Using Python Regular Expressions with StyledTextCtrl
I wanted to use a StyledTextCtrl to interactively apply python regular expressions to large unicode text files. My requirements were that the system should be fast, information in the control should be disturbed as little as possible and it should be possible to undo/redo changes.
I met with two main problems.
The first is that data stored in the STC is byte data and a non ascii character takes up more than one position. This is unlike python where a utf8 character takes up one position in a Unicode string. Conversion between these two systems were required.
The second is how to get data from a text box and use it as a regular expression when that data may include utf8 characters and Unicode escape sequences (\xa3 or \u0143).
I have attached my solution to these problems in the form of a wxPython demo. It is my hope that others may find this useful or someone may be able to show me how to do it better. Some of the methods can be used by scripts independently of the gui.
In case it is not obvious:
The top STC is for the text to be edited, fill using cut and paste from somewhere or other.
The middle STC is for entering the regular expression, multiline and (?x) forms can be used. Unicode escapes and hex escapes can be used.
The bottom STC is for replacement text, \1 and \g<name> forms can be used as can \u00A3 type escapes (but not \xA3 type escapes).
1 ## Demo program to illustrate the interactive application of python regular
2 # expressions to utf-8 contents of a StyledTextCtrl in such a way that
3 # minimumum disturbance is caused to other data in the control and undo/redo
4 # can be used normally.
5
6 # Author: Robert Ledger (ledgerbob at gmail.com) 2006.
7
8 # Last Modification: 11 July 2006
9
10 # 11 July 2006: Added test for zero length source text to avoid crashing.
11 # Added extra style and size information for GNU/Linux.
12
13 # Program tested on:
14 # windows xp (python2.4 wx2.3.6)
15 # Debian GNU/Linux 3.1 (python2.3 wx2.3.6)
16
17 # Please be aware, I am not a professional programmer and nothing in
18 # this code is guaranteed not to munch your vital data.
19
20 import wx, re
21 import wx.stc as stc
22
23 MainEditor = None
24
25 # The following functions are general purpose functions for regular
26 # expression manipulation of the contents of StyledTextCtrls using python
27 # regular expression syntax.
28 #
29 # They are independent of the SearchPanel gui and can be cut out and put into
30 # their own module or included in a utility module if desired.
31 #
32 # All changes made by these functions can be undone by the STC's undo/redo
33 # commands.
34 #
35 # In all the following, parameter sFind should be a regular expression object
36 # or of type unicode. The sReplace parameters are expected to be of type
37 # unicode. Where a unicode object is expected, on object of type string will do
38 # as long as it only contains ascii characters.
39 #
40
41 def _norm(sFind, pos, endpos, ed):
42
43 """
44 Return (text, pos, endpos) where text is a string containing the
45 text from pos to endpos in ed after real values have been substituted
46 for default values in these variables.
47
48 If sFind is not of type str or unicode it is assumed to be a
49 regular expression object otherwise it is compiled into a regular
50 expression object.
51
52
53 """
54 endtext = ed.GetTextLength()
55
56 if pos is None:
57 startSel, endSel = ed.GetSelection()
58 pos = endSel
59
60 if endpos is None:
61 endpos = endtext
62
63 if pos > endpos:
64 pos, endpos = endpos, pos
65
66 text = ed.GetTextRange(pos, endpos)
67
68 if isinstance(sFind, (str, unicode)):
69 sFind = re.compile(sFind)
70
71 if pos == endpos:
72 return '', sFind, 0, 0
73
74 return text, sFind, pos, endpos
75
76
77 def Search(sFind, pos, endpos, ed):
78 """
79 Search for re sFind from integer pos to integer endpos in STC ed.
80
81 sFind can be a str, unicode or regular expression object. If type
82 str then it must only contain ascii data.
83
84 Returns a tuple (match, start, end), where match is an re match
85 object, start and end are positions of the found text in the
86 target STC.
87
88 """
89 text, rFind, pos, endpos = _norm(sFind, pos, endpos, ed)
90 m = None
91 if len(text):
92 m = rFind.search(text)
93 if not m:
94 return None, 0, 0
95 uStart = pos + len(text[:m.start(0)].encode('utf8'))
96 uLen = len(m.group(0).encode('utf8'))
97 return m, uStart, uStart + uLen
98
99
100 def SearchList(sFind, pos, endpos, ed):
101 """
102 Search the region of STC between pos and endpos for re sFind and
103 return a list of tuples representing each find.
104
105 sFind can be a str, unicode or regular expression object. If type
106 str then it must only contain ascii data.
107
108 The returned tuples are in the form (match, start, end) where match
109 is an re match object and start and end are the start and end of the
110 matched region in the target STC.
111
112 """
113 text, rFind, pos, endpos = _norm(sFind, pos, endpos, ed)
114
115 ptr = 0
116 oldend = 0
117
118 lst=[]
119 if len(text) == 0:
120 return lst
121 for m in rFind.finditer(text):
122 start, end = m.span(0)
123 ofs=len(text[oldend:start].encode('utf8'))
124 newstart=ptr+ofs
125 uLen=len(m.group(0).encode('utf8'))
126 ptr = newstart+uLen
127 oldend = start + len(m.group(0))
128 lst.append((m, pos + newstart, pos + ptr))
129
130 return lst
131
132
133 def ReplaceList(sReplace, lst, ed):
134 """
135 Takes a list of tuples as produced by SearchList and replaces the
136 the region specified by the tuple with unicode string sReplace after
137 re transformations.
138
139 The list of regions must be ordered from the start of the document
140 to the end with no overlapping regions. The regions are replaced
141 in reverse order, (because once text is inserted all pointers following
142 the insertion point will be invalid!)
143
144 All the changes made to the STC by this method can be undone or
145 redone with a single undo/redo operation on the STC.
146
147 """
148 lst = list(lst)
149 lst.reverse()
150 ed.BeginUndoAction()
151 for m, start, end in lst:
152 ed.SetTargetStart(start)
153 ed.SetTargetEnd(end)
154 ed.ReplaceTarget(m.expand(sReplace))
155 ed.EndUndoAction()
156
157
158 def ReverseSearch(sFind, pos, endpos, ed):
159 """
160 Find all matches of re sFind in the STC (ed) region specified by
161 pos and endpos and return a tuple (match, pos, endpos) for the
162 LAST match.
163
164 """
165 lst = SearchList(sFind, pos, endpos, ed)
166 if len(lst)<1:
167 return None, 0, 0
168 return lst[-1]
169
170
171 def ReplaceAll(sFind, sReplace, startpos, endpos, ed):
172 """
173 Replace all occurences of re sFind between startpos and endpos
174 with re replacement expression sReplace after normal re transformations.
175
176 """
177 lst = self.SearchList(sFind, startpos, endpos, ed)
178 ReplaceList( sReplace, lst, ed)
179
180
181 def SearchReplace(sFind, sReplace, pos, endpos, ed):
182 """
183 Search the text in an STC between integer pos and integer endpos
184 for re sFind and replace the matched text with sReplace after
185 re transformation.
186
187 Returns tuple (reMatchOject, start, end) where start and end represent
188 positions of the replaced text in the STC ed.
189
190 """
191 m, uStart, uEnd = Search(sFind, pos, endpos, ed)
192
193 ed.SetTargetStart(uStart)
194 ed.SetTargetEnd(uEnd)
195 ed.ReplaceTarget(m.expand(sReplace))
196
197 return m, uStart, uEnd
198
199 #
200 # End of independant functions for external use
201 #
202
203 class MessageButton(wx.Button):
204 """
205 A Button that invokes an OnMessageButton method in its
206 parent with a user defined message as a parameter when it
207 is clicked.
208 """
209
210 def __init__(self, parent, label, message=None):
211 """
212 Construct a MessageButton with an initial message
213 set to label if no message is supplied
214
215 """
216 wx.Button.__init__(self, parent, -1, label)
217 self.myparent = parent
218 if message is None:
219 message = label
220 self.message = message
221 self.Bind(wx.EVT_BUTTON, self.OnButton)
222
223 def OnButton(self, event):
224 self.myparent.OnMessageButton(self.message, self)
225
226 def Alert( message ):
227 dialog = wx.MessageDialog(None, message, style = wx.ICON_EXCLAMATION | wx.OK)
228 dialog.ShowModal()
229 dialog.Destroy()
230
231
232 class MySTC(stc.StyledTextCtrl):
233 """
234 Derive a class from StyledTextCtrl and add some useful methods
235 and initiializations.
236
237 """
238 def __init__(self, parent, size=wx.DefaultSize, style=0):
239 """
240 Default Constructor
241
242 """
243 stc.StyledTextCtrl.__init__(self, parent, size=size, style=style)
244
245 self.SetCaretLineBack('yellow')
246 self.SetCaretLineVisible(True)
247
248 def CenterPosInView(self, pos):
249 """
250 Given a position in the control, center the line that
251 contins that position in the view.
252
253 """
254 line = self.LineFromPosition(pos)
255 self.CenterLineInView(line)
256 self.GotoLine(line)
257 self.GotoPos(pos)
258
259 def CenterLineInView(self, line):
260 """
261 Given a line in the control, center that line
262 in the view.
263
264 """
265 nlines = self.LinesOnScreen()
266 first=self.GetFirstVisibleLine()
267 target = first + nlines/2
268 self.LineScroll(0, line - target)
269
270
271 class SearchEditBox(MySTC):
272 """
273 Subclass the edit control to provide additions
274 and initializations to make it useful as an input text box.
275
276 """
277 def __init__(self, parent):
278 """ Default constructor """
279 MySTC.__init__(self, parent, style=wx.SIMPLE_BORDER)
280 self.SetUseHorizontalScrollBar(False)
281 self.SetUseVerticalScrollBar(False)
282
283
284 class SearchPanel(wx.Panel):
285 """
286 Class to privide a way to interactivly apply Python Regular Expressions
287 to an STC in such a way that other data in the control is disturbed
288 as little as possible and changes can be done and undone using
289 normal STC commands.
290
291 In addition to interactive controls, methods are supplied that can
292 be used by external scripts.
293
294 """
295
296 FROMTOP = 0
297 FORWARD = 1
298 BACKWARD = 2
299 WRAP = 3
300 INSELECTION = 4
301
302
303 def __init__(self, parent):
304 """
305 Default constructor. Sets up the gui componets of the panel.
306
307 lastMatchObject:
308 A place to store the match object returned from a python re
309 search.
310
311 """
312 wx.Panel.__init__(self, parent, -1)
313
314 self.lastMatchObject = None
315
316 mainVbox = wx.BoxSizer(wx.VERTICAL)
317
318 hbox=wx.BoxSizer(wx.HORIZONTAL)
319
320 editBoxSizer =wx.BoxSizer(wx.VERTICAL)
321 self.FindText = ed1 = SearchEditBox(self)
322 self.ReplaceText = ed2 = SearchEditBox(self)
323 editBoxSizer.Add(ed1,1,wx.EXPAND)
324 editBoxSizer.Add(ed2,1,wx.EXPAND)
325
326 hbox.Add(editBoxSizer,1,wx.EXPAND | wx.ALL, 5)
327
328 rboxsizer=wx.BoxSizer(wx.VERTICAL)
329 self.directionRadioBox = wx.RadioBox(self,-1,
330 label='Direction',choices=['From Top','Forward','Backward','Wrap','In Selection'],
331 style= wx.RA_SPECIFY_ROWS
332 )
333 rboxsizer.Add(self.directionRadioBox,1,wx.EXPAND | wx.ALL, 5)
334 hbox.Add(rboxsizer,0,wx.EXPAND )
335
336 buttonBoxSizer=wx.BoxSizer(wx.VERTICAL)
337 for label, message in [
338 ('Search', 'Search'),
339 ('Replace', 'Replace'),
340 ('R && S', 'ReplaceAndSearch',),
341 ('Replace All','ReplaceAll'),
342 ('Count', 'Count'),
343 ('List', 'List')
344 ]:
345 buttonBoxSizer.Add(MessageButton(self, label, message),1,wx.EXPAND)
346
347 hbox.Add(buttonBoxSizer,0,wx.EXPAND | wx.ALL, 5)
348 mainVbox.Add(hbox,1,wx.EXPAND)
349
350 self.SetSizer(mainVbox)
351 mainVbox.Fit(self)
352
353
354 def OnMessageButton(self, message, object):
355 """
356 Callback function for MessageButtons.
357
358 Initialize some variables from the gui then call a method
359 to perform the operation requested by the user.
360
361 """
362 self.direction = self.GetDirection()
363 self.sFind = self.GetFindStr()
364 self.sReplace = self.GetReplaceStr()
365 f = getattr(self, 'On'+message, self.OnUnknownSignal)
366 self.ed = MainEditor
367 self.ed.SetFocus()
368 return f()
369
370
371 def GetDirection(self):
372 """
373 Get the 'search direction' value from the gui component.
374
375 """
376 return self.directionRadioBox.GetSelection()
377
378
379 def SetDirection(self, i):
380 """
381 Set value for 'search direction' gui component.
382
383 """
384 self.directionRadioBox.SetSelection(i)
385
386
387 def GetFindStr(self):
388 """
389 Get the search string from the 'find' text box and escape
390 all unicode chars, then uncescape escaped unicode chars.
391
392 This allows a mixture of unicode charachters and escaped
393 unicode charachters of the form \u00A3 to be entered in the
394 imput box.
395
396 """
397 s = self.FindText.GetText().encode('raw_unicode_escape')
398 s = s.decode('raw_unicode_escape')
399 return s
400
401
402 def GetReplaceStr(self):
403 """
404 Get the replace string from the 'replace' text box and convert
405 it to python internal representation.
406
407 Unicode escapes of the form \u00A3 can be used.
408
409 """
410 s = self.ReplaceText.GetText().encode('raw_unicode_escape')
411 s = s.decode('raw_unicode_escape')
412 return s
413
414
415 def GetSearchRegion(self):
416 """
417 Returns the (start, end) of the region, in the target editor,
418 to be searched. Result depends on the state of the gui controls.
419
420 A value of None is returned for end to indicate that search
421 should go to the end of the text in the control.
422
423 If the search direction is FORWARD or WRAP, the search region will
424 begin after any current selection or at the current cursor position
425 if there is no selection.
426
427 """
428 startSel, endSel = self.ed.GetSelection()
429
430 if self.direction == self.INSELECTION:
431 return startSel, endSel
432
433 if self.direction == self.FORWARD or self.direction == self.WRAP:
434
435 if startSel != endSel:
436 startSel = endSel
437
438 return startSel, None
439
440 if self.direction == self.BACKWARD:
441 return 0, startSel
442
443 if self.direction == self.FROMTOP:
444 return 0, None
445
446
447 def OnSearch(self):
448 """
449 Method invoked by 'Search' button in gui. Searches forward from
450 the current position or, if text is selected, from just after the
451 end of the selected text.
452
453 """
454 try:
455 rFind = re.compile(self.sFind)
456 except:
457 Alert('Error in regular expression.')
458 return False
459
460 self.lastMatchObject=None
461
462 startSel, endSel = self.GetSearchRegion()
463
464 if self.direction == self.BACKWARD:
465 m, uStart, uEnd = ReverseSearch(rFind, startSel, endSel, ed=self.ed)
466 if not m:
467 Alert('No match found')
468 return
469
470 else:
471
472 if self.direction == self.FROMTOP:
473 self.SetDirection(self.FORWARD)
474
475 m, uStart, uEnd = Search(rFind, startSel, endSel, ed=self.ed)
476 if not m:
477 Alert('No match found')
478 if not self.direction == self.WRAP:
479 return False
480 text = self.ed.GetTextRange(0,startSel)
481 pos, endpos = 0, startSel
482 m, uStart, uEnd = Search(rFind, pos, endpos, ed=self.ed)
483 if not m:
484 Alert('No match found after wrapping')
485 return
486
487 self.lastMatchObject=m
488 self.ed.CenterPosInView(uStart)
489 self.ed.SetSelection(uStart, uEnd)
490
491
492 def OnReplace(self):
493 """
494 Method invoked by 'Replace' button in gui.
495
496 Replaces the text found by a previous press of the search buton
497 with the contents of the sReplace text box after re substitutions.
498
499 """
500 startSel, endSel = self.ed.GetSelection()
501 if startSel == endSel:
502 Alert('Can\'t replace text -non selected')
503 return
504 m = self.lastMatchObject
505 if m is None:
506 Alert('Can\'t replace text!\nDo a search first')
507 return
508 selectedText=self.ed.GetSelectedText()
509 matchedText = m.group(0)
510 if matchedText != selectedText:
511 self.lastMatchObject = None
512 Alert('Can\'t replace text.\nSelected text does not match last found text ')
513 return
514 newText = m.expand(self.sReplace)
515 self.ed.ReplaceSelection(newText)
516 self.lastMatchObject = None
517
518
519 def OnReplaceAndSearch(self):
520 """
521 Method invoked by 'Replace & Search' button in gui.
522
523 Equivelent of pressing the Search button followed by
524 pressing the Replace button.
525
526 """
527 self.OnReplace()
528 self.OnSearch()
529
530
531 def OnReplaceAll(self):
532 """
533 Method invoked by 'Replace All' button in gui.
534
535 Replaces all text matched by sFind with sReplace after re
536 substitutions. Operates on the entire text or on the selected
537 text if the 'in selection' option is set.
538
539 """
540 lst = self.OnList(log=False)
541 n=len(lst)
542 ReplaceList(self.sReplace, lst, ed=self.ed)
543 Alert('Made %s replacements'%len(lst))
544
545
546 def OnCount(self):
547 """
548 Method invoked by 'Count' button in gui.
549
550 Counts the number of matches for sFind in the entire text, or in
551 the selected text if the 'in selection' option is set.
552
553 """
554 startSel=self.GetSearchRegion()
555 count = len(self.OnList(log=False))
556 Alert( 'Found search string %s times.'%count)
557
558
559 def OnList(self, log=True):
560 """
561 Method invoked by 'List' button in gui.
562
563 Creates a list of matches and prints a list of line numbers
564 and found strings. The search is done either on the entire text
565 or on the selected text if the 'in selection' option is active.
566
567 """
568 try:
569 rFind = re.compile(self.sFind)
570 except:
571 Alert('Error in regular expression.')
572 return []
573
574 dir = self.GetDirection()
575 if dir == self.INSELECTION:
576 startSel, endSel = self.GetSearchRegion()
577 else:
578 startSel, endSel = 0, None
579
580 lst = SearchList(rFind, startSel, endSel, ed=self.ed)
581
582 if log:
583 for m, uStart, uEnd in lst:
584 print '%s: %r'%(
585 self.ed.LineFromPosition(uStart),
586 self.ed.GetTextRange(uStart,uEnd)
587 )
588
589 Alert('List Search found %s matches.\n\nResults were printed on the console.'%len(lst))
590
591 return lst
592
593 def OnUnknownSignal(self):
594 pass
595
596
597
598 class TestFrame(wx.Frame):
599
600 def __init__(self):
601 global MainEditor
602 wx.Frame.__init__(self, None, -1, 'Ledgerbob\'s Python Search Demo')
603
604 MainEditor = MySTC(self)
605 sizer = wx.BoxSizer(wx.VERTICAL)
606 sizer.Add(MainEditor, 1, wx.EXPAND)
607 sizer.Add(SearchPanel(self), 0, wx.EXPAND)
608 self.SetSizer(sizer)
609 self.SetSize((400, 400))
610
611 if __name__=="__main__":
612 app = wx.PySimpleApp()
613 win = TestFrame()
614 win.Show(True)
615 app.MainLoop()