1 {
2 pcRegExp - Perl compatible regular expressions for Virtual Pascal
3 (c) 2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com>
4
5 Based on PCRE library interface unit for Virtual Pascal.
6 (c) 2001 Alexander Tokarev <dwalin@dwalin.ru>
7
8 The current PCRE version is: 3.7
9
10 This software may be distributed under the terms of the modified BSD license
11 Copyright (c) 2001, Alexander Tokarev
12 All rights reserved.
13
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright notice,
20 this list of conditions and the following disclaimer in the documentation
21 and/or other materials provided with the distribution.
22 * Neither the name of the <ORGANIZATION> nor the names of its contributors
23 may be used to endorse or promote products derived from this software without
24 specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
27 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
37 The PCRE library is written by: Philip Hazel <ph10@cam.ac.uk>
38 Copyright (c) 1997-2004 University of Cambridge
39
40 AngelsHolocaust 4-11-04 updated to use version v5.0
41 (INFO: this is regex-directed, NFA)
42 AH: 9-11-04 - pcre_free: removed var, pcre already gives the ptr, now
43 everything works as it should (no more crashes)
44 -> removed CheckRegExp because pcre handles errors perfectly
45 10-11-04 - added pcError (errorhandling), pcInit
46 13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset
47 17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr
48 17-02-06 - added RunTimeOptions: caller can set options while searching
49 19-02-06 - added SearchOfs(): let PCRE use the complete string and offset
50 into the string itself
51 20-12-06 - support for version 7.0
52 27.08.08 - support for v7.7
53 }
54
55 {$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0} {$DEFINE PCRE_7_7}
56
57 Unit pcregexp;
58
59 Interface
60
61 uses objects;
62
63 Type
64 PpcRegExp = ^TpcRegExp;
65 // TpcRegExp = object
66 TpcRegExp = object(TObject)
67 MatchesCount: integer;
68 RegExpC, RegExpExt : Pointer;
69 Matches:Pointer;
70 RegExp: shortstring;
71 SourceLen: integer;
72 PartialMatch : boolean;
73 Error : boolean;
74 ErrorMsg : Pchar;
75 ErrorPos : integer;
76 RunTimeOptions: Integer; // options which can be set by the caller
77 constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer);
Search(AStr: Pchar; ALen : longint)78 function Search(AStr: Pchar; ALen : longint) : boolean; virtual;
SearchNext( AStr: Pchar; ALen : longint)79 function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual;
SearchOfs( AStr: Pchar; ALen, AOfs : longint)80 function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual;
MatchSub(ANom: integer; var Pos, Len : longint)81 function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual;
MatchFull(var Pos, Len : longint)82 function MatchFull(var Pos, Len : longint) : boolean; virtual;
GetSubStr(ANom: integer; AStr: Pchar)83 function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual;
GetFullStr(AStr: Pchar)84 function GetFullStr(AStr: Pchar) : string; virtual;
GetReplStr(AStr: Pchar; const ARepl: string)85 function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual;
GetPreSubStr(AStr: Pchar)86 function GetPreSubStr(AStr: Pchar) : string; virtual;
GetPostSubStr(AStr: Pchar)87 function GetPostSubStr(AStr: Pchar) : string; virtual;
ErrorStr()88 function ErrorStr : string; virtual;
89 destructor Done; virtual;
90 end;
91
pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer)92 function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer)93 function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
94
pcFastGrepMatch(WildCard, aStr: string)95 function pcFastGrepMatch(WildCard, aStr: string): Boolean;
pcFastGrepSub(WildCard, aStr, aRepl: string)96 function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
97
98 {$IFDEF PCRE_5_0}
pcGetVersion()99 function pcGetVersion : pchar;
100 {$ENDIF}
101
pcError(var pRegExp : Pointer)102 function pcError (var pRegExp : Pointer) : Boolean;
pcInit(const Pattern: Shortstring; CaseSens: Boolean)103 function pcInit (const Pattern: Shortstring; CaseSens: Boolean) : Pointer;
104
105 Const { Options }
106 PCRE_CASELESS = $0001;
107 PCRE_MULTILINE = $0002;
108 PCRE_DOTALL = $0004;
109 PCRE_EXTENDED = $0008;
110 PCRE_ANCHORED = $0010;
111 PCRE_DOLLAR_ENDONLY = $0020;
112 PCRE_EXTRA = $0040;
113 PCRE_NOTBOL = $0080;
114 PCRE_NOTEOL = $0100;
115 PCRE_UNGREEDY = $0200;
116 PCRE_NOTEMPTY = $0400;
117 {$IFDEF PCRE_5_0}
118 PCRE_UTF8 = $0800;
119 PCRE_NO_AUTO_CAPTURE = $1000;
120 PCRE_NO_UTF8_CHECK = $2000;
121 PCRE_AUTO_CALLOUT = $4000;
122 PCRE_PARTIAL = $8000;
123 {$ENDIF}
124 {$IFDEF PCRE_7_0}
125 PCRE_DFA_SHORTEST = $00010000;
126 PCRE_DFA_RESTART = $00020000;
127 PCRE_FIRSTLINE = $00040000;
128 PCRE_DUPNAMES = $00080000;
129 PCRE_NEWLINE_CR = $00100000;
130 PCRE_NEWLINE_LF = $00200000;
131 PCRE_NEWLINE_CRLF = $00300000;
132 PCRE_NEWLINE_ANY = $00400000;
133 PCRE_NEWLINE_ANYCRLF = $00500000;
134
135 PCRE_NEWLINE_BITS = PCRE_NEWLINE_CR or PCRE_NEWLINE_LF or PCRE_NEWLINE_ANY;
136
137 {$ENDIF}
138 {$IFDEF PCRE_7_7}
139 PCRE_BSR_ANYCRLF = $00800000;
140 PCRE_BSR_UNICODE = $01000000;
141 PCRE_JAVASCRIPT_COMPAT= $02000000;
142 {$ENDIF}
143
144 PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS +
145 PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED +
146 PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE +
147 PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK
148 {$IFDEF PCRE_7_0}
149 + PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_BITS
150 {$ENDIF}
151 {$IFDEF PCRE_7_7}
152 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE + PCRE_JAVASCRIPT_COMPAT
153 {$ENDIF}
154 ;
155
156 PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
157 PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL
158 {$IFDEF PCRE_7_0}
159 + PCRE_NEWLINE_BITS
160 {$ENDIF}
161 {$IFDEF PCRE_7_7}
162 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
163 {$ENDIF}
164 ;
165
166 {$IFDEF PCRE_7_0}
167 PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
168 PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL +
169 PCRE_DFA_SHORTEST + PCRE_DFA_RESTART +
170 PCRE_NEWLINE_BITS
171 {$IFDEF PCRE_7_7}
172 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
173 {$ENDIF}
174 ;
175 {$ENDIF}
176
177 { Exec-time and get/set-time error codes }
178 PCRE_ERROR_NOMATCH = -1;
179 PCRE_ERROR_NULL = -2;
180 PCRE_ERROR_BADOPTION = -3;
181 PCRE_ERROR_BADMAGIC = -4;
182 PCRE_ERROR_UNKNOWN_MODE = -5;
183 PCRE_ERROR_NOMEMORY = -6;
184 PCRE_ERROR_NOSUBSTRING = -7;
185 {$IFDEF PCRE_5_0}
186 PCRE_ERROR_MATCHLIMIT = -8;
187 PCRE_ERROR_CALLOUT = -9; { Never used by PCRE itself }
188 PCRE_ERROR_BADUTF8 = -10;
189 PCRE_ERROR_BADUTF8_OFFSET = -11;
190 PCRE_ERROR_PARTIAL = -12;
191 PCRE_ERROR_BADPARTIAL = -13;
192 PCRE_ERROR_INTERNAL = -14;
193 PCRE_ERROR_BADCOUNT = -15;
194 {$ENDIF}
195 {$IFDEF PCRE_7_0}
196 PCRE_ERROR_DFA_UITEM = -16;
197 PCRE_ERROR_DFA_UCOND = -17;
198 PCRE_ERROR_DFA_UMLIMIT = -18;
199 PCRE_ERROR_DFA_WSSIZE = -19;
200 PCRE_ERROR_DFA_RECURSE = -20;
201 PCRE_ERROR_RECURSIONLIMIT = -21;
202 PCRE_ERROR_NULLWSLIMIT = -22;
203 PCRE_ERROR_BADNEWLINE = -23;
204 {$ENDIF}
205
206 { Request types for pcre_fullinfo() }
207
208 PCRE_INFO_OPTIONS = 0;
209 PCRE_INFO_SIZE = 1;
210 PCRE_INFO_CAPTURECOUNT = 2;
211 PCRE_INFO_BACKREFMAX = 3;
212 PCRE_INFO_FIRSTBYTE = 4;
213 PCRE_INFO_FIRSTCHAR = 4; { For backwards compatibility }
214 PCRE_INFO_FIRSTTABLE = 5;
215 {$IFDEF PCRE_5_0}
216 PCRE_INFO_LASTLITERAL = 6;
217 PCRE_INFO_NAMEENTRYSIZE = 7;
218 PCRE_INFO_NAMECOUNT = 8;
219 PCRE_INFO_NAMETABLE = 9;
220 PCRE_INFO_STUDYSIZE = 10;
221 PCRE_INFO_DEFAULT_TABLES = 11;
222 {$ENDIF PCRE_5_0}
223 {$IFDEF PCRE_7_7}
224 PCRE_INFO_OKPARTIAL = 12;
225 PCRE_INFO_JCHANGED = 13;
226 PCRE_INFO_HASCRORLF = 14;
227 {$ENDIF}
228
229 { Request types for pcre_config() }
230 {$IFDEF PCRE_5_0}
231 PCRE_CONFIG_UTF8 = 0;
232 PCRE_CONFIG_NEWLINE = 1;
233 PCRE_CONFIG_LINK_SIZE = 2;
234 PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3;
235 PCRE_CONFIG_MATCH_LIMIT = 4;
236 PCRE_CONFIG_STACKRECURSE = 5;
237 PCRE_CONFIG_UNICODE_PROPERTIES = 6;
238 {$ENDIF PCRE_5_0}
239 {$IFDEF PCRE_7_0}
240 PCRE_CONFIG_MATCH_LIMIT_RECURSION = 7;
241 {$ENDIF}
242 {$IFDEF PCRE_7_7}
243 PCRE_CONFIG_BSR = 8;
244 {$ENDIF}
245
246 { Bit flags for the pcre_extra structure }
247 {$IFDEF PCRE_5_0}
248 PCRE_EXTRA_STUDY_DATA = $0001;
249 PCRE_EXTRA_MATCH_LIMIT = $0002;
250 PCRE_EXTRA_CALLOUT_DATA = $0004;
251 PCRE_EXTRA_TABLES = $0008;
252 {$ENDIF PCRE_5_0}
253 {$IFDEF PCRE_7_0}
254 PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010;
255 {$ENDIF}
256
257 Const
258 // DefaultOptions : integer = 0;
259 DefaultLocaleTable : pointer = nil;
260
261 {$IFDEF PCRE_5_0}
262 { The structure for passing additional data to pcre_exec(). This is defined in
263 such as way as to be extensible. Always add new fields at the end, in order to
264 remain compatible. }
265
266 type ppcre_extra = ^tpcre_extra;
267 tpcre_extra = record
268 flags : longint; { Bits for which fields are set }
269 study_data : pointer; { Opaque data from pcre_study() }
270 match_limit : longint; { Maximum number of calls to match() }
271 callout_data : pointer; { Data passed back in callouts }
272 tables : pointer; { Pointer to character tables }
273 match_limit_recursion: longint; { Max recursive calls to match() }
274 end;
275
276 type ppcre_callout_block = ^pcre_callout_block;
277 pcre_callout_block = record
278 version,
279 (* ------------------------ Version 0 ------------------------------- *)
280 callout_number : integer;
281 offset_vector : pointer;
282 subject : pchar;
283 subject_length, start_match, current_position, capture_top,
284 capture_last : integer;
285 callout_data : pointer;
286 (* ------------------- Added for Version 1 -------------------------- *)
287 pattern_position, next_item_length : integer;
288 end;
289 {$ENDIF PCRE_5_0}
290
291 {$OrgName+}
292 {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
293
294 { local replacement of external pcre memory management functions }
pcre_malloc( size : integer )295 function pcre_malloc( size : integer ) : pointer;
296 procedure pcre_free( {var} p : pointer );
297 {$IFDEF PCRE_5_0}
size()298 const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc;
299 pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free;
pcre_callout(var p : ppcre_callout_block)300 function pcre_callout(var p : ppcre_callout_block) : integer;
301 {$ENDIF PCRE_5_0}
302 {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
303
304 Implementation
305
306 Uses strings, collect, messages, dnapp, commands, advance0, stringsx
307 {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL};
308
309 Const
310 MAGIC_NUMBER = $50435245; { 'PCRE' }
311 MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64}
312
313 Type
314 PMatchArray = ^TMatchArray;
315 TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer;
316
317 PRegExpCollection = ^TRegExpCollection;
318 TRegExpCollection = object(TSortedCollection)
319 MaxRegExp : integer;
320 SearchRegExp : shortstring;
321 CompareModeInsert : boolean;
322 constructor Init(AMaxRegExp:integer);
323 procedure FreeItem(P: Pointer); virtual;
Compare(P1, P2: Pointer)324 function Compare(P1, P2: Pointer): Integer; virtual;
Find(ARegExp:shortstring;var P: PpcRegExp)325 function Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual;
CheckNew(ARegExp:shortstring)326 function CheckNew(ARegExp:shortstring):PpcRegExp;virtual;
327 end;
328
329 Var
330 PRegExpCache : PRegExpCollection;
331
332
333 {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
334
335 { imported original pcre functions }
336
pcre_compile( const pattern : PChar; options : integer;337 function pcre_compile( const pattern : PChar; options : integer;
338 var errorptr : PChar; var erroroffset : integer;
339 const tables : PChar ) : pointer {pcre}; external;
340 {$IFDEF PCRE_7_0}
pcre_compile2( const pattern : PChar; options : integer;341 function pcre_compile2( const pattern : PChar; options : integer;
342 var errorcodeptr : Integer;
343 var errorptr : PChar; var erroroffset : integer;
344 const tables : PChar ) : pointer {pcre}; external;
345 {$ENDIF}
346 {$IFDEF PCRE_5_0}
pcre_config( what : integer; where : pointer)347 function pcre_config( what : integer; where : pointer) : integer; external;
pcre_copy_named_substring( const code : pointer {pcre};348 function pcre_copy_named_substring( const code : pointer {pcre};
349 const subject : pchar;
350 var ovector : integer;
351 stringcount : integer;
352 const stringname : pchar;
353 var buffer : pchar;
354 size : integer) : integer; external;
pcre_copy_substring( const subject : pchar; var ovector : integer;355 function pcre_copy_substring( const subject : pchar; var ovector : integer;
356 stringcount, stringnumber : integer;
357 var buffer : pchar; size : integer )
358 : integer; external;
pcre_exec( const argument_re : pointer {pcre};359 function pcre_exec( const argument_re : pointer {pcre};
360 const extra_data : pointer {pcre_extra};
361 {$ELSE}
362 function pcre_exec( const external_re : pointer;
363 const external_extra : pointer;
364 {$ENDIF}
365 const subject : PChar;
366 length, start_offset, options : integer;
367 offsets : pointer;
368 offsetcount : integer ) : integer; external;
369 {$IFDEF PCRE_7_0}
pcre_dfa_exec( const argument_re : pointer {pcre};370 function pcre_dfa_exec( const argument_re : pointer {pcre};
371 const extra_data : pointer {pcre_extra};
372 const subject : pchar;
373 length, start_offset, options : integer;
374 offsets : pointer;
375 offsetcount : integer;
376 workspace : pointer;
377 wscount : integer ) : integer; external;
378 {$ENDIF}
379 {$IFDEF PCRE_5_0}
380 procedure pcre_free_substring( const p : pchar ); external;
381 procedure pcre_free_substring_list( var p : pchar ); external;
pcre_fullinfo( const argument_re : pointer {pcre};382 function pcre_fullinfo( const argument_re : pointer {pcre};
383 const extra_data : pointer {pcre_extra};
384 what : integer;
385 where : pointer ) : integer; external;
pcre_get_named_substring( const code : pointer {pcre};386 function pcre_get_named_substring( const code : pointer {pcre};
387 const subject : pchar;
388 var ovector : integer;
389 stringcount : integer;
390 const stringname : pchar;
391 var stringptr : pchar ) : integer; external;
pcre_get_stringnumber( const code : pointer {pcre};392 function pcre_get_stringnumber( const code : pointer {pcre};
393 const stringname : pchar ) : integer; external;
pcre_get_stringtable_entries( const code : pointer {pcre};394 function pcre_get_stringtable_entries( const code : pointer {pcre};
395 const stringname : pchar;
396 var firstptr,
397 lastptr : pchar ) : integer; external;
pcre_get_substring( const subject : pchar; var ovector : integer;398 function pcre_get_substring( const subject : pchar; var ovector : integer;
399 stringcount, stringnumber : integer;
400 var stringptr : pchar ) : integer; external;
pcre_get_substring_list( const subject : pchar; var ovector : integer;401 function pcre_get_substring_list( const subject : pchar; var ovector : integer;
402 stringcount : integer;
403 listptr : pointer {const char ***listptr}) : integer; external;
pcre_info( const argument_re : pointer {pcre};404 function pcre_info( const argument_re : pointer {pcre};
405 var optptr : integer;
406 var first_byte : integer ) : integer; external;
pcre_maketables()407 function pcre_maketables : pchar; external;
408 {$ENDIF}
409 {$IFDEF PCRE_7_0}
pcre_refcount( const argument_re : pointer {pcre};410 function pcre_refcount( const argument_re : pointer {pcre};
411 adjust : integer ) : pchar; external;
412 {$ENDIF}
pcre_study( const external_re : pointer {pcre};413 function pcre_study( const external_re : pointer {pcre};
414 options : integer;
415 var errorptr : PChar ) : pointer {pcre_extra}; external;
416 {$IFDEF PCRE_5_0}
pcre_version()417 function pcre_version : pchar; external;
418 {$ENDIF}
419
pcre_malloc( size : integer )420 function pcre_malloc( size : integer ) : pointer;
421 begin
422 GetMem( result, size );
423 end;
424
425 procedure pcre_free( {var} p : pointer );
426 begin
427 if (p <> nil) then
428 FreeMem( p, 0 );
429 {@p := nil;}
430 end;
431
432 {$IFDEF PCRE_5_0}
433 (* Called from PCRE as a result of the (?C) item. We print out where we are in
434 the match. Yield zero unless more callouts than the fail count, or the callout
435 data is not zero. *)
436
pcre_callout()437 function pcre_callout;
438 begin
439 end;
440 {$ENDIF}
441
442 {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
443
444 // Always include the newest version of the library
445 {$IFDEF PCRE_7_7}
446 {$L pcre77.lib}
447 {$ELSE}
448 {$IFDEF PCRE_7_0}
449 {$L pcre70.lib}
450 {$ELSE}
451 {$IFDEF PCRE_5_0}
452 {$L pcre50.lib}
453 {$ELSE}
454 {$IFDEF PCRE_3_7}
455 {$L pcre37.lib}
456 {$ENDIF PCRE_3_7}
457 {$ENDIF PCRE_5_0}
458 {$ENDIF PCRE_7_0}
459 {$ENDIF PCRE_7_7}
460
461 {TpcRegExp}
462
463 constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer);
464 var
465 pRegExp : PChar;
466 begin
467 RegExp:=ARegExp;
468 RegExpC:=nil;
469 RegExpExt:=nil;
470 Matches:=nil;
471 MatchesCount:=0;
472 Error:=true;
473 ErrorMsg:=nil;
474 ErrorPos:=0;
475 RunTimeOptions := 0;
476 if length(RegExp) < 255 then
477 begin
478 RegExp[length(RegExp)+1]:=#0;
479 pRegExp:=@RegExp[1];
480 end
481 else
482 begin
483 GetMem(pRegExp,length(RegExp)+1);
484 pRegExp:=strpcopy(pRegExp,RegExp);
485 end;
486 RegExpC := pcre_compile( pRegExp,
487 AOptions and PCRE_COMPILE_ALLOWED_OPTIONS,
488 ErrorMsg, ErrorPos, ALocale);
489 if length(RegExp) = 255 then
490 StrDispose(pRegExp);
491 if RegExpC = nil then
492 exit;
493 ErrorMsg:=nil;
494 RegExpExt := pcre_study( RegExpC, 0, ErrorMsg );
495 if (RegExpExt = nil) and (ErrorMsg <> nil) then
496 begin
497 pcre_free(RegExpC);
498 exit;
499 end;
500 GetMem(Matches,SizeOf(TMatchArray));
501 Error:=false;
502 end;
503
504 destructor TpcRegExp.Done;
505 begin
506 if RegExpC <> nil then
507 pcre_free(RegExpC);
508 if RegExpExt <> nil then
509 pcre_free(RegExpExt);
510 if Matches <> nil then
511 FreeMem(Matches,SizeOf(TMatchArray));
512 end;
513
TpcRegExp.SearchNext( AStr: Pchar; ALen : longint )514 function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean;
515 var Options: Integer;
516 begin // must handle PCRE_ERROR_PARTIAL here
517 Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
518 PCRE_EXEC_ALLOWED_OPTIONS;
519 if MatchesCount > 0 then
520 MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1],
521 Options, Matches, MAX_MATCHES ) else
522 MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0,
523 Options, Matches, MAX_MATCHES );
524 { if MatchesCount = 0 then
525 MatchesCount := MatchesCount div 3;}
526 PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
527 SearchNext := MatchesCount > 0;
528 end;
529
TpcRegExp.Search( AStr: Pchar; ALen : longint)530 function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean;
531 begin
532 MatchesCount:=0;
533 Search:=SearchNext(AStr,ALen);
534 SourceLen:=ALen;
535 end;
536
TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint )537 function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean;
538 var Options: Integer;
539 begin
540 MatchesCount:=0;
541 Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
542 PCRE_EXEC_ALLOWED_OPTIONS;
543 MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs,
544 Options, Matches, MAX_MATCHES );
545 PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
546 SearchOfs := MatchesCount > 0;
547 SourceLen := ALen-AOfs;
548 end;
549
TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint)550 function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean;
551 begin
552 if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then
553 begin
554 ANom:=ANom*2;
555 Pos:=PMatchArray(Matches)^[ANom];
556 Len:=PMatchArray(Matches)^[ANom+1]-Pos;
557 MatchSub:=true;
558 end
559 else
560 MatchSub:=false;
561 end;
562
TpcRegExp.MatchFull(var Pos,Len:longint)563 function TpcRegExp.MatchFull(var Pos,Len:longint):boolean;
564 begin
565 MatchFull:=MatchSub(0,Pos,Len);
566 end;
567
TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar)568 function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string;
569 var
570 s: ansistring;
571 pos,len: longint;
572 begin
573 s:='';
574 if MatchSub(ANom, pos, len) then
575 begin
576 setlength(s, len);
577 Move(AStr[pos], s[1], len);
578 end;
579 GetSubStr:=s;
580 end;
581
TpcRegExp.GetPreSubStr(AStr: Pchar)582 function TpcRegExp.GetPreSubStr(AStr: Pchar):string;
583 var
584 s: ansistring;
585 l: longint;
586 begin
587 s:='';
588 if (MatchesCount > 0) then
589 begin
590 l:=PMatchArray(Matches)^[0]-1;
591 if l > 0 then
592 begin
593 setlength(s,l);
594 Move(AStr[1],s[1],l);
595 end;
596 end;
597 GetPreSubStr:=s;
598 end;
599
TpcRegExp.GetPostSubStr(AStr: Pchar)600 function TpcRegExp.GetPostSubStr(AStr: Pchar):string;
601 var
602 s: ansistring;
603 l: longint;
604 ANom: integer;
605 begin
606 s:='';
607 if (MatchesCount > 0) then
608 begin
609 ANom:=(MatchesCount-1){*2} shl 1;
610 l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1;
611 if l > 0 then
612 begin
613 setlength(s,l);
614 Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l);
615 end;
616 end;
617 GetPostSubStr:=s;
618 end;
619
620
TpcRegExp.GetFullStr(AStr: Pchar)621 function TpcRegExp.GetFullStr(AStr: Pchar):string;
622 var
623 s: ansistring;
624 l: longint;
625 begin
626 GetFullStr:=GetSubStr(0,AStr);
627 end;
628
TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string)629 function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string;
630 var
631 s: ansistring;
632 l,i,lasti: longint;
633 begin
634 l:=length(ARepl);
635 i:=1;
636 lasti:=1;
637 s:='';
638 while i <= l do
639 begin
640 case ARepl[i] of
641 '\' :
642 begin
643 if i < l then
644 begin
645 s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]};
646 {AH 17-10-05 support for POSIX \1-\9 backreferences}
647 case ARepl[i+1] of
648 '0' : s:=s+GetFullStr(AStr);
649 '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
650 else s:=s+ARepl[i+1]; // copy the escaped character
651 end;
652 end;
653 inc(i);
654 lasti:=i+1;
655 end;
656 '$' :
657 begin
658 if i < l then
659 begin
660 s:=s+copy(ARepl,lasti,i-lasti);
661 case ARepl[i+1] of
662 '&' : s:=s+GetFullStr(AStr);
663 '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
664 '`' : s:=s+GetPreSubStr(AStr);
665 #39 : s:=s+GetPostSubStr(AStr);
666 end;
667 end;
668 inc(i);
669 lasti:=i+1;
670 end;
671 end;
672 inc(i);
673 end;
674 if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then
675 s:=s+copy(ARepl,lasti,l-lasti+1);
676 GetReplStr:=s;
677 end;
678
TpcRegExp.ErrorStr()679 function TpcRegExp.ErrorStr:string;
680 begin
681 ErrorStr:=StrPas(ErrorMsg);
682 end;
683
684 {TRegExpCollection}
685
686 constructor TRegExpCollection.Init(AMaxRegExp: integer);
687 begin
688 Inherited Init(1,1);
689 MaxRegExp:=AMaxRegExp;
690 CompareModeInsert:=true;
691 end;
692
693 procedure TRegExpCollection.FreeItem(P: Pointer);
694 begin
695 if P <> nil then
696 begin
697 Dispose(PpcRegExp(P),Done);
698 end;
699 end;
700
TRegExpCollection.Compare(P1, P2: Pointer)701 function TRegExpCollection.Compare(P1, P2: Pointer): Integer;
702 //var
703 // l,l1,l2,i : byte;
704 //// wPos: pchar;
705 begin
706 if CompareModeInsert then
707 begin
708 // l1:=length(PpcRegExp(P1)^.RegExp);
709 // l2:=length(PpcRegExp(P2)^.RegExp);
710 // if l1 > l2 then l:=l2 else
711 // l:=l1;
712 // for i:=1 to l do
713 // if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break;
714 // if i <=l then
715 // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else
716 // Compare:=l1-l2;
717 Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False);
718 end
719 else
720 begin
721 // l1:=length(PpcRegExp(P1)^.RegExp);
722 // l2:=length(SearchRegExp);
723 // if l1 > l2 then l:=l2 else
724 // l:=l1;
725 // for i:=1 to l do
726 // if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then
727 // begin
728 // Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]);
729 // break;
730 // end;
731 // if i > l then Compare:=l1-l2;
732 Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False);
733 end;
734 end;
735
TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp)736 function TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean;
737 var I : integer;
738 begin
739 CompareModeInsert:=false;
740 SearchRegExp:=ARegExp;
741 if Search(nil,I) then
742 begin
743 P:=PpcRegExp(At(I));
744 Find:=true;
745 end
746 else
747 begin
748 P:=nil;
749 Find:=false;
750 end;
751 CompareModeInsert:=true;
752 end;
753
TRegExpCollection.CheckNew(ARegExp:shortstring)754 function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp;
755 var
756 P : PpcRegExp;
757 begin
758 if not Find(ARegExp,P) then
759 begin
760 if Count = MaxRegExp then
761 AtFree(0);
762 P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil));
763 Insert(P);
764 end;
765 CheckNew:=P;
766 end;
767
pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer)768 function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
769 var
770 PpcRE:PpcRegExp;
771 begin
772 PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
773 pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
774 Dispose(PpcRE,Done);
775 end;
776
pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer)777 function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
778 var
779 PpcRE:PpcRegExp;
780 begin
781 PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
782 if PpcRE^.Search(pchar(AStr),Length(AStr)) then
783 pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
784 else
785 pcGrepSub:='';
786 Dispose(PpcRE,Done);
787 end;
788
pcFastGrepMatch(WildCard, aStr: string)789 function pcFastGrepMatch(WildCard, aStr: string): Boolean;
790 var
791 PpcRE:PpcRegExp;
792 begin
793 PpcRE:=PRegExpCache^.CheckNew(WildCard);
794 pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
795 end;
796
pcFastGrepSub(WildCard, aStr, aRepl: string)797 function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
798 var
799 PpcRE:PpcRegExp;
800 begin
801 PpcRE:=PRegExpCache^.CheckNew(WildCard);
802 if PpcRE^.Search(pchar(AStr),Length(AStr)) then
803 pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
804 else
805 pcFastGrepSub:='';
806 end;
807
808 {$IFDEF PCRE_5_0}
pcGetVersion()809 function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none}
810 asm
811 call pcre_version
812 end;
813 {$ENDIF PCRE_5_0}
814
pcError()815 function pcError;
816 var P: ppcRegExp absolute pRegExp;
817 begin
818 Result := (P = nil) or P^.Error;
819 If Result and (P <> nil) then
820 begin
821 { if P^.ErrorPos = 0 then
822 MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton)
823 else}
824 MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos),
825 @P^.ErrorPos,mfConfirmation+mfOkButton);
826 Dispose(P, Done);
827 P:=nil;
828 end;
829 end;
830
pcInit()831 function pcInit;
832 var Options : Integer;
833 begin
834 If CaseSens then Options := 0 else Options := PCRE_CASELESS;
835 Result := New( PpcRegExp, Init( Pattern,
836 {DefaultOptions}
837 startup.MiscMultiData.cfgRegEx.DefaultOptions or Options,
838 DefaultLocaleTable) );
839 end;
840
841 Initialization
842 PRegExpCache:=New(PRegExpCollection,Init(64));
843 Finalization
844 Dispose(PRegExpCache,Done);
845 End.
846