1 {
2   pcRegExp - Perl compatible regular expressions for Virtual Pascal
3   (c) 2001 Peter S. Voronov aka Chem O'Dun <petervrn@yahoo.com>
4 
5   Based on PCRE library interface unit for Virtual Pascal.
6   (c) 2001 Alexander Tokarev <dwalin@dwalin.ru>
7 
8   The current PCRE version is: 3.7
9 
10   This software may be distributed under the terms of the modified BSD license
11   Copyright (c) 2001, Alexander Tokarev
12   All rights reserved.
13 
14   Redistribution and use in source and binary forms, with or without
15   modification, are permitted provided that the following conditions are met:
16 
17     * Redistributions of source code must retain the above copyright notice,
18       this list of conditions and the following disclaimer.
19     * Redistributions in binary form must reproduce the above copyright notice,
20       this list of conditions and the following disclaimer in the documentation
21       and/or other materials provided with the distribution.
22     * Neither the name of the <ORGANIZATION> nor the names of its contributors
23       may be used to endorse or promote products derived from this software without
24       specific prior written permission.
25 
26   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
27   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 
37   The PCRE library is written by: Philip Hazel <ph10@cam.ac.uk>
38   Copyright (c) 1997-2004 University of Cambridge
39 
40   AngelsHolocaust 4-11-04 updated to use version v5.0
41   (INFO: this is regex-directed, NFA)
42   AH:  9-11-04 - pcre_free: removed var, pcre already gives the ptr, now
43 			    everything works as it should (no more crashes)
44 		 -> removed CheckRegExp because pcre handles errors perfectly
45       10-11-04 - added pcError (errorhandling), pcInit
46       13-11-04 - removed the ErrorPos = 0 check -> always print erroroffset
47       17-10-05 - support for \1-\9 backreferences in TpcRegExp.GetReplStr
48       17-02-06 - added RunTimeOptions: caller can set options while searching
49       19-02-06 - added SearchOfs(): let PCRE use the complete string and offset
50 		 into the string itself
51       20-12-06 - support for version 7.0
52       27.08.08 - support for v7.7
53 }
54 
55 {$H+} {$DEFINE PCRE_3_7} {$DEFINE PCRE_5_0} {$DEFINE PCRE_7_0} {$DEFINE PCRE_7_7}
56 
57 Unit pcregexp;
58 
59 Interface
60 
61 uses objects;
62 
63 Type
64  PpcRegExp = ^TpcRegExp;
65 // TpcRegExp = object
66  TpcRegExp = object(TObject)
67   MatchesCount: integer;
68   RegExpC, RegExpExt : Pointer;
69   Matches:Pointer;
70   RegExp: shortstring;
71   SourceLen: integer;
72   PartialMatch : boolean;
73   Error : boolean;
74   ErrorMsg : Pchar;
75   ErrorPos : integer;
76   RunTimeOptions: Integer; // options which can be set by the caller
77   constructor Init(const ARegExp : shortstring; AOptions : integer; ALocale : Pointer);
Search(AStr: Pchar; ALen : longint)78   function Search(AStr: Pchar; ALen : longint) : boolean; virtual;
SearchNext( AStr: Pchar; ALen : longint)79   function SearchNext( AStr: Pchar; ALen : longint) : boolean; virtual;
SearchOfs( AStr: Pchar; ALen, AOfs : longint)80   function SearchOfs ( AStr: Pchar; ALen, AOfs : longint) : boolean; virtual;
MatchSub(ANom: integer; var Pos, Len : longint)81   function MatchSub(ANom: integer; var Pos, Len : longint) : boolean; virtual;
MatchFull(var Pos, Len : longint)82   function MatchFull(var Pos, Len : longint) : boolean; virtual;
GetSubStr(ANom: integer; AStr: Pchar)83   function GetSubStr(ANom: integer; AStr: Pchar) : string; virtual;
GetFullStr(AStr: Pchar)84   function GetFullStr(AStr: Pchar) : string; virtual;
GetReplStr(AStr: Pchar; const ARepl: string)85   function GetReplStr(AStr: Pchar; const ARepl: string) : string; virtual;
GetPreSubStr(AStr: Pchar)86   function GetPreSubStr(AStr: Pchar) : string; virtual;
GetPostSubStr(AStr: Pchar)87   function GetPostSubStr(AStr: Pchar) : string; virtual;
ErrorStr()88   function ErrorStr : string; virtual;
89   destructor Done; virtual;
90  end;
91 
pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer)92  function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer)93  function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
94 
pcFastGrepMatch(WildCard, aStr: string)95  function pcFastGrepMatch(WildCard, aStr: string): Boolean;
pcFastGrepSub(WildCard, aStr, aRepl: string)96  function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
97 
98 {$IFDEF PCRE_5_0}
pcGetVersion()99  function pcGetVersion : pchar;
100 {$ENDIF}
101 
pcError(var pRegExp : Pointer)102  function pcError (var pRegExp : Pointer) : Boolean;
pcInit(const Pattern: Shortstring; CaseSens: Boolean)103  function pcInit  (const Pattern: Shortstring; CaseSens: Boolean) : Pointer;
104 
105 Const { Options }
106  PCRE_CASELESS         = $0001;
107  PCRE_MULTILINE        = $0002;
108  PCRE_DOTALL           = $0004;
109  PCRE_EXTENDED         = $0008;
110  PCRE_ANCHORED         = $0010;
111  PCRE_DOLLAR_ENDONLY   = $0020;
112  PCRE_EXTRA            = $0040;
113  PCRE_NOTBOL           = $0080;
114  PCRE_NOTEOL           = $0100;
115  PCRE_UNGREEDY         = $0200;
116  PCRE_NOTEMPTY         = $0400;
117 {$IFDEF PCRE_5_0}
118  PCRE_UTF8             = $0800;
119  PCRE_NO_AUTO_CAPTURE  = $1000;
120  PCRE_NO_UTF8_CHECK    = $2000;
121  PCRE_AUTO_CALLOUT     = $4000;
122  PCRE_PARTIAL          = $8000;
123 {$ENDIF}
124 {$IFDEF PCRE_7_0}
125  PCRE_DFA_SHORTEST     = $00010000;
126  PCRE_DFA_RESTART      = $00020000;
127  PCRE_FIRSTLINE        = $00040000;
128  PCRE_DUPNAMES         = $00080000;
129  PCRE_NEWLINE_CR       = $00100000;
130  PCRE_NEWLINE_LF       = $00200000;
131  PCRE_NEWLINE_CRLF     = $00300000;
132  PCRE_NEWLINE_ANY      = $00400000;
133  PCRE_NEWLINE_ANYCRLF  = $00500000;
134 
135  PCRE_NEWLINE_BITS     = PCRE_NEWLINE_CR or PCRE_NEWLINE_LF or PCRE_NEWLINE_ANY;
136 
137 {$ENDIF}
138 {$IFDEF PCRE_7_7}
139  PCRE_BSR_ANYCRLF      = $00800000;
140  PCRE_BSR_UNICODE      = $01000000;
141  PCRE_JAVASCRIPT_COMPAT= $02000000;
142 {$ENDIF}
143 
144  PCRE_COMPILE_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_AUTO_CALLOUT + PCRE_CASELESS  +
145 				PCRE_DOLLAR_ENDONLY + PCRE_DOTALL + PCRE_EXTENDED  +
146 				PCRE_EXTRA + PCRE_MULTILINE + PCRE_NO_AUTO_CAPTURE +
147 				PCRE_UNGREEDY + PCRE_UTF8 + PCRE_NO_UTF8_CHECK
148 				{$IFDEF PCRE_7_0}
149 				+ PCRE_DUPNAMES + PCRE_FIRSTLINE + PCRE_NEWLINE_BITS
150 				{$ENDIF}
151 				{$IFDEF PCRE_7_7}
152 				+ PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE + PCRE_JAVASCRIPT_COMPAT
153 				{$ENDIF}
154 				;
155 
156  PCRE_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
157 			     PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL
158 			     {$IFDEF PCRE_7_0}
159 			     + PCRE_NEWLINE_BITS
160 			     {$ENDIF}
161 			     {$IFDEF PCRE_7_7}
162 			     + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
163 			     {$ENDIF}
164 			     ;
165 
166 {$IFDEF PCRE_7_0}
167  PCRE_DFA_EXEC_ALLOWED_OPTIONS = PCRE_ANCHORED + PCRE_NOTBOL + PCRE_NOTEOL +
168 				 PCRE_NOTEMPTY + PCRE_NO_UTF8_CHECK + PCRE_PARTIAL +
169 				 PCRE_DFA_SHORTEST + PCRE_DFA_RESTART +
170 				 PCRE_NEWLINE_BITS
171 				 {$IFDEF PCRE_7_7}
172 				 + PCRE_BSR_ANYCRLF + PCRE_BSR_UNICODE
173 				 {$ENDIF}
174 				 ;
175 {$ENDIF}
176 
177 { Exec-time and get/set-time error codes }
178  PCRE_ERROR_NOMATCH        =  -1;
179  PCRE_ERROR_NULL	   =  -2;
180  PCRE_ERROR_BADOPTION      =  -3;
181  PCRE_ERROR_BADMAGIC       =  -4;
182  PCRE_ERROR_UNKNOWN_MODE   =  -5;
183  PCRE_ERROR_NOMEMORY       =  -6;
184  PCRE_ERROR_NOSUBSTRING    =  -7;
185 {$IFDEF PCRE_5_0}
186  PCRE_ERROR_MATCHLIMIT     =  -8;
187  PCRE_ERROR_CALLOUT        =  -9;  { Never used by PCRE itself }
188  PCRE_ERROR_BADUTF8        = -10;
189  PCRE_ERROR_BADUTF8_OFFSET = -11;
190  PCRE_ERROR_PARTIAL        = -12;
191  PCRE_ERROR_BADPARTIAL     = -13;
192  PCRE_ERROR_INTERNAL       = -14;
193  PCRE_ERROR_BADCOUNT       = -15;
194 {$ENDIF}
195 {$IFDEF PCRE_7_0}
196  PCRE_ERROR_DFA_UITEM      = -16;
197  PCRE_ERROR_DFA_UCOND      = -17;
198  PCRE_ERROR_DFA_UMLIMIT    = -18;
199  PCRE_ERROR_DFA_WSSIZE     = -19;
200  PCRE_ERROR_DFA_RECURSE    = -20;
201  PCRE_ERROR_RECURSIONLIMIT = -21;
202  PCRE_ERROR_NULLWSLIMIT    = -22;
203  PCRE_ERROR_BADNEWLINE     = -23;
204 {$ENDIF}
205 
206 { Request types for pcre_fullinfo() }
207 
208  PCRE_INFO_OPTIONS         =  0;
209  PCRE_INFO_SIZE 	   =  1;
210  PCRE_INFO_CAPTURECOUNT    =  2;
211  PCRE_INFO_BACKREFMAX      =  3;
212  PCRE_INFO_FIRSTBYTE       =  4;
213  PCRE_INFO_FIRSTCHAR       =  4; { For backwards compatibility }
214  PCRE_INFO_FIRSTTABLE      =  5;
215 {$IFDEF PCRE_5_0}
216  PCRE_INFO_LASTLITERAL     =  6;
217  PCRE_INFO_NAMEENTRYSIZE   =  7;
218  PCRE_INFO_NAMECOUNT       =  8;
219  PCRE_INFO_NAMETABLE       =  9;
220  PCRE_INFO_STUDYSIZE       = 10;
221  PCRE_INFO_DEFAULT_TABLES  = 11;
222 {$ENDIF PCRE_5_0}
223 {$IFDEF PCRE_7_7}
224  PCRE_INFO_OKPARTIAL       = 12;
225  PCRE_INFO_JCHANGED        = 13;
226  PCRE_INFO_HASCRORLF       = 14;
227 {$ENDIF}
228 
229 { Request types for pcre_config() }
230 {$IFDEF PCRE_5_0}
231  PCRE_CONFIG_UTF8       	    = 0;
232  PCRE_CONFIG_NEWLINE    	    = 1;
233  PCRE_CONFIG_LINK_SIZE  	    = 2;
234  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD = 3;
235  PCRE_CONFIG_MATCH_LIMIT	    = 4;
236  PCRE_CONFIG_STACKRECURSE           = 5;
237  PCRE_CONFIG_UNICODE_PROPERTIES     = 6;
238 {$ENDIF PCRE_5_0}
239 {$IFDEF PCRE_7_0}
240  PCRE_CONFIG_MATCH_LIMIT_RECURSION  = 7;
241 {$ENDIF}
242 {$IFDEF PCRE_7_7}
243  PCRE_CONFIG_BSR		    = 8;
244 {$ENDIF}
245 
246 { Bit flags for the pcre_extra structure }
247 {$IFDEF PCRE_5_0}
248  PCRE_EXTRA_STUDY_DATA  	  = $0001;
249  PCRE_EXTRA_MATCH_LIMIT 	  = $0002;
250  PCRE_EXTRA_CALLOUT_DATA	  = $0004;
251  PCRE_EXTRA_TABLES      	  = $0008;
252 {$ENDIF PCRE_5_0}
253 {$IFDEF PCRE_7_0}
254  PCRE_EXTRA_MATCH_LIMIT_RECURSION = $0010;
255 {$ENDIF}
256 
257 Const
258 // DefaultOptions : integer = 0;
259  DefaultLocaleTable : pointer = nil;
260 
261 {$IFDEF PCRE_5_0}
262 { The structure for passing additional data to pcre_exec(). This is defined in
263 such as way as to be extensible. Always add new fields at the end, in order to
264 remain compatible. }
265 
266 type ppcre_extra = ^tpcre_extra;
267      tpcre_extra = record
268        flags : longint; 	       { Bits for which fields are set }
269        study_data : pointer;           { Opaque data from pcre_study() }
270        match_limit : longint;          { Maximum number of calls to match() }
271        callout_data : pointer;         { Data passed back in callouts }
272        tables : pointer;	       { Pointer to character tables }
273        match_limit_recursion: longint; { Max recursive calls to match() }
274      end;
275 
276 type ppcre_callout_block = ^pcre_callout_block;
277      pcre_callout_block = record
278        version,
279   (* ------------------------ Version 0 ------------------------------- *)
280        callout_number : integer;
281        offset_vector : pointer;
282        subject : pchar;
283        subject_length, start_match, current_position, capture_top,
284        capture_last : integer;
285        callout_data : pointer;
286   (* ------------------- Added for Version 1 -------------------------- *)
287        pattern_position, next_item_length : integer;
288      end;
289 {$ENDIF PCRE_5_0}
290 
291 {$OrgName+}
292 {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
293 
294  { local replacement of external pcre memory management functions }
pcre_malloc( size : integer )295  function pcre_malloc( size : integer ) : pointer;
296  procedure pcre_free( {var} p : pointer );
297 {$IFDEF PCRE_5_0}
size()298  const pcre_stack_malloc: function ( size : integer ): pointer = pcre_malloc;
299        pcre_stack_free: procedure ( {var} p : pointer ) = pcre_free;
pcre_callout(var p : ppcre_callout_block)300  function pcre_callout(var p : ppcre_callout_block) : integer;
301 {$ENDIF PCRE_5_0}
302 {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
303 
304 Implementation
305 
306 Uses strings, collect, messages, dnapp, commands, advance0, stringsx
307     {$IFDEF VIRTUALPASCAL} ,vpsyslow {$ENDIF VIRTUALPASCAL};
308 
309 Const
310  MAGIC_NUMBER = $50435245; { 'PCRE' }
311  MAX_MATCHES = 90; { changed in 3.5 version; should be divisible by 3, was 64}
312 
313 Type
314  PMatchArray = ^TMatchArray;
315  TMatchArray = array[0..( MAX_MATCHES * 3 )] of integer;
316 
317  PRegExpCollection = ^TRegExpCollection;
318  TRegExpCollection =  object(TSortedCollection)
319    MaxRegExp : integer;
320    SearchRegExp : shortstring;
321    CompareModeInsert : boolean;
322    constructor Init(AMaxRegExp:integer);
323    procedure FreeItem(P: Pointer); virtual;
Compare(P1, P2: Pointer)324    function  Compare(P1, P2: Pointer): Integer; virtual;
Find(ARegExp:shortstring;var P: PpcRegExp)325    function  Find(ARegExp:shortstring;var P: PpcRegExp):boolean; virtual;
CheckNew(ARegExp:shortstring)326    function CheckNew(ARegExp:shortstring):PpcRegExp;virtual;
327  end;
328 
329 Var
330  PRegExpCache : PRegExpCollection;
331 
332 
333 {$IFDEF VIRTUALPASCAL} {&Cdecl+} {$ENDIF VIRTUALPASCAL}
334 
335  { imported original pcre functions }
336 
pcre_compile( const pattern : PChar; options : integer;337  function pcre_compile( const pattern : PChar; options : integer;
338 			var errorptr : PChar; var erroroffset : integer;
339 			const tables : PChar ) : pointer {pcre}; external;
340 {$IFDEF PCRE_7_0}
pcre_compile2( const pattern : PChar; options : integer;341  function pcre_compile2( const pattern : PChar; options : integer;
342 			 var errorcodeptr : Integer;
343 			 var errorptr : PChar; var erroroffset : integer;
344 			 const tables : PChar ) : pointer {pcre}; external;
345 {$ENDIF}
346 {$IFDEF PCRE_5_0}
pcre_config( what : integer; where : pointer)347  function pcre_config( what : integer; where : pointer) : integer; external;
pcre_copy_named_substring( const code : pointer {pcre};348  function pcre_copy_named_substring( const code : pointer {pcre};
349 				     const subject : pchar;
350 				     var ovector : integer;
351 				     stringcount : integer;
352 				     const stringname : pchar;
353 				     var buffer : pchar;
354 				     size : integer) : integer; external;
pcre_copy_substring( const subject : pchar; var ovector : integer;355  function pcre_copy_substring( const subject : pchar; var ovector : integer;
356 			       stringcount, stringnumber : integer;
357 			       var buffer : pchar; size : integer )
358 			       : integer; external;
pcre_exec( const argument_re : pointer {pcre};359  function pcre_exec( const argument_re : pointer {pcre};
360 		     const extra_data : pointer {pcre_extra};
361 {$ELSE}
362  function pcre_exec( const external_re : pointer;
363 		     const external_extra : pointer;
364 {$ENDIF}
365 		     const subject : PChar;
366 		     length, start_offset, options : integer;
367 		     offsets : pointer;
368 		     offsetcount : integer ) : integer; external;
369 {$IFDEF PCRE_7_0}
pcre_dfa_exec( const argument_re : pointer {pcre};370  function pcre_dfa_exec( const argument_re : pointer {pcre};
371 			 const extra_data : pointer {pcre_extra};
372 			 const subject : pchar;
373 			 length, start_offset, options : integer;
374 			 offsets : pointer;
375 			 offsetcount : integer;
376 			 workspace : pointer;
377 			 wscount : integer ) : integer; external;
378 {$ENDIF}
379 {$IFDEF PCRE_5_0}
380  procedure pcre_free_substring( const p : pchar ); external;
381  procedure pcre_free_substring_list( var p : pchar ); external;
pcre_fullinfo( const argument_re : pointer {pcre};382  function pcre_fullinfo( const argument_re : pointer {pcre};
383 			 const extra_data : pointer {pcre_extra};
384 			 what : integer;
385 			 where : pointer ) : integer; external;
pcre_get_named_substring( const code : pointer {pcre};386  function pcre_get_named_substring( const code : pointer {pcre};
387 				    const subject : pchar;
388 				    var ovector : integer;
389 				    stringcount : integer;
390 				    const stringname : pchar;
391 				    var stringptr : pchar ) : integer; external;
pcre_get_stringnumber( const code : pointer {pcre};392  function pcre_get_stringnumber( const code : pointer {pcre};
393 				 const stringname : pchar ) : integer; external;
pcre_get_stringtable_entries( const code : pointer {pcre};394  function pcre_get_stringtable_entries( const code : pointer {pcre};
395 					const stringname : pchar;
396 					var firstptr,
397 					    lastptr : pchar ) : integer; external;
pcre_get_substring( const subject : pchar; var ovector : integer;398  function pcre_get_substring( const subject : pchar; var ovector : integer;
399 			      stringcount, stringnumber : integer;
400 			      var stringptr : pchar ) : integer; external;
pcre_get_substring_list( const subject : pchar; var ovector : integer;401  function pcre_get_substring_list( const subject : pchar; var ovector : integer;
402 				   stringcount : integer;
403 				   listptr : pointer {const char ***listptr}) : integer; external;
pcre_info( const argument_re : pointer {pcre};404  function pcre_info( const argument_re : pointer {pcre};
405 		     var optptr : integer;
406 		     var first_byte : integer ) : integer; external;
pcre_maketables()407  function pcre_maketables : pchar; external;
408 {$ENDIF}
409 {$IFDEF PCRE_7_0}
pcre_refcount( const argument_re : pointer {pcre};410  function pcre_refcount( const argument_re : pointer {pcre};
411 			 adjust : integer ) : pchar; external;
412 {$ENDIF}
pcre_study( const external_re : pointer {pcre};413  function pcre_study( const external_re : pointer {pcre};
414 		      options : integer;
415 		      var errorptr : PChar ) : pointer {pcre_extra}; external;
416 {$IFDEF PCRE_5_0}
pcre_version()417  function pcre_version : pchar; external;
418 {$ENDIF}
419 
pcre_malloc( size : integer )420  function pcre_malloc( size : integer ) : pointer;
421  begin
422   GetMem( result, size );
423  end;
424 
425  procedure pcre_free( {var} p : pointer );
426  begin
427   if (p <> nil) then
428     FreeMem( p, 0 );
429   {@p := nil;}
430  end;
431 
432 {$IFDEF PCRE_5_0}
433 (* Called from PCRE as a result of the (?C) item. We print out where we are in
434 the match. Yield zero unless more callouts than the fail count, or the callout
435 data is not zero. *)
436 
pcre_callout()437  function pcre_callout;
438  begin
439  end;
440 {$ENDIF}
441 
442 {$IFDEF VIRTUALPASCAL} {&Cdecl-} {$ENDIF VIRTUALPASCAL}
443 
444 // Always include the newest version of the library
445 {$IFDEF PCRE_7_7}
446   {$L pcre77.lib}
447 {$ELSE}
448   {$IFDEF PCRE_7_0}
449     {$L pcre70.lib}
450   {$ELSE}
451     {$IFDEF PCRE_5_0}
452       {$L pcre50.lib}
453     {$ELSE}
454       {$IFDEF PCRE_3_7}
455 	{$L pcre37.lib}
456       {$ENDIF PCRE_3_7}
457     {$ENDIF PCRE_5_0}
458   {$ENDIF PCRE_7_0}
459 {$ENDIF PCRE_7_7}
460 
461 {TpcRegExp}
462 
463  constructor TpcRegExp.Init(const ARegExp:shortstring; AOptions:integer; ALocale : Pointer);
464  var
465   pRegExp : PChar;
466  begin
467   RegExp:=ARegExp;
468   RegExpC:=nil;
469   RegExpExt:=nil;
470   Matches:=nil;
471   MatchesCount:=0;
472   Error:=true;
473   ErrorMsg:=nil;
474   ErrorPos:=0;
475   RunTimeOptions := 0;
476   if length(RegExp) < 255 then
477    begin
478     RegExp[length(RegExp)+1]:=#0;
479     pRegExp:=@RegExp[1];
480    end
481   else
482    begin
483     GetMem(pRegExp,length(RegExp)+1);
484     pRegExp:=strpcopy(pRegExp,RegExp);
485    end;
486   RegExpC := pcre_compile( pRegExp,
487 			   AOptions and PCRE_COMPILE_ALLOWED_OPTIONS,
488 			   ErrorMsg, ErrorPos, ALocale);
489   if length(RegExp) = 255 then
490    StrDispose(pRegExp);
491   if RegExpC = nil then
492    exit;
493   ErrorMsg:=nil;
494   RegExpExt := pcre_study( RegExpC, 0, ErrorMsg );
495   if (RegExpExt = nil) and (ErrorMsg <> nil) then
496    begin
497     pcre_free(RegExpC);
498     exit;
499    end;
500   GetMem(Matches,SizeOf(TMatchArray));
501   Error:=false;
502  end;
503 
504  destructor TpcRegExp.Done;
505  begin
506   if RegExpC <> nil then
507     pcre_free(RegExpC);
508   if RegExpExt <> nil then
509     pcre_free(RegExpExt);
510   if Matches <> nil then
511     FreeMem(Matches,SizeOf(TMatchArray));
512  end;
513 
TpcRegExp.SearchNext( AStr: Pchar; ALen : longint )514  function TpcRegExp.SearchNext( AStr: Pchar; ALen : longint ) : boolean;
515  var Options: Integer;
516  begin // must handle PCRE_ERROR_PARTIAL here
517   Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
518 	     PCRE_EXEC_ALLOWED_OPTIONS;
519   if MatchesCount > 0 then
520     MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, PMatchArray(Matches)^[1],
521 			     Options, Matches, MAX_MATCHES ) else
522     MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, 0,
523 			     Options, Matches, MAX_MATCHES );
524 {  if MatchesCount = 0 then
525     MatchesCount := MatchesCount div 3;}
526   PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
527   SearchNext := MatchesCount > 0;
528  end;
529 
TpcRegExp.Search( AStr: Pchar; ALen : longint)530  function TpcRegExp.Search( AStr: Pchar; ALen : longint):boolean;
531  begin
532   MatchesCount:=0;
533   Search:=SearchNext(AStr,ALen);
534   SourceLen:=ALen;
535  end;
536 
TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint )537  function TpcRegExp.SearchOfs( AStr: Pchar; ALen, AOfs: longint ) : boolean;
538  var Options: Integer;
539  begin
540   MatchesCount:=0;
541   Options := (RunTimeOptions or startup.MiscMultiData.cfgRegEx.DefaultOptions) and
542 	     PCRE_EXEC_ALLOWED_OPTIONS;
543   MatchesCount:=pcre_exec( RegExpC, RegExpExt, AStr, ALen, AOfs,
544 			   Options, Matches, MAX_MATCHES );
545   PartialMatch := MatchesCount = PCRE_ERROR_PARTIAL;
546   SearchOfs := MatchesCount > 0;
547   SourceLen := ALen-AOfs;
548  end;
549 
TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint)550  function TpcRegExp.MatchSub(ANom:integer; var Pos,Len:longint):boolean;
551  begin
552   if (MatchesCount > 0) and (ANom <= (MatchesCount-1)) then
553    begin
554     ANom:=ANom*2;
555     Pos:=PMatchArray(Matches)^[ANom];
556     Len:=PMatchArray(Matches)^[ANom+1]-Pos;
557     MatchSub:=true;
558    end
559   else
560    MatchSub:=false;
561  end;
562 
TpcRegExp.MatchFull(var Pos,Len:longint)563  function TpcRegExp.MatchFull(var Pos,Len:longint):boolean;
564  begin
565   MatchFull:=MatchSub(0,Pos,Len);
566  end;
567 
TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar)568  function TpcRegExp.GetSubStr(ANom: integer; AStr: Pchar):string;
569  var
570   s: ansistring;
571   pos,len: longint;
572  begin
573   s:='';
574   if MatchSub(ANom, pos, len) then
575    begin
576     setlength(s, len);
577     Move(AStr[pos], s[1], len);
578    end;
579   GetSubStr:=s;
580  end;
581 
TpcRegExp.GetPreSubStr(AStr: Pchar)582  function TpcRegExp.GetPreSubStr(AStr: Pchar):string;
583  var
584   s: ansistring;
585   l: longint;
586  begin
587   s:='';
588   if (MatchesCount > 0) then
589    begin
590     l:=PMatchArray(Matches)^[0]-1;
591     if l > 0 then
592      begin
593       setlength(s,l);
594       Move(AStr[1],s[1],l);
595      end;
596    end;
597   GetPreSubStr:=s;
598  end;
599 
TpcRegExp.GetPostSubStr(AStr: Pchar)600  function TpcRegExp.GetPostSubStr(AStr: Pchar):string;
601  var
602   s: ansistring;
603   l: longint;
604   ANom: integer;
605  begin
606   s:='';
607   if (MatchesCount > 0) then
608    begin
609     ANom:=(MatchesCount-1){*2} shl 1;
610     l:=SourceLen-PMatchArray(Matches)^[ANom+1]+1;
611     if l > 0 then
612      begin
613       setlength(s,l);
614       Move(AStr[PMatchArray(Matches)^[ANom+1]],s[1],l);
615      end;
616    end;
617   GetPostSubStr:=s;
618  end;
619 
620 
TpcRegExp.GetFullStr(AStr: Pchar)621  function TpcRegExp.GetFullStr(AStr: Pchar):string;
622  var
623   s: ansistring;
624   l: longint;
625  begin
626   GetFullStr:=GetSubStr(0,AStr);
627  end;
628 
TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string)629  function TpcRegExp.GetReplStr(AStr: Pchar; const ARepl: string):string;
630  var
631   s: ansistring;
632   l,i,lasti: longint;
633  begin
634   l:=length(ARepl);
635   i:=1;
636   lasti:=1;
637   s:='';
638   while i <= l do
639    begin
640     case ARepl[i] of
641      '\' :
642       begin
643        if i < l then
644 	begin
645 	 s:=s+copy(ARepl,lasti,i-lasti){+ARepl[i+1]};
646 	 {AH 17-10-05 support for POSIX \1-\9 backreferences}
647 	 case ARepl[i+1] of
648 	  '0' : s:=s+GetFullStr(AStr);
649 	  '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
650 	  else s:=s+ARepl[i+1]; // copy the escaped character
651 	 end;
652 	end;
653        inc(i);
654        lasti:=i+1;
655       end;
656      '$' :
657       begin
658        if i < l then
659 	begin
660 	 s:=s+copy(ARepl,lasti,i-lasti);
661 	 case ARepl[i+1] of
662 	  '&' : s:=s+GetFullStr(AStr);
663 	  '1'..'9' : s:=s+GetSubStr(ord(ARepl[i+1])-ord('0'),AStr);
664 	  '`' : s:=s+GetPreSubStr(AStr);
665 	  #39 : s:=s+GetPostSubStr(AStr);
666 	 end;
667 	end;
668        inc(i);
669        lasti:=i+1;
670       end;
671     end;
672     inc(i);
673    end;
674   if lasti <= {AH 25-10-2004 added =, else l==1 won't work} l then
675     s:=s+copy(ARepl,lasti,l-lasti+1);
676   GetReplStr:=s;
677  end;
678 
TpcRegExp.ErrorStr()679  function TpcRegExp.ErrorStr:string;
680   begin
681    ErrorStr:=StrPas(ErrorMsg);
682   end;
683 
684 {TRegExpCollection}
685 
686 constructor TRegExpCollection.Init(AMaxRegExp: integer);
687 begin
688  Inherited Init(1,1);
689  MaxRegExp:=AMaxRegExp;
690  CompareModeInsert:=true;
691 end;
692 
693 procedure TRegExpCollection.FreeItem(P: Pointer);
694 begin
695  if P <> nil then
696   begin
697    Dispose(PpcRegExp(P),Done);
698   end;
699 end;
700 
TRegExpCollection.Compare(P1, P2: Pointer)701 function  TRegExpCollection.Compare(P1, P2: Pointer): Integer;
702 //var
703 // l,l1,l2,i : byte;
704 //// wPos: pchar;
705 begin
706  if CompareModeInsert then
707   begin
708 //   l1:=length(PpcRegExp(P1)^.RegExp);
709 //   l2:=length(PpcRegExp(P2)^.RegExp);
710 //   if l1 > l2 then l:=l2 else
711 //      	     l:=l1;
712 //   for i:=1 to l do
713 //     if PpcRegExp(P1).RegExp[i] <> PpcRegExp(P2).RegExp[i] then break;
714 //   if i <=l then
715 //     Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(PpcRegExp(P2).RegExp[i]) else
716 //     Compare:=l1-l2;
717     Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, PpcRegExp(P2).RegExp, False);
718   end
719  else
720   begin
721 //   l1:=length(PpcRegExp(P1)^.RegExp);
722 //   l2:=length(SearchRegExp);
723 //   if l1 > l2 then l:=l2 else
724 //      	     l:=l1;
725 //   for i:=1 to l do
726 //     if PpcRegExp(P1).RegExp[i] <> SearchRegExp[i] then
727 //     begin
728 //       Compare:=ord(PpcRegExp(P1).RegExp[i])-ord(SearchRegExp[i]);
729 //       break;
730 //     end;
731 //   if i > l then Compare:=l1-l2;
732     Compare := stringsx.PasStrCmp(PpcRegExp(P1).RegExp, SearchRegExp, False);
733   end;
734 end;
735 
TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp)736 function  TRegExpCollection.Find(ARegExp:shortstring;var P: PpcRegExp):boolean;
737 var I : integer;
738 begin
739  CompareModeInsert:=false;
740  SearchRegExp:=ARegExp;
741  if Search(nil,I) then
742   begin
743    P:=PpcRegExp(At(I));
744    Find:=true;
745   end
746  else
747   begin
748    P:=nil;
749    Find:=false;
750   end;
751  CompareModeInsert:=true;
752 end;
753 
TRegExpCollection.CheckNew(ARegExp:shortstring)754 function TRegExpCollection.CheckNew(ARegExp:shortstring):PpcRegExp;
755 var
756  P : PpcRegExp;
757 begin
758  if not Find(ARegExp,P) then
759   begin
760    if Count = MaxRegExp then
761     AtFree(0);
762    P:=New(ppcRegExp,Init(ARegExp,PCRE_CASELESS,nil));
763    Insert(P);
764   end;
765  CheckNew:=P;
766 end;
767 
pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer)768 function pcGrepMatch(WildCard, aStr: string; AOptions:integer; ALocale : Pointer): Boolean;
769 var
770  PpcRE:PpcRegExp;
771 begin
772  PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
773  pcGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
774  Dispose(PpcRE,Done);
775 end;
776 
pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer)777 function pcGrepSub(WildCard, aStr, aRepl: string; AOptions:integer; ALocale : Pointer): string;
778 var
779  PpcRE:PpcRegExp;
780 begin
781  PpcRE:=New(ppcRegExp,Init(WildCard,AOptions,Alocale));
782  if PpcRE^.Search(pchar(AStr),Length(AStr)) then
783   pcGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
784  else
785   pcGrepSub:='';
786  Dispose(PpcRE,Done);
787 end;
788 
pcFastGrepMatch(WildCard, aStr: string)789 function pcFastGrepMatch(WildCard, aStr: string): Boolean;
790 var
791  PpcRE:PpcRegExp;
792 begin
793  PpcRE:=PRegExpCache^.CheckNew(WildCard);
794  pcFastGrepMatch:=PpcRE^.Search(pchar(AStr),Length(AStr));
795 end;
796 
pcFastGrepSub(WildCard, aStr, aRepl: string)797 function pcFastGrepSub(WildCard, aStr, aRepl: string): string;
798 var
799  PpcRE:PpcRegExp;
800 begin
801  PpcRE:=PRegExpCache^.CheckNew(WildCard);
802  if PpcRE^.Search(pchar(AStr),Length(AStr)) then
803   pcFastGrepSub:=PpcRE^.GetReplStr(pchar(AStr),ARepl)
804  else
805   pcFastGrepSub:='';
806 end;
807 
808 {$IFDEF PCRE_5_0}
pcGetVersion()809 function pcGetVersion : pchar; assembler; {$FRAME-}{$USES none}
810 asm
811   call pcre_version
812 end;
813 {$ENDIF PCRE_5_0}
814 
pcError()815 function pcError;
816 var P: ppcRegExp absolute pRegExp;
817 begin
818   Result := (P = nil) or P^.Error;
819   If Result and (P <> nil) then
820   begin
821 {     if P^.ErrorPos = 0 then
822       MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"', nil,mfConfirmation+mfOkButton)
823     else}
824       MessageBox(GetString(erRegExpCompile)+'"'+P^.ErrorStr+'"'+GetString(erRegExpCompPos),
825 		 @P^.ErrorPos,mfConfirmation+mfOkButton);
826     Dispose(P, Done);
827     P:=nil;
828   end;
829 end;
830 
pcInit()831 function pcInit;
832 var Options : Integer;
833 begin
834   If CaseSens then Options := 0 else Options := PCRE_CASELESS;
835   Result := New( PpcRegExp, Init( Pattern,
836 				  {DefaultOptions}
837 				  startup.MiscMultiData.cfgRegEx.DefaultOptions or Options,
838 				  DefaultLocaleTable) );
839 end;
840 
841 Initialization
842  PRegExpCache:=New(PRegExpCollection,Init(64));
843 Finalization
844  Dispose(PRegExpCache,Done);
845 End.
846