1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2013 University of Cambridge
10 
11   The machine code generator part (this module) was written by Zoltan Herczeg
12                       Copyright (c) 2010-2013
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18     * Redistributions of source code must retain the above copyright notice,
19       this list of conditions and the following disclaimer.
20 
21     * Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25     * Neither the name of the University of Cambridge nor the names of its
26       contributors may be used to endorse or promote products derived from
27       this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include "pcre_internal.h"
48 
49 #if defined SUPPORT_JIT
50 
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54 
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61 
62 #include "sljit/sljitLir.c"
63 
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67 
68 /* Defines for debugging purposes. */
69 
70 /* 1 - Use unoptimized capturing brackets.
71    2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73 
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76 
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80 
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84 
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89 
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92 
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98 
99   'ab' - 'a' and 'b' regexps are concatenated
100   'a+' - 'a' is the sub-expression of the '+' operator
101 
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107 
108  Greedy star operator (*) :
109    Matching path: match happens.
110    Backtrack path: match failed.
111  Non-greedy star operator (*?) :
112    Matching path: no need to perform a match.
113    Backtrack path: match is required.
114 
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118 
119    A(B|C)D
120 
121 The generated code will be the following:
122 
123  A matching path
124  '(' matching path (pushing arguments to the stack)
125  B matching path
126  ')' matching path (pushing arguments to the stack)
127  D matching path
128  return with successful match
129 
130  D backtrack path
131  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132  B backtrack path
133  C expected path
134  jump to D matching path
135  C backtrack path
136  A backtrack path
137 
138  Notice, that the order of backtrack code paths are the opposite of the fast
139  code paths. In this way the topmost value on the stack is always belong
140  to the current backtrack code path. The backtrack path must check
141  whether there is a next alternative. If so, it needs to jump back to
142  the matching path eventually. Otherwise it needs to clear out its own stack
143  frame and continue the execution on the backtrack code paths.
144 */
145 
146 /*
147 Saved stack frames:
148 
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153 
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156 
157 Thus we can restore the private data to a particular point in the stack.
158 */
159 
160 typedef struct jit_arguments {
161   /* Pointers first. */
162   struct sljit_stack *stack;
163   const pcre_uchar *str;
164   const pcre_uchar *begin;
165   const pcre_uchar *end;
166   int *offsets;
167   pcre_uchar *uchar_ptr;
168   pcre_uchar *mark_ptr;
169   void *callout_data;
170   /* Everything else after. */
171   pcre_uint32 limit_match;
172   int real_offset_count;
173   int offset_count;
174   pcre_uint8 notbol;
175   pcre_uint8 noteol;
176   pcre_uint8 notempty;
177   pcre_uint8 notempty_atstart;
178 } jit_arguments;
179 
180 typedef struct executable_functions {
181   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182   sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184   PUBL(jit_callback) callback;
185   void *userdata;
186   pcre_uint32 top_bracket;
187   pcre_uint32 limit_match;
188 } executable_functions;
189 
190 typedef struct jump_list {
191   struct sljit_jump *jump;
192   struct jump_list *next;
193 } jump_list;
194 
195 typedef struct stub_list {
196   struct sljit_jump *start;
197   struct sljit_label *quit;
198   struct stub_list *next;
199 } stub_list;
200 
201 typedef struct label_addr_list {
202   struct sljit_label *label;
203   sljit_uw *update_addr;
204   struct label_addr_list *next;
205 } label_addr_list;
206 
207 enum frame_types {
208   no_frame = -1,
209   no_stack = -2
210 };
211 
212 enum control_types {
213   type_mark = 0,
214   type_then_trap = 1
215 };
216 
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218 
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224   /* Concatenation stack. */
225   struct backtrack_common *prev;
226   jump_list *nextbacktracks;
227   /* Internal stack (for component operators). */
228   struct backtrack_common *top;
229   jump_list *topbacktracks;
230   /* Opcode pointer. */
231   pcre_uchar *cc;
232 } backtrack_common;
233 
234 typedef struct assert_backtrack {
235   backtrack_common common;
236   jump_list *condfailed;
237   /* Less than 0 if a frame is not needed. */
238   int framesize;
239   /* Points to our private memory word on the stack. */
240   int private_data_ptr;
241   /* For iterators. */
242   struct sljit_label *matchingpath;
243 } assert_backtrack;
244 
245 typedef struct bracket_backtrack {
246   backtrack_common common;
247   /* Where to coninue if an alternative is successfully matched. */
248   struct sljit_label *alternative_matchingpath;
249   /* For rmin and rmax iterators. */
250   struct sljit_label *recursive_matchingpath;
251   /* For greedy ? operator. */
252   struct sljit_label *zero_matchingpath;
253   /* Contains the branches of a failed condition. */
254   union {
255     /* Both for OP_COND, OP_SCOND. */
256     jump_list *condfailed;
257     assert_backtrack *assert;
258     /* For OP_ONCE. Less than 0 if not needed. */
259     int framesize;
260   } u;
261   /* Points to our private memory word on the stack. */
262   int private_data_ptr;
263 } bracket_backtrack;
264 
265 typedef struct bracketpos_backtrack {
266   backtrack_common common;
267   /* Points to our private memory word on the stack. */
268   int private_data_ptr;
269   /* Reverting stack is needed. */
270   int framesize;
271   /* Allocated stack size. */
272   int stacksize;
273 } bracketpos_backtrack;
274 
275 typedef struct braminzero_backtrack {
276   backtrack_common common;
277   struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279 
280 typedef struct iterator_backtrack {
281   backtrack_common common;
282   /* Next iteration. */
283   struct sljit_label *matchingpath;
284 } iterator_backtrack;
285 
286 typedef struct recurse_entry {
287   struct recurse_entry *next;
288   /* Contains the function entry. */
289   struct sljit_label *entry;
290   /* Collects the calls until the function is not created. */
291   jump_list *calls;
292   /* Points to the starting opcode. */
293   sljit_sw start;
294 } recurse_entry;
295 
296 typedef struct recurse_backtrack {
297   backtrack_common common;
298   BOOL inlined_pattern;
299 } recurse_backtrack;
300 
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302 
303 typedef struct then_trap_backtrack {
304   backtrack_common common;
305   /* If then_trap is not NULL, this structure contains the real
306   then_trap for the backtracking path. */
307   struct then_trap_backtrack *then_trap;
308   /* Points to the starting opcode. */
309   sljit_sw start;
310   /* Exit point for the then opcodes of this alternative. */
311   jump_list *quit;
312   /* Frame size of the current alternative. */
313   int framesize;
314 } then_trap_backtrack;
315 
316 #define MAX_RANGE_SIZE 4
317 
318 typedef struct compiler_common {
319   /* The sljit ceneric compiler. */
320   struct sljit_compiler *compiler;
321   /* First byte code. */
322   pcre_uchar *start;
323   /* Maps private data offset to each opcode. */
324   sljit_si *private_data_ptrs;
325   /* This read-only data is available during runtime. */
326   sljit_uw *read_only_data;
327   /* The total size of the read-only data. */
328   sljit_uw read_only_data_size;
329   /* The next free entry of the read_only_data. */
330   sljit_uw *read_only_data_ptr;
331   /* Tells whether the capturing bracket is optimized. */
332   pcre_uint8 *optimized_cbracket;
333   /* Tells whether the starting offset is a target of then. */
334   pcre_uint8 *then_offsets;
335   /* Current position where a THEN must jump. */
336   then_trap_backtrack *then_trap;
337   /* Starting offset of private data for capturing brackets. */
338   int cbra_ptr;
339   /* Output vector starting point. Must be divisible by 2. */
340   int ovector_start;
341   /* Last known position of the requested byte. */
342   int req_char_ptr;
343   /* Head of the last recursion. */
344   int recursive_head_ptr;
345   /* First inspected character for partial matching. */
346   int start_used_ptr;
347   /* Starting pointer for partial soft matches. */
348   int hit_start;
349   /* End pointer of the first line. */
350   int first_line_end;
351   /* Points to the marked string. */
352   int mark_ptr;
353   /* Recursive control verb management chain. */
354   int control_head_ptr;
355   /* Points to the last matched capture block index. */
356   int capture_last_ptr;
357   /* Points to the starting position of the current match. */
358   int start_ptr;
359 
360   /* Flipped and lower case tables. */
361   const pcre_uint8 *fcc;
362   sljit_sw lcc;
363   /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364   int mode;
365   /* TRUE, when minlength is greater than 0. */
366   BOOL might_be_empty;
367   /* \K is found in the pattern. */
368   BOOL has_set_som;
369   /* (*SKIP:arg) is found in the pattern. */
370   BOOL has_skip_arg;
371   /* (*THEN) is found in the pattern. */
372   BOOL has_then;
373   /* Needs to know the start position anytime. */
374   BOOL needs_start_ptr;
375   /* Currently in recurse or negative assert. */
376   BOOL local_exit;
377   /* Currently in a positive assert. */
378   BOOL positive_assert;
379   /* Newline control. */
380   int nltype;
381   pcre_uint32 nlmax;
382   pcre_uint32 nlmin;
383   int newline;
384   int bsr_nltype;
385   pcre_uint32 bsr_nlmax;
386   pcre_uint32 bsr_nlmin;
387   /* Dollar endonly. */
388   int endonly;
389   /* Tables. */
390   sljit_sw ctypes;
391   /* Named capturing brackets. */
392   pcre_uchar *name_table;
393   sljit_sw name_count;
394   sljit_sw name_entry_size;
395 
396   /* Labels and jump lists. */
397   struct sljit_label *partialmatchlabel;
398   struct sljit_label *quit_label;
399   struct sljit_label *forced_quit_label;
400   struct sljit_label *accept_label;
401   struct sljit_label *ff_newline_shortcut;
402   stub_list *stubs;
403   label_addr_list *label_addrs;
404   recurse_entry *entries;
405   recurse_entry *currententry;
406   jump_list *partialmatch;
407   jump_list *quit;
408   jump_list *positive_assert_quit;
409   jump_list *forced_quit;
410   jump_list *accept;
411   jump_list *calllimit;
412   jump_list *stackalloc;
413   jump_list *revertframes;
414   jump_list *wordboundary;
415   jump_list *anynewline;
416   jump_list *hspace;
417   jump_list *vspace;
418   jump_list *casefulcmp;
419   jump_list *caselesscmp;
420   jump_list *reset_match;
421   BOOL jscript_compat;
422 #ifdef SUPPORT_UTF
423   BOOL utf;
424 #ifdef SUPPORT_UCP
425   BOOL use_ucp;
426 #endif
427 #ifdef COMPILE_PCRE8
428   jump_list *utfreadchar;
429   jump_list *utfreadchar16;
430   jump_list *utfreadtype8;
431 #endif
432 #endif /* SUPPORT_UTF */
433 #ifdef SUPPORT_UCP
434   jump_list *getucd;
435 #endif
436 } compiler_common;
437 
438 /* For byte_sequence_compare. */
439 
440 typedef struct compare_context {
441   int length;
442   int sourcereg;
443 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
444   int ucharptr;
445   union {
446     sljit_si asint;
447     sljit_uh asushort;
448 #if defined COMPILE_PCRE8
449     sljit_ub asbyte;
450     sljit_ub asuchars[4];
451 #elif defined COMPILE_PCRE16
452     sljit_uh asuchars[2];
453 #elif defined COMPILE_PCRE32
454     sljit_ui asuchars[1];
455 #endif
456   } c;
457   union {
458     sljit_si asint;
459     sljit_uh asushort;
460 #if defined COMPILE_PCRE8
461     sljit_ub asbyte;
462     sljit_ub asuchars[4];
463 #elif defined COMPILE_PCRE16
464     sljit_uh asuchars[2];
465 #elif defined COMPILE_PCRE32
466     sljit_ui asuchars[1];
467 #endif
468   } oc;
469 #endif
470 } compare_context;
471 
472 /* Undefine sljit macros. */
473 #undef CMP
474 
475 /* Used for accessing the elements of the stack. */
476 #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
477 
478 #define TMP1          SLJIT_R0
479 #define TMP2          SLJIT_R2
480 #define TMP3          SLJIT_R3
481 #define STR_PTR       SLJIT_S0
482 #define STR_END       SLJIT_S1
483 #define STACK_TOP     SLJIT_R1
484 #define STACK_LIMIT   SLJIT_S2
485 #define COUNT_MATCH   SLJIT_S3
486 #define ARGUMENTS     SLJIT_S4
487 #define RETURN_ADDR   SLJIT_R4
488 
489 /* Local space layout. */
490 /* These two locals can be used by the current opcode. */
491 #define LOCALS0          (0 * sizeof(sljit_sw))
492 #define LOCALS1          (1 * sizeof(sljit_sw))
493 /* Two local variables for possessive quantifiers (char1 cannot use them). */
494 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
495 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
496 /* Max limit of recursions. */
497 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
498 /* The output vector is stored on the stack, and contains pointers
499 to characters. The vector data is divided into two groups: the first
500 group contains the start / end character pointers, and the second is
501 the start pointers when the end of the capturing group has not yet reached. */
502 #define OVECTOR_START    (common->ovector_start)
503 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
505 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
506 
507 #if defined COMPILE_PCRE8
508 #define MOV_UCHAR  SLJIT_MOV_UB
509 #define MOVU_UCHAR SLJIT_MOVU_UB
510 #elif defined COMPILE_PCRE16
511 #define MOV_UCHAR  SLJIT_MOV_UH
512 #define MOVU_UCHAR SLJIT_MOVU_UH
513 #elif defined COMPILE_PCRE32
514 #define MOV_UCHAR  SLJIT_MOV_UI
515 #define MOVU_UCHAR SLJIT_MOVU_UI
516 #else
517 #error Unsupported compiling mode
518 #endif
519 
520 /* Shortcuts. */
521 #define DEFINE_COMPILER \
522   struct sljit_compiler *compiler = common->compiler
523 #define OP1(op, dst, dstw, src, srcw) \
524   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
525 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
526   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
527 #define LABEL() \
528   sljit_emit_label(compiler)
529 #define JUMP(type) \
530   sljit_emit_jump(compiler, (type))
531 #define JUMPTO(type, label) \
532   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
533 #define JUMPHERE(jump) \
534   sljit_set_label((jump), sljit_emit_label(compiler))
535 #define SET_LABEL(jump, label) \
536   sljit_set_label((jump), (label))
537 #define CMP(type, src1, src1w, src2, src2w) \
538   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
539 #define CMPTO(type, src1, src1w, src2, src2w, label) \
540   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
541 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
542   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
543 #define GET_LOCAL_BASE(dst, dstw, offset) \
544   sljit_get_local_base(compiler, (dst), (dstw), (offset))
545 
546 #define READ_CHAR_MAX 0x7fffffff
547 
bracketend(pcre_uchar * cc)548 static pcre_uchar* bracketend(pcre_uchar* cc)
549 {
550 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
551 do cc += GET(cc, 1); while (*cc == OP_ALT);
552 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
553 cc += 1 + LINK_SIZE;
554 return cc;
555 }
556 
no_alternatives(pcre_uchar * cc)557 static int no_alternatives(pcre_uchar* cc)
558 {
559 int count = 0;
560 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
561 do
562   {
563   cc += GET(cc, 1);
564   count++;
565   }
566 while (*cc == OP_ALT);
567 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
568 return count;
569 }
570 
571 static int ones_in_half_byte[16] = {
572   /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
573   /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
574 };
575 
576 /* Functions whose might need modification for all new supported opcodes:
577  next_opcode
578  check_opcode_types
579  set_private_data_ptrs
580  get_framesize
581  init_frame
582  get_private_data_copy_length
583  copy_private_data
584  compile_matchingpath
585  compile_backtrackingpath
586 */
587 
next_opcode(compiler_common * common,pcre_uchar * cc)588 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
589 {
590 SLJIT_UNUSED_ARG(common);
591 switch(*cc)
592   {
593   case OP_SOD:
594   case OP_SOM:
595   case OP_SET_SOM:
596   case OP_NOT_WORD_BOUNDARY:
597   case OP_WORD_BOUNDARY:
598   case OP_NOT_DIGIT:
599   case OP_DIGIT:
600   case OP_NOT_WHITESPACE:
601   case OP_WHITESPACE:
602   case OP_NOT_WORDCHAR:
603   case OP_WORDCHAR:
604   case OP_ANY:
605   case OP_ALLANY:
606   case OP_NOTPROP:
607   case OP_PROP:
608   case OP_ANYNL:
609   case OP_NOT_HSPACE:
610   case OP_HSPACE:
611   case OP_NOT_VSPACE:
612   case OP_VSPACE:
613   case OP_EXTUNI:
614   case OP_EODN:
615   case OP_EOD:
616   case OP_CIRC:
617   case OP_CIRCM:
618   case OP_DOLL:
619   case OP_DOLLM:
620   case OP_CRSTAR:
621   case OP_CRMINSTAR:
622   case OP_CRPLUS:
623   case OP_CRMINPLUS:
624   case OP_CRQUERY:
625   case OP_CRMINQUERY:
626   case OP_CRRANGE:
627   case OP_CRMINRANGE:
628   case OP_CRPOSSTAR:
629   case OP_CRPOSPLUS:
630   case OP_CRPOSQUERY:
631   case OP_CRPOSRANGE:
632   case OP_CLASS:
633   case OP_NCLASS:
634   case OP_REF:
635   case OP_REFI:
636   case OP_DNREF:
637   case OP_DNREFI:
638   case OP_RECURSE:
639   case OP_CALLOUT:
640   case OP_ALT:
641   case OP_KET:
642   case OP_KETRMAX:
643   case OP_KETRMIN:
644   case OP_KETRPOS:
645   case OP_REVERSE:
646   case OP_ASSERT:
647   case OP_ASSERT_NOT:
648   case OP_ASSERTBACK:
649   case OP_ASSERTBACK_NOT:
650   case OP_ONCE:
651   case OP_ONCE_NC:
652   case OP_BRA:
653   case OP_BRAPOS:
654   case OP_CBRA:
655   case OP_CBRAPOS:
656   case OP_COND:
657   case OP_SBRA:
658   case OP_SBRAPOS:
659   case OP_SCBRA:
660   case OP_SCBRAPOS:
661   case OP_SCOND:
662   case OP_CREF:
663   case OP_DNCREF:
664   case OP_RREF:
665   case OP_DNRREF:
666   case OP_DEF:
667   case OP_BRAZERO:
668   case OP_BRAMINZERO:
669   case OP_BRAPOSZERO:
670   case OP_PRUNE:
671   case OP_SKIP:
672   case OP_THEN:
673   case OP_COMMIT:
674   case OP_FAIL:
675   case OP_ACCEPT:
676   case OP_ASSERT_ACCEPT:
677   case OP_CLOSE:
678   case OP_SKIPZERO:
679   return cc + PRIV(OP_lengths)[*cc];
680 
681   case OP_CHAR:
682   case OP_CHARI:
683   case OP_NOT:
684   case OP_NOTI:
685   case OP_STAR:
686   case OP_MINSTAR:
687   case OP_PLUS:
688   case OP_MINPLUS:
689   case OP_QUERY:
690   case OP_MINQUERY:
691   case OP_UPTO:
692   case OP_MINUPTO:
693   case OP_EXACT:
694   case OP_POSSTAR:
695   case OP_POSPLUS:
696   case OP_POSQUERY:
697   case OP_POSUPTO:
698   case OP_STARI:
699   case OP_MINSTARI:
700   case OP_PLUSI:
701   case OP_MINPLUSI:
702   case OP_QUERYI:
703   case OP_MINQUERYI:
704   case OP_UPTOI:
705   case OP_MINUPTOI:
706   case OP_EXACTI:
707   case OP_POSSTARI:
708   case OP_POSPLUSI:
709   case OP_POSQUERYI:
710   case OP_POSUPTOI:
711   case OP_NOTSTAR:
712   case OP_NOTMINSTAR:
713   case OP_NOTPLUS:
714   case OP_NOTMINPLUS:
715   case OP_NOTQUERY:
716   case OP_NOTMINQUERY:
717   case OP_NOTUPTO:
718   case OP_NOTMINUPTO:
719   case OP_NOTEXACT:
720   case OP_NOTPOSSTAR:
721   case OP_NOTPOSPLUS:
722   case OP_NOTPOSQUERY:
723   case OP_NOTPOSUPTO:
724   case OP_NOTSTARI:
725   case OP_NOTMINSTARI:
726   case OP_NOTPLUSI:
727   case OP_NOTMINPLUSI:
728   case OP_NOTQUERYI:
729   case OP_NOTMINQUERYI:
730   case OP_NOTUPTOI:
731   case OP_NOTMINUPTOI:
732   case OP_NOTEXACTI:
733   case OP_NOTPOSSTARI:
734   case OP_NOTPOSPLUSI:
735   case OP_NOTPOSQUERYI:
736   case OP_NOTPOSUPTOI:
737   cc += PRIV(OP_lengths)[*cc];
738 #ifdef SUPPORT_UTF
739   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
740 #endif
741   return cc;
742 
743   /* Special cases. */
744   case OP_TYPESTAR:
745   case OP_TYPEMINSTAR:
746   case OP_TYPEPLUS:
747   case OP_TYPEMINPLUS:
748   case OP_TYPEQUERY:
749   case OP_TYPEMINQUERY:
750   case OP_TYPEUPTO:
751   case OP_TYPEMINUPTO:
752   case OP_TYPEEXACT:
753   case OP_TYPEPOSSTAR:
754   case OP_TYPEPOSPLUS:
755   case OP_TYPEPOSQUERY:
756   case OP_TYPEPOSUPTO:
757   return cc + PRIV(OP_lengths)[*cc] - 1;
758 
759   case OP_ANYBYTE:
760 #ifdef SUPPORT_UTF
761   if (common->utf) return NULL;
762 #endif
763   return cc + 1;
764 
765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
766   case OP_XCLASS:
767   return cc + GET(cc, 1);
768 #endif
769 
770   case OP_MARK:
771   case OP_PRUNE_ARG:
772   case OP_SKIP_ARG:
773   case OP_THEN_ARG:
774   return cc + 1 + 2 + cc[1];
775 
776   default:
777   /* All opcodes are supported now! */
778   SLJIT_ASSERT_STOP();
779   return NULL;
780   }
781 }
782 
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)783 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 {
785 int count;
786 pcre_uchar *slot;
787 
788 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
789 while (cc < ccend)
790   {
791   switch(*cc)
792     {
793     case OP_SET_SOM:
794     common->has_set_som = TRUE;
795     common->might_be_empty = TRUE;
796     cc += 1;
797     break;
798 
799     case OP_REF:
800     case OP_REFI:
801     common->optimized_cbracket[GET2(cc, 1)] = 0;
802     cc += 1 + IMM2_SIZE;
803     break;
804 
805     case OP_BRA:
806     case OP_CBRA:
807     case OP_SBRA:
808     case OP_SCBRA:
809     count = no_alternatives(cc);
810     if (count > 4)
811       common->read_only_data_size += count * sizeof(sljit_uw);
812     cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
813     break;
814 
815     case OP_CBRAPOS:
816     case OP_SCBRAPOS:
817     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818     cc += 1 + LINK_SIZE + IMM2_SIZE;
819     break;
820 
821     case OP_COND:
822     case OP_SCOND:
823     /* Only AUTO_CALLOUT can insert this opcode. We do
824        not intend to support this case. */
825     if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826       return FALSE;
827     cc += 1 + LINK_SIZE;
828     break;
829 
830     case OP_CREF:
831     common->optimized_cbracket[GET2(cc, 1)] = 0;
832     cc += 1 + IMM2_SIZE;
833     break;
834 
835     case OP_DNREF:
836     case OP_DNREFI:
837     case OP_DNCREF:
838     count = GET2(cc, 1 + IMM2_SIZE);
839     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840     while (count-- > 0)
841       {
842       common->optimized_cbracket[GET2(slot, 0)] = 0;
843       slot += common->name_entry_size;
844       }
845     cc += 1 + 2 * IMM2_SIZE;
846     break;
847 
848     case OP_RECURSE:
849     /* Set its value only once. */
850     if (common->recursive_head_ptr == 0)
851       {
852       common->recursive_head_ptr = common->ovector_start;
853       common->ovector_start += sizeof(sljit_sw);
854       }
855     cc += 1 + LINK_SIZE;
856     break;
857 
858     case OP_CALLOUT:
859     if (common->capture_last_ptr == 0)
860       {
861       common->capture_last_ptr = common->ovector_start;
862       common->ovector_start += sizeof(sljit_sw);
863       }
864     cc += 2 + 2 * LINK_SIZE;
865     break;
866 
867     case OP_THEN_ARG:
868     common->has_then = TRUE;
869     common->control_head_ptr = 1;
870     /* Fall through. */
871 
872     case OP_PRUNE_ARG:
873     common->needs_start_ptr = TRUE;
874     /* Fall through. */
875 
876     case OP_MARK:
877     if (common->mark_ptr == 0)
878       {
879       common->mark_ptr = common->ovector_start;
880       common->ovector_start += sizeof(sljit_sw);
881       }
882     cc += 1 + 2 + cc[1];
883     break;
884 
885     case OP_THEN:
886     common->has_then = TRUE;
887     common->control_head_ptr = 1;
888     /* Fall through. */
889 
890     case OP_PRUNE:
891     case OP_SKIP:
892     common->needs_start_ptr = TRUE;
893     cc += 1;
894     break;
895 
896     case OP_SKIP_ARG:
897     common->control_head_ptr = 1;
898     common->has_skip_arg = TRUE;
899     cc += 1 + 2 + cc[1];
900     break;
901 
902     default:
903     cc = next_opcode(common, cc);
904     if (cc == NULL)
905       return FALSE;
906     break;
907     }
908   }
909 return TRUE;
910 }
911 
get_class_iterator_size(pcre_uchar * cc)912 static int get_class_iterator_size(pcre_uchar *cc)
913 {
914 switch(*cc)
915   {
916   case OP_CRSTAR:
917   case OP_CRPLUS:
918   return 2;
919 
920   case OP_CRMINSTAR:
921   case OP_CRMINPLUS:
922   case OP_CRQUERY:
923   case OP_CRMINQUERY:
924   return 1;
925 
926   case OP_CRRANGE:
927   case OP_CRMINRANGE:
928   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
929     return 0;
930   return 2;
931 
932   default:
933   return 0;
934   }
935 }
936 
detect_repeat(compiler_common * common,pcre_uchar * begin)937 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
938 {
939 pcre_uchar *end = bracketend(begin);
940 pcre_uchar *next;
941 pcre_uchar *next_end;
942 pcre_uchar *max_end;
943 pcre_uchar type;
944 sljit_sw length = end - begin;
945 int min, max, i;
946 
947 /* Detect fixed iterations first. */
948 if (end[-(1 + LINK_SIZE)] != OP_KET)
949   return FALSE;
950 
951 /* Already detected repeat. */
952 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
953   return TRUE;
954 
955 next = end;
956 min = 1;
957 while (1)
958   {
959   if (*next != *begin)
960     break;
961   next_end = bracketend(next);
962   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
963     break;
964   next = next_end;
965   min++;
966   }
967 
968 if (min == 2)
969   return FALSE;
970 
971 max = 0;
972 max_end = next;
973 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
974   {
975   type = *next;
976   while (1)
977     {
978     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
979       break;
980     next_end = bracketend(next + 2 + LINK_SIZE);
981     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
982       break;
983     next = next_end;
984     max++;
985     }
986 
987   if (next[0] == type && next[1] == *begin && max >= 1)
988     {
989     next_end = bracketend(next + 1);
990     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
991       {
992       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
993         if (*next_end != OP_KET)
994           break;
995 
996       if (i == max)
997         {
998         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
999         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1000         /* +2 the original and the last. */
1001         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1002         if (min == 1)
1003           return TRUE;
1004         min--;
1005         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1006         }
1007       }
1008     }
1009   }
1010 
1011 if (min >= 3)
1012   {
1013   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1014   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1015   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1016   return TRUE;
1017   }
1018 
1019 return FALSE;
1020 }
1021 
1022 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1023     case OP_MINSTAR: \
1024     case OP_MINPLUS: \
1025     case OP_QUERY: \
1026     case OP_MINQUERY: \
1027     case OP_MINSTARI: \
1028     case OP_MINPLUSI: \
1029     case OP_QUERYI: \
1030     case OP_MINQUERYI: \
1031     case OP_NOTMINSTAR: \
1032     case OP_NOTMINPLUS: \
1033     case OP_NOTQUERY: \
1034     case OP_NOTMINQUERY: \
1035     case OP_NOTMINSTARI: \
1036     case OP_NOTMINPLUSI: \
1037     case OP_NOTQUERYI: \
1038     case OP_NOTMINQUERYI:
1039 
1040 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1041     case OP_STAR: \
1042     case OP_PLUS: \
1043     case OP_STARI: \
1044     case OP_PLUSI: \
1045     case OP_NOTSTAR: \
1046     case OP_NOTPLUS: \
1047     case OP_NOTSTARI: \
1048     case OP_NOTPLUSI:
1049 
1050 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1051     case OP_UPTO: \
1052     case OP_MINUPTO: \
1053     case OP_UPTOI: \
1054     case OP_MINUPTOI: \
1055     case OP_NOTUPTO: \
1056     case OP_NOTMINUPTO: \
1057     case OP_NOTUPTOI: \
1058     case OP_NOTMINUPTOI:
1059 
1060 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1061     case OP_TYPEMINSTAR: \
1062     case OP_TYPEMINPLUS: \
1063     case OP_TYPEQUERY: \
1064     case OP_TYPEMINQUERY:
1065 
1066 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1067     case OP_TYPESTAR: \
1068     case OP_TYPEPLUS:
1069 
1070 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1071     case OP_TYPEUPTO: \
1072     case OP_TYPEMINUPTO:
1073 
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1074 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1075 {
1076 pcre_uchar *cc = common->start;
1077 pcre_uchar *alternative;
1078 pcre_uchar *end = NULL;
1079 int private_data_ptr = *private_data_start;
1080 int space, size, bracketlen;
1081 
1082 while (cc < ccend)
1083   {
1084   space = 0;
1085   size = 0;
1086   bracketlen = 0;
1087   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1088     return;
1089 
1090   if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1091     if (detect_repeat(common, cc))
1092       {
1093       /* These brackets are converted to repeats, so no global
1094       based single character repeat is allowed. */
1095       if (cc >= end)
1096         end = bracketend(cc);
1097       }
1098 
1099   switch(*cc)
1100     {
1101     case OP_KET:
1102     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1103       {
1104       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1105       private_data_ptr += sizeof(sljit_sw);
1106       cc += common->private_data_ptrs[cc + 1 - common->start];
1107       }
1108     cc += 1 + LINK_SIZE;
1109     break;
1110 
1111     case OP_ASSERT:
1112     case OP_ASSERT_NOT:
1113     case OP_ASSERTBACK:
1114     case OP_ASSERTBACK_NOT:
1115     case OP_ONCE:
1116     case OP_ONCE_NC:
1117     case OP_BRAPOS:
1118     case OP_SBRA:
1119     case OP_SBRAPOS:
1120     case OP_SCOND:
1121     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1122     private_data_ptr += sizeof(sljit_sw);
1123     bracketlen = 1 + LINK_SIZE;
1124     break;
1125 
1126     case OP_CBRAPOS:
1127     case OP_SCBRAPOS:
1128     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129     private_data_ptr += sizeof(sljit_sw);
1130     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1131     break;
1132 
1133     case OP_COND:
1134     /* Might be a hidden SCOND. */
1135     alternative = cc + GET(cc, 1);
1136     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1137       {
1138       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1139       private_data_ptr += sizeof(sljit_sw);
1140       }
1141     bracketlen = 1 + LINK_SIZE;
1142     break;
1143 
1144     case OP_BRA:
1145     bracketlen = 1 + LINK_SIZE;
1146     break;
1147 
1148     case OP_CBRA:
1149     case OP_SCBRA:
1150     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1151     break;
1152 
1153     CASE_ITERATOR_PRIVATE_DATA_1
1154     space = 1;
1155     size = -2;
1156     break;
1157 
1158     CASE_ITERATOR_PRIVATE_DATA_2A
1159     space = 2;
1160     size = -2;
1161     break;
1162 
1163     CASE_ITERATOR_PRIVATE_DATA_2B
1164     space = 2;
1165     size = -(2 + IMM2_SIZE);
1166     break;
1167 
1168     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1169     space = 1;
1170     size = 1;
1171     break;
1172 
1173     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1174     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1175       space = 2;
1176     size = 1;
1177     break;
1178 
1179     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1180     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1181       space = 2;
1182     size = 1 + IMM2_SIZE;
1183     break;
1184 
1185     case OP_CLASS:
1186     case OP_NCLASS:
1187     size += 1 + 32 / sizeof(pcre_uchar);
1188     space = get_class_iterator_size(cc + size);
1189     break;
1190 
1191 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1192     case OP_XCLASS:
1193     size = GET(cc, 1);
1194     space = get_class_iterator_size(cc + size);
1195     break;
1196 #endif
1197 
1198     default:
1199     cc = next_opcode(common, cc);
1200     SLJIT_ASSERT(cc != NULL);
1201     break;
1202     }
1203 
1204   /* Character iterators, which are not inside a repeated bracket,
1205      gets a private slot instead of allocating it on the stack. */
1206   if (space > 0 && cc >= end)
1207     {
1208     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1209     private_data_ptr += sizeof(sljit_sw) * space;
1210     }
1211 
1212   if (size != 0)
1213     {
1214     if (size < 0)
1215       {
1216       cc += -size;
1217 #ifdef SUPPORT_UTF
1218       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1219 #endif
1220       }
1221     else
1222       cc += size;
1223     }
1224 
1225   if (bracketlen > 0)
1226     {
1227     if (cc >= end)
1228       {
1229       end = bracketend(cc);
1230       if (end[-1 - LINK_SIZE] == OP_KET)
1231         end = NULL;
1232       }
1233     cc += bracketlen;
1234     }
1235   }
1236 *private_data_start = private_data_ptr;
1237 }
1238 
1239 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1240 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1241 {
1242 int length = 0;
1243 int possessive = 0;
1244 BOOL stack_restore = FALSE;
1245 BOOL setsom_found = recursive;
1246 BOOL setmark_found = recursive;
1247 /* The last capture is a local variable even for recursions. */
1248 BOOL capture_last_found = FALSE;
1249 
1250 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1251 SLJIT_ASSERT(common->control_head_ptr != 0);
1252 *needs_control_head = TRUE;
1253 #else
1254 *needs_control_head = FALSE;
1255 #endif
1256 
1257 if (ccend == NULL)
1258   {
1259   ccend = bracketend(cc) - (1 + LINK_SIZE);
1260   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1261     {
1262     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1263     /* This is correct regardless of common->capture_last_ptr. */
1264     capture_last_found = TRUE;
1265     }
1266   cc = next_opcode(common, cc);
1267   }
1268 
1269 SLJIT_ASSERT(cc != NULL);
1270 while (cc < ccend)
1271   switch(*cc)
1272     {
1273     case OP_SET_SOM:
1274     SLJIT_ASSERT(common->has_set_som);
1275     stack_restore = TRUE;
1276     if (!setsom_found)
1277       {
1278       length += 2;
1279       setsom_found = TRUE;
1280       }
1281     cc += 1;
1282     break;
1283 
1284     case OP_MARK:
1285     case OP_PRUNE_ARG:
1286     case OP_THEN_ARG:
1287     SLJIT_ASSERT(common->mark_ptr != 0);
1288     stack_restore = TRUE;
1289     if (!setmark_found)
1290       {
1291       length += 2;
1292       setmark_found = TRUE;
1293       }
1294     if (common->control_head_ptr != 0)
1295       *needs_control_head = TRUE;
1296     cc += 1 + 2 + cc[1];
1297     break;
1298 
1299     case OP_RECURSE:
1300     stack_restore = TRUE;
1301     if (common->has_set_som && !setsom_found)
1302       {
1303       length += 2;
1304       setsom_found = TRUE;
1305       }
1306     if (common->mark_ptr != 0 && !setmark_found)
1307       {
1308       length += 2;
1309       setmark_found = TRUE;
1310       }
1311     if (common->capture_last_ptr != 0 && !capture_last_found)
1312       {
1313       length += 2;
1314       capture_last_found = TRUE;
1315       }
1316     cc += 1 + LINK_SIZE;
1317     break;
1318 
1319     case OP_CBRA:
1320     case OP_CBRAPOS:
1321     case OP_SCBRA:
1322     case OP_SCBRAPOS:
1323     stack_restore = TRUE;
1324     if (common->capture_last_ptr != 0 && !capture_last_found)
1325       {
1326       length += 2;
1327       capture_last_found = TRUE;
1328       }
1329     length += 3;
1330     cc += 1 + LINK_SIZE + IMM2_SIZE;
1331     break;
1332 
1333     default:
1334     stack_restore = TRUE;
1335     /* Fall through. */
1336 
1337     case OP_NOT_WORD_BOUNDARY:
1338     case OP_WORD_BOUNDARY:
1339     case OP_NOT_DIGIT:
1340     case OP_DIGIT:
1341     case OP_NOT_WHITESPACE:
1342     case OP_WHITESPACE:
1343     case OP_NOT_WORDCHAR:
1344     case OP_WORDCHAR:
1345     case OP_ANY:
1346     case OP_ALLANY:
1347     case OP_ANYBYTE:
1348     case OP_NOTPROP:
1349     case OP_PROP:
1350     case OP_ANYNL:
1351     case OP_NOT_HSPACE:
1352     case OP_HSPACE:
1353     case OP_NOT_VSPACE:
1354     case OP_VSPACE:
1355     case OP_EXTUNI:
1356     case OP_EODN:
1357     case OP_EOD:
1358     case OP_CIRC:
1359     case OP_CIRCM:
1360     case OP_DOLL:
1361     case OP_DOLLM:
1362     case OP_CHAR:
1363     case OP_CHARI:
1364     case OP_NOT:
1365     case OP_NOTI:
1366 
1367     case OP_EXACT:
1368     case OP_POSSTAR:
1369     case OP_POSPLUS:
1370     case OP_POSQUERY:
1371     case OP_POSUPTO:
1372 
1373     case OP_EXACTI:
1374     case OP_POSSTARI:
1375     case OP_POSPLUSI:
1376     case OP_POSQUERYI:
1377     case OP_POSUPTOI:
1378 
1379     case OP_NOTEXACT:
1380     case OP_NOTPOSSTAR:
1381     case OP_NOTPOSPLUS:
1382     case OP_NOTPOSQUERY:
1383     case OP_NOTPOSUPTO:
1384 
1385     case OP_NOTEXACTI:
1386     case OP_NOTPOSSTARI:
1387     case OP_NOTPOSPLUSI:
1388     case OP_NOTPOSQUERYI:
1389     case OP_NOTPOSUPTOI:
1390 
1391     case OP_TYPEEXACT:
1392     case OP_TYPEPOSSTAR:
1393     case OP_TYPEPOSPLUS:
1394     case OP_TYPEPOSQUERY:
1395     case OP_TYPEPOSUPTO:
1396 
1397     case OP_CLASS:
1398     case OP_NCLASS:
1399     case OP_XCLASS:
1400 
1401     cc = next_opcode(common, cc);
1402     SLJIT_ASSERT(cc != NULL);
1403     break;
1404     }
1405 
1406 /* Possessive quantifiers can use a special case. */
1407 if (SLJIT_UNLIKELY(possessive == length))
1408   return stack_restore ? no_frame : no_stack;
1409 
1410 if (length > 0)
1411   return length + 1;
1412 return stack_restore ? no_frame : no_stack;
1413 }
1414 
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1415 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1416 {
1417 DEFINE_COMPILER;
1418 BOOL setsom_found = recursive;
1419 BOOL setmark_found = recursive;
1420 /* The last capture is a local variable even for recursions. */
1421 BOOL capture_last_found = FALSE;
1422 int offset;
1423 
1424 /* >= 1 + shortest item size (2) */
1425 SLJIT_UNUSED_ARG(stacktop);
1426 SLJIT_ASSERT(stackpos >= stacktop + 2);
1427 
1428 stackpos = STACK(stackpos);
1429 if (ccend == NULL)
1430   {
1431   ccend = bracketend(cc) - (1 + LINK_SIZE);
1432   if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1433     cc = next_opcode(common, cc);
1434   }
1435 
1436 SLJIT_ASSERT(cc != NULL);
1437 while (cc < ccend)
1438   switch(*cc)
1439     {
1440     case OP_SET_SOM:
1441     SLJIT_ASSERT(common->has_set_som);
1442     if (!setsom_found)
1443       {
1444       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1445       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1446       stackpos += (int)sizeof(sljit_sw);
1447       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448       stackpos += (int)sizeof(sljit_sw);
1449       setsom_found = TRUE;
1450       }
1451     cc += 1;
1452     break;
1453 
1454     case OP_MARK:
1455     case OP_PRUNE_ARG:
1456     case OP_THEN_ARG:
1457     SLJIT_ASSERT(common->mark_ptr != 0);
1458     if (!setmark_found)
1459       {
1460       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1461       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1462       stackpos += (int)sizeof(sljit_sw);
1463       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464       stackpos += (int)sizeof(sljit_sw);
1465       setmark_found = TRUE;
1466       }
1467     cc += 1 + 2 + cc[1];
1468     break;
1469 
1470     case OP_RECURSE:
1471     if (common->has_set_som && !setsom_found)
1472       {
1473       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1474       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1475       stackpos += (int)sizeof(sljit_sw);
1476       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1477       stackpos += (int)sizeof(sljit_sw);
1478       setsom_found = TRUE;
1479       }
1480     if (common->mark_ptr != 0 && !setmark_found)
1481       {
1482       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1483       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1484       stackpos += (int)sizeof(sljit_sw);
1485       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486       stackpos += (int)sizeof(sljit_sw);
1487       setmark_found = TRUE;
1488       }
1489     if (common->capture_last_ptr != 0 && !capture_last_found)
1490       {
1491       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1492       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1493       stackpos += (int)sizeof(sljit_sw);
1494       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495       stackpos += (int)sizeof(sljit_sw);
1496       capture_last_found = TRUE;
1497       }
1498     cc += 1 + LINK_SIZE;
1499     break;
1500 
1501     case OP_CBRA:
1502     case OP_CBRAPOS:
1503     case OP_SCBRA:
1504     case OP_SCBRAPOS:
1505     if (common->capture_last_ptr != 0 && !capture_last_found)
1506       {
1507       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1508       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1509       stackpos += (int)sizeof(sljit_sw);
1510       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1511       stackpos += (int)sizeof(sljit_sw);
1512       capture_last_found = TRUE;
1513       }
1514     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1515     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1516     stackpos += (int)sizeof(sljit_sw);
1517     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1518     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1519     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520     stackpos += (int)sizeof(sljit_sw);
1521     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1522     stackpos += (int)sizeof(sljit_sw);
1523 
1524     cc += 1 + LINK_SIZE + IMM2_SIZE;
1525     break;
1526 
1527     default:
1528     cc = next_opcode(common, cc);
1529     SLJIT_ASSERT(cc != NULL);
1530     break;
1531     }
1532 
1533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1534 SLJIT_ASSERT(stackpos == STACK(stacktop));
1535 }
1536 
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1537 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1538 {
1539 int private_data_length = needs_control_head ? 3 : 2;
1540 int size;
1541 pcre_uchar *alternative;
1542 /* Calculate the sum of the private machine words. */
1543 while (cc < ccend)
1544   {
1545   size = 0;
1546   switch(*cc)
1547     {
1548     case OP_KET:
1549     if (PRIVATE_DATA(cc) != 0)
1550       private_data_length++;
1551     cc += 1 + LINK_SIZE;
1552     break;
1553 
1554     case OP_ASSERT:
1555     case OP_ASSERT_NOT:
1556     case OP_ASSERTBACK:
1557     case OP_ASSERTBACK_NOT:
1558     case OP_ONCE:
1559     case OP_ONCE_NC:
1560     case OP_BRAPOS:
1561     case OP_SBRA:
1562     case OP_SBRAPOS:
1563     case OP_SCOND:
1564     private_data_length++;
1565     cc += 1 + LINK_SIZE;
1566     break;
1567 
1568     case OP_CBRA:
1569     case OP_SCBRA:
1570     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1571       private_data_length++;
1572     cc += 1 + LINK_SIZE + IMM2_SIZE;
1573     break;
1574 
1575     case OP_CBRAPOS:
1576     case OP_SCBRAPOS:
1577     private_data_length += 2;
1578     cc += 1 + LINK_SIZE + IMM2_SIZE;
1579     break;
1580 
1581     case OP_COND:
1582     /* Might be a hidden SCOND. */
1583     alternative = cc + GET(cc, 1);
1584     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1585       private_data_length++;
1586     cc += 1 + LINK_SIZE;
1587     break;
1588 
1589     CASE_ITERATOR_PRIVATE_DATA_1
1590     if (PRIVATE_DATA(cc))
1591       private_data_length++;
1592     cc += 2;
1593 #ifdef SUPPORT_UTF
1594     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596     break;
1597 
1598     CASE_ITERATOR_PRIVATE_DATA_2A
1599     if (PRIVATE_DATA(cc))
1600       private_data_length += 2;
1601     cc += 2;
1602 #ifdef SUPPORT_UTF
1603     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605     break;
1606 
1607     CASE_ITERATOR_PRIVATE_DATA_2B
1608     if (PRIVATE_DATA(cc))
1609       private_data_length += 2;
1610     cc += 2 + IMM2_SIZE;
1611 #ifdef SUPPORT_UTF
1612     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1613 #endif
1614     break;
1615 
1616     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1617     if (PRIVATE_DATA(cc))
1618       private_data_length++;
1619     cc += 1;
1620     break;
1621 
1622     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1623     if (PRIVATE_DATA(cc))
1624       private_data_length += 2;
1625     cc += 1;
1626     break;
1627 
1628     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1629     if (PRIVATE_DATA(cc))
1630       private_data_length += 2;
1631     cc += 1 + IMM2_SIZE;
1632     break;
1633 
1634     case OP_CLASS:
1635     case OP_NCLASS:
1636 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1637     case OP_XCLASS:
1638     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1639 #else
1640     size = 1 + 32 / (int)sizeof(pcre_uchar);
1641 #endif
1642     if (PRIVATE_DATA(cc))
1643       private_data_length += get_class_iterator_size(cc + size);
1644     cc += size;
1645     break;
1646 
1647     default:
1648     cc = next_opcode(common, cc);
1649     SLJIT_ASSERT(cc != NULL);
1650     break;
1651     }
1652   }
1653 SLJIT_ASSERT(cc == ccend);
1654 return private_data_length;
1655 }
1656 
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1657 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1658   BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1659 {
1660 DEFINE_COMPILER;
1661 int srcw[2];
1662 int count, size;
1663 BOOL tmp1next = TRUE;
1664 BOOL tmp1empty = TRUE;
1665 BOOL tmp2empty = TRUE;
1666 pcre_uchar *alternative;
1667 enum {
1668   start,
1669   loop,
1670   end
1671 } status;
1672 
1673 status = save ? start : loop;
1674 stackptr = STACK(stackptr - 2);
1675 stacktop = STACK(stacktop - 1);
1676 
1677 if (!save)
1678   {
1679   stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1680   if (stackptr < stacktop)
1681     {
1682     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1683     stackptr += sizeof(sljit_sw);
1684     tmp1empty = FALSE;
1685     }
1686   if (stackptr < stacktop)
1687     {
1688     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1689     stackptr += sizeof(sljit_sw);
1690     tmp2empty = FALSE;
1691     }
1692   /* The tmp1next must be TRUE in either way. */
1693   }
1694 
1695 do
1696   {
1697   count = 0;
1698   switch(status)
1699     {
1700     case start:
1701     SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1702     count = 1;
1703     srcw[0] = common->recursive_head_ptr;
1704     if (needs_control_head)
1705       {
1706       SLJIT_ASSERT(common->control_head_ptr != 0);
1707       count = 2;
1708       srcw[1] = common->control_head_ptr;
1709       }
1710     status = loop;
1711     break;
1712 
1713     case loop:
1714     if (cc >= ccend)
1715       {
1716       status = end;
1717       break;
1718       }
1719 
1720     switch(*cc)
1721       {
1722       case OP_KET:
1723       if (PRIVATE_DATA(cc) != 0)
1724         {
1725         count = 1;
1726         srcw[0] = PRIVATE_DATA(cc);
1727         }
1728       cc += 1 + LINK_SIZE;
1729       break;
1730 
1731       case OP_ASSERT:
1732       case OP_ASSERT_NOT:
1733       case OP_ASSERTBACK:
1734       case OP_ASSERTBACK_NOT:
1735       case OP_ONCE:
1736       case OP_ONCE_NC:
1737       case OP_BRAPOS:
1738       case OP_SBRA:
1739       case OP_SBRAPOS:
1740       case OP_SCOND:
1741       count = 1;
1742       srcw[0] = PRIVATE_DATA(cc);
1743       SLJIT_ASSERT(srcw[0] != 0);
1744       cc += 1 + LINK_SIZE;
1745       break;
1746 
1747       case OP_CBRA:
1748       case OP_SCBRA:
1749       if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1750         {
1751         count = 1;
1752         srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1753         }
1754       cc += 1 + LINK_SIZE + IMM2_SIZE;
1755       break;
1756 
1757       case OP_CBRAPOS:
1758       case OP_SCBRAPOS:
1759       count = 2;
1760       srcw[0] = PRIVATE_DATA(cc);
1761       srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1762       SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1763       cc += 1 + LINK_SIZE + IMM2_SIZE;
1764       break;
1765 
1766       case OP_COND:
1767       /* Might be a hidden SCOND. */
1768       alternative = cc + GET(cc, 1);
1769       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1770         {
1771         count = 1;
1772         srcw[0] = PRIVATE_DATA(cc);
1773         SLJIT_ASSERT(srcw[0] != 0);
1774         }
1775       cc += 1 + LINK_SIZE;
1776       break;
1777 
1778       CASE_ITERATOR_PRIVATE_DATA_1
1779       if (PRIVATE_DATA(cc))
1780         {
1781         count = 1;
1782         srcw[0] = PRIVATE_DATA(cc);
1783         }
1784       cc += 2;
1785 #ifdef SUPPORT_UTF
1786       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1787 #endif
1788       break;
1789 
1790       CASE_ITERATOR_PRIVATE_DATA_2A
1791       if (PRIVATE_DATA(cc))
1792         {
1793         count = 2;
1794         srcw[0] = PRIVATE_DATA(cc);
1795         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1796         }
1797       cc += 2;
1798 #ifdef SUPPORT_UTF
1799       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1800 #endif
1801       break;
1802 
1803       CASE_ITERATOR_PRIVATE_DATA_2B
1804       if (PRIVATE_DATA(cc))
1805         {
1806         count = 2;
1807         srcw[0] = PRIVATE_DATA(cc);
1808         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1809         }
1810       cc += 2 + IMM2_SIZE;
1811 #ifdef SUPPORT_UTF
1812       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1813 #endif
1814       break;
1815 
1816       CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1817       if (PRIVATE_DATA(cc))
1818         {
1819         count = 1;
1820         srcw[0] = PRIVATE_DATA(cc);
1821         }
1822       cc += 1;
1823       break;
1824 
1825       CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1826       if (PRIVATE_DATA(cc))
1827         {
1828         count = 2;
1829         srcw[0] = PRIVATE_DATA(cc);
1830         srcw[1] = srcw[0] + sizeof(sljit_sw);
1831         }
1832       cc += 1;
1833       break;
1834 
1835       CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1836       if (PRIVATE_DATA(cc))
1837         {
1838         count = 2;
1839         srcw[0] = PRIVATE_DATA(cc);
1840         srcw[1] = srcw[0] + sizeof(sljit_sw);
1841         }
1842       cc += 1 + IMM2_SIZE;
1843       break;
1844 
1845       case OP_CLASS:
1846       case OP_NCLASS:
1847 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1848       case OP_XCLASS:
1849       size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1850 #else
1851       size = 1 + 32 / (int)sizeof(pcre_uchar);
1852 #endif
1853       if (PRIVATE_DATA(cc))
1854         switch(get_class_iterator_size(cc + size))
1855           {
1856           case 1:
1857           count = 1;
1858           srcw[0] = PRIVATE_DATA(cc);
1859           break;
1860 
1861           case 2:
1862           count = 2;
1863           srcw[0] = PRIVATE_DATA(cc);
1864           srcw[1] = srcw[0] + sizeof(sljit_sw);
1865           break;
1866 
1867           default:
1868           SLJIT_ASSERT_STOP();
1869           break;
1870           }
1871       cc += size;
1872       break;
1873 
1874       default:
1875       cc = next_opcode(common, cc);
1876       SLJIT_ASSERT(cc != NULL);
1877       break;
1878       }
1879     break;
1880 
1881     case end:
1882     SLJIT_ASSERT_STOP();
1883     break;
1884     }
1885 
1886   while (count > 0)
1887     {
1888     count--;
1889     if (save)
1890       {
1891       if (tmp1next)
1892         {
1893         if (!tmp1empty)
1894           {
1895           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1896           stackptr += sizeof(sljit_sw);
1897           }
1898         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1899         tmp1empty = FALSE;
1900         tmp1next = FALSE;
1901         }
1902       else
1903         {
1904         if (!tmp2empty)
1905           {
1906           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1907           stackptr += sizeof(sljit_sw);
1908           }
1909         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1910         tmp2empty = FALSE;
1911         tmp1next = TRUE;
1912         }
1913       }
1914     else
1915       {
1916       if (tmp1next)
1917         {
1918         SLJIT_ASSERT(!tmp1empty);
1919         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1920         tmp1empty = stackptr >= stacktop;
1921         if (!tmp1empty)
1922           {
1923           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1924           stackptr += sizeof(sljit_sw);
1925           }
1926         tmp1next = FALSE;
1927         }
1928       else
1929         {
1930         SLJIT_ASSERT(!tmp2empty);
1931         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1932         tmp2empty = stackptr >= stacktop;
1933         if (!tmp2empty)
1934           {
1935           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936           stackptr += sizeof(sljit_sw);
1937           }
1938         tmp1next = TRUE;
1939         }
1940       }
1941     }
1942   }
1943 while (status != end);
1944 
1945 if (save)
1946   {
1947   if (tmp1next)
1948     {
1949     if (!tmp1empty)
1950       {
1951       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1952       stackptr += sizeof(sljit_sw);
1953       }
1954     if (!tmp2empty)
1955       {
1956       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1957       stackptr += sizeof(sljit_sw);
1958       }
1959     }
1960   else
1961     {
1962     if (!tmp2empty)
1963       {
1964       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1965       stackptr += sizeof(sljit_sw);
1966       }
1967     if (!tmp1empty)
1968       {
1969       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1970       stackptr += sizeof(sljit_sw);
1971       }
1972     }
1973   }
1974 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1975 }
1976 
set_then_offsets(compiler_common * common,pcre_uchar * cc,pcre_uint8 * current_offset)1977 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1978 {
1979 pcre_uchar *end = bracketend(cc);
1980 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1981 
1982 /* Assert captures then. */
1983 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1984   current_offset = NULL;
1985 /* Conditional block does not. */
1986 if (*cc == OP_COND || *cc == OP_SCOND)
1987   has_alternatives = FALSE;
1988 
1989 cc = next_opcode(common, cc);
1990 if (has_alternatives)
1991   current_offset = common->then_offsets + (cc - common->start);
1992 
1993 while (cc < end)
1994   {
1995   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1996     cc = set_then_offsets(common, cc, current_offset);
1997   else
1998     {
1999     if (*cc == OP_ALT && has_alternatives)
2000       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2001     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2002       *current_offset = 1;
2003     cc = next_opcode(common, cc);
2004     }
2005   }
2006 
2007 return end;
2008 }
2009 
2010 #undef CASE_ITERATOR_PRIVATE_DATA_1
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2012 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2015 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2016 
is_powerof2(unsigned int value)2017 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2018 {
2019 return (value & (value - 1)) == 0;
2020 }
2021 
set_jumps(jump_list * list,struct sljit_label * label)2022 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2023 {
2024 while (list)
2025   {
2026   /* sljit_set_label is clever enough to do nothing
2027   if either the jump or the label is NULL. */
2028   SET_LABEL(list->jump, label);
2029   list = list->next;
2030   }
2031 }
2032 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2033 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2034 {
2035 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2036 if (list_item)
2037   {
2038   list_item->next = *list;
2039   list_item->jump = jump;
2040   *list = list_item;
2041   }
2042 }
2043 
add_stub(compiler_common * common,struct sljit_jump * start)2044 static void add_stub(compiler_common *common, struct sljit_jump *start)
2045 {
2046 DEFINE_COMPILER;
2047 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2048 
2049 if (list_item)
2050   {
2051   list_item->start = start;
2052   list_item->quit = LABEL();
2053   list_item->next = common->stubs;
2054   common->stubs = list_item;
2055   }
2056 }
2057 
flush_stubs(compiler_common * common)2058 static void flush_stubs(compiler_common *common)
2059 {
2060 DEFINE_COMPILER;
2061 stub_list* list_item = common->stubs;
2062 
2063 while (list_item)
2064   {
2065   JUMPHERE(list_item->start);
2066   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2067   JUMPTO(SLJIT_JUMP, list_item->quit);
2068   list_item = list_item->next;
2069   }
2070 common->stubs = NULL;
2071 }
2072 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2073 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2074 {
2075 DEFINE_COMPILER;
2076 label_addr_list *label_addr;
2077 
2078 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2079 if (label_addr == NULL)
2080   return;
2081 label_addr->label = LABEL();
2082 label_addr->update_addr = update_addr;
2083 label_addr->next = common->label_addrs;
2084 common->label_addrs = label_addr;
2085 }
2086 
count_match(compiler_common * common)2087 static SLJIT_INLINE void count_match(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090 
2091 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093 }
2094 
allocate_stack(compiler_common * common,int size)2095 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2096 {
2097 /* May destroy all locals and registers except TMP2. */
2098 DEFINE_COMPILER;
2099 
2100 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2101 #ifdef DESTROY_REGISTERS
2102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2103 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2104 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2107 #endif
2108 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2109 }
2110 
free_stack(compiler_common * common,int size)2111 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2112 {
2113 DEFINE_COMPILER;
2114 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2115 }
2116 
reset_ovector(compiler_common * common,int length)2117 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2118 {
2119 DEFINE_COMPILER;
2120 struct sljit_label *loop;
2121 int i;
2122 
2123 /* At this point we can freely use all temporary registers. */
2124 SLJIT_ASSERT(length > 1);
2125 /* TMP1 returns with begin - 1. */
2126 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2127 if (length < 8)
2128   {
2129   for (i = 1; i < length; i++)
2130     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2131   }
2132 else
2133   {
2134   GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2135   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2136   loop = LABEL();
2137   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2138   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2139   JUMPTO(SLJIT_C_NOT_ZERO, loop);
2140   }
2141 }
2142 
do_reset_match(compiler_common * common,int length)2143 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2144 {
2145 DEFINE_COMPILER;
2146 struct sljit_label *loop;
2147 int i;
2148 
2149 SLJIT_ASSERT(length > 1);
2150 /* OVECTOR(1) contains the "string begin - 1" constant. */
2151 if (length > 2)
2152   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2153 if (length < 8)
2154   {
2155   for (i = 2; i < length; i++)
2156     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2157   }
2158 else
2159   {
2160   GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2161   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2162   loop = LABEL();
2163   OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2164   OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2165   JUMPTO(SLJIT_C_NOT_ZERO, loop);
2166   }
2167 
2168 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2169 if (common->mark_ptr != 0)
2170   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2171 if (common->control_head_ptr != 0)
2172   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2176 }
2177 
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2178 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2179 {
2180 while (current != NULL)
2181   {
2182   switch (current[-2])
2183     {
2184     case type_then_trap:
2185     break;
2186 
2187     case type_mark:
2188     if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2189       return current[-4];
2190     break;
2191 
2192     default:
2193     SLJIT_ASSERT_STOP();
2194     break;
2195     }
2196   current = (sljit_sw*)current[-1];
2197   }
2198 return -1;
2199 }
2200 
copy_ovector(compiler_common * common,int topbracket)2201 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2202 {
2203 DEFINE_COMPILER;
2204 struct sljit_label *loop;
2205 struct sljit_jump *early_quit;
2206 
2207 /* At this point we can freely use all registers. */
2208 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2210 
2211 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2212 if (common->mark_ptr != 0)
2213   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2214 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2215 if (common->mark_ptr != 0)
2216   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2217 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2218 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2219 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2220 /* Unlikely, but possible */
2221 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2222 loop = LABEL();
2223 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2224 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2225 /* Copy the integer value to the output buffer */
2226 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2227 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2228 #endif
2229 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2231 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2232 JUMPHERE(early_quit);
2233 
2234 /* Calculate the return value, which is the maximum ovector value. */
2235 if (topbracket > 1)
2236   {
2237   GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2238   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2239 
2240   /* OVECTOR(0) is never equal to SLJIT_S2. */
2241   loop = LABEL();
2242   OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2243   OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2244   CMPTO(SLJIT_C_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2245   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2246   }
2247 else
2248   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2249 }
2250 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2251 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_jump *jump;
2255 
2256 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2257 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2258   && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2259 
2260 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2262 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2263 CMPTO(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2264 
2265 /* Store match begin and end. */
2266 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2267 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2268 
2269 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2270 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2271 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2272 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2273 #endif
2274 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2275 JUMPHERE(jump);
2276 
2277 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2278 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2279 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2280 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2281 #endif
2282 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2283 
2284 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2285 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2286 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2287 #endif
2288 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2289 
2290 JUMPTO(SLJIT_JUMP, quit);
2291 }
2292 
check_start_used_ptr(compiler_common * common)2293 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2294 {
2295 /* May destroy TMP1. */
2296 DEFINE_COMPILER;
2297 struct sljit_jump *jump;
2298 
2299 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2300   {
2301   /* The value of -1 must be kept for start_used_ptr! */
2302   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2303   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2304   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2305   jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2306   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2307   JUMPHERE(jump);
2308   }
2309 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2310   {
2311   jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2312   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2313   JUMPHERE(jump);
2314   }
2315 }
2316 
char_has_othercase(compiler_common * common,pcre_uchar * cc)2317 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2318 {
2319 /* Detects if the character has an othercase. */
2320 unsigned int c;
2321 
2322 #ifdef SUPPORT_UTF
2323 if (common->utf)
2324   {
2325   GETCHAR(c, cc);
2326   if (c > 127)
2327     {
2328 #ifdef SUPPORT_UCP
2329     return c != UCD_OTHERCASE(c);
2330 #else
2331     return FALSE;
2332 #endif
2333     }
2334 #ifndef COMPILE_PCRE8
2335   return common->fcc[c] != c;
2336 #endif
2337   }
2338 else
2339 #endif
2340   c = *cc;
2341 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2342 }
2343 
char_othercase(compiler_common * common,unsigned int c)2344 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2345 {
2346 /* Returns with the othercase. */
2347 #ifdef SUPPORT_UTF
2348 if (common->utf && c > 127)
2349   {
2350 #ifdef SUPPORT_UCP
2351   return UCD_OTHERCASE(c);
2352 #else
2353   return c;
2354 #endif
2355   }
2356 #endif
2357 return TABLE_GET(c, common->fcc, c);
2358 }
2359 
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2360 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2361 {
2362 /* Detects if the character and its othercase has only 1 bit difference. */
2363 unsigned int c, oc, bit;
2364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2365 int n;
2366 #endif
2367 
2368 #ifdef SUPPORT_UTF
2369 if (common->utf)
2370   {
2371   GETCHAR(c, cc);
2372   if (c <= 127)
2373     oc = common->fcc[c];
2374   else
2375     {
2376 #ifdef SUPPORT_UCP
2377     oc = UCD_OTHERCASE(c);
2378 #else
2379     oc = c;
2380 #endif
2381     }
2382   }
2383 else
2384   {
2385   c = *cc;
2386   oc = TABLE_GET(c, common->fcc, c);
2387   }
2388 #else
2389 c = *cc;
2390 oc = TABLE_GET(c, common->fcc, c);
2391 #endif
2392 
2393 SLJIT_ASSERT(c != oc);
2394 
2395 bit = c ^ oc;
2396 /* Optimized for English alphabet. */
2397 if (c <= 127 && bit == 0x20)
2398   return (0 << 8) | 0x20;
2399 
2400 /* Since c != oc, they must have at least 1 bit difference. */
2401 if (!is_powerof2(bit))
2402   return 0;
2403 
2404 #if defined COMPILE_PCRE8
2405 
2406 #ifdef SUPPORT_UTF
2407 if (common->utf && c > 127)
2408   {
2409   n = GET_EXTRALEN(*cc);
2410   while ((bit & 0x3f) == 0)
2411     {
2412     n--;
2413     bit >>= 6;
2414     }
2415   return (n << 8) | bit;
2416   }
2417 #endif /* SUPPORT_UTF */
2418 return (0 << 8) | bit;
2419 
2420 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2421 
2422 #ifdef SUPPORT_UTF
2423 if (common->utf && c > 65535)
2424   {
2425   if (bit >= (1 << 10))
2426     bit >>= 10;
2427   else
2428     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2429   }
2430 #endif /* SUPPORT_UTF */
2431 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2432 
2433 #endif /* COMPILE_PCRE[8|16|32] */
2434 }
2435 
check_partial(compiler_common * common,BOOL force)2436 static void check_partial(compiler_common *common, BOOL force)
2437 {
2438 /* Checks whether a partial matching is occurred. Does not modify registers. */
2439 DEFINE_COMPILER;
2440 struct sljit_jump *jump = NULL;
2441 
2442 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2443 
2444 if (common->mode == JIT_COMPILE)
2445   return;
2446 
2447 if (!force)
2448   jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2449 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450   jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2451 
2452 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2453   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2454 else
2455   {
2456   if (common->partialmatchlabel != NULL)
2457     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458   else
2459     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460   }
2461 
2462 if (jump != NULL)
2463   JUMPHERE(jump);
2464 }
2465 
check_str_end(compiler_common * common,jump_list ** end_reached)2466 static void check_str_end(compiler_common *common, jump_list **end_reached)
2467 {
2468 /* Does not affect registers. Usually used in a tight spot. */
2469 DEFINE_COMPILER;
2470 struct sljit_jump *jump;
2471 
2472 if (common->mode == JIT_COMPILE)
2473   {
2474   add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2475   return;
2476   }
2477 
2478 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480   {
2481   add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2482   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2483   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2484   }
2485 else
2486   {
2487   add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2488   if (common->partialmatchlabel != NULL)
2489     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2490   else
2491     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2492   }
2493 JUMPHERE(jump);
2494 }
2495 
detect_partial_match(compiler_common * common,jump_list ** backtracks)2496 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2497 {
2498 DEFINE_COMPILER;
2499 struct sljit_jump *jump;
2500 
2501 if (common->mode == JIT_COMPILE)
2502   {
2503   add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2504   return;
2505   }
2506 
2507 /* Partial matching mode. */
2508 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2509 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2510 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2511   {
2512   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2513   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2514   }
2515 else
2516   {
2517   if (common->partialmatchlabel != NULL)
2518     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2519   else
2520     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2521   }
2522 JUMPHERE(jump);
2523 }
2524 
peek_char(compiler_common * common,pcre_uint32 max)2525 static void peek_char(compiler_common *common, pcre_uint32 max)
2526 {
2527 /* Reads the character into TMP1, keeps STR_PTR.
2528 Does not check STR_END. TMP2 Destroyed. */
2529 DEFINE_COMPILER;
2530 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531 struct sljit_jump *jump;
2532 #endif
2533 
2534 SLJIT_UNUSED_ARG(max);
2535 
2536 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538 if (common->utf)
2539   {
2540   if (max < 128) return;
2541 
2542   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546   JUMPHERE(jump);
2547   }
2548 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549 
2550 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551 if (common->utf)
2552   {
2553   if (max < 0xd800) return;
2554 
2555   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557   /* TMP2 contains the high surrogate. */
2558   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563   JUMPHERE(jump);
2564   }
2565 #endif
2566 }
2567 
2568 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569 
is_char7_bitset(const pcre_uint8 * bitset,BOOL nclass)2570 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2571 {
2572 /* Tells whether the character codes below 128 are enough
2573 to determine a match. */
2574 const pcre_uint8 value = nclass ? 0xff : 0;
2575 const pcre_uint8* end = bitset + 32;
2576 
2577 bitset += 16;
2578 do
2579   {
2580   if (*bitset++ != value)
2581     return FALSE;
2582   }
2583 while (bitset < end);
2584 return TRUE;
2585 }
2586 
read_char7_type(compiler_common * common,BOOL full_read)2587 static void read_char7_type(compiler_common *common, BOOL full_read)
2588 {
2589 /* Reads the precise character type of a character into TMP1, if the character
2590 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591 full_read argument tells whether characters above max are accepted or not. */
2592 DEFINE_COMPILER;
2593 struct sljit_jump *jump;
2594 
2595 SLJIT_ASSERT(common->utf);
2596 
2597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599 
2600 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601 
2602 if (full_read)
2603   {
2604   jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2606   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2607   JUMPHERE(jump);
2608   }
2609 }
2610 
2611 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612 
read_char_range(compiler_common * common,pcre_uint32 min,pcre_uint32 max,BOOL update_str_ptr)2613 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614 {
2615 /* Reads the precise value of a character into TMP1, if the character is
2616 between min and max (c >= min && c <= max). Otherwise it returns with a value
2617 outside the range. Does not check STR_END. */
2618 DEFINE_COMPILER;
2619 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620 struct sljit_jump *jump;
2621 #endif
2622 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623 struct sljit_jump *jump2;
2624 #endif
2625 
2626 SLJIT_UNUSED_ARG(update_str_ptr);
2627 SLJIT_UNUSED_ARG(min);
2628 SLJIT_UNUSED_ARG(max);
2629 SLJIT_ASSERT(min <= max);
2630 
2631 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633 
2634 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635 if (common->utf)
2636   {
2637   if (max < 128 && !update_str_ptr) return;
2638 
2639   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640   if (min >= 0x10000)
2641     {
2642     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643     if (update_str_ptr)
2644       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646     jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655     if (!update_str_ptr)
2656       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660     JUMPHERE(jump2);
2661     if (update_str_ptr)
2662       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663     }
2664   else if (min >= 0x800 && max <= 0xffff)
2665     {
2666     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667     if (update_str_ptr)
2668       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670     jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675     if (!update_str_ptr)
2676       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680     JUMPHERE(jump2);
2681     if (update_str_ptr)
2682       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683     }
2684   else if (max >= 0x800)
2685     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686   else if (max < 128)
2687     {
2688     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690     }
2691   else
2692     {
2693     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694     if (!update_str_ptr)
2695       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696     else
2697       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702     if (update_str_ptr)
2703       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704     }
2705   JUMPHERE(jump);
2706   }
2707 #endif
2708 
2709 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710 if (common->utf)
2711   {
2712   if (max >= 0x10000)
2713     {
2714     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716     /* TMP2 contains the high surrogate. */
2717     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723     JUMPHERE(jump);
2724     return;
2725     }
2726 
2727   if (max < 0xd800 && !update_str_ptr) return;
2728 
2729   /* Skip low surrogate if necessary. */
2730   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732   if (update_str_ptr)
2733     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734   if (max >= 0xd800)
2735     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736   JUMPHERE(jump);
2737   }
2738 #endif
2739 }
2740 
read_char(compiler_common * common)2741 static SLJIT_INLINE void read_char(compiler_common *common)
2742 {
2743 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744 }
2745 
read_char8_type(compiler_common * common,BOOL update_str_ptr)2746 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747 {
2748 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749 DEFINE_COMPILER;
2750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751 struct sljit_jump *jump;
2752 #endif
2753 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754 struct sljit_jump *jump2;
2755 #endif
2756 
2757 SLJIT_UNUSED_ARG(update_str_ptr);
2758 
2759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761 
2762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763 if (common->utf)
2764   {
2765   /* This can be an extra read in some situations, but hopefully
2766   it is needed in most cases. */
2767   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768   jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769   if (!update_str_ptr)
2770     {
2771     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778     jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780     JUMPHERE(jump2);
2781     }
2782   else
2783     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2784   JUMPHERE(jump);
2785   return;
2786   }
2787 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788 
2789 #if !defined COMPILE_PCRE8
2790 /* The ctypes array contains only 256 values. */
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793 #endif
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 #if !defined COMPILE_PCRE8
2796 JUMPHERE(jump);
2797 #endif
2798 
2799 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800 if (common->utf && update_str_ptr)
2801   {
2802   /* Skip low surrogate if necessary. */
2803   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806   JUMPHERE(jump);
2807   }
2808 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809 }
2810 
skip_char_back(compiler_common * common)2811 static void skip_char_back(compiler_common *common)
2812 {
2813 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2814 DEFINE_COMPILER;
2815 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2816 #if defined COMPILE_PCRE8
2817 struct sljit_label *label;
2818 
2819 if (common->utf)
2820   {
2821   label = LABEL();
2822   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2823   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2825   CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2826   return;
2827   }
2828 #elif defined COMPILE_PCRE16
2829 if (common->utf)
2830   {
2831   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2832   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833   /* Skip low surrogate if necessary. */
2834   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2835   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2836   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2837   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2838   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2839   return;
2840   }
2841 #endif /* COMPILE_PCRE[8|16] */
2842 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2843 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 }
2845 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)2846 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847 {
2848 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849 DEFINE_COMPILER;
2850 struct sljit_jump *jump;
2851 
2852 if (nltype == NLTYPE_ANY)
2853   {
2854   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856   }
2857 else if (nltype == NLTYPE_ANYCRLF)
2858   {
2859   if (jumpifmatch)
2860     {
2861     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863     }
2864   else
2865     {
2866     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868     JUMPHERE(jump);
2869     }
2870   }
2871 else
2872   {
2873   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875   }
2876 }
2877 
2878 #ifdef SUPPORT_UTF
2879 
2880 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)2881 static void do_utfreadchar(compiler_common *common)
2882 {
2883 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885 DEFINE_COMPILER;
2886 struct sljit_jump *jump;
2887 
2888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894 
2895 /* Searching for the first zero. */
2896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897 jump = JUMP(SLJIT_C_NOT_ZERO);
2898 /* Two byte sequence. */
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902 
2903 JUMPHERE(jump);
2904 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2909 
2910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911 jump = JUMP(SLJIT_C_NOT_ZERO);
2912 /* Three byte sequence. */
2913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2915 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2916 
2917 /* Four byte sequence. */
2918 JUMPHERE(jump);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927 }
2928 
do_utfreadchar16(compiler_common * common)2929 static void do_utfreadchar16(compiler_common *common)
2930 {
2931 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932 of the character (>= 0xc0). Return value in TMP1. */
2933 DEFINE_COMPILER;
2934 struct sljit_jump *jump;
2935 
2936 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942 
2943 /* Searching for the first zero. */
2944 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945 jump = JUMP(SLJIT_C_NOT_ZERO);
2946 /* Two byte sequence. */
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949 
2950 JUMPHERE(jump);
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953 /* This code runs only in 8 bit mode. No need to shift the value. */
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960 /* Three byte sequence. */
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963 }
2964 
do_utfreadtype8(compiler_common * common)2965 static void do_utfreadtype8(compiler_common *common)
2966 {
2967 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2968 of the character (>= 0xc0). Return value in TMP1. */
2969 DEFINE_COMPILER;
2970 struct sljit_jump *jump;
2971 struct sljit_jump *compare;
2972 
2973 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2974 
2975 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2976 jump = JUMP(SLJIT_C_NOT_ZERO);
2977 /* Two byte sequence. */
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981 /* The upper 5 bits are known at this point. */
2982 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2986 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988 
2989 JUMPHERE(compare);
2990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2992 
2993 /* We only have types for characters less than 256. */
2994 JUMPHERE(jump);
2995 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999 }
3000 
3001 #endif /* COMPILE_PCRE8 */
3002 
3003 #endif /* SUPPORT_UTF */
3004 
3005 #ifdef SUPPORT_UCP
3006 
3007 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3008 #define UCD_BLOCK_MASK 127
3009 #define UCD_BLOCK_SHIFT 7
3010 
do_getucd(compiler_common * common)3011 static void do_getucd(compiler_common *common)
3012 {
3013 /* Search the UCD record for the character comes in TMP1.
3014 Returns chartype in TMP1 and UCD offset in TMP2. */
3015 DEFINE_COMPILER;
3016 
3017 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3018 
3019 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3020 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3021 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3023 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3026 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3028 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3030 }
3031 #endif
3032 
mainloop_entry(compiler_common * common,BOOL hascrorlf,BOOL firstline)3033 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3034 {
3035 DEFINE_COMPILER;
3036 struct sljit_label *mainloop;
3037 struct sljit_label *newlinelabel = NULL;
3038 struct sljit_jump *start;
3039 struct sljit_jump *end = NULL;
3040 struct sljit_jump *nl = NULL;
3041 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3042 struct sljit_jump *singlechar;
3043 #endif
3044 jump_list *newline = NULL;
3045 BOOL newlinecheck = FALSE;
3046 BOOL readuchar = FALSE;
3047 
3048 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3049     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3050   newlinecheck = TRUE;
3051 
3052 if (firstline)
3053   {
3054   /* Search for the end of the first line. */
3055   SLJIT_ASSERT(common->first_line_end != 0);
3056   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3057 
3058   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3059     {
3060     mainloop = LABEL();
3061     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3062     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3063     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3064     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3065     CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3066     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3067     JUMPHERE(end);
3068     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3069     }
3070   else
3071     {
3072     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3073     mainloop = LABEL();
3074     /* Continual stores does not cause data dependency. */
3075     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3076     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077     check_newlinechar(common, common->nltype, &newline, TRUE);
3078     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079     JUMPHERE(end);
3080     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3081     set_jumps(newline, LABEL());
3082     }
3083 
3084   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3085   }
3086 
3087 start = JUMP(SLJIT_JUMP);
3088 
3089 if (newlinecheck)
3090   {
3091   newlinelabel = LABEL();
3092   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3093   end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3094   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3095   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3096   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3097 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3098   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3099 #endif
3100   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3101   nl = JUMP(SLJIT_JUMP);
3102   }
3103 
3104 mainloop = LABEL();
3105 
3106 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3107 #ifdef SUPPORT_UTF
3108 if (common->utf) readuchar = TRUE;
3109 #endif
3110 if (newlinecheck) readuchar = TRUE;
3111 
3112 if (readuchar)
3113   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3114 
3115 if (newlinecheck)
3116   CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3117 
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3120 #if defined COMPILE_PCRE8
3121 if (common->utf)
3122   {
3123   singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3124   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3125   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3126   JUMPHERE(singlechar);
3127   }
3128 #elif defined COMPILE_PCRE16
3129 if (common->utf)
3130   {
3131   singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3132   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3133   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3134   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3135   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3136   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3137   JUMPHERE(singlechar);
3138   }
3139 #endif /* COMPILE_PCRE[8|16] */
3140 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3141 JUMPHERE(start);
3142 
3143 if (newlinecheck)
3144   {
3145   JUMPHERE(end);
3146   JUMPHERE(nl);
3147   }
3148 
3149 return mainloop;
3150 }
3151 
3152 #define MAX_N_CHARS 16
3153 #define MAX_N_BYTES 8
3154 
add_prefix_byte(pcre_uint8 byte,pcre_uint8 * bytes)3155 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3156 {
3157 pcre_uint8 len = bytes[0];
3158 int i;
3159 
3160 if (len == 255)
3161   return;
3162 
3163 if (len == 0)
3164   {
3165   bytes[0] = 1;
3166   bytes[1] = byte;
3167   return;
3168   }
3169 
3170 for (i = len; i > 0; i--)
3171   if (bytes[i] == byte)
3172     return;
3173 
3174 if (len >= MAX_N_BYTES - 1)
3175   {
3176   bytes[0] = 255;
3177   return;
3178   }
3179 
3180 len++;
3181 bytes[len] = byte;
3182 bytes[0] = len;
3183 }
3184 
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uint32 * chars,pcre_uint8 * bytes,int max_chars)3185 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3186 {
3187 /* Recursive function, which scans prefix literals. */
3188 BOOL last, any, caseless;
3189 int len, repeat, len_save, consumed = 0;
3190 pcre_uint32 chr, mask;
3191 pcre_uchar *alternative, *cc_save, *oc;
3192 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3193 pcre_uchar othercase[8];
3194 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3195 pcre_uchar othercase[2];
3196 #else
3197 pcre_uchar othercase[1];
3198 #endif
3199 
3200 repeat = 1;
3201 while (TRUE)
3202   {
3203   last = TRUE;
3204   any = FALSE;
3205   caseless = FALSE;
3206   switch (*cc)
3207     {
3208     case OP_CHARI:
3209     caseless = TRUE;
3210     case OP_CHAR:
3211     last = FALSE;
3212     cc++;
3213     break;
3214 
3215     case OP_SOD:
3216     case OP_SOM:
3217     case OP_SET_SOM:
3218     case OP_NOT_WORD_BOUNDARY:
3219     case OP_WORD_BOUNDARY:
3220     case OP_EODN:
3221     case OP_EOD:
3222     case OP_CIRC:
3223     case OP_CIRCM:
3224     case OP_DOLL:
3225     case OP_DOLLM:
3226     /* Zero width assertions. */
3227     cc++;
3228     continue;
3229 
3230     case OP_ASSERT:
3231     case OP_ASSERT_NOT:
3232     case OP_ASSERTBACK:
3233     case OP_ASSERTBACK_NOT:
3234     cc = bracketend(cc);
3235     continue;
3236 
3237     case OP_PLUSI:
3238     case OP_MINPLUSI:
3239     case OP_POSPLUSI:
3240     caseless = TRUE;
3241     case OP_PLUS:
3242     case OP_MINPLUS:
3243     case OP_POSPLUS:
3244     cc++;
3245     break;
3246 
3247     case OP_EXACTI:
3248     caseless = TRUE;
3249     case OP_EXACT:
3250     repeat = GET2(cc, 1);
3251     last = FALSE;
3252     cc += 1 + IMM2_SIZE;
3253     break;
3254 
3255     case OP_QUERYI:
3256     case OP_MINQUERYI:
3257     case OP_POSQUERYI:
3258     caseless = TRUE;
3259     case OP_QUERY:
3260     case OP_MINQUERY:
3261     case OP_POSQUERY:
3262     len = 1;
3263     cc++;
3264 #ifdef SUPPORT_UTF
3265     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3266 #endif
3267     max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3268     if (max_chars == 0)
3269       return consumed;
3270     last = FALSE;
3271     break;
3272 
3273     case OP_KET:
3274     cc += 1 + LINK_SIZE;
3275     continue;
3276 
3277     case OP_ALT:
3278     cc += GET(cc, 1);
3279     continue;
3280 
3281     case OP_ONCE:
3282     case OP_ONCE_NC:
3283     case OP_BRA:
3284     case OP_BRAPOS:
3285     case OP_CBRA:
3286     case OP_CBRAPOS:
3287     alternative = cc + GET(cc, 1);
3288     while (*alternative == OP_ALT)
3289       {
3290       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3291       if (max_chars == 0)
3292         return consumed;
3293       alternative += GET(alternative, 1);
3294       }
3295 
3296     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3297       cc += IMM2_SIZE;
3298     cc += 1 + LINK_SIZE;
3299     continue;
3300 
3301     case OP_CLASS:
3302 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3303     if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3304 #endif
3305     any = TRUE;
3306     cc += 1 + 32 / sizeof(pcre_uchar);
3307     break;
3308 
3309     case OP_NCLASS:
3310 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3311     if (common->utf) return consumed;
3312 #endif
3313     any = TRUE;
3314     cc += 1 + 32 / sizeof(pcre_uchar);
3315     break;
3316 
3317 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3318     case OP_XCLASS:
3319 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3320     if (common->utf) return consumed;
3321 #endif
3322     any = TRUE;
3323     cc += GET(cc, 1);
3324     break;
3325 #endif
3326 
3327     case OP_DIGIT:
3328 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3329     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3330       return consumed;
3331 #endif
3332     any = TRUE;
3333     cc++;
3334     break;
3335 
3336     case OP_WHITESPACE:
3337 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3338     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3339       return consumed;
3340 #endif
3341     any = TRUE;
3342     cc++;
3343     break;
3344 
3345     case OP_WORDCHAR:
3346 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3347     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3348       return consumed;
3349 #endif
3350     any = TRUE;
3351     cc++;
3352     break;
3353 
3354     case OP_NOT:
3355     case OP_NOTI:
3356     cc++;
3357     /* Fall through. */
3358     case OP_NOT_DIGIT:
3359     case OP_NOT_WHITESPACE:
3360     case OP_NOT_WORDCHAR:
3361     case OP_ANY:
3362     case OP_ALLANY:
3363 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3364     if (common->utf) return consumed;
3365 #endif
3366     any = TRUE;
3367     cc++;
3368     break;
3369 
3370 #ifdef SUPPORT_UCP
3371     case OP_NOTPROP:
3372     case OP_PROP:
3373 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3374     if (common->utf) return consumed;
3375 #endif
3376     any = TRUE;
3377     cc += 1 + 2;
3378     break;
3379 #endif
3380 
3381     case OP_TYPEEXACT:
3382     repeat = GET2(cc, 1);
3383     cc += 1 + IMM2_SIZE;
3384     continue;
3385 
3386     case OP_NOTEXACT:
3387     case OP_NOTEXACTI:
3388 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3389     if (common->utf) return consumed;
3390 #endif
3391     any = TRUE;
3392     repeat = GET2(cc, 1);
3393     cc += 1 + IMM2_SIZE + 1;
3394     break;
3395 
3396     default:
3397     return consumed;
3398     }
3399 
3400   if (any)
3401     {
3402 #if defined COMPILE_PCRE8
3403     mask = 0xff;
3404 #elif defined COMPILE_PCRE16
3405     mask = 0xffff;
3406 #elif defined COMPILE_PCRE32
3407     mask = 0xffffffff;
3408 #else
3409     SLJIT_ASSERT_STOP();
3410 #endif
3411 
3412     do
3413       {
3414       chars[0] = mask;
3415       chars[1] = mask;
3416       bytes[0] = 255;
3417 
3418       consumed++;
3419       if (--max_chars == 0)
3420         return consumed;
3421       chars += 2;
3422       bytes += MAX_N_BYTES;
3423       }
3424     while (--repeat > 0);
3425 
3426     repeat = 1;
3427     continue;
3428     }
3429 
3430   len = 1;
3431 #ifdef SUPPORT_UTF
3432   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3433 #endif
3434 
3435   if (caseless && char_has_othercase(common, cc))
3436     {
3437 #ifdef SUPPORT_UTF
3438     if (common->utf)
3439       {
3440       GETCHAR(chr, cc);
3441       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3442         return consumed;
3443       }
3444     else
3445 #endif
3446       {
3447       chr = *cc;
3448       othercase[0] = TABLE_GET(chr, common->fcc, chr);
3449       }
3450     }
3451   else
3452     caseless = FALSE;
3453 
3454   len_save = len;
3455   cc_save = cc;
3456   while (TRUE)
3457     {
3458     oc = othercase;
3459     do
3460       {
3461       chr = *cc;
3462 #ifdef COMPILE_PCRE32
3463       if (SLJIT_UNLIKELY(chr == NOTACHAR))
3464         return consumed;
3465 #endif
3466       add_prefix_byte((pcre_uint8)chr, bytes);
3467 
3468       mask = 0;
3469       if (caseless)
3470         {
3471         add_prefix_byte((pcre_uint8)*oc, bytes);
3472         mask = *cc ^ *oc;
3473         chr |= mask;
3474         }
3475 
3476 #ifdef COMPILE_PCRE32
3477       if (chars[0] == NOTACHAR && chars[1] == 0)
3478 #else
3479       if (chars[0] == NOTACHAR)
3480 #endif
3481         {
3482         chars[0] = chr;
3483         chars[1] = mask;
3484         }
3485       else
3486         {
3487         mask |= chars[0] ^ chr;
3488         chr |= mask;
3489         chars[0] = chr;
3490         chars[1] |= mask;
3491         }
3492 
3493       len--;
3494       consumed++;
3495       if (--max_chars == 0)
3496         return consumed;
3497       chars += 2;
3498       bytes += MAX_N_BYTES;
3499       cc++;
3500       oc++;
3501       }
3502     while (len > 0);
3503 
3504     if (--repeat == 0)
3505       break;
3506 
3507     len = len_save;
3508     cc = cc_save;
3509     }
3510 
3511   repeat = 1;
3512   if (last)
3513     return consumed;
3514   }
3515 }
3516 
fast_forward_first_n_chars(compiler_common * common,BOOL firstline)3517 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3518 {
3519 DEFINE_COMPILER;
3520 struct sljit_label *start;
3521 struct sljit_jump *quit;
3522 pcre_uint32 chars[MAX_N_CHARS * 2];
3523 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3524 pcre_uint8 ones[MAX_N_CHARS];
3525 int offsets[3];
3526 pcre_uint32 mask;
3527 pcre_uint8 *byte_set, *byte_set_end;
3528 int i, max, from;
3529 int range_right = -1, range_len = 3 - 1;
3530 sljit_ub *update_table = NULL;
3531 BOOL in_range;
3532 
3533 /* This is even TRUE, if both are NULL. */
3534 SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3535 
3536 for (i = 0; i < MAX_N_CHARS; i++)
3537   {
3538   chars[i << 1] = NOTACHAR;
3539   chars[(i << 1) + 1] = 0;
3540   bytes[i * MAX_N_BYTES] = 0;
3541   }
3542 
3543 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3544 
3545 if (max <= 1)
3546   return FALSE;
3547 
3548 for (i = 0; i < max; i++)
3549   {
3550   mask = chars[(i << 1) + 1];
3551   ones[i] = ones_in_half_byte[mask & 0xf];
3552   mask >>= 4;
3553   while (mask != 0)
3554     {
3555     ones[i] += ones_in_half_byte[mask & 0xf];
3556     mask >>= 4;
3557     }
3558   }
3559 
3560 in_range = FALSE;
3561 from = 0;   /* Prevent compiler "uninitialized" warning */
3562 for (i = 0; i <= max; i++)
3563   {
3564   if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3565     {
3566     range_len = i - from;
3567     range_right = i - 1;
3568     }
3569 
3570   if (i < max && bytes[i * MAX_N_BYTES] < 255)
3571     {
3572     if (!in_range)
3573       {
3574       in_range = TRUE;
3575       from = i;
3576       }
3577     }
3578   else if (in_range)
3579     in_range = FALSE;
3580   }
3581 
3582 if (range_right >= 0)
3583   {
3584   /* Since no data is consumed (see the assert in the beginning
3585   of this function), this space can be reallocated. */
3586   if (common->read_only_data)
3587     SLJIT_FREE(common->read_only_data);
3588 
3589   common->read_only_data_size += 256;
3590   common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3591   if (common->read_only_data == NULL)
3592     return TRUE;
3593 
3594   update_table = (sljit_ub *)common->read_only_data;
3595   common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3596   memset(update_table, IN_UCHARS(range_len), 256);
3597 
3598   for (i = 0; i < range_len; i++)
3599     {
3600     byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3601     SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3602     byte_set_end = byte_set + byte_set[0];
3603     byte_set++;
3604     while (byte_set <= byte_set_end)
3605       {
3606       if (update_table[*byte_set] > IN_UCHARS(i))
3607         update_table[*byte_set] = IN_UCHARS(i);
3608       byte_set++;
3609       }
3610     }
3611   }
3612 
3613 offsets[0] = -1;
3614 /* Scan forward. */
3615 for (i = 0; i < max; i++)
3616   if (ones[i] <= 2) {
3617     offsets[0] = i;
3618     break;
3619   }
3620 
3621 if (offsets[0] < 0 && range_right < 0)
3622   return FALSE;
3623 
3624 if (offsets[0] >= 0)
3625   {
3626   /* Scan backward. */
3627   offsets[1] = -1;
3628   for (i = max - 1; i > offsets[0]; i--)
3629     if (ones[i] <= 2 && i != range_right)
3630       {
3631       offsets[1] = i;
3632       break;
3633       }
3634 
3635   /* This case is handled better by fast_forward_first_char. */
3636   if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3637     return FALSE;
3638 
3639   offsets[2] = -1;
3640   /* We only search for a middle character if there is no range check. */
3641   if (offsets[1] >= 0 && range_right == -1)
3642     {
3643     /* Scan from middle. */
3644     for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3645       if (ones[i] <= 2)
3646         {
3647         offsets[2] = i;
3648         break;
3649         }
3650 
3651     if (offsets[2] == -1)
3652       {
3653       for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3654         if (ones[i] <= 2)
3655           {
3656           offsets[2] = i;
3657           break;
3658           }
3659       }
3660     }
3661 
3662   SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3663   SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3664 
3665   chars[0] = chars[offsets[0] << 1];
3666   chars[1] = chars[(offsets[0] << 1) + 1];
3667   if (offsets[2] >= 0)
3668     {
3669     chars[2] = chars[offsets[2] << 1];
3670     chars[3] = chars[(offsets[2] << 1) + 1];
3671     }
3672   if (offsets[1] >= 0)
3673     {
3674     chars[4] = chars[offsets[1] << 1];
3675     chars[5] = chars[(offsets[1] << 1) + 1];
3676     }
3677   }
3678 
3679 max -= 1;
3680 if (firstline)
3681   {
3682   SLJIT_ASSERT(common->first_line_end != 0);
3683   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3684   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3685   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3686   quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3687   OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3688   JUMPHERE(quit);
3689   }
3690 else
3691   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3692 
3693 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3694 if (range_right >= 0)
3695   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3696 #endif
3697 
3698 start = LABEL();
3699 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3700 
3701 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3702 
3703 if (range_right >= 0)
3704   {
3705 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3706   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3707 #else
3708   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3709 #endif
3710 
3711 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3712   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3713 #else
3714   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3715 #endif
3716   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3717   CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3718   }
3719 
3720 if (offsets[0] >= 0)
3721   {
3722   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3723   if (offsets[1] >= 0)
3724     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3725   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3726 
3727   if (chars[1] != 0)
3728     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3729   CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3730   if (offsets[2] >= 0)
3731     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3732 
3733   if (offsets[1] >= 0)
3734     {
3735     if (chars[5] != 0)
3736       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3737     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3738     }
3739 
3740   if (offsets[2] >= 0)
3741     {
3742     if (chars[3] != 0)
3743       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3744     CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3745     }
3746   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3747   }
3748 
3749 JUMPHERE(quit);
3750 
3751 if (firstline)
3752   {
3753   if (range_right >= 0)
3754     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3755   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3756   if (range_right >= 0)
3757     {
3758     quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3759     OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3760     JUMPHERE(quit);
3761     }
3762   }
3763 else
3764   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3765 return TRUE;
3766 }
3767 
3768 #undef MAX_N_CHARS
3769 #undef MAX_N_BYTES
3770 
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless,BOOL firstline)3771 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3772 {
3773 DEFINE_COMPILER;
3774 struct sljit_label *start;
3775 struct sljit_jump *quit;
3776 struct sljit_jump *found;
3777 pcre_uchar oc, bit;
3778 
3779 if (firstline)
3780   {
3781   SLJIT_ASSERT(common->first_line_end != 0);
3782   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3783   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3784   }
3785 
3786 start = LABEL();
3787 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3788 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3789 
3790 oc = first_char;
3791 if (caseless)
3792   {
3793   oc = TABLE_GET(first_char, common->fcc, first_char);
3794 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3795   if (first_char > 127 && common->utf)
3796     oc = UCD_OTHERCASE(first_char);
3797 #endif
3798   }
3799 if (first_char == oc)
3800   found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3801 else
3802   {
3803   bit = first_char ^ oc;
3804   if (is_powerof2(bit))
3805     {
3806     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3807     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3808     }
3809   else
3810     {
3811     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3812     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3813     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3814     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3815     found = JUMP(SLJIT_C_NOT_ZERO);
3816     }
3817   }
3818 
3819 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3820 JUMPTO(SLJIT_JUMP, start);
3821 JUMPHERE(found);
3822 JUMPHERE(quit);
3823 
3824 if (firstline)
3825   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3826 }
3827 
fast_forward_newline(compiler_common * common,BOOL firstline)3828 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3829 {
3830 DEFINE_COMPILER;
3831 struct sljit_label *loop;
3832 struct sljit_jump *lastchar;
3833 struct sljit_jump *firstchar;
3834 struct sljit_jump *quit;
3835 struct sljit_jump *foundcr = NULL;
3836 struct sljit_jump *notfoundnl;
3837 jump_list *newline = NULL;
3838 
3839 if (firstline)
3840   {
3841   SLJIT_ASSERT(common->first_line_end != 0);
3842   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3843   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3844   }
3845 
3846 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3847   {
3848   lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3849   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3850   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3851   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3852   firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3853 
3854   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3855   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3856   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3857 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3858   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3859 #endif
3860   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3861 
3862   loop = LABEL();
3863   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3864   quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3865   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3866   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3867   CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3868   CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3869 
3870   JUMPHERE(quit);
3871   JUMPHERE(firstchar);
3872   JUMPHERE(lastchar);
3873 
3874   if (firstline)
3875     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3876   return;
3877   }
3878 
3879 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3880 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3881 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3882 skip_char_back(common);
3883 
3884 loop = LABEL();
3885 common->ff_newline_shortcut = loop;
3886 
3887 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3888 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3889 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3890   foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3891 check_newlinechar(common, common->nltype, &newline, FALSE);
3892 set_jumps(newline, loop);
3893 
3894 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3895   {
3896   quit = JUMP(SLJIT_JUMP);
3897   JUMPHERE(foundcr);
3898   notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3899   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3900   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3901   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3902 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3903   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3904 #endif
3905   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3906   JUMPHERE(notfoundnl);
3907   JUMPHERE(quit);
3908   }
3909 JUMPHERE(lastchar);
3910 JUMPHERE(firstchar);
3911 
3912 if (firstline)
3913   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3914 }
3915 
3916 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3917 
fast_forward_start_bits(compiler_common * common,pcre_uint8 * start_bits,BOOL firstline)3918 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3919 {
3920 DEFINE_COMPILER;
3921 struct sljit_label *start;
3922 struct sljit_jump *quit;
3923 struct sljit_jump *found = NULL;
3924 jump_list *matches = NULL;
3925 #ifndef COMPILE_PCRE8
3926 struct sljit_jump *jump;
3927 #endif
3928 
3929 if (firstline)
3930   {
3931   SLJIT_ASSERT(common->first_line_end != 0);
3932   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3933   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3934   }
3935 
3936 start = LABEL();
3937 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3939 #ifdef SUPPORT_UTF
3940 if (common->utf)
3941   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3942 #endif
3943 
3944 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3945   {
3946 #ifndef COMPILE_PCRE8
3947   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3948   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3949   JUMPHERE(jump);
3950 #endif
3951   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3952   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3953   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3954   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3955   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3956   found = JUMP(SLJIT_C_NOT_ZERO);
3957   }
3958 
3959 #ifdef SUPPORT_UTF
3960 if (common->utf)
3961   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3962 #endif
3963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3964 #ifdef SUPPORT_UTF
3965 #if defined COMPILE_PCRE8
3966 if (common->utf)
3967   {
3968   CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3969   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3970   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3971   }
3972 #elif defined COMPILE_PCRE16
3973 if (common->utf)
3974   {
3975   CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3976   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3977   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3978   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3979   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3980   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3981   }
3982 #endif /* COMPILE_PCRE[8|16] */
3983 #endif /* SUPPORT_UTF */
3984 JUMPTO(SLJIT_JUMP, start);
3985 if (found != NULL)
3986   JUMPHERE(found);
3987 if (matches != NULL)
3988   set_jumps(matches, LABEL());
3989 JUMPHERE(quit);
3990 
3991 if (firstline)
3992   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3993 }
3994 
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)3995 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3996 {
3997 DEFINE_COMPILER;
3998 struct sljit_label *loop;
3999 struct sljit_jump *toolong;
4000 struct sljit_jump *alreadyfound;
4001 struct sljit_jump *found;
4002 struct sljit_jump *foundoc = NULL;
4003 struct sljit_jump *notfound;
4004 pcre_uint32 oc, bit;
4005 
4006 SLJIT_ASSERT(common->req_char_ptr != 0);
4007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4008 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4009 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
4010 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
4011 
4012 if (has_firstchar)
4013   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4014 else
4015   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4016 
4017 loop = LABEL();
4018 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4019 
4020 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4021 oc = req_char;
4022 if (caseless)
4023   {
4024   oc = TABLE_GET(req_char, common->fcc, req_char);
4025 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4026   if (req_char > 127 && common->utf)
4027     oc = UCD_OTHERCASE(req_char);
4028 #endif
4029   }
4030 if (req_char == oc)
4031   found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4032 else
4033   {
4034   bit = req_char ^ oc;
4035   if (is_powerof2(bit))
4036     {
4037     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4038     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4039     }
4040   else
4041     {
4042     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4043     foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4044     }
4045   }
4046 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4047 JUMPTO(SLJIT_JUMP, loop);
4048 
4049 JUMPHERE(found);
4050 if (foundoc)
4051   JUMPHERE(foundoc);
4052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4053 JUMPHERE(alreadyfound);
4054 JUMPHERE(toolong);
4055 return notfound;
4056 }
4057 
do_revertframes(compiler_common * common)4058 static void do_revertframes(compiler_common *common)
4059 {
4060 DEFINE_COMPILER;
4061 struct sljit_jump *jump;
4062 struct sljit_label *mainloop;
4063 
4064 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4065 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4066 GET_LOCAL_BASE(TMP3, 0, 0);
4067 
4068 /* Drop frames until we reach STACK_TOP. */
4069 mainloop = LABEL();
4070 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4071 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4072 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
4073 
4074 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4075 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4076 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4077 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4078 JUMPTO(SLJIT_JUMP, mainloop);
4079 
4080 JUMPHERE(jump);
4081 jump = JUMP(SLJIT_C_SIG_LESS);
4082 /* End of dropping frames. */
4083 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4084 
4085 JUMPHERE(jump);
4086 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4087 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4088 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4089 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4090 JUMPTO(SLJIT_JUMP, mainloop);
4091 }
4092 
check_wordboundary(compiler_common * common)4093 static void check_wordboundary(compiler_common *common)
4094 {
4095 DEFINE_COMPILER;
4096 struct sljit_jump *skipread;
4097 jump_list *skipread_list = NULL;
4098 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4099 struct sljit_jump *jump;
4100 #endif
4101 
4102 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4103 
4104 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4105 /* Get type of the previous char, and put it to LOCALS1. */
4106 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4109 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4110 skip_char_back(common);
4111 check_start_used_ptr(common);
4112 read_char(common);
4113 
4114 /* Testing char type. */
4115 #ifdef SUPPORT_UCP
4116 if (common->use_ucp)
4117   {
4118   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4119   jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4120   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4121   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4122   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4123   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4124   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4125   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4126   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4127   JUMPHERE(jump);
4128   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4129   }
4130 else
4131 #endif
4132   {
4133 #ifndef COMPILE_PCRE8
4134   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4135 #elif defined SUPPORT_UTF
4136   /* Here LOCALS1 has already been zeroed. */
4137   jump = NULL;
4138   if (common->utf)
4139     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4140 #endif /* COMPILE_PCRE8 */
4141   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4142   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4143   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4144   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4145 #ifndef COMPILE_PCRE8
4146   JUMPHERE(jump);
4147 #elif defined SUPPORT_UTF
4148   if (jump != NULL)
4149     JUMPHERE(jump);
4150 #endif /* COMPILE_PCRE8 */
4151   }
4152 JUMPHERE(skipread);
4153 
4154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4155 check_str_end(common, &skipread_list);
4156 peek_char(common, READ_CHAR_MAX);
4157 
4158 /* Testing char type. This is a code duplication. */
4159 #ifdef SUPPORT_UCP
4160 if (common->use_ucp)
4161   {
4162   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4163   jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4164   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4165   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4166   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4167   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4168   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4169   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4170   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4171   JUMPHERE(jump);
4172   }
4173 else
4174 #endif
4175   {
4176 #ifndef COMPILE_PCRE8
4177   /* TMP2 may be destroyed by peek_char. */
4178   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4179   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4180 #elif defined SUPPORT_UTF
4181   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4182   jump = NULL;
4183   if (common->utf)
4184     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4185 #endif
4186   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4187   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4188   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4189 #ifndef COMPILE_PCRE8
4190   JUMPHERE(jump);
4191 #elif defined SUPPORT_UTF
4192   if (jump != NULL)
4193     JUMPHERE(jump);
4194 #endif /* COMPILE_PCRE8 */
4195   }
4196 set_jumps(skipread_list, LABEL());
4197 
4198 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4199 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4200 }
4201 
check_class_ranges(compiler_common * common,const pcre_uint8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4202 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4203 {
4204 DEFINE_COMPILER;
4205 int ranges[MAX_RANGE_SIZE];
4206 pcre_uint8 bit, cbit, all;
4207 int i, byte, length = 0;
4208 
4209 bit = bits[0] & 0x1;
4210 /* All bits will be zero or one (since bit is zero or one). */
4211 all = -bit;
4212 
4213 for (i = 0; i < 256; )
4214   {
4215   byte = i >> 3;
4216   if ((i & 0x7) == 0 && bits[byte] == all)
4217     i += 8;
4218   else
4219     {
4220     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4221     if (cbit != bit)
4222       {
4223       if (length >= MAX_RANGE_SIZE)
4224         return FALSE;
4225       ranges[length] = i;
4226       length++;
4227       bit = cbit;
4228       all = -cbit;
4229       }
4230     i++;
4231     }
4232   }
4233 
4234 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4235   {
4236   if (length >= MAX_RANGE_SIZE)
4237     return FALSE;
4238   ranges[length] = 256;
4239   length++;
4240   }
4241 
4242 if (length < 0 || length > 4)
4243   return FALSE;
4244 
4245 bit = bits[0] & 0x1;
4246 if (invert) bit ^= 0x1;
4247 
4248 /* No character is accepted. */
4249 if (length == 0 && bit == 0)
4250   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4251 
4252 switch(length)
4253   {
4254   case 0:
4255   /* When bit != 0, all characters are accepted. */
4256   return TRUE;
4257 
4258   case 1:
4259   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4260   return TRUE;
4261 
4262   case 2:
4263   if (ranges[0] + 1 != ranges[1])
4264     {
4265     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4266     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4267     }
4268   else
4269     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4270   return TRUE;
4271 
4272   case 3:
4273   if (bit != 0)
4274     {
4275     add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4276     if (ranges[0] + 1 != ranges[1])
4277       {
4278       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4279       add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4280       }
4281     else
4282       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4283     return TRUE;
4284     }
4285 
4286   add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4287   if (ranges[1] + 1 != ranges[2])
4288     {
4289     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4290     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4291     }
4292   else
4293     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4294   return TRUE;
4295 
4296   case 4:
4297   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4298       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4299       && is_powerof2(ranges[2] - ranges[0]))
4300     {
4301     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4302     if (ranges[2] + 1 != ranges[3])
4303       {
4304       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4305       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4306       }
4307     else
4308       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4309     return TRUE;
4310     }
4311 
4312   if (bit != 0)
4313     {
4314     i = 0;
4315     if (ranges[0] + 1 != ranges[1])
4316       {
4317       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4318       add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4319       i = ranges[0];
4320       }
4321     else
4322       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4323 
4324     if (ranges[2] + 1 != ranges[3])
4325       {
4326       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4327       add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4328       }
4329     else
4330       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4331     return TRUE;
4332     }
4333 
4334   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4335   add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4336   if (ranges[1] + 1 != ranges[2])
4337     {
4338     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4339     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4340     }
4341   else
4342     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4343   return TRUE;
4344 
4345   default:
4346   SLJIT_ASSERT_STOP();
4347   return FALSE;
4348   }
4349 }
4350 
check_anynewline(compiler_common * common)4351 static void check_anynewline(compiler_common *common)
4352 {
4353 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4354 DEFINE_COMPILER;
4355 
4356 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4357 
4358 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4359 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4360 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4361 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4362 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4363 #ifdef COMPILE_PCRE8
4364 if (common->utf)
4365   {
4366 #endif
4367   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4368   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4369   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4370 #ifdef COMPILE_PCRE8
4371   }
4372 #endif
4373 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4374 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4375 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4376 }
4377 
check_hspace(compiler_common * common)4378 static void check_hspace(compiler_common *common)
4379 {
4380 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4381 DEFINE_COMPILER;
4382 
4383 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4384 
4385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4386 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4387 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4388 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4390 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4391 #ifdef COMPILE_PCRE8
4392 if (common->utf)
4393   {
4394 #endif
4395   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4396   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4397   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4398   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4399   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4400   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4401   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4402   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4403   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4404   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4405   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4406   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4407   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4408 #ifdef COMPILE_PCRE8
4409   }
4410 #endif
4411 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4412 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4413 
4414 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4415 }
4416 
check_vspace(compiler_common * common)4417 static void check_vspace(compiler_common *common)
4418 {
4419 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4420 DEFINE_COMPILER;
4421 
4422 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4423 
4424 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4425 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4426 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4427 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4428 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4429 #ifdef COMPILE_PCRE8
4430 if (common->utf)
4431   {
4432 #endif
4433   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4434   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4435   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4436 #ifdef COMPILE_PCRE8
4437   }
4438 #endif
4439 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4440 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4441 
4442 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4443 }
4444 
4445 #define CHAR1 STR_END
4446 #define CHAR2 STACK_TOP
4447 
do_casefulcmp(compiler_common * common)4448 static void do_casefulcmp(compiler_common *common)
4449 {
4450 DEFINE_COMPILER;
4451 struct sljit_jump *jump;
4452 struct sljit_label *label;
4453 
4454 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4455 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4456 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4458 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4459 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4460 
4461 label = LABEL();
4462 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4463 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4464 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4465 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4466 JUMPTO(SLJIT_C_NOT_ZERO, label);
4467 
4468 JUMPHERE(jump);
4469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4470 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4471 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4472 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4473 }
4474 
4475 #define LCC_TABLE STACK_LIMIT
4476 
do_caselesscmp(compiler_common * common)4477 static void do_caselesscmp(compiler_common *common)
4478 {
4479 DEFINE_COMPILER;
4480 struct sljit_jump *jump;
4481 struct sljit_label *label;
4482 
4483 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4484 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4485 
4486 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4487 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4489 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4490 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4491 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4492 
4493 label = LABEL();
4494 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4495 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4496 #ifndef COMPILE_PCRE8
4497 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4498 #endif
4499 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4500 #ifndef COMPILE_PCRE8
4501 JUMPHERE(jump);
4502 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4503 #endif
4504 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4505 #ifndef COMPILE_PCRE8
4506 JUMPHERE(jump);
4507 #endif
4508 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4509 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4510 JUMPTO(SLJIT_C_NOT_ZERO, label);
4511 
4512 JUMPHERE(jump);
4513 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4514 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4515 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4516 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4518 }
4519 
4520 #undef LCC_TABLE
4521 #undef CHAR1
4522 #undef CHAR2
4523 
4524 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4525 
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)4526 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4527 {
4528 /* This function would be ineffective to do in JIT level. */
4529 pcre_uint32 c1, c2;
4530 const pcre_uchar *src2 = args->uchar_ptr;
4531 const pcre_uchar *end2 = args->end;
4532 const ucd_record *ur;
4533 const pcre_uint32 *pp;
4534 
4535 while (src1 < end1)
4536   {
4537   if (src2 >= end2)
4538     return (pcre_uchar*)1;
4539   GETCHARINC(c1, src1);
4540   GETCHARINC(c2, src2);
4541   ur = GET_UCD(c2);
4542   if (c1 != c2 && c1 != c2 + ur->other_case)
4543     {
4544     pp = PRIV(ucd_caseless_sets) + ur->caseset;
4545     for (;;)
4546       {
4547       if (c1 < *pp) return NULL;
4548       if (c1 == *pp++) break;
4549       }
4550     }
4551   }
4552 return src2;
4553 }
4554 
4555 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4556 
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)4557 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4558     compare_context* context, jump_list **backtracks)
4559 {
4560 DEFINE_COMPILER;
4561 unsigned int othercasebit = 0;
4562 pcre_uchar *othercasechar = NULL;
4563 #ifdef SUPPORT_UTF
4564 int utflength;
4565 #endif
4566 
4567 if (caseless && char_has_othercase(common, cc))
4568   {
4569   othercasebit = char_get_othercase_bit(common, cc);
4570   SLJIT_ASSERT(othercasebit);
4571   /* Extracting bit difference info. */
4572 #if defined COMPILE_PCRE8
4573   othercasechar = cc + (othercasebit >> 8);
4574   othercasebit &= 0xff;
4575 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4576   /* Note that this code only handles characters in the BMP. If there
4577   ever are characters outside the BMP whose othercase differs in only one
4578   bit from itself (there currently are none), this code will need to be
4579   revised for COMPILE_PCRE32. */
4580   othercasechar = cc + (othercasebit >> 9);
4581   if ((othercasebit & 0x100) != 0)
4582     othercasebit = (othercasebit & 0xff) << 8;
4583   else
4584     othercasebit &= 0xff;
4585 #endif /* COMPILE_PCRE[8|16|32] */
4586   }
4587 
4588 if (context->sourcereg == -1)
4589   {
4590 #if defined COMPILE_PCRE8
4591 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4592   if (context->length >= 4)
4593     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4594   else if (context->length >= 2)
4595     OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4596   else
4597 #endif
4598     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4599 #elif defined COMPILE_PCRE16
4600 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4601   if (context->length >= 4)
4602     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4603   else
4604 #endif
4605     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4606 #elif defined COMPILE_PCRE32
4607   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4608 #endif /* COMPILE_PCRE[8|16|32] */
4609   context->sourcereg = TMP2;
4610   }
4611 
4612 #ifdef SUPPORT_UTF
4613 utflength = 1;
4614 if (common->utf && HAS_EXTRALEN(*cc))
4615   utflength += GET_EXTRALEN(*cc);
4616 
4617 do
4618   {
4619 #endif
4620 
4621   context->length -= IN_UCHARS(1);
4622 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4623 
4624   /* Unaligned read is supported. */
4625   if (othercasebit != 0 && othercasechar == cc)
4626     {
4627     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4628     context->oc.asuchars[context->ucharptr] = othercasebit;
4629     }
4630   else
4631     {
4632     context->c.asuchars[context->ucharptr] = *cc;
4633     context->oc.asuchars[context->ucharptr] = 0;
4634     }
4635   context->ucharptr++;
4636 
4637 #if defined COMPILE_PCRE8
4638   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4639 #else
4640   if (context->ucharptr >= 2 || context->length == 0)
4641 #endif
4642     {
4643     if (context->length >= 4)
4644       OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4645     else if (context->length >= 2)
4646       OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 #if defined COMPILE_PCRE8
4648     else if (context->length >= 1)
4649       OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4650 #endif /* COMPILE_PCRE8 */
4651     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4652 
4653     switch(context->ucharptr)
4654       {
4655       case 4 / sizeof(pcre_uchar):
4656       if (context->oc.asint != 0)
4657         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4658       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4659       break;
4660 
4661       case 2 / sizeof(pcre_uchar):
4662       if (context->oc.asushort != 0)
4663         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4664       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4665       break;
4666 
4667 #ifdef COMPILE_PCRE8
4668       case 1:
4669       if (context->oc.asbyte != 0)
4670         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4671       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4672       break;
4673 #endif
4674 
4675       default:
4676       SLJIT_ASSERT_STOP();
4677       break;
4678       }
4679     context->ucharptr = 0;
4680     }
4681 
4682 #else
4683 
4684   /* Unaligned read is unsupported or in 32 bit mode. */
4685   if (context->length >= 1)
4686     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4687 
4688   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4689 
4690   if (othercasebit != 0 && othercasechar == cc)
4691     {
4692     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4693     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4694     }
4695   else
4696     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4697 
4698 #endif
4699 
4700   cc++;
4701 #ifdef SUPPORT_UTF
4702   utflength--;
4703   }
4704 while (utflength > 0);
4705 #endif
4706 
4707 return cc;
4708 }
4709 
4710 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4711 
4712 #define SET_TYPE_OFFSET(value) \
4713   if ((value) != typeoffset) \
4714     { \
4715     if ((value) < typeoffset) \
4716       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4717     else \
4718       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4719     } \
4720   typeoffset = (value);
4721 
4722 #define SET_CHAR_OFFSET(value) \
4723   if ((value) != charoffset) \
4724     { \
4725     if ((value) < charoffset) \
4726       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4727     else \
4728       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4729     } \
4730   charoffset = (value);
4731 
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)4732 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4733 {
4734 DEFINE_COMPILER;
4735 jump_list *found = NULL;
4736 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4737 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4738 struct sljit_jump *jump = NULL;
4739 pcre_uchar *ccbegin;
4740 int compares, invertcmp, numberofcmps;
4741 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4742 BOOL utf = common->utf;
4743 #endif
4744 
4745 #ifdef SUPPORT_UCP
4746 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4747 BOOL charsaved = FALSE;
4748 int typereg = TMP1, scriptreg = TMP1;
4749 const pcre_uint32 *other_cases;
4750 sljit_uw typeoffset;
4751 #endif
4752 
4753 /* Scanning the necessary info. */
4754 cc++;
4755 ccbegin = cc;
4756 compares = 0;
4757 if (cc[-1] & XCL_MAP)
4758   {
4759   min = 0;
4760   cc += 32 / sizeof(pcre_uchar);
4761   }
4762 
4763 while (*cc != XCL_END)
4764   {
4765   compares++;
4766   if (*cc == XCL_SINGLE)
4767     {
4768     cc ++;
4769     GETCHARINCTEST(c, cc);
4770     if (c > max) max = c;
4771     if (c < min) min = c;
4772 #ifdef SUPPORT_UCP
4773     needschar = TRUE;
4774 #endif
4775     }
4776   else if (*cc == XCL_RANGE)
4777     {
4778     cc ++;
4779     GETCHARINCTEST(c, cc);
4780     if (c < min) min = c;
4781     GETCHARINCTEST(c, cc);
4782     if (c > max) max = c;
4783 #ifdef SUPPORT_UCP
4784     needschar = TRUE;
4785 #endif
4786     }
4787 #ifdef SUPPORT_UCP
4788   else
4789     {
4790     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4791     cc++;
4792     if (*cc == PT_CLIST)
4793       {
4794       other_cases = PRIV(ucd_caseless_sets) + cc[1];
4795       while (*other_cases != NOTACHAR)
4796         {
4797         if (*other_cases > max) max = *other_cases;
4798         if (*other_cases < min) min = *other_cases;
4799         other_cases++;
4800         }
4801       }
4802     else
4803       {
4804       max = READ_CHAR_MAX;
4805       min = 0;
4806       }
4807 
4808     switch(*cc)
4809       {
4810       case PT_ANY:
4811       break;
4812 
4813       case PT_LAMP:
4814       case PT_GC:
4815       case PT_PC:
4816       case PT_ALNUM:
4817       needstype = TRUE;
4818       break;
4819 
4820       case PT_SC:
4821       needsscript = TRUE;
4822       break;
4823 
4824       case PT_SPACE:
4825       case PT_PXSPACE:
4826       case PT_WORD:
4827       case PT_PXGRAPH:
4828       case PT_PXPRINT:
4829       case PT_PXPUNCT:
4830       needstype = TRUE;
4831       needschar = TRUE;
4832       break;
4833 
4834       case PT_CLIST:
4835       case PT_UCNC:
4836       needschar = TRUE;
4837       break;
4838 
4839       default:
4840       SLJIT_ASSERT_STOP();
4841       break;
4842       }
4843     cc += 2;
4844     }
4845 #endif
4846   }
4847 
4848 /* We are not necessary in utf mode even in 8 bit mode. */
4849 cc = ccbegin;
4850 detect_partial_match(common, backtracks);
4851 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4852 
4853 if ((cc[-1] & XCL_HASPROP) == 0)
4854   {
4855   if ((cc[-1] & XCL_MAP) != 0)
4856     {
4857     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4858     if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4859       {
4860       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4861       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4862       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4863       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4864       OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4865       add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4866       }
4867 
4868     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4869     JUMPHERE(jump);
4870 
4871     cc += 32 / sizeof(pcre_uchar);
4872     }
4873   else
4874     {
4875     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4876     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4877     }
4878   }
4879 else if ((cc[-1] & XCL_MAP) != 0)
4880   {
4881   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4882 #ifdef SUPPORT_UCP
4883   charsaved = TRUE;
4884 #endif
4885   if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4886     {
4887 #ifdef COMPILE_PCRE8
4888     SLJIT_ASSERT(common->utf);
4889 #endif
4890     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4891 
4892     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4893     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4894     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4895     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4896     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4897     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4898 
4899     JUMPHERE(jump);
4900     }
4901 
4902   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4903   cc += 32 / sizeof(pcre_uchar);
4904   }
4905 
4906 #ifdef SUPPORT_UCP
4907 /* Simple register allocation. TMP1 is preferred if possible. */
4908 if (needstype || needsscript)
4909   {
4910   if (needschar && !charsaved)
4911     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4912   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4913   if (needschar)
4914     {
4915     if (needstype)
4916       {
4917       OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4918       typereg = RETURN_ADDR;
4919       }
4920 
4921     if (needsscript)
4922       scriptreg = TMP3;
4923     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4924     }
4925   else if (needstype && needsscript)
4926     scriptreg = TMP3;
4927   /* In all other cases only one of them was specified, and that can goes to TMP1. */
4928 
4929   if (needsscript)
4930     {
4931     if (scriptreg == TMP1)
4932       {
4933       OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4934       OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4935       }
4936     else
4937       {
4938       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4939       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4940       OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4941       }
4942     }
4943   }
4944 #endif
4945 
4946 /* Generating code. */
4947 charoffset = 0;
4948 numberofcmps = 0;
4949 #ifdef SUPPORT_UCP
4950 typeoffset = 0;
4951 #endif
4952 
4953 while (*cc != XCL_END)
4954   {
4955   compares--;
4956   invertcmp = (compares == 0 && list != backtracks);
4957   jump = NULL;
4958 
4959   if (*cc == XCL_SINGLE)
4960     {
4961     cc ++;
4962     GETCHARINCTEST(c, cc);
4963 
4964     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4965       {
4966       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4967       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4968       numberofcmps++;
4969       }
4970     else if (numberofcmps > 0)
4971       {
4972       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4973       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4974       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4975       numberofcmps = 0;
4976       }
4977     else
4978       {
4979       jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4980       numberofcmps = 0;
4981       }
4982     }
4983   else if (*cc == XCL_RANGE)
4984     {
4985     cc ++;
4986     GETCHARINCTEST(c, cc);
4987     SET_CHAR_OFFSET(c);
4988     GETCHARINCTEST(c, cc);
4989 
4990     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4991       {
4992       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4993       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4994       numberofcmps++;
4995       }
4996     else if (numberofcmps > 0)
4997       {
4998       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4999       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5000       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5001       numberofcmps = 0;
5002       }
5003     else
5004       {
5005       jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5006       numberofcmps = 0;
5007       }
5008     }
5009 #ifdef SUPPORT_UCP
5010   else
5011     {
5012     if (*cc == XCL_NOTPROP)
5013       invertcmp ^= 0x1;
5014     cc++;
5015     switch(*cc)
5016       {
5017       case PT_ANY:
5018       if (list != backtracks)
5019         {
5020         if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5021           continue;
5022         }
5023       else if (cc[-1] == XCL_NOTPROP)
5024         continue;
5025       jump = JUMP(SLJIT_JUMP);
5026       break;
5027 
5028       case PT_LAMP:
5029       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5030       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5031       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5032       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5033       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5034       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5035       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5036       break;
5037 
5038       case PT_GC:
5039       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5040       SET_TYPE_OFFSET(c);
5041       jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5042       break;
5043 
5044       case PT_PC:
5045       jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5046       break;
5047 
5048       case PT_SC:
5049       jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5050       break;
5051 
5052       case PT_SPACE:
5053       case PT_PXSPACE:
5054       SET_CHAR_OFFSET(9);
5055       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5056       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5057 
5058       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5059       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5060 
5061       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5062       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5063 
5064       SET_TYPE_OFFSET(ucp_Zl);
5065       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5066       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5067       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5068       break;
5069 
5070       case PT_WORD:
5071       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5072       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5073       /* Fall through. */
5074 
5075       case PT_ALNUM:
5076       SET_TYPE_OFFSET(ucp_Ll);
5077       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5078       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
5079       SET_TYPE_OFFSET(ucp_Nd);
5080       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5081       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5082       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5083       break;
5084 
5085       case PT_CLIST:
5086       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5087 
5088       /* At least three characters are required.
5089          Otherwise this case would be handled by the normal code path. */
5090       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5091       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5092 
5093       /* Optimizing character pairs, if their difference is power of 2. */
5094       if (is_powerof2(other_cases[1] ^ other_cases[0]))
5095         {
5096         if (charoffset == 0)
5097           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5098         else
5099           {
5100           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5101           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5102           }
5103         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5104         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5105         other_cases += 2;
5106         }
5107       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5108         {
5109         if (charoffset == 0)
5110           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5111         else
5112           {
5113           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5114           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5115           }
5116         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5117         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5118 
5119         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5120         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5121 
5122         other_cases += 3;
5123         }
5124       else
5125         {
5126         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5127         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5128         }
5129 
5130       while (*other_cases != NOTACHAR)
5131         {
5132         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5133         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5134         }
5135       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5136       break;
5137 
5138       case PT_UCNC:
5139       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5140       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5141       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5142       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5143       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5144       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5145 
5146       SET_CHAR_OFFSET(0xa0);
5147       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5148       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5149       SET_CHAR_OFFSET(0);
5150       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5151       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5152       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5153       break;
5154 
5155       case PT_PXGRAPH:
5156       /* C and Z groups are the farthest two groups. */
5157       SET_TYPE_OFFSET(ucp_Ll);
5158       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5159       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5160 
5161       jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5162 
5163       /* In case of ucp_Cf, we overwrite the result. */
5164       SET_CHAR_OFFSET(0x2066);
5165       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5166       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5167 
5168       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5169       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5170 
5171       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5172       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5173 
5174       JUMPHERE(jump);
5175       jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5176       break;
5177 
5178       case PT_PXPRINT:
5179       /* C and Z groups are the farthest two groups. */
5180       SET_TYPE_OFFSET(ucp_Ll);
5181       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5182       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5183 
5184       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5185       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5186 
5187       jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5188 
5189       /* In case of ucp_Cf, we overwrite the result. */
5190       SET_CHAR_OFFSET(0x2066);
5191       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5192       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5193 
5194       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5195       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5196 
5197       JUMPHERE(jump);
5198       jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5199       break;
5200 
5201       case PT_PXPUNCT:
5202       SET_TYPE_OFFSET(ucp_Sc);
5203       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5204       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5205 
5206       SET_CHAR_OFFSET(0);
5207       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5208       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5209 
5210       SET_TYPE_OFFSET(ucp_Pc);
5211       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5212       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5213       jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5214       break;
5215       }
5216     cc += 2;
5217     }
5218 #endif
5219 
5220   if (jump != NULL)
5221     add_jump(compiler, compares > 0 ? list : backtracks, jump);
5222   }
5223 
5224 if (found != NULL)
5225   set_jumps(found, LABEL());
5226 }
5227 
5228 #undef SET_TYPE_OFFSET
5229 #undef SET_CHAR_OFFSET
5230 
5231 #endif
5232 
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)5233 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5234 {
5235 DEFINE_COMPILER;
5236 int length;
5237 unsigned int c, oc, bit;
5238 compare_context context;
5239 struct sljit_jump *jump[4];
5240 jump_list *end_list;
5241 #ifdef SUPPORT_UTF
5242 struct sljit_label *label;
5243 #ifdef SUPPORT_UCP
5244 pcre_uchar propdata[5];
5245 #endif
5246 #endif /* SUPPORT_UTF */
5247 
5248 switch(type)
5249   {
5250   case OP_SOD:
5251   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5252   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5253   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5254   return cc;
5255 
5256   case OP_SOM:
5257   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5258   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5259   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5260   return cc;
5261 
5262   case OP_NOT_WORD_BOUNDARY:
5263   case OP_WORD_BOUNDARY:
5264   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5265   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5266   return cc;
5267 
5268   case OP_NOT_DIGIT:
5269   case OP_DIGIT:
5270   /* Digits are usually 0-9, so it is worth to optimize them. */
5271   detect_partial_match(common, backtracks);
5272 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5273   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5274     read_char7_type(common, type == OP_NOT_DIGIT);
5275   else
5276 #endif
5277     read_char8_type(common, type == OP_NOT_DIGIT);
5278     /* Flip the starting bit in the negative case. */
5279   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5280   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5281   return cc;
5282 
5283   case OP_NOT_WHITESPACE:
5284   case OP_WHITESPACE:
5285   detect_partial_match(common, backtracks);
5286 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5287   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5288     read_char7_type(common, type == OP_NOT_WHITESPACE);
5289   else
5290 #endif
5291     read_char8_type(common, type == OP_NOT_WHITESPACE);
5292   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5293   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5294   return cc;
5295 
5296   case OP_NOT_WORDCHAR:
5297   case OP_WORDCHAR:
5298   detect_partial_match(common, backtracks);
5299 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5300   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5301     read_char7_type(common, type == OP_NOT_WORDCHAR);
5302   else
5303 #endif
5304     read_char8_type(common, type == OP_NOT_WORDCHAR);
5305   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5306   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5307   return cc;
5308 
5309   case OP_ANY:
5310   detect_partial_match(common, backtracks);
5311   read_char_range(common, common->nlmin, common->nlmax, TRUE);
5312   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5313     {
5314     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5315     end_list = NULL;
5316     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5317       add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5318     else
5319       check_str_end(common, &end_list);
5320 
5321     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5322     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5323     set_jumps(end_list, LABEL());
5324     JUMPHERE(jump[0]);
5325     }
5326   else
5327     check_newlinechar(common, common->nltype, backtracks, TRUE);
5328   return cc;
5329 
5330   case OP_ALLANY:
5331   detect_partial_match(common, backtracks);
5332 #ifdef SUPPORT_UTF
5333   if (common->utf)
5334     {
5335     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5336     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5337 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5338 #if defined COMPILE_PCRE8
5339     jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5340     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5341     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5342 #elif defined COMPILE_PCRE16
5343     jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5344     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5345     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5346     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5347     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5348     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5349 #endif
5350     JUMPHERE(jump[0]);
5351 #endif /* COMPILE_PCRE[8|16] */
5352     return cc;
5353     }
5354 #endif
5355   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5356   return cc;
5357 
5358   case OP_ANYBYTE:
5359   detect_partial_match(common, backtracks);
5360   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5361   return cc;
5362 
5363 #ifdef SUPPORT_UTF
5364 #ifdef SUPPORT_UCP
5365   case OP_NOTPROP:
5366   case OP_PROP:
5367   propdata[0] = XCL_HASPROP;
5368   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5369   propdata[2] = cc[0];
5370   propdata[3] = cc[1];
5371   propdata[4] = XCL_END;
5372   compile_xclass_matchingpath(common, propdata, backtracks);
5373   return cc + 2;
5374 #endif
5375 #endif
5376 
5377   case OP_ANYNL:
5378   detect_partial_match(common, backtracks);
5379   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5380   jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5381   /* We don't need to handle soft partial matching case. */
5382   end_list = NULL;
5383   if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5384     add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5385   else
5386     check_str_end(common, &end_list);
5387   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5388   jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5389   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5390   jump[2] = JUMP(SLJIT_JUMP);
5391   JUMPHERE(jump[0]);
5392   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5393   set_jumps(end_list, LABEL());
5394   JUMPHERE(jump[1]);
5395   JUMPHERE(jump[2]);
5396   return cc;
5397 
5398   case OP_NOT_HSPACE:
5399   case OP_HSPACE:
5400   detect_partial_match(common, backtracks);
5401   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5402   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5403   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5404   return cc;
5405 
5406   case OP_NOT_VSPACE:
5407   case OP_VSPACE:
5408   detect_partial_match(common, backtracks);
5409   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5410   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5411   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5412   return cc;
5413 
5414 #ifdef SUPPORT_UCP
5415   case OP_EXTUNI:
5416   detect_partial_match(common, backtracks);
5417   read_char(common);
5418   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5419   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5420   /* Optimize register allocation: use a real register. */
5421   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5422   OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5423 
5424   label = LABEL();
5425   jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5426   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5427   read_char(common);
5428   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5429   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5430   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5431 
5432   OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5433   OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5434   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5435   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5436   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5437   JUMPTO(SLJIT_C_NOT_ZERO, label);
5438 
5439   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5440   JUMPHERE(jump[0]);
5441   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5442 
5443   if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5444     {
5445     jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5446     /* Since we successfully read a char above, partial matching must occure. */
5447     check_partial(common, TRUE);
5448     JUMPHERE(jump[0]);
5449     }
5450   return cc;
5451 #endif
5452 
5453   case OP_EODN:
5454   /* Requires rather complex checks. */
5455   jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5456   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5457     {
5458     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5459     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5460     if (common->mode == JIT_COMPILE)
5461       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5462     else
5463       {
5464       jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5465       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5466       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5467       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5468       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5469       add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5470       check_partial(common, TRUE);
5471       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5472       JUMPHERE(jump[1]);
5473       }
5474     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5475     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5476     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5477     }
5478   else if (common->nltype == NLTYPE_FIXED)
5479     {
5480     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5481     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5482     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5483     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5484     }
5485   else
5486     {
5487     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5488     jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5489     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5490     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5491     jump[2] = JUMP(SLJIT_C_GREATER);
5492     add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5493     /* Equal. */
5494     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5495     jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5496     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5497 
5498     JUMPHERE(jump[1]);
5499     if (common->nltype == NLTYPE_ANYCRLF)
5500       {
5501       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5502       add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5503       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5504       }
5505     else
5506       {
5507       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5508       read_char_range(common, common->nlmin, common->nlmax, TRUE);
5509       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5510       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5511       add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5512       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5513       }
5514     JUMPHERE(jump[2]);
5515     JUMPHERE(jump[3]);
5516     }
5517   JUMPHERE(jump[0]);
5518   check_partial(common, FALSE);
5519   return cc;
5520 
5521   case OP_EOD:
5522   add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5523   check_partial(common, FALSE);
5524   return cc;
5525 
5526   case OP_CIRC:
5527   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5528   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5529   add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5530   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5531   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5532   return cc;
5533 
5534   case OP_CIRCM:
5535   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5536   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5537   jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5538   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5539   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5540   jump[0] = JUMP(SLJIT_JUMP);
5541   JUMPHERE(jump[1]);
5542 
5543   add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5544   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5545     {
5546     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5547     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5548     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5549     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5550     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5551     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5552     }
5553   else
5554     {
5555     skip_char_back(common);
5556     read_char_range(common, common->nlmin, common->nlmax, TRUE);
5557     check_newlinechar(common, common->nltype, backtracks, FALSE);
5558     }
5559   JUMPHERE(jump[0]);
5560   return cc;
5561 
5562   case OP_DOLL:
5563   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5564   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5565   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5566 
5567   if (!common->endonly)
5568     compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5569   else
5570     {
5571     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5572     check_partial(common, FALSE);
5573     }
5574   return cc;
5575 
5576   case OP_DOLLM:
5577   jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5578   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5579   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5580   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5581   check_partial(common, FALSE);
5582   jump[0] = JUMP(SLJIT_JUMP);
5583   JUMPHERE(jump[1]);
5584 
5585   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5586     {
5587     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5588     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5589     if (common->mode == JIT_COMPILE)
5590       add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5591     else
5592       {
5593       jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5594       /* STR_PTR = STR_END - IN_UCHARS(1) */
5595       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5596       check_partial(common, TRUE);
5597       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5598       JUMPHERE(jump[1]);
5599       }
5600 
5601     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5602     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5603     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5604     }
5605   else
5606     {
5607     peek_char(common, common->nlmax);
5608     check_newlinechar(common, common->nltype, backtracks, FALSE);
5609     }
5610   JUMPHERE(jump[0]);
5611   return cc;
5612 
5613   case OP_CHAR:
5614   case OP_CHARI:
5615   length = 1;
5616 #ifdef SUPPORT_UTF
5617   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5618 #endif
5619   if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5620     {
5621     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5622     add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5623 
5624     context.length = IN_UCHARS(length);
5625     context.sourcereg = -1;
5626 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5627     context.ucharptr = 0;
5628 #endif
5629     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5630     }
5631 
5632   detect_partial_match(common, backtracks);
5633 #ifdef SUPPORT_UTF
5634   if (common->utf)
5635     {
5636     GETCHAR(c, cc);
5637     }
5638   else
5639 #endif
5640     c = *cc;
5641 
5642   if (type == OP_CHAR || !char_has_othercase(common, cc))
5643     {
5644     read_char_range(common, c, c, FALSE);
5645     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5646     return cc + length;
5647     }
5648   oc = char_othercase(common, c);
5649   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5650   bit = c ^ oc;
5651   if (is_powerof2(bit))
5652     {
5653     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5654     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5655     return cc + length;
5656     }
5657   jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5658   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5659   JUMPHERE(jump[0]);
5660   return cc + length;
5661 
5662   case OP_NOT:
5663   case OP_NOTI:
5664   detect_partial_match(common, backtracks);
5665   length = 1;
5666 #ifdef SUPPORT_UTF
5667   if (common->utf)
5668     {
5669 #ifdef COMPILE_PCRE8
5670     c = *cc;
5671     if (c < 128)
5672       {
5673       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5674       if (type == OP_NOT || !char_has_othercase(common, cc))
5675         add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5676       else
5677         {
5678         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5679         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5680         add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5681         }
5682       /* Skip the variable-length character. */
5683       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5684       jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5685       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5686       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5687       JUMPHERE(jump[0]);
5688       return cc + 1;
5689       }
5690     else
5691 #endif /* COMPILE_PCRE8 */
5692       {
5693       GETCHARLEN(c, cc, length);
5694       }
5695     }
5696   else
5697 #endif /* SUPPORT_UTF */
5698     c = *cc;
5699 
5700   if (type == OP_NOT || !char_has_othercase(common, cc))
5701     {
5702     read_char_range(common, c, c, TRUE);
5703     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5704     }
5705   else
5706     {
5707     oc = char_othercase(common, c);
5708     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5709     bit = c ^ oc;
5710     if (is_powerof2(bit))
5711       {
5712       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5713       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5714       }
5715     else
5716       {
5717       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5718       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5719       }
5720     }
5721   return cc + length;
5722 
5723   case OP_CLASS:
5724   case OP_NCLASS:
5725   detect_partial_match(common, backtracks);
5726 
5727 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5728   bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5729   read_char_range(common, 0, bit, type == OP_NCLASS);
5730 #else
5731   read_char_range(common, 0, 255, type == OP_NCLASS);
5732 #endif
5733 
5734   if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5735     return cc + 32 / sizeof(pcre_uchar);
5736 
5737 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5738   jump[0] = NULL;
5739   if (common->utf)
5740     {
5741     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5742     if (type == OP_CLASS)
5743       {
5744       add_jump(compiler, backtracks, jump[0]);
5745       jump[0] = NULL;
5746       }
5747     }
5748 #elif !defined COMPILE_PCRE8
5749   jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5750   if (type == OP_CLASS)
5751     {
5752     add_jump(compiler, backtracks, jump[0]);
5753     jump[0] = NULL;
5754     }
5755 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5756 
5757   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5758   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5759   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5760   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5761   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5762   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5763 
5764 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5765   if (jump[0] != NULL)
5766     JUMPHERE(jump[0]);
5767 #endif
5768 
5769   return cc + 32 / sizeof(pcre_uchar);
5770 
5771 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5772   case OP_XCLASS:
5773   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5774   return cc + GET(cc, 0) - 1;
5775 #endif
5776 
5777   case OP_REVERSE:
5778   length = GET(cc, 0);
5779   if (length == 0)
5780     return cc + LINK_SIZE;
5781   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5782 #ifdef SUPPORT_UTF
5783   if (common->utf)
5784     {
5785     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5786     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5787     label = LABEL();
5788     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5789     skip_char_back(common);
5790     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5791     JUMPTO(SLJIT_C_NOT_ZERO, label);
5792     }
5793   else
5794 #endif
5795     {
5796     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5797     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5798     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5799     }
5800   check_start_used_ptr(common);
5801   return cc + LINK_SIZE;
5802   }
5803 SLJIT_ASSERT_STOP();
5804 return cc;
5805 }
5806 
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)5807 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5808 {
5809 /* This function consumes at least one input character. */
5810 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5811 DEFINE_COMPILER;
5812 pcre_uchar *ccbegin = cc;
5813 compare_context context;
5814 int size;
5815 
5816 context.length = 0;
5817 do
5818   {
5819   if (cc >= ccend)
5820     break;
5821 
5822   if (*cc == OP_CHAR)
5823     {
5824     size = 1;
5825 #ifdef SUPPORT_UTF
5826     if (common->utf && HAS_EXTRALEN(cc[1]))
5827       size += GET_EXTRALEN(cc[1]);
5828 #endif
5829     }
5830   else if (*cc == OP_CHARI)
5831     {
5832     size = 1;
5833 #ifdef SUPPORT_UTF
5834     if (common->utf)
5835       {
5836       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5837         size = 0;
5838       else if (HAS_EXTRALEN(cc[1]))
5839         size += GET_EXTRALEN(cc[1]);
5840       }
5841     else
5842 #endif
5843     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5844       size = 0;
5845     }
5846   else
5847     size = 0;
5848 
5849   cc += 1 + size;
5850   context.length += IN_UCHARS(size);
5851   }
5852 while (size > 0 && context.length <= 128);
5853 
5854 cc = ccbegin;
5855 if (context.length > 0)
5856   {
5857   /* We have a fixed-length byte sequence. */
5858   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5859   add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5860 
5861   context.sourcereg = -1;
5862 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5863   context.ucharptr = 0;
5864 #endif
5865   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5866   return cc;
5867   }
5868 
5869 /* A non-fixed length character will be checked if length == 0. */
5870 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5871 }
5872 
5873 /* Forward definitions. */
5874 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5875 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5876 
5877 #define PUSH_BACKTRACK(size, ccstart, error) \
5878   do \
5879     { \
5880     backtrack = sljit_alloc_memory(compiler, (size)); \
5881     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5882       return error; \
5883     memset(backtrack, 0, size); \
5884     backtrack->prev = parent->top; \
5885     backtrack->cc = (ccstart); \
5886     parent->top = backtrack; \
5887     } \
5888   while (0)
5889 
5890 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5891   do \
5892     { \
5893     backtrack = sljit_alloc_memory(compiler, (size)); \
5894     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5895       return; \
5896     memset(backtrack, 0, size); \
5897     backtrack->prev = parent->top; \
5898     backtrack->cc = (ccstart); \
5899     parent->top = backtrack; \
5900     } \
5901   while (0)
5902 
5903 #define BACKTRACK_AS(type) ((type *)backtrack)
5904 
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5905 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5906 {
5907 /* The OVECTOR offset goes to TMP2. */
5908 DEFINE_COMPILER;
5909 int count = GET2(cc, 1 + IMM2_SIZE);
5910 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5911 unsigned int offset;
5912 jump_list *found = NULL;
5913 
5914 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5915 
5916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5917 
5918 count--;
5919 while (count-- > 0)
5920   {
5921   offset = GET2(slot, 0) << 1;
5922   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5923   add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5924   slot += common->name_entry_size;
5925   }
5926 
5927 offset = GET2(slot, 0) << 1;
5928 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5929 if (backtracks != NULL && !common->jscript_compat)
5930   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5931 
5932 set_jumps(found, LABEL());
5933 }
5934 
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)5935 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5936 {
5937 DEFINE_COMPILER;
5938 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5939 int offset = 0;
5940 struct sljit_jump *jump = NULL;
5941 struct sljit_jump *partial;
5942 struct sljit_jump *nopartial;
5943 
5944 if (ref)
5945   {
5946   offset = GET2(cc, 1) << 1;
5947   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5948   /* OVECTOR(1) contains the "string begin - 1" constant. */
5949   if (withchecks && !common->jscript_compat)
5950     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5951   }
5952 else
5953   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5954 
5955 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5956 if (common->utf && *cc == OP_REFI)
5957   {
5958   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5959   if (ref)
5960     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5961   else
5962     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5963 
5964   if (withchecks)
5965     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5966 
5967   /* Needed to save important temporary registers. */
5968   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5969   OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5970   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5971   sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5972   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5973   if (common->mode == JIT_COMPILE)
5974     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5975   else
5976     {
5977     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5978     nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5979     check_partial(common, FALSE);
5980     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5981     JUMPHERE(nopartial);
5982     }
5983   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5984   }
5985 else
5986 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5987   {
5988   if (ref)
5989     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
5990   else
5991     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5992 
5993   if (withchecks)
5994     jump = JUMP(SLJIT_C_ZERO);
5995 
5996   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5997   partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5998   if (common->mode == JIT_COMPILE)
5999     add_jump(compiler, backtracks, partial);
6000 
6001   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6002   add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6003 
6004   if (common->mode != JIT_COMPILE)
6005     {
6006     nopartial = JUMP(SLJIT_JUMP);
6007     JUMPHERE(partial);
6008     /* TMP2 -= STR_END - STR_PTR */
6009     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6010     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6011     partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6012     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6013     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6014     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6015     JUMPHERE(partial);
6016     check_partial(common, FALSE);
6017     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6018     JUMPHERE(nopartial);
6019     }
6020   }
6021 
6022 if (jump != NULL)
6023   {
6024   if (emptyfail)
6025     add_jump(compiler, backtracks, jump);
6026   else
6027     JUMPHERE(jump);
6028   }
6029 }
6030 
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6031 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6032 {
6033 DEFINE_COMPILER;
6034 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6035 backtrack_common *backtrack;
6036 pcre_uchar type;
6037 int offset = 0;
6038 struct sljit_label *label;
6039 struct sljit_jump *zerolength;
6040 struct sljit_jump *jump = NULL;
6041 pcre_uchar *ccbegin = cc;
6042 int min = 0, max = 0;
6043 BOOL minimize;
6044 
6045 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6046 
6047 if (ref)
6048   offset = GET2(cc, 1) << 1;
6049 else
6050   cc += IMM2_SIZE;
6051 type = cc[1 + IMM2_SIZE];
6052 
6053 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6054 minimize = (type & 0x1) != 0;
6055 switch(type)
6056   {
6057   case OP_CRSTAR:
6058   case OP_CRMINSTAR:
6059   min = 0;
6060   max = 0;
6061   cc += 1 + IMM2_SIZE + 1;
6062   break;
6063   case OP_CRPLUS:
6064   case OP_CRMINPLUS:
6065   min = 1;
6066   max = 0;
6067   cc += 1 + IMM2_SIZE + 1;
6068   break;
6069   case OP_CRQUERY:
6070   case OP_CRMINQUERY:
6071   min = 0;
6072   max = 1;
6073   cc += 1 + IMM2_SIZE + 1;
6074   break;
6075   case OP_CRRANGE:
6076   case OP_CRMINRANGE:
6077   min = GET2(cc, 1 + IMM2_SIZE + 1);
6078   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6079   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6080   break;
6081   default:
6082   SLJIT_ASSERT_STOP();
6083   break;
6084   }
6085 
6086 if (!minimize)
6087   {
6088   if (min == 0)
6089     {
6090     allocate_stack(common, 2);
6091     if (ref)
6092       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6093     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6094     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6095     /* Temporary release of STR_PTR. */
6096     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6097     /* Handles both invalid and empty cases. Since the minimum repeat,
6098     is zero the invalid case is basically the same as an empty case. */
6099     if (ref)
6100       zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6101     else
6102       {
6103       compile_dnref_search(common, ccbegin, NULL);
6104       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6105       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6106       zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6107       }
6108     /* Restore if not zero length. */
6109     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6110     }
6111   else
6112     {
6113     allocate_stack(common, 1);
6114     if (ref)
6115       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6116     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6117     if (ref)
6118       {
6119       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6120       zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6121       }
6122     else
6123       {
6124       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6125       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6126       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6127       zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6128       }
6129     }
6130 
6131   if (min > 1 || max > 1)
6132     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6133 
6134   label = LABEL();
6135   if (!ref)
6136     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6137   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6138 
6139   if (min > 1 || max > 1)
6140     {
6141     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6142     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6143     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6144     if (min > 1)
6145       CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
6146     if (max > 1)
6147       {
6148       jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6149       allocate_stack(common, 1);
6150       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6151       JUMPTO(SLJIT_JUMP, label);
6152       JUMPHERE(jump);
6153       }
6154     }
6155 
6156   if (max == 0)
6157     {
6158     /* Includes min > 1 case as well. */
6159     allocate_stack(common, 1);
6160     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6161     JUMPTO(SLJIT_JUMP, label);
6162     }
6163 
6164   JUMPHERE(zerolength);
6165   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6166 
6167   count_match(common);
6168   return cc;
6169   }
6170 
6171 allocate_stack(common, ref ? 2 : 3);
6172 if (ref)
6173   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6175 if (type != OP_CRMINSTAR)
6176   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6177 
6178 if (min == 0)
6179   {
6180   /* Handles both invalid and empty cases. Since the minimum repeat,
6181   is zero the invalid case is basically the same as an empty case. */
6182   if (ref)
6183     zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6184   else
6185     {
6186     compile_dnref_search(common, ccbegin, NULL);
6187     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6188     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6189     zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6190     }
6191   /* Length is non-zero, we can match real repeats. */
6192   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6193   jump = JUMP(SLJIT_JUMP);
6194   }
6195 else
6196   {
6197   if (ref)
6198     {
6199     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6200     zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6201     }
6202   else
6203     {
6204     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6205     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6206     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6207     zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6208     }
6209   }
6210 
6211 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6212 if (max > 0)
6213   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6214 
6215 if (!ref)
6216   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6217 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6219 
6220 if (min > 1)
6221   {
6222   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6223   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6224   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6225   CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6226   }
6227 else if (max > 0)
6228   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6229 
6230 if (jump != NULL)
6231   JUMPHERE(jump);
6232 JUMPHERE(zerolength);
6233 
6234 count_match(common);
6235 return cc;
6236 }
6237 
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6238 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6239 {
6240 DEFINE_COMPILER;
6241 backtrack_common *backtrack;
6242 recurse_entry *entry = common->entries;
6243 recurse_entry *prev = NULL;
6244 sljit_sw start = GET(cc, 1);
6245 pcre_uchar *start_cc;
6246 BOOL needs_control_head;
6247 
6248 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6249 
6250 /* Inlining simple patterns. */
6251 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6252   {
6253   start_cc = common->start + start;
6254   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6255   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6256   return cc + 1 + LINK_SIZE;
6257   }
6258 
6259 while (entry != NULL)
6260   {
6261   if (entry->start == start)
6262     break;
6263   prev = entry;
6264   entry = entry->next;
6265   }
6266 
6267 if (entry == NULL)
6268   {
6269   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6270   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6271     return NULL;
6272   entry->next = NULL;
6273   entry->entry = NULL;
6274   entry->calls = NULL;
6275   entry->start = start;
6276 
6277   if (prev != NULL)
6278     prev->next = entry;
6279   else
6280     common->entries = entry;
6281   }
6282 
6283 if (common->has_set_som && common->mark_ptr != 0)
6284   {
6285   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6286   allocate_stack(common, 2);
6287   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6288   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6289   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6290   }
6291 else if (common->has_set_som || common->mark_ptr != 0)
6292   {
6293   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6294   allocate_stack(common, 1);
6295   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6296   }
6297 
6298 if (entry->entry == NULL)
6299   add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6300 else
6301   JUMPTO(SLJIT_FAST_CALL, entry->entry);
6302 /* Leave if the match is failed. */
6303 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6304 return cc + 1 + LINK_SIZE;
6305 }
6306 
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)6307 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6308 {
6309 const pcre_uchar *begin = arguments->begin;
6310 int *offset_vector = arguments->offsets;
6311 int offset_count = arguments->offset_count;
6312 int i;
6313 
6314 if (PUBL(callout) == NULL)
6315   return 0;
6316 
6317 callout_block->version = 2;
6318 callout_block->callout_data = arguments->callout_data;
6319 
6320 /* Offsets in subject. */
6321 callout_block->subject_length = arguments->end - arguments->begin;
6322 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6323 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6324 #if defined COMPILE_PCRE8
6325 callout_block->subject = (PCRE_SPTR)begin;
6326 #elif defined COMPILE_PCRE16
6327 callout_block->subject = (PCRE_SPTR16)begin;
6328 #elif defined COMPILE_PCRE32
6329 callout_block->subject = (PCRE_SPTR32)begin;
6330 #endif
6331 
6332 /* Convert and copy the JIT offset vector to the offset_vector array. */
6333 callout_block->capture_top = 0;
6334 callout_block->offset_vector = offset_vector;
6335 for (i = 2; i < offset_count; i += 2)
6336   {
6337   offset_vector[i] = jit_ovector[i] - begin;
6338   offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6339   if (jit_ovector[i] >= begin)
6340     callout_block->capture_top = i;
6341   }
6342 
6343 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6344 if (offset_count > 0)
6345   offset_vector[0] = -1;
6346 if (offset_count > 1)
6347   offset_vector[1] = -1;
6348 return (*PUBL(callout))(callout_block);
6349 }
6350 
6351 /* Aligning to 8 byte. */
6352 #define CALLOUT_ARG_SIZE \
6353     (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6354 
6355 #define CALLOUT_ARG_OFFSET(arg) \
6356     (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6357 
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6358 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6359 {
6360 DEFINE_COMPILER;
6361 backtrack_common *backtrack;
6362 
6363 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6364 
6365 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6366 
6367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6368 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6369 SLJIT_ASSERT(common->capture_last_ptr != 0);
6370 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6371 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6372 
6373 /* These pointer sized fields temporarly stores internal variables. */
6374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6377 
6378 if (common->mark_ptr != 0)
6379   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6380 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6381 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6383 
6384 /* Needed to save important temporary registers. */
6385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6386 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6387 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6388 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6389 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6390 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6391 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6392 
6393 /* Check return value. */
6394 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6395 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6396 if (common->forced_quit_label == NULL)
6397   add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6398 else
6399   JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6400 return cc + 2 + 2 * LINK_SIZE;
6401 }
6402 
6403 #undef CALLOUT_ARG_SIZE
6404 #undef CALLOUT_ARG_OFFSET
6405 
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)6406 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6407 {
6408 DEFINE_COMPILER;
6409 int framesize;
6410 int extrasize;
6411 BOOL needs_control_head;
6412 int private_data_ptr;
6413 backtrack_common altbacktrack;
6414 pcre_uchar *ccbegin;
6415 pcre_uchar opcode;
6416 pcre_uchar bra = OP_BRA;
6417 jump_list *tmp = NULL;
6418 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6419 jump_list **found;
6420 /* Saving previous accept variables. */
6421 BOOL save_local_exit = common->local_exit;
6422 BOOL save_positive_assert = common->positive_assert;
6423 then_trap_backtrack *save_then_trap = common->then_trap;
6424 struct sljit_label *save_quit_label = common->quit_label;
6425 struct sljit_label *save_accept_label = common->accept_label;
6426 jump_list *save_quit = common->quit;
6427 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6428 jump_list *save_accept = common->accept;
6429 struct sljit_jump *jump;
6430 struct sljit_jump *brajump = NULL;
6431 
6432 /* Assert captures then. */
6433 common->then_trap = NULL;
6434 
6435 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6436   {
6437   SLJIT_ASSERT(!conditional);
6438   bra = *cc;
6439   cc++;
6440   }
6441 private_data_ptr = PRIVATE_DATA(cc);
6442 SLJIT_ASSERT(private_data_ptr != 0);
6443 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6444 backtrack->framesize = framesize;
6445 backtrack->private_data_ptr = private_data_ptr;
6446 opcode = *cc;
6447 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6448 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6449 ccbegin = cc;
6450 cc += GET(cc, 1);
6451 
6452 if (bra == OP_BRAMINZERO)
6453   {
6454   /* This is a braminzero backtrack path. */
6455   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6456   free_stack(common, 1);
6457   brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6458   }
6459 
6460 if (framesize < 0)
6461   {
6462   extrasize = needs_control_head ? 2 : 1;
6463   if (framesize == no_frame)
6464     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6465   allocate_stack(common, extrasize);
6466   if (needs_control_head)
6467     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6468   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6469   if (needs_control_head)
6470     {
6471     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6472     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6473     }
6474   }
6475 else
6476   {
6477   extrasize = needs_control_head ? 3 : 2;
6478   allocate_stack(common, framesize + extrasize);
6479   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6480   OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6481   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6482   if (needs_control_head)
6483     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6484   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6485   if (needs_control_head)
6486     {
6487     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6488     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6489     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6490     }
6491   else
6492     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6493   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6494   }
6495 
6496 memset(&altbacktrack, 0, sizeof(backtrack_common));
6497 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6498   {
6499   /* Negative assert is stronger than positive assert. */
6500   common->local_exit = TRUE;
6501   common->quit_label = NULL;
6502   common->quit = NULL;
6503   common->positive_assert = FALSE;
6504   }
6505 else
6506   common->positive_assert = TRUE;
6507 common->positive_assert_quit = NULL;
6508 
6509 while (1)
6510   {
6511   common->accept_label = NULL;
6512   common->accept = NULL;
6513   altbacktrack.top = NULL;
6514   altbacktrack.topbacktracks = NULL;
6515 
6516   if (*ccbegin == OP_ALT)
6517     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6518 
6519   altbacktrack.cc = ccbegin;
6520   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6521   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6522     {
6523     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6524       {
6525       common->local_exit = save_local_exit;
6526       common->quit_label = save_quit_label;
6527       common->quit = save_quit;
6528       }
6529     common->positive_assert = save_positive_assert;
6530     common->then_trap = save_then_trap;
6531     common->accept_label = save_accept_label;
6532     common->positive_assert_quit = save_positive_assert_quit;
6533     common->accept = save_accept;
6534     return NULL;
6535     }
6536   common->accept_label = LABEL();
6537   if (common->accept != NULL)
6538     set_jumps(common->accept, common->accept_label);
6539 
6540   /* Reset stack. */
6541   if (framesize < 0)
6542     {
6543     if (framesize == no_frame)
6544       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6545     else
6546       free_stack(common, extrasize);
6547     if (needs_control_head)
6548       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6549     }
6550   else
6551     {
6552     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6553       {
6554       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6555       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6556       if (needs_control_head)
6557         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6558       }
6559     else
6560       {
6561       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6562       if (needs_control_head)
6563         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6564       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6565       }
6566     }
6567 
6568   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6569     {
6570     /* We know that STR_PTR was stored on the top of the stack. */
6571     if (conditional)
6572       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6573     else if (bra == OP_BRAZERO)
6574       {
6575       if (framesize < 0)
6576         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6577       else
6578         {
6579         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6580         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6581         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6582         }
6583       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6584       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6585       }
6586     else if (framesize >= 0)
6587       {
6588       /* For OP_BRA and OP_BRAMINZERO. */
6589       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6590       }
6591     }
6592   add_jump(compiler, found, JUMP(SLJIT_JUMP));
6593 
6594   compile_backtrackingpath(common, altbacktrack.top);
6595   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6596     {
6597     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6598       {
6599       common->local_exit = save_local_exit;
6600       common->quit_label = save_quit_label;
6601       common->quit = save_quit;
6602       }
6603     common->positive_assert = save_positive_assert;
6604     common->then_trap = save_then_trap;
6605     common->accept_label = save_accept_label;
6606     common->positive_assert_quit = save_positive_assert_quit;
6607     common->accept = save_accept;
6608     return NULL;
6609     }
6610   set_jumps(altbacktrack.topbacktracks, LABEL());
6611 
6612   if (*cc != OP_ALT)
6613     break;
6614 
6615   ccbegin = cc;
6616   cc += GET(cc, 1);
6617   }
6618 
6619 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6620   {
6621   SLJIT_ASSERT(common->positive_assert_quit == NULL);
6622   /* Makes the check less complicated below. */
6623   common->positive_assert_quit = common->quit;
6624   }
6625 
6626 /* None of them matched. */
6627 if (common->positive_assert_quit != NULL)
6628   {
6629   jump = JUMP(SLJIT_JUMP);
6630   set_jumps(common->positive_assert_quit, LABEL());
6631   SLJIT_ASSERT(framesize != no_stack);
6632   if (framesize < 0)
6633     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6634   else
6635     {
6636     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6637     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6638     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6639     }
6640   JUMPHERE(jump);
6641   }
6642 
6643 if (needs_control_head)
6644   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6645 
6646 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6647   {
6648   /* Assert is failed. */
6649   if (conditional || bra == OP_BRAZERO)
6650     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6651 
6652   if (framesize < 0)
6653     {
6654     /* The topmost item should be 0. */
6655     if (bra == OP_BRAZERO)
6656       {
6657       if (extrasize == 2)
6658         free_stack(common, 1);
6659       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6660       }
6661     else
6662       free_stack(common, extrasize);
6663     }
6664   else
6665     {
6666     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6667     /* The topmost item should be 0. */
6668     if (bra == OP_BRAZERO)
6669       {
6670       free_stack(common, framesize + extrasize - 1);
6671       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6672       }
6673     else
6674       free_stack(common, framesize + extrasize);
6675     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6676     }
6677   jump = JUMP(SLJIT_JUMP);
6678   if (bra != OP_BRAZERO)
6679     add_jump(compiler, target, jump);
6680 
6681   /* Assert is successful. */
6682   set_jumps(tmp, LABEL());
6683   if (framesize < 0)
6684     {
6685     /* We know that STR_PTR was stored on the top of the stack. */
6686     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6687     /* Keep the STR_PTR on the top of the stack. */
6688     if (bra == OP_BRAZERO)
6689       {
6690       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6691       if (extrasize == 2)
6692         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6693       }
6694     else if (bra == OP_BRAMINZERO)
6695       {
6696       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6697       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6698       }
6699     }
6700   else
6701     {
6702     if (bra == OP_BRA)
6703       {
6704       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6705       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6706       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6707       }
6708     else
6709       {
6710       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6711       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6712       if (extrasize == 2)
6713         {
6714         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6715         if (bra == OP_BRAMINZERO)
6716           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6717         }
6718       else
6719         {
6720         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6721         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6722         }
6723       }
6724     }
6725 
6726   if (bra == OP_BRAZERO)
6727     {
6728     backtrack->matchingpath = LABEL();
6729     SET_LABEL(jump, backtrack->matchingpath);
6730     }
6731   else if (bra == OP_BRAMINZERO)
6732     {
6733     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6734     JUMPHERE(brajump);
6735     if (framesize >= 0)
6736       {
6737       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6738       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6739       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6740       }
6741     set_jumps(backtrack->common.topbacktracks, LABEL());
6742     }
6743   }
6744 else
6745   {
6746   /* AssertNot is successful. */
6747   if (framesize < 0)
6748     {
6749     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6750     if (bra != OP_BRA)
6751       {
6752       if (extrasize == 2)
6753         free_stack(common, 1);
6754       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6755       }
6756     else
6757       free_stack(common, extrasize);
6758     }
6759   else
6760     {
6761     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6762     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6763     /* The topmost item should be 0. */
6764     if (bra != OP_BRA)
6765       {
6766       free_stack(common, framesize + extrasize - 1);
6767       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6768       }
6769     else
6770       free_stack(common, framesize + extrasize);
6771     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6772     }
6773 
6774   if (bra == OP_BRAZERO)
6775     backtrack->matchingpath = LABEL();
6776   else if (bra == OP_BRAMINZERO)
6777     {
6778     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6779     JUMPHERE(brajump);
6780     }
6781 
6782   if (bra != OP_BRA)
6783     {
6784     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6785     set_jumps(backtrack->common.topbacktracks, LABEL());
6786     backtrack->common.topbacktracks = NULL;
6787     }
6788   }
6789 
6790 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6791   {
6792   common->local_exit = save_local_exit;
6793   common->quit_label = save_quit_label;
6794   common->quit = save_quit;
6795   }
6796 common->positive_assert = save_positive_assert;
6797 common->then_trap = save_then_trap;
6798 common->accept_label = save_accept_label;
6799 common->positive_assert_quit = save_positive_assert_quit;
6800 common->accept = save_accept;
6801 return cc + 1 + LINK_SIZE;
6802 }
6803 
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)6804 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6805 {
6806 DEFINE_COMPILER;
6807 int stacksize;
6808 
6809 if (framesize < 0)
6810   {
6811   if (framesize == no_frame)
6812     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6813   else
6814     {
6815     stacksize = needs_control_head ? 1 : 0;
6816     if (ket != OP_KET || has_alternatives)
6817       stacksize++;
6818     free_stack(common, stacksize);
6819     }
6820 
6821   if (needs_control_head)
6822     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6823 
6824   /* TMP2 which is set here used by OP_KETRMAX below. */
6825   if (ket == OP_KETRMAX)
6826     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6827   else if (ket == OP_KETRMIN)
6828     {
6829     /* Move the STR_PTR to the private_data_ptr. */
6830     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6831     }
6832   }
6833 else
6834   {
6835   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6836   OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6837   if (needs_control_head)
6838     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6839 
6840   if (ket == OP_KETRMAX)
6841     {
6842     /* TMP2 which is set here used by OP_KETRMAX below. */
6843     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6844     }
6845   }
6846 if (needs_control_head)
6847   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
6848 }
6849 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)6850 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6851 {
6852 DEFINE_COMPILER;
6853 
6854 if (common->capture_last_ptr != 0)
6855   {
6856   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6857   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6858   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6859   stacksize++;
6860   }
6861 if (common->optimized_cbracket[offset >> 1] == 0)
6862   {
6863   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6864   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6865   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6866   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6867   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6868   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
6869   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
6870   stacksize += 2;
6871   }
6872 return stacksize;
6873 }
6874 
6875 /*
6876   Handling bracketed expressions is probably the most complex part.
6877 
6878   Stack layout naming characters:
6879     S - Push the current STR_PTR
6880     0 - Push a 0 (NULL)
6881     A - Push the current STR_PTR. Needed for restoring the STR_PTR
6882         before the next alternative. Not pushed if there are no alternatives.
6883     M - Any values pushed by the current alternative. Can be empty, or anything.
6884     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6885     L - Push the previous local (pointed by localptr) to the stack
6886    () - opional values stored on the stack
6887   ()* - optonal, can be stored multiple times
6888 
6889   The following list shows the regular expression templates, their PCRE byte codes
6890   and stack layout supported by pcre-sljit.
6891 
6892   (?:)                     OP_BRA     | OP_KET                A M
6893   ()                       OP_CBRA    | OP_KET                C M
6894   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
6895                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
6896   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
6897                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
6898   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
6899                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
6900   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
6901                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
6902   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
6903   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
6904   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
6905   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
6906   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
6907            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
6908   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
6909            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
6910   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
6911            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
6912   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
6913            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
6914 
6915 
6916   Stack layout naming characters:
6917     A - Push the alternative index (starting from 0) on the stack.
6918         Not pushed if there is no alternatives.
6919     M - Any values pushed by the current alternative. Can be empty, or anything.
6920 
6921   The next list shows the possible content of a bracket:
6922   (|)     OP_*BRA    | OP_ALT ...         M A
6923   (?()|)  OP_*COND   | OP_ALT             M A
6924   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
6925   (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
6926                                           Or nothing, if trace is unnecessary
6927 */
6928 
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6929 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6930 {
6931 DEFINE_COMPILER;
6932 backtrack_common *backtrack;
6933 pcre_uchar opcode;
6934 int private_data_ptr = 0;
6935 int offset = 0;
6936 int i, stacksize;
6937 int repeat_ptr = 0, repeat_length = 0;
6938 int repeat_type = 0, repeat_count = 0;
6939 pcre_uchar *ccbegin;
6940 pcre_uchar *matchingpath;
6941 pcre_uchar *slot;
6942 pcre_uchar bra = OP_BRA;
6943 pcre_uchar ket;
6944 assert_backtrack *assert;
6945 BOOL has_alternatives;
6946 BOOL needs_control_head = FALSE;
6947 struct sljit_jump *jump;
6948 struct sljit_jump *skip;
6949 struct sljit_label *rmax_label = NULL;
6950 struct sljit_jump *braminzero = NULL;
6951 
6952 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6953 
6954 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6955   {
6956   bra = *cc;
6957   cc++;
6958   opcode = *cc;
6959   }
6960 
6961 opcode = *cc;
6962 ccbegin = cc;
6963 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6964 ket = *matchingpath;
6965 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6966   {
6967   repeat_ptr = PRIVATE_DATA(matchingpath);
6968   repeat_length = PRIVATE_DATA(matchingpath + 1);
6969   repeat_type = PRIVATE_DATA(matchingpath + 2);
6970   repeat_count = PRIVATE_DATA(matchingpath + 3);
6971   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6972   if (repeat_type == OP_UPTO)
6973     ket = OP_KETRMAX;
6974   if (repeat_type == OP_MINUPTO)
6975     ket = OP_KETRMIN;
6976   }
6977 
6978 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6979   {
6980   /* Drop this bracket_backtrack. */
6981   parent->top = backtrack->prev;
6982   return matchingpath + 1 + LINK_SIZE + repeat_length;
6983   }
6984 
6985 matchingpath = ccbegin + 1 + LINK_SIZE;
6986 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6987 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6988 cc += GET(cc, 1);
6989 
6990 has_alternatives = *cc == OP_ALT;
6991 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6992   has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6993 
6994 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6995   opcode = OP_SCOND;
6996 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6997   opcode = OP_ONCE;
6998 
6999 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7000   {
7001   /* Capturing brackets has a pre-allocated space. */
7002   offset = GET2(ccbegin, 1 + LINK_SIZE);
7003   if (common->optimized_cbracket[offset] == 0)
7004     {
7005     private_data_ptr = OVECTOR_PRIV(offset);
7006     offset <<= 1;
7007     }
7008   else
7009     {
7010     offset <<= 1;
7011     private_data_ptr = OVECTOR(offset);
7012     }
7013   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7014   matchingpath += IMM2_SIZE;
7015   }
7016 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7017   {
7018   /* Other brackets simply allocate the next entry. */
7019   private_data_ptr = PRIVATE_DATA(ccbegin);
7020   SLJIT_ASSERT(private_data_ptr != 0);
7021   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7022   if (opcode == OP_ONCE)
7023     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7024   }
7025 
7026 /* Instructions before the first alternative. */
7027 stacksize = 0;
7028 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7029   stacksize++;
7030 if (bra == OP_BRAZERO)
7031   stacksize++;
7032 
7033 if (stacksize > 0)
7034   allocate_stack(common, stacksize);
7035 
7036 stacksize = 0;
7037 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7038   {
7039   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7040   stacksize++;
7041   }
7042 
7043 if (bra == OP_BRAZERO)
7044   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7045 
7046 if (bra == OP_BRAMINZERO)
7047   {
7048   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7049   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7050   if (ket != OP_KETRMIN)
7051     {
7052     free_stack(common, 1);
7053     braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7054     }
7055   else
7056     {
7057     if (opcode == OP_ONCE || opcode >= OP_SBRA)
7058       {
7059       jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7060       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7061       /* Nothing stored during the first run. */
7062       skip = JUMP(SLJIT_JUMP);
7063       JUMPHERE(jump);
7064       /* Checking zero-length iteration. */
7065       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7066         {
7067         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7068         braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7069         }
7070       else
7071         {
7072         /* Except when the whole stack frame must be saved. */
7073         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7074         braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7075         }
7076       JUMPHERE(skip);
7077       }
7078     else
7079       {
7080       jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7081       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7082       JUMPHERE(jump);
7083       }
7084     }
7085   }
7086 
7087 if (repeat_type != 0)
7088   {
7089   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7090   if (repeat_type == OP_EXACT)
7091     rmax_label = LABEL();
7092   }
7093 
7094 if (ket == OP_KETRMIN)
7095   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7096 
7097 if (ket == OP_KETRMAX)
7098   {
7099   rmax_label = LABEL();
7100   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7101     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7102   }
7103 
7104 /* Handling capturing brackets and alternatives. */
7105 if (opcode == OP_ONCE)
7106   {
7107   stacksize = 0;
7108   if (needs_control_head)
7109     {
7110     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7111     stacksize++;
7112     }
7113 
7114   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7115     {
7116     /* Neither capturing brackets nor recursions are found in the block. */
7117     if (ket == OP_KETRMIN)
7118       {
7119       stacksize += 2;
7120       if (!needs_control_head)
7121         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7122       }
7123     else
7124       {
7125       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7126         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7127       if (ket == OP_KETRMAX || has_alternatives)
7128         stacksize++;
7129       }
7130 
7131     if (stacksize > 0)
7132       allocate_stack(common, stacksize);
7133 
7134     stacksize = 0;
7135     if (needs_control_head)
7136       {
7137       stacksize++;
7138       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7139       }
7140 
7141     if (ket == OP_KETRMIN)
7142       {
7143       if (needs_control_head)
7144         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7145       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7146       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7147         OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7148       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7149       }
7150     else if (ket == OP_KETRMAX || has_alternatives)
7151       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7152     }
7153   else
7154     {
7155     if (ket != OP_KET || has_alternatives)
7156       stacksize++;
7157 
7158     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7159     allocate_stack(common, stacksize);
7160 
7161     if (needs_control_head)
7162       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7163 
7164     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7165     OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7166 
7167     stacksize = needs_control_head ? 1 : 0;
7168     if (ket != OP_KET || has_alternatives)
7169       {
7170       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7171       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7172       stacksize++;
7173       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7174       }
7175     else
7176       {
7177       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7178       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7179       }
7180     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7181     }
7182   }
7183 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7184   {
7185   /* Saving the previous values. */
7186   if (common->optimized_cbracket[offset >> 1] != 0)
7187     {
7188     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7189     allocate_stack(common, 2);
7190     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7191     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
7192     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7193     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7194     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7195     }
7196   else
7197     {
7198     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7199     allocate_stack(common, 1);
7200     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7201     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7202     }
7203   }
7204 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7205   {
7206   /* Saving the previous value. */
7207   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7208   allocate_stack(common, 1);
7209   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7210   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7211   }
7212 else if (has_alternatives)
7213   {
7214   /* Pushing the starting string pointer. */
7215   allocate_stack(common, 1);
7216   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7217   }
7218 
7219 /* Generating code for the first alternative. */
7220 if (opcode == OP_COND || opcode == OP_SCOND)
7221   {
7222   if (*matchingpath == OP_CREF)
7223     {
7224     SLJIT_ASSERT(has_alternatives);
7225     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7226       CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7227     matchingpath += 1 + IMM2_SIZE;
7228     }
7229   else if (*matchingpath == OP_DNCREF)
7230     {
7231     SLJIT_ASSERT(has_alternatives);
7232 
7233     i = GET2(matchingpath, 1 + IMM2_SIZE);
7234     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7235     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7236     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7237     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7238     slot += common->name_entry_size;
7239     i--;
7240     while (i-- > 0)
7241       {
7242       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7243       OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7244       slot += common->name_entry_size;
7245       }
7246     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7247     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
7248     matchingpath += 1 + 2 * IMM2_SIZE;
7249     }
7250   else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
7251     {
7252     /* Never has other case. */
7253     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7254     SLJIT_ASSERT(!has_alternatives);
7255 
7256     if (*matchingpath == OP_RREF)
7257       {
7258       stacksize = GET2(matchingpath, 1);
7259       if (common->currententry == NULL)
7260         stacksize = 0;
7261       else if (stacksize == RREF_ANY)
7262         stacksize = 1;
7263       else if (common->currententry->start == 0)
7264         stacksize = stacksize == 0;
7265       else
7266         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7267 
7268       if (stacksize != 0)
7269         matchingpath += 1 + IMM2_SIZE;
7270       }
7271     else
7272       {
7273       if (common->currententry == NULL || common->currententry->start == 0)
7274         stacksize = 0;
7275       else
7276         {
7277         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7278         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7279         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7280         while (stacksize > 0)
7281           {
7282           if ((int)GET2(slot, 0) == i)
7283             break;
7284           slot += common->name_entry_size;
7285           stacksize--;
7286           }
7287         }
7288 
7289       if (stacksize != 0)
7290         matchingpath += 1 + 2 * IMM2_SIZE;
7291       }
7292 
7293       /* The stacksize == 0 is a common "else" case. */
7294       if (stacksize == 0)
7295         {
7296         if (*cc == OP_ALT)
7297           {
7298           matchingpath = cc + 1 + LINK_SIZE;
7299           cc += GET(cc, 1);
7300           }
7301         else
7302           matchingpath = cc;
7303         }
7304     }
7305   else
7306     {
7307     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7308     /* Similar code as PUSH_BACKTRACK macro. */
7309     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7310     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7311       return NULL;
7312     memset(assert, 0, sizeof(assert_backtrack));
7313     assert->common.cc = matchingpath;
7314     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7315     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7316     }
7317   }
7318 
7319 compile_matchingpath(common, matchingpath, cc, backtrack);
7320 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7321   return NULL;
7322 
7323 if (opcode == OP_ONCE)
7324   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7325 
7326 stacksize = 0;
7327 if (repeat_type == OP_MINUPTO)
7328   {
7329   /* We need to preserve the counter. TMP2 will be used below. */
7330   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7331   stacksize++;
7332   }
7333 if (ket != OP_KET || bra != OP_BRA)
7334   stacksize++;
7335 if (offset != 0)
7336   {
7337   if (common->capture_last_ptr != 0)
7338     stacksize++;
7339   if (common->optimized_cbracket[offset >> 1] == 0)
7340     stacksize += 2;
7341   }
7342 if (has_alternatives && opcode != OP_ONCE)
7343   stacksize++;
7344 
7345 if (stacksize > 0)
7346   allocate_stack(common, stacksize);
7347 
7348 stacksize = 0;
7349 if (repeat_type == OP_MINUPTO)
7350   {
7351   /* TMP2 was set above. */
7352   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7353   stacksize++;
7354   }
7355 
7356 if (ket != OP_KET || bra != OP_BRA)
7357   {
7358   if (ket != OP_KET)
7359     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7360   else
7361     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7362   stacksize++;
7363   }
7364 
7365 if (offset != 0)
7366   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7367 
7368 if (has_alternatives)
7369   {
7370   if (opcode != OP_ONCE)
7371     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7372   if (ket != OP_KETRMAX)
7373     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7374   }
7375 
7376 /* Must be after the matchingpath label. */
7377 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7378   {
7379   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7380   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7381   }
7382 
7383 if (ket == OP_KETRMAX)
7384   {
7385   if (repeat_type != 0)
7386     {
7387     if (has_alternatives)
7388       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7389     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7390     JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
7391     /* Drop STR_PTR for greedy plus quantifier. */
7392     if (opcode != OP_ONCE)
7393       free_stack(common, 1);
7394     }
7395   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7396     {
7397     if (has_alternatives)
7398       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7399     /* Checking zero-length iteration. */
7400     if (opcode != OP_ONCE)
7401       {
7402       CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
7403       /* Drop STR_PTR for greedy plus quantifier. */
7404       if (bra != OP_BRAZERO)
7405         free_stack(common, 1);
7406       }
7407     else
7408       /* TMP2 must contain the starting STR_PTR. */
7409       CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7410     }
7411   else
7412     JUMPTO(SLJIT_JUMP, rmax_label);
7413   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7414   }
7415 
7416 if (repeat_type == OP_EXACT)
7417   {
7418   count_match(common);
7419   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7420   JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
7421   }
7422 else if (repeat_type == OP_UPTO)
7423   {
7424   /* We need to preserve the counter. */
7425   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7426   allocate_stack(common, 1);
7427   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7428   }
7429 
7430 if (bra == OP_BRAZERO)
7431   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7432 
7433 if (bra == OP_BRAMINZERO)
7434   {
7435   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7436   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7437   if (braminzero != NULL)
7438     {
7439     JUMPHERE(braminzero);
7440     /* We need to release the end pointer to perform the
7441     backtrack for the zero-length iteration. When
7442     framesize is < 0, OP_ONCE will do the release itself. */
7443     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7444       {
7445       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7446       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7447       }
7448     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7449       free_stack(common, 1);
7450     }
7451   /* Continue to the normal backtrack. */
7452   }
7453 
7454 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7455   count_match(common);
7456 
7457 /* Skip the other alternatives. */
7458 while (*cc == OP_ALT)
7459   cc += GET(cc, 1);
7460 cc += 1 + LINK_SIZE;
7461 
7462 /* Temporarily encoding the needs_control_head in framesize. */
7463 if (opcode == OP_ONCE)
7464   BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7465 return cc + repeat_length;
7466 }
7467 
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7468 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7469 {
7470 DEFINE_COMPILER;
7471 backtrack_common *backtrack;
7472 pcre_uchar opcode;
7473 int private_data_ptr;
7474 int cbraprivptr = 0;
7475 BOOL needs_control_head;
7476 int framesize;
7477 int stacksize;
7478 int offset = 0;
7479 BOOL zero = FALSE;
7480 pcre_uchar *ccbegin = NULL;
7481 int stack; /* Also contains the offset of control head. */
7482 struct sljit_label *loop = NULL;
7483 struct jump_list *emptymatch = NULL;
7484 
7485 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7486 if (*cc == OP_BRAPOSZERO)
7487   {
7488   zero = TRUE;
7489   cc++;
7490   }
7491 
7492 opcode = *cc;
7493 private_data_ptr = PRIVATE_DATA(cc);
7494 SLJIT_ASSERT(private_data_ptr != 0);
7495 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7496 switch(opcode)
7497   {
7498   case OP_BRAPOS:
7499   case OP_SBRAPOS:
7500   ccbegin = cc + 1 + LINK_SIZE;
7501   break;
7502 
7503   case OP_CBRAPOS:
7504   case OP_SCBRAPOS:
7505   offset = GET2(cc, 1 + LINK_SIZE);
7506   /* This case cannot be optimized in the same was as
7507   normal capturing brackets. */
7508   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7509   cbraprivptr = OVECTOR_PRIV(offset);
7510   offset <<= 1;
7511   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7512   break;
7513 
7514   default:
7515   SLJIT_ASSERT_STOP();
7516   break;
7517   }
7518 
7519 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7520 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7521 if (framesize < 0)
7522   {
7523   if (offset != 0)
7524     {
7525     stacksize = 2;
7526     if (common->capture_last_ptr != 0)
7527       stacksize++;
7528     }
7529   else
7530     stacksize = 1;
7531 
7532   if (needs_control_head)
7533     stacksize++;
7534   if (!zero)
7535     stacksize++;
7536 
7537   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7538   allocate_stack(common, stacksize);
7539   if (framesize == no_frame)
7540     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7541 
7542   stack = 0;
7543   if (offset != 0)
7544     {
7545     stack = 2;
7546     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7547     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7548     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7549     if (common->capture_last_ptr != 0)
7550       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7551     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7552     if (needs_control_head)
7553       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7554     if (common->capture_last_ptr != 0)
7555       {
7556       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7557       stack = 3;
7558       }
7559     }
7560   else
7561     {
7562     if (needs_control_head)
7563       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7564     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7565     stack = 1;
7566     }
7567 
7568   if (needs_control_head)
7569     stack++;
7570   if (!zero)
7571     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7572   if (needs_control_head)
7573     {
7574     stack--;
7575     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7576     }
7577   }
7578 else
7579   {
7580   stacksize = framesize + 1;
7581   if (!zero)
7582     stacksize++;
7583   if (needs_control_head)
7584     stacksize++;
7585   if (offset == 0)
7586     stacksize++;
7587   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7588 
7589   allocate_stack(common, stacksize);
7590   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7591   if (needs_control_head)
7592     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7593   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7594 
7595   stack = 0;
7596   if (!zero)
7597     {
7598     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7599     stack = 1;
7600     }
7601   if (needs_control_head)
7602     {
7603     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7604     stack++;
7605     }
7606   if (offset == 0)
7607     {
7608     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7609     stack++;
7610     }
7611   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7612   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7613   stack -= 1 + (offset == 0);
7614   }
7615 
7616 if (offset != 0)
7617   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7618 
7619 loop = LABEL();
7620 while (*cc != OP_KETRPOS)
7621   {
7622   backtrack->top = NULL;
7623   backtrack->topbacktracks = NULL;
7624   cc += GET(cc, 1);
7625 
7626   compile_matchingpath(common, ccbegin, cc, backtrack);
7627   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7628     return NULL;
7629 
7630   if (framesize < 0)
7631     {
7632     if (framesize == no_frame)
7633       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7634 
7635     if (offset != 0)
7636       {
7637       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7638       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7639       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7640       if (common->capture_last_ptr != 0)
7641         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7642       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7643       }
7644     else
7645       {
7646       if (opcode == OP_SBRAPOS)
7647         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7648       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7649       }
7650 
7651     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7652       add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
7653 
7654     if (!zero)
7655       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7656     }
7657   else
7658     {
7659     if (offset != 0)
7660       {
7661       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7662       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7663       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7664       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7665       if (common->capture_last_ptr != 0)
7666         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7667       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7668       }
7669     else
7670       {
7671       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7672       OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7673       if (opcode == OP_SBRAPOS)
7674         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7675       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7676       }
7677 
7678     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7679       add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
7680 
7681     if (!zero)
7682       {
7683       if (framesize < 0)
7684         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7685       else
7686         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7687       }
7688     }
7689 
7690   if (needs_control_head)
7691     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7692 
7693   JUMPTO(SLJIT_JUMP, loop);
7694   flush_stubs(common);
7695 
7696   compile_backtrackingpath(common, backtrack->top);
7697   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7698     return NULL;
7699   set_jumps(backtrack->topbacktracks, LABEL());
7700 
7701   if (framesize < 0)
7702     {
7703     if (offset != 0)
7704       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7705     else
7706       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7707     }
7708   else
7709     {
7710     if (offset != 0)
7711       {
7712       /* Last alternative. */
7713       if (*cc == OP_KETRPOS)
7714         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7715       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7716       }
7717     else
7718       {
7719       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7720       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7721       }
7722     }
7723 
7724   if (*cc == OP_KETRPOS)
7725     break;
7726   ccbegin = cc + 1 + LINK_SIZE;
7727   }
7728 
7729 /* We don't have to restore the control head in case of a failed match. */
7730 
7731 backtrack->topbacktracks = NULL;
7732 if (!zero)
7733   {
7734   if (framesize < 0)
7735     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7736   else /* TMP2 is set to [private_data_ptr] above. */
7737     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7738   }
7739 
7740 /* None of them matched. */
7741 set_jumps(emptymatch, LABEL());
7742 count_match(common);
7743 return cc + 1 + LINK_SIZE;
7744 }
7745 
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,int * max,int * min,pcre_uchar ** end)7746 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7747 {
7748 int class_len;
7749 
7750 *opcode = *cc;
7751 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7752   {
7753   cc++;
7754   *type = OP_CHAR;
7755   }
7756 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7757   {
7758   cc++;
7759   *type = OP_CHARI;
7760   *opcode -= OP_STARI - OP_STAR;
7761   }
7762 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7763   {
7764   cc++;
7765   *type = OP_NOT;
7766   *opcode -= OP_NOTSTAR - OP_STAR;
7767   }
7768 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7769   {
7770   cc++;
7771   *type = OP_NOTI;
7772   *opcode -= OP_NOTSTARI - OP_STAR;
7773   }
7774 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7775   {
7776   cc++;
7777   *opcode -= OP_TYPESTAR - OP_STAR;
7778   *type = 0;
7779   }
7780 else
7781   {
7782   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7783   *type = *opcode;
7784   cc++;
7785   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7786   *opcode = cc[class_len - 1];
7787   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7788     {
7789     *opcode -= OP_CRSTAR - OP_STAR;
7790     if (end != NULL)
7791       *end = cc + class_len;
7792     }
7793   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7794     {
7795     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7796     if (end != NULL)
7797       *end = cc + class_len;
7798     }
7799   else
7800     {
7801     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7802     *max = GET2(cc, (class_len + IMM2_SIZE));
7803     *min = GET2(cc, class_len);
7804 
7805     if (*min == 0)
7806       {
7807       SLJIT_ASSERT(*max != 0);
7808       *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7809       }
7810     if (*max == *min)
7811       *opcode = OP_EXACT;
7812 
7813     if (end != NULL)
7814       *end = cc + class_len + 2 * IMM2_SIZE;
7815     }
7816   return cc;
7817   }
7818 
7819 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7820   {
7821   *max = GET2(cc, 0);
7822   cc += IMM2_SIZE;
7823   }
7824 
7825 if (*type == 0)
7826   {
7827   *type = *cc;
7828   if (end != NULL)
7829     *end = next_opcode(common, cc);
7830   cc++;
7831   return cc;
7832   }
7833 
7834 if (end != NULL)
7835   {
7836   *end = cc + 1;
7837 #ifdef SUPPORT_UTF
7838   if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7839 #endif
7840   }
7841 return cc;
7842 }
7843 
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7844 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7845 {
7846 DEFINE_COMPILER;
7847 backtrack_common *backtrack;
7848 pcre_uchar opcode;
7849 pcre_uchar type;
7850 int max = -1, min = -1;
7851 pcre_uchar* end;
7852 jump_list *nomatch = NULL;
7853 struct sljit_jump *jump = NULL;
7854 struct sljit_label *label;
7855 int private_data_ptr = PRIVATE_DATA(cc);
7856 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
7857 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7858 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7859 int tmp_base, tmp_offset;
7860 
7861 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7862 
7863 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7864 
7865 switch(type)
7866   {
7867   case OP_NOT_DIGIT:
7868   case OP_DIGIT:
7869   case OP_NOT_WHITESPACE:
7870   case OP_WHITESPACE:
7871   case OP_NOT_WORDCHAR:
7872   case OP_WORDCHAR:
7873   case OP_ANY:
7874   case OP_ALLANY:
7875   case OP_ANYBYTE:
7876   case OP_ANYNL:
7877   case OP_NOT_HSPACE:
7878   case OP_HSPACE:
7879   case OP_NOT_VSPACE:
7880   case OP_VSPACE:
7881   case OP_CHAR:
7882   case OP_CHARI:
7883   case OP_NOT:
7884   case OP_NOTI:
7885   case OP_CLASS:
7886   case OP_NCLASS:
7887   tmp_base = TMP3;
7888   tmp_offset = 0;
7889   break;
7890 
7891   default:
7892   SLJIT_ASSERT_STOP();
7893   /* Fall through. */
7894 
7895   case OP_EXTUNI:
7896   case OP_XCLASS:
7897   case OP_NOTPROP:
7898   case OP_PROP:
7899   tmp_base = SLJIT_MEM1(SLJIT_SP);
7900   tmp_offset = POSSESSIVE0;
7901   break;
7902   }
7903 
7904 switch(opcode)
7905   {
7906   case OP_STAR:
7907   case OP_PLUS:
7908   case OP_UPTO:
7909   case OP_CRRANGE:
7910   if (type == OP_ANYNL || type == OP_EXTUNI)
7911     {
7912     SLJIT_ASSERT(private_data_ptr == 0);
7913     if (opcode == OP_STAR || opcode == OP_UPTO)
7914       {
7915       allocate_stack(common, 2);
7916       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7917       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7918       }
7919     else
7920       {
7921       allocate_stack(common, 1);
7922       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7923       }
7924 
7925     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7926       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7927 
7928     label = LABEL();
7929     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7930     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7931       {
7932       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7933       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7934       if (opcode == OP_CRRANGE && min > 0)
7935         CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
7936       if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7937         jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7938       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7939       }
7940 
7941     /* We cannot use TMP3 because of this allocate_stack. */
7942     allocate_stack(common, 1);
7943     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7944     JUMPTO(SLJIT_JUMP, label);
7945     if (jump != NULL)
7946       JUMPHERE(jump);
7947     }
7948   else
7949     {
7950     if (opcode == OP_PLUS)
7951       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7952     if (private_data_ptr == 0)
7953       allocate_stack(common, 2);
7954     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7955     if (opcode <= OP_PLUS)
7956       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
7957     else
7958       OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
7959     label = LABEL();
7960     compile_char1_matchingpath(common, type, cc, &nomatch);
7961     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7962     if (opcode <= OP_PLUS)
7963       JUMPTO(SLJIT_JUMP, label);
7964     else if (opcode == OP_CRRANGE && max == 0)
7965       {
7966       OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7967       JUMPTO(SLJIT_JUMP, label);
7968       }
7969     else
7970       {
7971       OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7972       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7973       OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7974       CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
7975       }
7976     set_jumps(nomatch, LABEL());
7977     if (opcode == OP_CRRANGE)
7978       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
7979     OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7980     }
7981   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7982   break;
7983 
7984   case OP_MINSTAR:
7985   case OP_MINPLUS:
7986   if (opcode == OP_MINPLUS)
7987     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7988   if (private_data_ptr == 0)
7989     allocate_stack(common, 1);
7990   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7991   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7992   break;
7993 
7994   case OP_MINUPTO:
7995   case OP_CRMINRANGE:
7996   if (private_data_ptr == 0)
7997     allocate_stack(common, 2);
7998   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7999   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8000   if (opcode == OP_CRMINRANGE)
8001     add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8002   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8003   break;
8004 
8005   case OP_QUERY:
8006   case OP_MINQUERY:
8007   if (private_data_ptr == 0)
8008     allocate_stack(common, 1);
8009   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8010   if (opcode == OP_QUERY)
8011     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8012   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8013   break;
8014 
8015   case OP_EXACT:
8016   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8017   label = LABEL();
8018   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8019   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8020   JUMPTO(SLJIT_C_NOT_ZERO, label);
8021   break;
8022 
8023   case OP_POSSTAR:
8024   case OP_POSPLUS:
8025   case OP_POSUPTO:
8026   if (opcode == OP_POSPLUS)
8027     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8028   if (opcode == OP_POSUPTO)
8029     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
8030   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8031   label = LABEL();
8032   compile_char1_matchingpath(common, type, cc, &nomatch);
8033   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8034   if (opcode != OP_POSUPTO)
8035     JUMPTO(SLJIT_JUMP, label);
8036   else
8037     {
8038     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8039     JUMPTO(SLJIT_C_NOT_ZERO, label);
8040     }
8041   set_jumps(nomatch, LABEL());
8042   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8043   break;
8044 
8045   case OP_POSQUERY:
8046   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8047   compile_char1_matchingpath(common, type, cc, &nomatch);
8048   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8049   set_jumps(nomatch, LABEL());
8050   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8051   break;
8052 
8053   case OP_CRPOSRANGE:
8054   /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8055   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8056   label = LABEL();
8057   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8058   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8059   JUMPTO(SLJIT_C_NOT_ZERO, label);
8060 
8061   if (max != 0)
8062     {
8063     SLJIT_ASSERT(max - min > 0);
8064     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
8065     }
8066   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8067   label = LABEL();
8068   compile_char1_matchingpath(common, type, cc, &nomatch);
8069   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8070   if (max == 0)
8071     JUMPTO(SLJIT_JUMP, label);
8072   else
8073     {
8074     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8075     JUMPTO(SLJIT_C_NOT_ZERO, label);
8076     }
8077   set_jumps(nomatch, LABEL());
8078   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8079   break;
8080 
8081   default:
8082   SLJIT_ASSERT_STOP();
8083   break;
8084   }
8085 
8086 count_match(common);
8087 return end;
8088 }
8089 
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8090 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8091 {
8092 DEFINE_COMPILER;
8093 backtrack_common *backtrack;
8094 
8095 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8096 
8097 if (*cc == OP_FAIL)
8098   {
8099   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8100   return cc + 1;
8101   }
8102 
8103 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8104   {
8105   /* No need to check notempty conditions. */
8106   if (common->accept_label == NULL)
8107     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8108   else
8109     JUMPTO(SLJIT_JUMP, common->accept_label);
8110   return cc + 1;
8111   }
8112 
8113 if (common->accept_label == NULL)
8114   add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
8115 else
8116   CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
8117 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8118 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8119 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8120 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8121 if (common->accept_label == NULL)
8122   add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8123 else
8124   CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8125 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8126 if (common->accept_label == NULL)
8127   add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8128 else
8129   CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8130 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8131 return cc + 1;
8132 }
8133 
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)8134 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8135 {
8136 DEFINE_COMPILER;
8137 int offset = GET2(cc, 1);
8138 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8139 
8140 /* Data will be discarded anyway... */
8141 if (common->currententry != NULL)
8142   return cc + 1 + IMM2_SIZE;
8143 
8144 if (!optimized_cbracket)
8145   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
8146 offset <<= 1;
8147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8148 if (!optimized_cbracket)
8149   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8150 return cc + 1 + IMM2_SIZE;
8151 }
8152 
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8153 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8154 {
8155 DEFINE_COMPILER;
8156 backtrack_common *backtrack;
8157 pcre_uchar opcode = *cc;
8158 pcre_uchar *ccend = cc + 1;
8159 
8160 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8161   ccend += 2 + cc[1];
8162 
8163 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8164 
8165 if (opcode == OP_SKIP)
8166   {
8167   allocate_stack(common, 1);
8168   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8169   return ccend;
8170   }
8171 
8172 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8173   {
8174   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8175   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8176   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8177   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8178   }
8179 
8180 return ccend;
8181 }
8182 
8183 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8184 
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8185 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8186 {
8187 DEFINE_COMPILER;
8188 backtrack_common *backtrack;
8189 BOOL needs_control_head;
8190 int size;
8191 
8192 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8193 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8194 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8195 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8196 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8197 
8198 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8199 size = 3 + (size < 0 ? 0 : size);
8200 
8201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8202 allocate_stack(common, size);
8203 if (size > 3)
8204   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8205 else
8206   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8210 
8211 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8212 if (size >= 0)
8213   init_frame(common, cc, ccend, size - 1, 0, FALSE);
8214 }
8215 
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8216 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8217 {
8218 DEFINE_COMPILER;
8219 backtrack_common *backtrack;
8220 BOOL has_then_trap = FALSE;
8221 then_trap_backtrack *save_then_trap = NULL;
8222 
8223 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8224 
8225 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8226   {
8227   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8228   has_then_trap = TRUE;
8229   save_then_trap = common->then_trap;
8230   /* Tail item on backtrack. */
8231   compile_then_trap_matchingpath(common, cc, ccend, parent);
8232   }
8233 
8234 while (cc < ccend)
8235   {
8236   switch(*cc)
8237     {
8238     case OP_SOD:
8239     case OP_SOM:
8240     case OP_NOT_WORD_BOUNDARY:
8241     case OP_WORD_BOUNDARY:
8242     case OP_NOT_DIGIT:
8243     case OP_DIGIT:
8244     case OP_NOT_WHITESPACE:
8245     case OP_WHITESPACE:
8246     case OP_NOT_WORDCHAR:
8247     case OP_WORDCHAR:
8248     case OP_ANY:
8249     case OP_ALLANY:
8250     case OP_ANYBYTE:
8251     case OP_NOTPROP:
8252     case OP_PROP:
8253     case OP_ANYNL:
8254     case OP_NOT_HSPACE:
8255     case OP_HSPACE:
8256     case OP_NOT_VSPACE:
8257     case OP_VSPACE:
8258     case OP_EXTUNI:
8259     case OP_EODN:
8260     case OP_EOD:
8261     case OP_CIRC:
8262     case OP_CIRCM:
8263     case OP_DOLL:
8264     case OP_DOLLM:
8265     case OP_NOT:
8266     case OP_NOTI:
8267     case OP_REVERSE:
8268     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8269     break;
8270 
8271     case OP_SET_SOM:
8272     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8273     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
8274     allocate_stack(common, 1);
8275     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
8276     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8277     cc++;
8278     break;
8279 
8280     case OP_CHAR:
8281     case OP_CHARI:
8282     if (common->mode == JIT_COMPILE)
8283       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8284     else
8285       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8286     break;
8287 
8288     case OP_STAR:
8289     case OP_MINSTAR:
8290     case OP_PLUS:
8291     case OP_MINPLUS:
8292     case OP_QUERY:
8293     case OP_MINQUERY:
8294     case OP_UPTO:
8295     case OP_MINUPTO:
8296     case OP_EXACT:
8297     case OP_POSSTAR:
8298     case OP_POSPLUS:
8299     case OP_POSQUERY:
8300     case OP_POSUPTO:
8301     case OP_STARI:
8302     case OP_MINSTARI:
8303     case OP_PLUSI:
8304     case OP_MINPLUSI:
8305     case OP_QUERYI:
8306     case OP_MINQUERYI:
8307     case OP_UPTOI:
8308     case OP_MINUPTOI:
8309     case OP_EXACTI:
8310     case OP_POSSTARI:
8311     case OP_POSPLUSI:
8312     case OP_POSQUERYI:
8313     case OP_POSUPTOI:
8314     case OP_NOTSTAR:
8315     case OP_NOTMINSTAR:
8316     case OP_NOTPLUS:
8317     case OP_NOTMINPLUS:
8318     case OP_NOTQUERY:
8319     case OP_NOTMINQUERY:
8320     case OP_NOTUPTO:
8321     case OP_NOTMINUPTO:
8322     case OP_NOTEXACT:
8323     case OP_NOTPOSSTAR:
8324     case OP_NOTPOSPLUS:
8325     case OP_NOTPOSQUERY:
8326     case OP_NOTPOSUPTO:
8327     case OP_NOTSTARI:
8328     case OP_NOTMINSTARI:
8329     case OP_NOTPLUSI:
8330     case OP_NOTMINPLUSI:
8331     case OP_NOTQUERYI:
8332     case OP_NOTMINQUERYI:
8333     case OP_NOTUPTOI:
8334     case OP_NOTMINUPTOI:
8335     case OP_NOTEXACTI:
8336     case OP_NOTPOSSTARI:
8337     case OP_NOTPOSPLUSI:
8338     case OP_NOTPOSQUERYI:
8339     case OP_NOTPOSUPTOI:
8340     case OP_TYPESTAR:
8341     case OP_TYPEMINSTAR:
8342     case OP_TYPEPLUS:
8343     case OP_TYPEMINPLUS:
8344     case OP_TYPEQUERY:
8345     case OP_TYPEMINQUERY:
8346     case OP_TYPEUPTO:
8347     case OP_TYPEMINUPTO:
8348     case OP_TYPEEXACT:
8349     case OP_TYPEPOSSTAR:
8350     case OP_TYPEPOSPLUS:
8351     case OP_TYPEPOSQUERY:
8352     case OP_TYPEPOSUPTO:
8353     cc = compile_iterator_matchingpath(common, cc, parent);
8354     break;
8355 
8356     case OP_CLASS:
8357     case OP_NCLASS:
8358     if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8359       cc = compile_iterator_matchingpath(common, cc, parent);
8360     else
8361       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8362     break;
8363 
8364 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8365     case OP_XCLASS:
8366     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8367       cc = compile_iterator_matchingpath(common, cc, parent);
8368     else
8369       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8370     break;
8371 #endif
8372 
8373     case OP_REF:
8374     case OP_REFI:
8375     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8376       cc = compile_ref_iterator_matchingpath(common, cc, parent);
8377     else
8378       {
8379       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8380       cc += 1 + IMM2_SIZE;
8381       }
8382     break;
8383 
8384     case OP_DNREF:
8385     case OP_DNREFI:
8386     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8387       cc = compile_ref_iterator_matchingpath(common, cc, parent);
8388     else
8389       {
8390       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8391       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8392       cc += 1 + 2 * IMM2_SIZE;
8393       }
8394     break;
8395 
8396     case OP_RECURSE:
8397     cc = compile_recurse_matchingpath(common, cc, parent);
8398     break;
8399 
8400     case OP_CALLOUT:
8401     cc = compile_callout_matchingpath(common, cc, parent);
8402     break;
8403 
8404     case OP_ASSERT:
8405     case OP_ASSERT_NOT:
8406     case OP_ASSERTBACK:
8407     case OP_ASSERTBACK_NOT:
8408     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8409     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8410     break;
8411 
8412     case OP_BRAMINZERO:
8413     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8414     cc = bracketend(cc + 1);
8415     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8416       {
8417       allocate_stack(common, 1);
8418       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8419       }
8420     else
8421       {
8422       allocate_stack(common, 2);
8423       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8424       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8425       }
8426     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8427     if (cc[1] > OP_ASSERTBACK_NOT)
8428       count_match(common);
8429     break;
8430 
8431     case OP_ONCE:
8432     case OP_ONCE_NC:
8433     case OP_BRA:
8434     case OP_CBRA:
8435     case OP_COND:
8436     case OP_SBRA:
8437     case OP_SCBRA:
8438     case OP_SCOND:
8439     cc = compile_bracket_matchingpath(common, cc, parent);
8440     break;
8441 
8442     case OP_BRAZERO:
8443     if (cc[1] > OP_ASSERTBACK_NOT)
8444       cc = compile_bracket_matchingpath(common, cc, parent);
8445     else
8446       {
8447       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8448       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8449       }
8450     break;
8451 
8452     case OP_BRAPOS:
8453     case OP_CBRAPOS:
8454     case OP_SBRAPOS:
8455     case OP_SCBRAPOS:
8456     case OP_BRAPOSZERO:
8457     cc = compile_bracketpos_matchingpath(common, cc, parent);
8458     break;
8459 
8460     case OP_MARK:
8461     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8462     SLJIT_ASSERT(common->mark_ptr != 0);
8463     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
8464     allocate_stack(common, common->has_skip_arg ? 5 : 1);
8465     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8466     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8467     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8468     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8469     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8470     if (common->has_skip_arg)
8471       {
8472       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8473       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8474       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8475       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8476       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8477       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8478       }
8479     cc += 1 + 2 + cc[1];
8480     break;
8481 
8482     case OP_PRUNE:
8483     case OP_PRUNE_ARG:
8484     case OP_SKIP:
8485     case OP_SKIP_ARG:
8486     case OP_THEN:
8487     case OP_THEN_ARG:
8488     case OP_COMMIT:
8489     cc = compile_control_verb_matchingpath(common, cc, parent);
8490     break;
8491 
8492     case OP_FAIL:
8493     case OP_ACCEPT:
8494     case OP_ASSERT_ACCEPT:
8495     cc = compile_fail_accept_matchingpath(common, cc, parent);
8496     break;
8497 
8498     case OP_CLOSE:
8499     cc = compile_close_matchingpath(common, cc);
8500     break;
8501 
8502     case OP_SKIPZERO:
8503     cc = bracketend(cc + 1);
8504     break;
8505 
8506     default:
8507     SLJIT_ASSERT_STOP();
8508     return;
8509     }
8510   if (cc == NULL)
8511     return;
8512   }
8513 
8514 if (has_then_trap)
8515   {
8516   /* Head item on backtrack. */
8517   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8518   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8519   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8520   common->then_trap = save_then_trap;
8521   }
8522 SLJIT_ASSERT(cc == ccend);
8523 }
8524 
8525 #undef PUSH_BACKTRACK
8526 #undef PUSH_BACKTRACK_NOVALUE
8527 #undef BACKTRACK_AS
8528 
8529 #define COMPILE_BACKTRACKINGPATH(current) \
8530   do \
8531     { \
8532     compile_backtrackingpath(common, (current)); \
8533     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8534       return; \
8535     } \
8536   while (0)
8537 
8538 #define CURRENT_AS(type) ((type *)current)
8539 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8540 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8541 {
8542 DEFINE_COMPILER;
8543 pcre_uchar *cc = current->cc;
8544 pcre_uchar opcode;
8545 pcre_uchar type;
8546 int max = -1, min = -1;
8547 struct sljit_label *label = NULL;
8548 struct sljit_jump *jump = NULL;
8549 jump_list *jumplist = NULL;
8550 int private_data_ptr = PRIVATE_DATA(cc);
8551 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8552 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8553 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8554 
8555 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8556 
8557 switch(opcode)
8558   {
8559   case OP_STAR:
8560   case OP_PLUS:
8561   case OP_UPTO:
8562   case OP_CRRANGE:
8563   if (type == OP_ANYNL || type == OP_EXTUNI)
8564     {
8565     SLJIT_ASSERT(private_data_ptr == 0);
8566     set_jumps(current->topbacktracks, LABEL());
8567     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8568     free_stack(common, 1);
8569     CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8570     }
8571   else
8572     {
8573     if (opcode == OP_UPTO)
8574       min = 0;
8575     if (opcode <= OP_PLUS)
8576       {
8577       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8578       jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
8579       }
8580     else
8581       {
8582       OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8583       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8584       jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8585       OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8586       }
8587     skip_char_back(common);
8588     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8589     JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8590     if (opcode == OP_CRRANGE)
8591       set_jumps(current->topbacktracks, LABEL());
8592     JUMPHERE(jump);
8593     if (private_data_ptr == 0)
8594       free_stack(common, 2);
8595     if (opcode == OP_PLUS)
8596       set_jumps(current->topbacktracks, LABEL());
8597     }
8598   break;
8599 
8600   case OP_MINSTAR:
8601   case OP_MINPLUS:
8602   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8603   compile_char1_matchingpath(common, type, cc, &jumplist);
8604   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8605   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8606   set_jumps(jumplist, LABEL());
8607   if (private_data_ptr == 0)
8608     free_stack(common, 1);
8609   if (opcode == OP_MINPLUS)
8610     set_jumps(current->topbacktracks, LABEL());
8611   break;
8612 
8613   case OP_MINUPTO:
8614   case OP_CRMINRANGE:
8615   if (opcode == OP_CRMINRANGE)
8616     {
8617     label = LABEL();
8618     set_jumps(current->topbacktracks, label);
8619     }
8620   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8621   compile_char1_matchingpath(common, type, cc, &jumplist);
8622 
8623   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8624   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8625   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8626   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8627 
8628   if (opcode == OP_CRMINRANGE)
8629     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8630 
8631   if (opcode == OP_CRMINRANGE && max == 0)
8632     JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8633   else
8634     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8635 
8636   set_jumps(jumplist, LABEL());
8637   if (private_data_ptr == 0)
8638     free_stack(common, 2);
8639   break;
8640 
8641   case OP_QUERY:
8642   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8643   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8644   CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8645   jump = JUMP(SLJIT_JUMP);
8646   set_jumps(current->topbacktracks, LABEL());
8647   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8648   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8649   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8650   JUMPHERE(jump);
8651   if (private_data_ptr == 0)
8652     free_stack(common, 1);
8653   break;
8654 
8655   case OP_MINQUERY:
8656   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8657   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8658   jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8659   compile_char1_matchingpath(common, type, cc, &jumplist);
8660   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8661   set_jumps(jumplist, LABEL());
8662   JUMPHERE(jump);
8663   if (private_data_ptr == 0)
8664     free_stack(common, 1);
8665   break;
8666 
8667   case OP_EXACT:
8668   case OP_POSPLUS:
8669   case OP_CRPOSRANGE:
8670   set_jumps(current->topbacktracks, LABEL());
8671   break;
8672 
8673   case OP_POSSTAR:
8674   case OP_POSQUERY:
8675   case OP_POSUPTO:
8676   break;
8677 
8678   default:
8679   SLJIT_ASSERT_STOP();
8680   break;
8681   }
8682 }
8683 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8684 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8685 {
8686 DEFINE_COMPILER;
8687 pcre_uchar *cc = current->cc;
8688 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8689 pcre_uchar type;
8690 
8691 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8692 
8693 if ((type & 0x1) == 0)
8694   {
8695   /* Maximize case. */
8696   set_jumps(current->topbacktracks, LABEL());
8697   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8698   free_stack(common, 1);
8699   CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8700   return;
8701   }
8702 
8703 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8704 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8705 set_jumps(current->topbacktracks, LABEL());
8706 free_stack(common, ref ? 2 : 3);
8707 }
8708 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)8709 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8710 {
8711 DEFINE_COMPILER;
8712 
8713 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8714   compile_backtrackingpath(common, current->top);
8715 set_jumps(current->topbacktracks, LABEL());
8716 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8717   return;
8718 
8719 if (common->has_set_som && common->mark_ptr != 0)
8720   {
8721   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8722   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8723   free_stack(common, 2);
8724   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
8725   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
8726   }
8727 else if (common->has_set_som || common->mark_ptr != 0)
8728   {
8729   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8730   free_stack(common, 1);
8731   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8732   }
8733 }
8734 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)8735 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8736 {
8737 DEFINE_COMPILER;
8738 pcre_uchar *cc = current->cc;
8739 pcre_uchar bra = OP_BRA;
8740 struct sljit_jump *brajump = NULL;
8741 
8742 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8743 if (*cc == OP_BRAZERO)
8744   {
8745   bra = *cc;
8746   cc++;
8747   }
8748 
8749 if (bra == OP_BRAZERO)
8750   {
8751   SLJIT_ASSERT(current->topbacktracks == NULL);
8752   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8753   }
8754 
8755 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8756   {
8757   set_jumps(current->topbacktracks, LABEL());
8758 
8759   if (bra == OP_BRAZERO)
8760     {
8761     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8762     CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8763     free_stack(common, 1);
8764     }
8765   return;
8766   }
8767 
8768 if (bra == OP_BRAZERO)
8769   {
8770   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8771     {
8772     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8773     CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8774     free_stack(common, 1);
8775     return;
8776     }
8777   free_stack(common, 1);
8778   brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8779   }
8780 
8781 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8782   {
8783   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
8784   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8785   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8786 
8787   set_jumps(current->topbacktracks, LABEL());
8788   }
8789 else
8790   set_jumps(current->topbacktracks, LABEL());
8791 
8792 if (bra == OP_BRAZERO)
8793   {
8794   /* We know there is enough place on the stack. */
8795   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8796   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8797   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8798   JUMPHERE(brajump);
8799   }
8800 }
8801 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)8802 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8803 {
8804 DEFINE_COMPILER;
8805 int opcode, stacksize, alt_count, alt_max;
8806 int offset = 0;
8807 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8808 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8809 pcre_uchar *cc = current->cc;
8810 pcre_uchar *ccbegin;
8811 pcre_uchar *ccprev;
8812 pcre_uchar bra = OP_BRA;
8813 pcre_uchar ket;
8814 assert_backtrack *assert;
8815 sljit_uw *next_update_addr = NULL;
8816 BOOL has_alternatives;
8817 BOOL needs_control_head = FALSE;
8818 struct sljit_jump *brazero = NULL;
8819 struct sljit_jump *alt1 = NULL;
8820 struct sljit_jump *alt2 = NULL;
8821 struct sljit_jump *once = NULL;
8822 struct sljit_jump *cond = NULL;
8823 struct sljit_label *rmin_label = NULL;
8824 struct sljit_label *exact_label = NULL;
8825 
8826 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8827   {
8828   bra = *cc;
8829   cc++;
8830   }
8831 
8832 opcode = *cc;
8833 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8834 ket = *ccbegin;
8835 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8836   {
8837   repeat_ptr = PRIVATE_DATA(ccbegin);
8838   repeat_type = PRIVATE_DATA(ccbegin + 2);
8839   repeat_count = PRIVATE_DATA(ccbegin + 3);
8840   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8841   if (repeat_type == OP_UPTO)
8842     ket = OP_KETRMAX;
8843   if (repeat_type == OP_MINUPTO)
8844     ket = OP_KETRMIN;
8845   }
8846 ccbegin = cc;
8847 cc += GET(cc, 1);
8848 has_alternatives = *cc == OP_ALT;
8849 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8850   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8851 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8852   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8853 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8854   opcode = OP_SCOND;
8855 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8856   opcode = OP_ONCE;
8857 
8858 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8859 
8860 /* Decoding the needs_control_head in framesize. */
8861 if (opcode == OP_ONCE)
8862   {
8863   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8864   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8865   }
8866 
8867 if (ket != OP_KET && repeat_type != 0)
8868   {
8869   /* TMP1 is used in OP_KETRMIN below. */
8870   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8871   free_stack(common, 1);
8872   if (repeat_type == OP_UPTO)
8873     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8874   else
8875     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
8876   }
8877 
8878 if (ket == OP_KETRMAX)
8879   {
8880   if (bra == OP_BRAZERO)
8881     {
8882     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8883     free_stack(common, 1);
8884     brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8885     }
8886   }
8887 else if (ket == OP_KETRMIN)
8888   {
8889   if (bra != OP_BRAMINZERO)
8890     {
8891     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8892     if (repeat_type != 0)
8893       {
8894       /* TMP1 was set a few lines above. */
8895       CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8896       /* Drop STR_PTR for non-greedy plus quantifier. */
8897       if (opcode != OP_ONCE)
8898         free_stack(common, 1);
8899       }
8900     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8901       {
8902       /* Checking zero-length iteration. */
8903       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8904         CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8905       else
8906         {
8907         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8908         CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8909         }
8910       /* Drop STR_PTR for non-greedy plus quantifier. */
8911       if (opcode != OP_ONCE)
8912         free_stack(common, 1);
8913       }
8914     else
8915       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8916     }
8917   rmin_label = LABEL();
8918   if (repeat_type != 0)
8919     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8920   }
8921 else if (bra == OP_BRAZERO)
8922   {
8923   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8924   free_stack(common, 1);
8925   brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8926   }
8927 else if (repeat_type == OP_EXACT)
8928   {
8929   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8930   exact_label = LABEL();
8931   }
8932 
8933 if (offset != 0)
8934   {
8935   if (common->capture_last_ptr != 0)
8936     {
8937     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8938     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8939     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8940     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
8941     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8942     free_stack(common, 3);
8943     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
8944     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8945     }
8946   else if (common->optimized_cbracket[offset >> 1] == 0)
8947     {
8948     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8949     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8950     free_stack(common, 2);
8951     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8952     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
8953     }
8954   }
8955 
8956 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
8957   {
8958   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8959     {
8960     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8961     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8962     }
8963   once = JUMP(SLJIT_JUMP);
8964   }
8965 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8966   {
8967   if (has_alternatives)
8968     {
8969     /* Always exactly one alternative. */
8970     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8971     free_stack(common, 1);
8972 
8973     alt_max = 2;
8974     alt1 = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8975     }
8976   }
8977 else if (has_alternatives)
8978   {
8979   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8980   free_stack(common, 1);
8981 
8982   if (alt_max > 4)
8983     {
8984     /* Table jump if alt_max is greater than 4. */
8985     next_update_addr = common->read_only_data_ptr;
8986     common->read_only_data_ptr += alt_max;
8987     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
8988     add_label_addr(common, next_update_addr++);
8989     }
8990   else
8991     {
8992     if (alt_max == 4)
8993       alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
8994     alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8995     }
8996   }
8997 
8998 COMPILE_BACKTRACKINGPATH(current->top);
8999 if (current->topbacktracks)
9000   set_jumps(current->topbacktracks, LABEL());
9001 
9002 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9003   {
9004   /* Conditional block always has at most one alternative. */
9005   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9006     {
9007     SLJIT_ASSERT(has_alternatives);
9008     assert = CURRENT_AS(bracket_backtrack)->u.assert;
9009     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9010       {
9011       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9012       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9013       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9014       }
9015     cond = JUMP(SLJIT_JUMP);
9016     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9017     }
9018   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9019     {
9020     SLJIT_ASSERT(has_alternatives);
9021     cond = JUMP(SLJIT_JUMP);
9022     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9023     }
9024   else
9025     SLJIT_ASSERT(!has_alternatives);
9026   }
9027 
9028 if (has_alternatives)
9029   {
9030   alt_count = sizeof(sljit_uw);
9031   do
9032     {
9033     current->top = NULL;
9034     current->topbacktracks = NULL;
9035     current->nextbacktracks = NULL;
9036     /* Conditional blocks always have an additional alternative, even if it is empty. */
9037     if (*cc == OP_ALT)
9038       {
9039       ccprev = cc + 1 + LINK_SIZE;
9040       cc += GET(cc, 1);
9041       if (opcode != OP_COND && opcode != OP_SCOND)
9042         {
9043         if (opcode != OP_ONCE)
9044           {
9045           if (private_data_ptr != 0)
9046             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9047           else
9048             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9049           }
9050         else
9051           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9052         }
9053       compile_matchingpath(common, ccprev, cc, current);
9054       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9055         return;
9056       }
9057 
9058     /* Instructions after the current alternative is successfully matched. */
9059     /* There is a similar code in compile_bracket_matchingpath. */
9060     if (opcode == OP_ONCE)
9061       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9062 
9063     stacksize = 0;
9064     if (repeat_type == OP_MINUPTO)
9065       {
9066       /* We need to preserve the counter. TMP2 will be used below. */
9067       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9068       stacksize++;
9069       }
9070     if (ket != OP_KET || bra != OP_BRA)
9071       stacksize++;
9072     if (offset != 0)
9073       {
9074       if (common->capture_last_ptr != 0)
9075         stacksize++;
9076       if (common->optimized_cbracket[offset >> 1] == 0)
9077         stacksize += 2;
9078       }
9079     if (opcode != OP_ONCE)
9080       stacksize++;
9081 
9082     if (stacksize > 0)
9083       allocate_stack(common, stacksize);
9084 
9085     stacksize = 0;
9086     if (repeat_type == OP_MINUPTO)
9087       {
9088       /* TMP2 was set above. */
9089       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9090       stacksize++;
9091       }
9092 
9093     if (ket != OP_KET || bra != OP_BRA)
9094       {
9095       if (ket != OP_KET)
9096         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9097       else
9098         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9099       stacksize++;
9100       }
9101 
9102     if (offset != 0)
9103       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9104 
9105     if (opcode != OP_ONCE)
9106       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9107 
9108     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9109       {
9110       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9111       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9112       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9113       }
9114 
9115     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9116 
9117     if (opcode != OP_ONCE)
9118       {
9119       if (alt_max > 4)
9120         add_label_addr(common, next_update_addr++);
9121       else
9122         {
9123         if (alt_count != 2 * sizeof(sljit_uw))
9124           {
9125           JUMPHERE(alt1);
9126           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9127             alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9128           }
9129         else
9130           {
9131           JUMPHERE(alt2);
9132           if (alt_max == 4)
9133             alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9134           }
9135         }
9136       alt_count += sizeof(sljit_uw);
9137       }
9138 
9139     COMPILE_BACKTRACKINGPATH(current->top);
9140     if (current->topbacktracks)
9141       set_jumps(current->topbacktracks, LABEL());
9142     SLJIT_ASSERT(!current->nextbacktracks);
9143     }
9144   while (*cc == OP_ALT);
9145 
9146   if (cond != NULL)
9147     {
9148     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9149     assert = CURRENT_AS(bracket_backtrack)->u.assert;
9150     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9151       {
9152       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9153       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9154       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9155       }
9156     JUMPHERE(cond);
9157     }
9158 
9159   /* Free the STR_PTR. */
9160   if (private_data_ptr == 0)
9161     free_stack(common, 1);
9162   }
9163 
9164 if (offset != 0)
9165   {
9166   /* Using both tmp register is better for instruction scheduling. */
9167   if (common->optimized_cbracket[offset >> 1] != 0)
9168     {
9169     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9170     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9171     free_stack(common, 2);
9172     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9173     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9174     }
9175   else
9176     {
9177     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9178     free_stack(common, 1);
9179     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9180     }
9181   }
9182 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9183   {
9184   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9185   free_stack(common, 1);
9186   }
9187 else if (opcode == OP_ONCE)
9188   {
9189   cc = ccbegin + GET(ccbegin, 1);
9190   stacksize = needs_control_head ? 1 : 0;
9191 
9192   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9193     {
9194     /* Reset head and drop saved frame. */
9195     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9196     }
9197   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9198     {
9199     /* The STR_PTR must be released. */
9200     stacksize++;
9201     }
9202   free_stack(common, stacksize);
9203 
9204   JUMPHERE(once);
9205   /* Restore previous private_data_ptr */
9206   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9207     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9208   else if (ket == OP_KETRMIN)
9209     {
9210     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9211     /* See the comment below. */
9212     free_stack(common, 2);
9213     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9214     }
9215   }
9216 
9217 if (repeat_type == OP_EXACT)
9218   {
9219   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9220   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9221   CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9222   }
9223 else if (ket == OP_KETRMAX)
9224   {
9225   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9226   if (bra != OP_BRAZERO)
9227     free_stack(common, 1);
9228 
9229   CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9230   if (bra == OP_BRAZERO)
9231     {
9232     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9233     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9234     JUMPHERE(brazero);
9235     free_stack(common, 1);
9236     }
9237   }
9238 else if (ket == OP_KETRMIN)
9239   {
9240   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9241 
9242   /* OP_ONCE removes everything in case of a backtrack, so we don't
9243   need to explicitly release the STR_PTR. The extra release would
9244   affect badly the free_stack(2) above. */
9245   if (opcode != OP_ONCE)
9246     free_stack(common, 1);
9247   CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9248   if (opcode == OP_ONCE)
9249     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9250   else if (bra == OP_BRAMINZERO)
9251     free_stack(common, 1);
9252   }
9253 else if (bra == OP_BRAZERO)
9254   {
9255   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9256   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9257   JUMPHERE(brazero);
9258   }
9259 }
9260 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)9261 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9262 {
9263 DEFINE_COMPILER;
9264 int offset;
9265 struct sljit_jump *jump;
9266 
9267 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9268   {
9269   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9270     {
9271     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9272     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9273     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9274     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9275     if (common->capture_last_ptr != 0)
9276       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9277     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9278     if (common->capture_last_ptr != 0)
9279       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9280     }
9281   set_jumps(current->topbacktracks, LABEL());
9282   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9283   return;
9284   }
9285 
9286 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9287 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9288 
9289 if (current->topbacktracks)
9290   {
9291   jump = JUMP(SLJIT_JUMP);
9292   set_jumps(current->topbacktracks, LABEL());
9293   /* Drop the stack frame. */
9294   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9295   JUMPHERE(jump);
9296   }
9297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9298 }
9299 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)9300 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9301 {
9302 assert_backtrack backtrack;
9303 
9304 current->top = NULL;
9305 current->topbacktracks = NULL;
9306 current->nextbacktracks = NULL;
9307 if (current->cc[1] > OP_ASSERTBACK_NOT)
9308   {
9309   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9310   compile_bracket_matchingpath(common, current->cc, current);
9311   compile_bracket_backtrackingpath(common, current->top);
9312   }
9313 else
9314   {
9315   memset(&backtrack, 0, sizeof(backtrack));
9316   backtrack.common.cc = current->cc;
9317   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9318   /* Manual call of compile_assert_matchingpath. */
9319   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9320   }
9321 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9322 }
9323 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)9324 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9325 {
9326 DEFINE_COMPILER;
9327 pcre_uchar opcode = *current->cc;
9328 struct sljit_label *loop;
9329 struct sljit_jump *jump;
9330 
9331 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9332   {
9333   if (common->then_trap != NULL)
9334     {
9335     SLJIT_ASSERT(common->control_head_ptr != 0);
9336 
9337     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9338     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9339     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9340     jump = JUMP(SLJIT_JUMP);
9341 
9342     loop = LABEL();
9343     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9344     JUMPHERE(jump);
9345     CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9346     CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9347     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9348     return;
9349     }
9350   else if (common->positive_assert)
9351     {
9352     add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9353     return;
9354     }
9355   }
9356 
9357 if (common->local_exit)
9358   {
9359   if (common->quit_label == NULL)
9360     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9361   else
9362     JUMPTO(SLJIT_JUMP, common->quit_label);
9363   return;
9364   }
9365 
9366 if (opcode == OP_SKIP_ARG)
9367   {
9368   SLJIT_ASSERT(common->control_head_ptr != 0);
9369   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9370   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
9371   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9372   sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9373   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9374 
9375   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9376   add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9377   return;
9378   }
9379 
9380 if (opcode == OP_SKIP)
9381   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9382 else
9383   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9384 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9385 }
9386 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)9387 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9388 {
9389 DEFINE_COMPILER;
9390 struct sljit_jump *jump;
9391 int size;
9392 
9393 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9394   {
9395   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9396   return;
9397   }
9398 
9399 size = CURRENT_AS(then_trap_backtrack)->framesize;
9400 size = 3 + (size < 0 ? 0 : size);
9401 
9402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9403 free_stack(common, size);
9404 jump = JUMP(SLJIT_JUMP);
9405 
9406 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9407 /* STACK_TOP is set by THEN. */
9408 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9409   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9411 free_stack(common, 3);
9412 
9413 JUMPHERE(jump);
9414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9415 }
9416 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)9417 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9418 {
9419 DEFINE_COMPILER;
9420 then_trap_backtrack *save_then_trap = common->then_trap;
9421 
9422 while (current)
9423   {
9424   if (current->nextbacktracks != NULL)
9425     set_jumps(current->nextbacktracks, LABEL());
9426   switch(*current->cc)
9427     {
9428     case OP_SET_SOM:
9429     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9430     free_stack(common, 1);
9431     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
9432     break;
9433 
9434     case OP_STAR:
9435     case OP_MINSTAR:
9436     case OP_PLUS:
9437     case OP_MINPLUS:
9438     case OP_QUERY:
9439     case OP_MINQUERY:
9440     case OP_UPTO:
9441     case OP_MINUPTO:
9442     case OP_EXACT:
9443     case OP_POSSTAR:
9444     case OP_POSPLUS:
9445     case OP_POSQUERY:
9446     case OP_POSUPTO:
9447     case OP_STARI:
9448     case OP_MINSTARI:
9449     case OP_PLUSI:
9450     case OP_MINPLUSI:
9451     case OP_QUERYI:
9452     case OP_MINQUERYI:
9453     case OP_UPTOI:
9454     case OP_MINUPTOI:
9455     case OP_EXACTI:
9456     case OP_POSSTARI:
9457     case OP_POSPLUSI:
9458     case OP_POSQUERYI:
9459     case OP_POSUPTOI:
9460     case OP_NOTSTAR:
9461     case OP_NOTMINSTAR:
9462     case OP_NOTPLUS:
9463     case OP_NOTMINPLUS:
9464     case OP_NOTQUERY:
9465     case OP_NOTMINQUERY:
9466     case OP_NOTUPTO:
9467     case OP_NOTMINUPTO:
9468     case OP_NOTEXACT:
9469     case OP_NOTPOSSTAR:
9470     case OP_NOTPOSPLUS:
9471     case OP_NOTPOSQUERY:
9472     case OP_NOTPOSUPTO:
9473     case OP_NOTSTARI:
9474     case OP_NOTMINSTARI:
9475     case OP_NOTPLUSI:
9476     case OP_NOTMINPLUSI:
9477     case OP_NOTQUERYI:
9478     case OP_NOTMINQUERYI:
9479     case OP_NOTUPTOI:
9480     case OP_NOTMINUPTOI:
9481     case OP_NOTEXACTI:
9482     case OP_NOTPOSSTARI:
9483     case OP_NOTPOSPLUSI:
9484     case OP_NOTPOSQUERYI:
9485     case OP_NOTPOSUPTOI:
9486     case OP_TYPESTAR:
9487     case OP_TYPEMINSTAR:
9488     case OP_TYPEPLUS:
9489     case OP_TYPEMINPLUS:
9490     case OP_TYPEQUERY:
9491     case OP_TYPEMINQUERY:
9492     case OP_TYPEUPTO:
9493     case OP_TYPEMINUPTO:
9494     case OP_TYPEEXACT:
9495     case OP_TYPEPOSSTAR:
9496     case OP_TYPEPOSPLUS:
9497     case OP_TYPEPOSQUERY:
9498     case OP_TYPEPOSUPTO:
9499     case OP_CLASS:
9500     case OP_NCLASS:
9501 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9502     case OP_XCLASS:
9503 #endif
9504     compile_iterator_backtrackingpath(common, current);
9505     break;
9506 
9507     case OP_REF:
9508     case OP_REFI:
9509     case OP_DNREF:
9510     case OP_DNREFI:
9511     compile_ref_iterator_backtrackingpath(common, current);
9512     break;
9513 
9514     case OP_RECURSE:
9515     compile_recurse_backtrackingpath(common, current);
9516     break;
9517 
9518     case OP_ASSERT:
9519     case OP_ASSERT_NOT:
9520     case OP_ASSERTBACK:
9521     case OP_ASSERTBACK_NOT:
9522     compile_assert_backtrackingpath(common, current);
9523     break;
9524 
9525     case OP_ONCE:
9526     case OP_ONCE_NC:
9527     case OP_BRA:
9528     case OP_CBRA:
9529     case OP_COND:
9530     case OP_SBRA:
9531     case OP_SCBRA:
9532     case OP_SCOND:
9533     compile_bracket_backtrackingpath(common, current);
9534     break;
9535 
9536     case OP_BRAZERO:
9537     if (current->cc[1] > OP_ASSERTBACK_NOT)
9538       compile_bracket_backtrackingpath(common, current);
9539     else
9540       compile_assert_backtrackingpath(common, current);
9541     break;
9542 
9543     case OP_BRAPOS:
9544     case OP_CBRAPOS:
9545     case OP_SBRAPOS:
9546     case OP_SCBRAPOS:
9547     case OP_BRAPOSZERO:
9548     compile_bracketpos_backtrackingpath(common, current);
9549     break;
9550 
9551     case OP_BRAMINZERO:
9552     compile_braminzero_backtrackingpath(common, current);
9553     break;
9554 
9555     case OP_MARK:
9556     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9557     if (common->has_skip_arg)
9558       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9559     free_stack(common, common->has_skip_arg ? 5 : 1);
9560     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9561     if (common->has_skip_arg)
9562       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9563     break;
9564 
9565     case OP_THEN:
9566     case OP_THEN_ARG:
9567     case OP_PRUNE:
9568     case OP_PRUNE_ARG:
9569     case OP_SKIP:
9570     case OP_SKIP_ARG:
9571     compile_control_verb_backtrackingpath(common, current);
9572     break;
9573 
9574     case OP_COMMIT:
9575     if (!common->local_exit)
9576       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9577     if (common->quit_label == NULL)
9578       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9579     else
9580       JUMPTO(SLJIT_JUMP, common->quit_label);
9581     break;
9582 
9583     case OP_CALLOUT:
9584     case OP_FAIL:
9585     case OP_ACCEPT:
9586     case OP_ASSERT_ACCEPT:
9587     set_jumps(current->topbacktracks, LABEL());
9588     break;
9589 
9590     case OP_THEN_TRAP:
9591     /* A virtual opcode for then traps. */
9592     compile_then_trap_backtrackingpath(common, current);
9593     break;
9594 
9595     default:
9596     SLJIT_ASSERT_STOP();
9597     break;
9598     }
9599   current = current->prev;
9600   }
9601 common->then_trap = save_then_trap;
9602 }
9603 
compile_recurse(compiler_common * common)9604 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9605 {
9606 DEFINE_COMPILER;
9607 pcre_uchar *cc = common->start + common->currententry->start;
9608 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9609 pcre_uchar *ccend = bracketend(cc);
9610 BOOL needs_control_head;
9611 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9612 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9613 int alternativesize;
9614 BOOL needs_frame;
9615 backtrack_common altbacktrack;
9616 struct sljit_jump *jump;
9617 
9618 /* Recurse captures then. */
9619 common->then_trap = NULL;
9620 
9621 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9622 needs_frame = framesize >= 0;
9623 if (!needs_frame)
9624   framesize = 0;
9625 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9626 
9627 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9628 common->currententry->entry = LABEL();
9629 set_jumps(common->currententry->calls, common->currententry->entry);
9630 
9631 sljit_emit_fast_enter(compiler, TMP2, 0);
9632 allocate_stack(common, private_data_size + framesize + alternativesize);
9633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9634 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9635 if (needs_control_head)
9636   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
9638 if (needs_frame)
9639   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9640 
9641 if (alternativesize > 0)
9642   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9643 
9644 memset(&altbacktrack, 0, sizeof(backtrack_common));
9645 common->quit_label = NULL;
9646 common->accept_label = NULL;
9647 common->quit = NULL;
9648 common->accept = NULL;
9649 altbacktrack.cc = ccbegin;
9650 cc += GET(cc, 1);
9651 while (1)
9652   {
9653   altbacktrack.top = NULL;
9654   altbacktrack.topbacktracks = NULL;
9655 
9656   if (altbacktrack.cc != ccbegin)
9657     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9658 
9659   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9660   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9661     return;
9662 
9663   add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9664 
9665   compile_backtrackingpath(common, altbacktrack.top);
9666   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9667     return;
9668   set_jumps(altbacktrack.topbacktracks, LABEL());
9669 
9670   if (*cc != OP_ALT)
9671     break;
9672 
9673   altbacktrack.cc = cc + 1 + LINK_SIZE;
9674   cc += GET(cc, 1);
9675   }
9676 
9677 /* None of them matched. */
9678 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9679 jump = JUMP(SLJIT_JUMP);
9680 
9681 if (common->quit != NULL)
9682   {
9683   set_jumps(common->quit, LABEL());
9684   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9685   if (needs_frame)
9686     {
9687     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9688     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9689     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9690     }
9691   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9692   common->quit = NULL;
9693   add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9694   }
9695 
9696 set_jumps(common->accept, LABEL());
9697 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9698 if (needs_frame)
9699   {
9700   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9701   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9702   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9703   }
9704 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9705 
9706 JUMPHERE(jump);
9707 if (common->quit != NULL)
9708   set_jumps(common->quit, LABEL());
9709 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9710 free_stack(common, private_data_size + framesize + alternativesize);
9711 if (needs_control_head)
9712   {
9713   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9714   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9715   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
9716   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9717   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9718   }
9719 else
9720   {
9721   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9722   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9723   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
9724   }
9725 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9726 }
9727 
9728 #undef COMPILE_BACKTRACKINGPATH
9729 #undef CURRENT_AS
9730 
9731 void
PRIV(jit_compile)9732 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9733 {
9734 struct sljit_compiler *compiler;
9735 backtrack_common rootbacktrack;
9736 compiler_common common_data;
9737 compiler_common *common = &common_data;
9738 const pcre_uint8 *tables = re->tables;
9739 pcre_study_data *study;
9740 int private_data_size;
9741 pcre_uchar *ccend;
9742 executable_functions *functions;
9743 void *executable_func;
9744 sljit_uw executable_size;
9745 sljit_uw total_length;
9746 label_addr_list *label_addr;
9747 struct sljit_label *mainloop_label = NULL;
9748 struct sljit_label *continue_match_label;
9749 struct sljit_label *empty_match_found_label = NULL;
9750 struct sljit_label *empty_match_backtrack_label = NULL;
9751 struct sljit_label *reset_match_label;
9752 struct sljit_label *quit_label;
9753 struct sljit_jump *jump;
9754 struct sljit_jump *minlength_check_failed = NULL;
9755 struct sljit_jump *reqbyte_notfound = NULL;
9756 struct sljit_jump *empty_match = NULL;
9757 
9758 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9759 study = extra->study_data;
9760 
9761 if (!tables)
9762   tables = PRIV(default_tables);
9763 
9764 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9765 memset(common, 0, sizeof(compiler_common));
9766 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9767 
9768 common->start = rootbacktrack.cc;
9769 common->read_only_data = NULL;
9770 common->read_only_data_size = 0;
9771 common->read_only_data_ptr = NULL;
9772 common->fcc = tables + fcc_offset;
9773 common->lcc = (sljit_sw)(tables + lcc_offset);
9774 common->mode = mode;
9775 common->might_be_empty = study->minlength == 0;
9776 common->nltype = NLTYPE_FIXED;
9777 switch(re->options & PCRE_NEWLINE_BITS)
9778   {
9779   case 0:
9780   /* Compile-time default */
9781   switch(NEWLINE)
9782     {
9783     case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9784     case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9785     default: common->newline = NEWLINE; break;
9786     }
9787   break;
9788   case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9789   case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9790   case PCRE_NEWLINE_CR+
9791        PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9792   case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9793   case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9794   default: return;
9795   }
9796 common->nlmax = READ_CHAR_MAX;
9797 common->nlmin = 0;
9798 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9799   common->bsr_nltype = NLTYPE_ANYCRLF;
9800 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9801   common->bsr_nltype = NLTYPE_ANY;
9802 else
9803   {
9804 #ifdef BSR_ANYCRLF
9805   common->bsr_nltype = NLTYPE_ANYCRLF;
9806 #else
9807   common->bsr_nltype = NLTYPE_ANY;
9808 #endif
9809   }
9810 common->bsr_nlmax = READ_CHAR_MAX;
9811 common->bsr_nlmin = 0;
9812 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9813 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9814 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9815 common->name_count = re->name_count;
9816 common->name_entry_size = re->name_entry_size;
9817 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9818 #ifdef SUPPORT_UTF
9819 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9820 common->utf = (re->options & PCRE_UTF8) != 0;
9821 #ifdef SUPPORT_UCP
9822 common->use_ucp = (re->options & PCRE_UCP) != 0;
9823 #endif
9824 if (common->utf)
9825   {
9826   if (common->nltype == NLTYPE_ANY)
9827     common->nlmax = 0x2029;
9828   else if (common->nltype == NLTYPE_ANYCRLF)
9829     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9830   else
9831     {
9832     /* We only care about the first newline character. */
9833     common->nlmax = common->newline & 0xff;
9834     }
9835 
9836   if (common->nltype == NLTYPE_FIXED)
9837     common->nlmin = common->newline & 0xff;
9838   else
9839     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9840 
9841   if (common->bsr_nltype == NLTYPE_ANY)
9842     common->bsr_nlmax = 0x2029;
9843   else
9844     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9845   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9846   }
9847 #endif /* SUPPORT_UTF */
9848 ccend = bracketend(common->start);
9849 
9850 /* Calculate the local space size on the stack. */
9851 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9852 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
9853 if (!common->optimized_cbracket)
9854   return;
9855 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9856 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9857 #else
9858 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9859 #endif
9860 
9861 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9862 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9863 common->capture_last_ptr = common->ovector_start;
9864 common->ovector_start += sizeof(sljit_sw);
9865 #endif
9866 if (!check_opcode_types(common, common->start, ccend))
9867   {
9868   SLJIT_FREE(common->optimized_cbracket);
9869   return;
9870   }
9871 
9872 /* Checking flags and updating ovector_start. */
9873 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9874   {
9875   common->req_char_ptr = common->ovector_start;
9876   common->ovector_start += sizeof(sljit_sw);
9877   }
9878 if (mode != JIT_COMPILE)
9879   {
9880   common->start_used_ptr = common->ovector_start;
9881   common->ovector_start += sizeof(sljit_sw);
9882   if (mode == JIT_PARTIAL_SOFT_COMPILE)
9883     {
9884     common->hit_start = common->ovector_start;
9885     common->ovector_start += 2 * sizeof(sljit_sw);
9886     }
9887   else
9888     {
9889     SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9890     common->needs_start_ptr = TRUE;
9891     }
9892   }
9893 if ((re->options & PCRE_FIRSTLINE) != 0)
9894   {
9895   common->first_line_end = common->ovector_start;
9896   common->ovector_start += sizeof(sljit_sw);
9897   }
9898 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9899 common->control_head_ptr = 1;
9900 #endif
9901 if (common->control_head_ptr != 0)
9902   {
9903   common->control_head_ptr = common->ovector_start;
9904   common->ovector_start += sizeof(sljit_sw);
9905   }
9906 if (common->needs_start_ptr && common->has_set_som)
9907   {
9908   /* Saving the real start pointer is necessary. */
9909   common->start_ptr = common->ovector_start;
9910   common->ovector_start += sizeof(sljit_sw);
9911   }
9912 else
9913   common->needs_start_ptr = FALSE;
9914 
9915 /* Aligning ovector to even number of sljit words. */
9916 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9917   common->ovector_start += sizeof(sljit_sw);
9918 
9919 if (common->start_ptr == 0)
9920   common->start_ptr = OVECTOR(0);
9921 
9922 /* Capturing brackets cannot be optimized if callouts are allowed. */
9923 if (common->capture_last_ptr != 0)
9924   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9925 
9926 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9927 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9928 
9929 total_length = ccend - common->start;
9930 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)));
9931 if (!common->private_data_ptrs)
9932   {
9933   SLJIT_FREE(common->optimized_cbracket);
9934   return;
9935   }
9936 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9937 
9938 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9939 set_private_data_ptrs(common, &private_data_size, ccend);
9940 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9941   {
9942   SLJIT_FREE(common->private_data_ptrs);
9943   SLJIT_FREE(common->optimized_cbracket);
9944   return;
9945   }
9946 
9947 if (common->has_then)
9948   {
9949   common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
9950   memset(common->then_offsets, 0, total_length);
9951   set_then_offsets(common, common->start, NULL);
9952   }
9953 
9954 if (common->read_only_data_size > 0)
9955   {
9956   common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
9957   if (common->read_only_data == NULL)
9958     {
9959     SLJIT_FREE(common->optimized_cbracket);
9960     SLJIT_FREE(common->private_data_ptrs);
9961     return;
9962     }
9963   common->read_only_data_ptr = common->read_only_data;
9964   }
9965 
9966 compiler = sljit_create_compiler();
9967 if (!compiler)
9968   {
9969   SLJIT_FREE(common->optimized_cbracket);
9970   SLJIT_FREE(common->private_data_ptrs);
9971   if (common->read_only_data)
9972     SLJIT_FREE(common->read_only_data);
9973   return;
9974   }
9975 common->compiler = compiler;
9976 
9977 /* Main pcre_jit_exec entry. */
9978 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
9979 
9980 /* Register init. */
9981 reset_ovector(common, (re->top_bracket + 1) * 2);
9982 if (common->req_char_ptr != 0)
9983   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
9984 
9985 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
9986 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
9987 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9988 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9989 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9990 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
9991 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9992 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
9993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
9994 
9995 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9996   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
9997 if (common->mark_ptr != 0)
9998   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
9999 if (common->control_head_ptr != 0)
10000   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10001 
10002 /* Main part of the matching */
10003 if ((re->options & PCRE_ANCHORED) == 0)
10004   {
10005   mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10006   continue_match_label = LABEL();
10007   /* Forward search if possible. */
10008   if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10009     {
10010     if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10011       {
10012       /* If read_only_data is reallocated, we might have an allocation failure. */
10013       if (common->read_only_data_size > 0 && common->read_only_data == NULL)
10014         {
10015         sljit_free_compiler(compiler);
10016         SLJIT_FREE(common->optimized_cbracket);
10017         SLJIT_FREE(common->private_data_ptrs);
10018         return;
10019         }
10020       }
10021     else if ((re->flags & PCRE_FIRSTSET) != 0)
10022       fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10023     else if ((re->flags & PCRE_STARTLINE) != 0)
10024       fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10025     else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10026       fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10027     }
10028   }
10029 else
10030   continue_match_label = LABEL();
10031 
10032 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10033   {
10034   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10035   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10036   minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0);
10037   }
10038 if (common->req_char_ptr != 0)
10039   reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10040 
10041 /* Store the current STR_PTR in OVECTOR(0). */
10042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10043 /* Copy the limit of allowed recursions. */
10044 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
10045 if (common->capture_last_ptr != 0)
10046   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
10047 
10048 if (common->needs_start_ptr)
10049   {
10050   SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10051   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
10052   }
10053 else
10054   SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10055 
10056 /* Copy the beginning of the string. */
10057 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10058   {
10059   jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10060   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10061   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10062   JUMPHERE(jump);
10063   }
10064 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10065   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10066 
10067 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10068 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10069   {
10070   sljit_free_compiler(compiler);
10071   SLJIT_FREE(common->optimized_cbracket);
10072   SLJIT_FREE(common->private_data_ptrs);
10073   if (common->read_only_data)
10074     SLJIT_FREE(common->read_only_data);
10075   return;
10076   }
10077 
10078 if (common->might_be_empty)
10079   {
10080   empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10081   empty_match_found_label = LABEL();
10082   }
10083 
10084 common->accept_label = LABEL();
10085 if (common->accept != NULL)
10086   set_jumps(common->accept, common->accept_label);
10087 
10088 /* This means we have a match. Update the ovector. */
10089 copy_ovector(common, re->top_bracket + 1);
10090 common->quit_label = common->forced_quit_label = LABEL();
10091 if (common->quit != NULL)
10092   set_jumps(common->quit, common->quit_label);
10093 if (common->forced_quit != NULL)
10094   set_jumps(common->forced_quit, common->forced_quit_label);
10095 if (minlength_check_failed != NULL)
10096   SET_LABEL(minlength_check_failed, common->forced_quit_label);
10097 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10098 
10099 if (mode != JIT_COMPILE)
10100   {
10101   common->partialmatchlabel = LABEL();
10102   set_jumps(common->partialmatch, common->partialmatchlabel);
10103   return_with_partial_match(common, common->quit_label);
10104   }
10105 
10106 if (common->might_be_empty)
10107   empty_match_backtrack_label = LABEL();
10108 compile_backtrackingpath(common, rootbacktrack.top);
10109 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10110   {
10111   sljit_free_compiler(compiler);
10112   SLJIT_FREE(common->optimized_cbracket);
10113   SLJIT_FREE(common->private_data_ptrs);
10114   if (common->read_only_data)
10115     SLJIT_FREE(common->read_only_data);
10116   return;
10117   }
10118 
10119 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10120 reset_match_label = LABEL();
10121 
10122 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10123   {
10124   /* Update hit_start only in the first time. */
10125   jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
10126   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
10127   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
10128   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
10129   JUMPHERE(jump);
10130   }
10131 
10132 /* Check we have remaining characters. */
10133 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10134   {
10135   SLJIT_ASSERT(common->first_line_end != 0);
10136   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
10137   }
10138 
10139 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
10140 
10141 if ((re->options & PCRE_ANCHORED) == 0)
10142   {
10143   if (common->ff_newline_shortcut != NULL)
10144     {
10145     if ((re->options & PCRE_FIRSTLINE) == 0)
10146       CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10147     /* There cannot be more newlines here. */
10148     }
10149   else
10150     {
10151     if ((re->options & PCRE_FIRSTLINE) == 0)
10152       CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10153     else
10154       CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10155     }
10156   }
10157 
10158 /* No more remaining characters. */
10159 if (reqbyte_notfound != NULL)
10160   JUMPHERE(reqbyte_notfound);
10161 
10162 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10163   CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10164 
10165 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10166 JUMPTO(SLJIT_JUMP, common->quit_label);
10167 
10168 flush_stubs(common);
10169 
10170 if (common->might_be_empty)
10171   {
10172   JUMPHERE(empty_match);
10173   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10174   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10175   CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10176   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10177   CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10178   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10179   CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10180   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10181   }
10182 
10183 common->currententry = common->entries;
10184 common->local_exit = TRUE;
10185 quit_label = common->quit_label;
10186 while (common->currententry != NULL)
10187   {
10188   /* Might add new entries. */
10189   compile_recurse(common);
10190   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10191     {
10192     sljit_free_compiler(compiler);
10193     SLJIT_FREE(common->optimized_cbracket);
10194     SLJIT_FREE(common->private_data_ptrs);
10195     if (common->read_only_data)
10196       SLJIT_FREE(common->read_only_data);
10197     return;
10198     }
10199   flush_stubs(common);
10200   common->currententry = common->currententry->next;
10201   }
10202 common->local_exit = FALSE;
10203 common->quit_label = quit_label;
10204 
10205 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10206 /* This is a (really) rare case. */
10207 set_jumps(common->stackalloc, LABEL());
10208 /* RETURN_ADDR is not a saved register. */
10209 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
10211 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10213 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10214 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10215 
10216 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10217 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10218 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10219 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10220 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10221 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
10223 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10224 
10225 /* Allocation failed. */
10226 JUMPHERE(jump);
10227 /* We break the return address cache here, but this is a really rare case. */
10228 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10229 JUMPTO(SLJIT_JUMP, common->quit_label);
10230 
10231 /* Call limit reached. */
10232 set_jumps(common->calllimit, LABEL());
10233 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10234 JUMPTO(SLJIT_JUMP, common->quit_label);
10235 
10236 if (common->revertframes != NULL)
10237   {
10238   set_jumps(common->revertframes, LABEL());
10239   do_revertframes(common);
10240   }
10241 if (common->wordboundary != NULL)
10242   {
10243   set_jumps(common->wordboundary, LABEL());
10244   check_wordboundary(common);
10245   }
10246 if (common->anynewline != NULL)
10247   {
10248   set_jumps(common->anynewline, LABEL());
10249   check_anynewline(common);
10250   }
10251 if (common->hspace != NULL)
10252   {
10253   set_jumps(common->hspace, LABEL());
10254   check_hspace(common);
10255   }
10256 if (common->vspace != NULL)
10257   {
10258   set_jumps(common->vspace, LABEL());
10259   check_vspace(common);
10260   }
10261 if (common->casefulcmp != NULL)
10262   {
10263   set_jumps(common->casefulcmp, LABEL());
10264   do_casefulcmp(common);
10265   }
10266 if (common->caselesscmp != NULL)
10267   {
10268   set_jumps(common->caselesscmp, LABEL());
10269   do_caselesscmp(common);
10270   }
10271 if (common->reset_match != NULL)
10272   {
10273   set_jumps(common->reset_match, LABEL());
10274   do_reset_match(common, (re->top_bracket + 1) * 2);
10275   CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10276   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10277   JUMPTO(SLJIT_JUMP, reset_match_label);
10278   }
10279 #ifdef SUPPORT_UTF
10280 #ifdef COMPILE_PCRE8
10281 if (common->utfreadchar != NULL)
10282   {
10283   set_jumps(common->utfreadchar, LABEL());
10284   do_utfreadchar(common);
10285   }
10286 if (common->utfreadchar16 != NULL)
10287   {
10288   set_jumps(common->utfreadchar16, LABEL());
10289   do_utfreadchar16(common);
10290   }
10291 if (common->utfreadtype8 != NULL)
10292   {
10293   set_jumps(common->utfreadtype8, LABEL());
10294   do_utfreadtype8(common);
10295   }
10296 #endif /* COMPILE_PCRE8 */
10297 #endif /* SUPPORT_UTF */
10298 #ifdef SUPPORT_UCP
10299 if (common->getucd != NULL)
10300   {
10301   set_jumps(common->getucd, LABEL());
10302   do_getucd(common);
10303   }
10304 #endif
10305 
10306 SLJIT_ASSERT(common->read_only_data + (common->read_only_data_size >> SLJIT_WORD_SHIFT) == common->read_only_data_ptr);
10307 SLJIT_FREE(common->optimized_cbracket);
10308 SLJIT_FREE(common->private_data_ptrs);
10309 
10310 executable_func = sljit_generate_code(compiler);
10311 executable_size = sljit_get_generated_code_size(compiler);
10312 label_addr = common->label_addrs;
10313 while (label_addr != NULL)
10314   {
10315   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
10316   label_addr = label_addr->next;
10317   }
10318 sljit_free_compiler(compiler);
10319 if (executable_func == NULL)
10320   {
10321   if (common->read_only_data)
10322     SLJIT_FREE(common->read_only_data);
10323   return;
10324   }
10325 
10326 /* Reuse the function descriptor if possible. */
10327 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10328   functions = (executable_functions *)extra->executable_jit;
10329 else
10330   {
10331   /* Note: If your memory-checker has flagged the allocation below as a
10332    * memory leak, it is probably because you either forgot to call
10333    * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10334    * pcre16_extra) object, or you called said function after having
10335    * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10336    * of the object. (The function will only free the JIT data if the
10337    * bit remains set, as the bit indicates that the pointer to the data
10338    * is valid.)
10339    */
10340   functions = SLJIT_MALLOC(sizeof(executable_functions));
10341   if (functions == NULL)
10342     {
10343     /* This case is highly unlikely since we just recently
10344     freed a lot of memory. Not impossible though. */
10345     sljit_free_code(executable_func);
10346     if (common->read_only_data)
10347       SLJIT_FREE(common->read_only_data);
10348     return;
10349     }
10350   memset(functions, 0, sizeof(executable_functions));
10351   functions->top_bracket = (re->top_bracket + 1) * 2;
10352   functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10353   extra->executable_jit = functions;
10354   extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10355   }
10356 
10357 functions->executable_funcs[mode] = executable_func;
10358 functions->read_only_data[mode] = common->read_only_data;
10359 functions->executable_sizes[mode] = executable_size;
10360 }
10361 
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)10362 static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
10363 {
10364 union {
10365    void* executable_func;
10366    jit_function call_executable_func;
10367 } convert_executable_func;
10368 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10369 struct sljit_stack local_stack;
10370 
10371 local_stack.top = (sljit_sw)&local_space;
10372 local_stack.base = local_stack.top;
10373 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10374 local_stack.max_limit = local_stack.limit;
10375 arguments->stack = &local_stack;
10376 convert_executable_func.executable_func = executable_func;
10377 return convert_executable_func.call_executable_func(arguments);
10378 }
10379 
10380 int
PRIV(jit_exec)10381 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10382   int length, int start_offset, int options, int *offsets, int offset_count)
10383 {
10384 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10385 union {
10386    void* executable_func;
10387    jit_function call_executable_func;
10388 } convert_executable_func;
10389 jit_arguments arguments;
10390 int max_offset_count;
10391 int retval;
10392 int mode = JIT_COMPILE;
10393 
10394 if ((options & PCRE_PARTIAL_HARD) != 0)
10395   mode = JIT_PARTIAL_HARD_COMPILE;
10396 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10397   mode = JIT_PARTIAL_SOFT_COMPILE;
10398 
10399 if (functions->executable_funcs[mode] == NULL)
10400   return PCRE_ERROR_JIT_BADOPTION;
10401 
10402 /* Sanity checks should be handled by pcre_exec. */
10403 arguments.str = subject + start_offset;
10404 arguments.begin = subject;
10405 arguments.end = subject + length;
10406 arguments.mark_ptr = NULL;
10407 /* JIT decreases this value less frequently than the interpreter. */
10408 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10409 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10410   arguments.limit_match = functions->limit_match;
10411 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10412 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10413 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10414 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10415 arguments.offsets = offsets;
10416 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10417 arguments.real_offset_count = offset_count;
10418 
10419 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10420 the output vector for storing captured strings, with the remainder used as
10421 workspace. We don't need the workspace here. For compatibility, we limit the
10422 number of captured strings in the same way as pcre_exec(), so that the user
10423 gets the same result with and without JIT. */
10424 
10425 if (offset_count != 2)
10426   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10427 max_offset_count = functions->top_bracket;
10428 if (offset_count > max_offset_count)
10429   offset_count = max_offset_count;
10430 arguments.offset_count = offset_count;
10431 
10432 if (functions->callback)
10433   arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10434 else
10435   arguments.stack = (struct sljit_stack *)functions->userdata;
10436 
10437 if (arguments.stack == NULL)
10438   retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10439 else
10440   {
10441   convert_executable_func.executable_func = functions->executable_funcs[mode];
10442   retval = convert_executable_func.call_executable_func(&arguments);
10443   }
10444 
10445 if (retval * 2 > offset_count)
10446   retval = 0;
10447 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10448   *(extra_data->mark) = arguments.mark_ptr;
10449 
10450 return retval;
10451 }
10452 
10453 #if defined COMPILE_PCRE8
10454 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)10455 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10456   PCRE_SPTR subject, int length, int start_offset, int options,
10457   int *offsets, int offset_count, pcre_jit_stack *stack)
10458 #elif defined COMPILE_PCRE16
10459 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10460 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10461   PCRE_SPTR16 subject, int length, int start_offset, int options,
10462   int *offsets, int offset_count, pcre16_jit_stack *stack)
10463 #elif defined COMPILE_PCRE32
10464 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10465 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10466   PCRE_SPTR32 subject, int length, int start_offset, int options,
10467   int *offsets, int offset_count, pcre32_jit_stack *stack)
10468 #endif
10469 {
10470 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10471 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10472 union {
10473    void* executable_func;
10474    jit_function call_executable_func;
10475 } convert_executable_func;
10476 jit_arguments arguments;
10477 int max_offset_count;
10478 int retval;
10479 int mode = JIT_COMPILE;
10480 
10481 SLJIT_UNUSED_ARG(argument_re);
10482 
10483 /* Plausibility checks */
10484 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10485 
10486 if ((options & PCRE_PARTIAL_HARD) != 0)
10487   mode = JIT_PARTIAL_HARD_COMPILE;
10488 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10489   mode = JIT_PARTIAL_SOFT_COMPILE;
10490 
10491 if (functions->executable_funcs[mode] == NULL)
10492   return PCRE_ERROR_JIT_BADOPTION;
10493 
10494 /* Sanity checks should be handled by pcre_exec. */
10495 arguments.stack = (struct sljit_stack *)stack;
10496 arguments.str = subject_ptr + start_offset;
10497 arguments.begin = subject_ptr;
10498 arguments.end = subject_ptr + length;
10499 arguments.mark_ptr = NULL;
10500 /* JIT decreases this value less frequently than the interpreter. */
10501 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10502 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10503   arguments.limit_match = functions->limit_match;
10504 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10505 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10506 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10507 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10508 arguments.offsets = offsets;
10509 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10510 arguments.real_offset_count = offset_count;
10511 
10512 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10513 the output vector for storing captured strings, with the remainder used as
10514 workspace. We don't need the workspace here. For compatibility, we limit the
10515 number of captured strings in the same way as pcre_exec(), so that the user
10516 gets the same result with and without JIT. */
10517 
10518 if (offset_count != 2)
10519   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10520 max_offset_count = functions->top_bracket;
10521 if (offset_count > max_offset_count)
10522   offset_count = max_offset_count;
10523 arguments.offset_count = offset_count;
10524 
10525 convert_executable_func.executable_func = functions->executable_funcs[mode];
10526 retval = convert_executable_func.call_executable_func(&arguments);
10527 
10528 if (retval * 2 > offset_count)
10529   retval = 0;
10530 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10531   *(extra_data->mark) = arguments.mark_ptr;
10532 
10533 return retval;
10534 }
10535 
10536 void
PRIV(jit_free)10537 PRIV(jit_free)(void *executable_funcs)
10538 {
10539 int i;
10540 executable_functions *functions = (executable_functions *)executable_funcs;
10541 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10542   {
10543   if (functions->executable_funcs[i] != NULL)
10544     sljit_free_code(functions->executable_funcs[i]);
10545   if (functions->read_only_data[i] != NULL)
10546     SLJIT_FREE(functions->read_only_data[i]);
10547   }
10548 SLJIT_FREE(functions);
10549 }
10550 
10551 int
PRIV(jit_get_size)10552 PRIV(jit_get_size)(void *executable_funcs)
10553 {
10554 int i;
10555 sljit_uw size = 0;
10556 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10557 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10558   size += executable_sizes[i];
10559 return (int)size;
10560 }
10561 
10562 const char*
PRIV(jit_get_target)10563 PRIV(jit_get_target)(void)
10564 {
10565 return sljit_get_platform_name();
10566 }
10567 
10568 #if defined COMPILE_PCRE8
10569 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10570 pcre_jit_stack_alloc(int startsize, int maxsize)
10571 #elif defined COMPILE_PCRE16
10572 PCRE_EXP_DECL pcre16_jit_stack *
10573 pcre16_jit_stack_alloc(int startsize, int maxsize)
10574 #elif defined COMPILE_PCRE32
10575 PCRE_EXP_DECL pcre32_jit_stack *
10576 pcre32_jit_stack_alloc(int startsize, int maxsize)
10577 #endif
10578 {
10579 if (startsize < 1 || maxsize < 1)
10580   return NULL;
10581 if (startsize > maxsize)
10582   startsize = maxsize;
10583 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10584 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10585 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
10586 }
10587 
10588 #if defined COMPILE_PCRE8
10589 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10590 pcre_jit_stack_free(pcre_jit_stack *stack)
10591 #elif defined COMPILE_PCRE16
10592 PCRE_EXP_DECL void
10593 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10594 #elif defined COMPILE_PCRE32
10595 PCRE_EXP_DECL void
10596 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10597 #endif
10598 {
10599 sljit_free_stack((struct sljit_stack *)stack);
10600 }
10601 
10602 #if defined COMPILE_PCRE8
10603 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10604 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10605 #elif defined COMPILE_PCRE16
10606 PCRE_EXP_DECL void
10607 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10608 #elif defined COMPILE_PCRE32
10609 PCRE_EXP_DECL void
10610 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10611 #endif
10612 {
10613 executable_functions *functions;
10614 if (extra != NULL &&
10615     (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10616     extra->executable_jit != NULL)
10617   {
10618   functions = (executable_functions *)extra->executable_jit;
10619   functions->callback = callback;
10620   functions->userdata = userdata;
10621   }
10622 }
10623 
10624 #if defined COMPILE_PCRE8
10625 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10626 pcre_jit_free_unused_memory(void)
10627 #elif defined COMPILE_PCRE16
10628 PCRE_EXP_DECL void
10629 pcre16_jit_free_unused_memory(void)
10630 #elif defined COMPILE_PCRE32
10631 PCRE_EXP_DECL void
10632 pcre32_jit_free_unused_memory(void)
10633 #endif
10634 {
10635 sljit_free_unused_memory_exec();
10636 }
10637 
10638 #else  /* SUPPORT_JIT */
10639 
10640 /* These are dummy functions to avoid linking errors when JIT support is not
10641 being compiled. */
10642 
10643 #if defined COMPILE_PCRE8
10644 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10645 pcre_jit_stack_alloc(int startsize, int maxsize)
10646 #elif defined COMPILE_PCRE16
10647 PCRE_EXP_DECL pcre16_jit_stack *
10648 pcre16_jit_stack_alloc(int startsize, int maxsize)
10649 #elif defined COMPILE_PCRE32
10650 PCRE_EXP_DECL pcre32_jit_stack *
10651 pcre32_jit_stack_alloc(int startsize, int maxsize)
10652 #endif
10653 {
10654 (void)startsize;
10655 (void)maxsize;
10656 return NULL;
10657 }
10658 
10659 #if defined COMPILE_PCRE8
10660 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10661 pcre_jit_stack_free(pcre_jit_stack *stack)
10662 #elif defined COMPILE_PCRE16
10663 PCRE_EXP_DECL void
10664 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10665 #elif defined COMPILE_PCRE32
10666 PCRE_EXP_DECL void
10667 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10668 #endif
10669 {
10670 (void)stack;
10671 }
10672 
10673 #if defined COMPILE_PCRE8
10674 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10675 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10676 #elif defined COMPILE_PCRE16
10677 PCRE_EXP_DECL void
10678 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10679 #elif defined COMPILE_PCRE32
10680 PCRE_EXP_DECL void
10681 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10682 #endif
10683 {
10684 (void)extra;
10685 (void)callback;
10686 (void)userdata;
10687 }
10688 
10689 #if defined COMPILE_PCRE8
10690 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10691 pcre_jit_free_unused_memory(void)
10692 #elif defined COMPILE_PCRE16
10693 PCRE_EXP_DECL void
10694 pcre16_jit_free_unused_memory(void)
10695 #elif defined COMPILE_PCRE32
10696 PCRE_EXP_DECL void
10697 pcre32_jit_free_unused_memory(void)
10698 #endif
10699 {
10700 }
10701 
10702 #endif
10703 
10704 /* End of pcre_jit_compile.c */
10705