1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2013 University of Cambridge
10 
11   The machine code generator part (this module) was written by Zoltan Herczeg
12                       Copyright (c) 2010-2013
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18     * Redistributions of source code must retain the above copyright notice,
19       this list of conditions and the following disclaimer.
20 
21     * Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25     * Neither the name of the University of Cambridge nor the names of its
26       contributors may be used to endorse or promote products derived from
27       this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include "pcre_internal.h"
48 
49 #if defined SUPPORT_JIT
50 
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54 
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61 
62 #include "sljit/sljitLir.c"
63 
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67 
68 /* Defines for debugging purposes. */
69 
70 /* 1 - Use unoptimized capturing brackets.
71    2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73 
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76 
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80 
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84 
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89 
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92 
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98 
99   'ab' - 'a' and 'b' regexps are concatenated
100   'a+' - 'a' is the sub-expression of the '+' operator
101 
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107 
108  Greedy star operator (*) :
109    Matching path: match happens.
110    Backtrack path: match failed.
111  Non-greedy star operator (*?) :
112    Matching path: no need to perform a match.
113    Backtrack path: match is required.
114 
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118 
119    A(B|C)D
120 
121 The generated code will be the following:
122 
123  A matching path
124  '(' matching path (pushing arguments to the stack)
125  B matching path
126  ')' matching path (pushing arguments to the stack)
127  D matching path
128  return with successful match
129 
130  D backtrack path
131  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132  B backtrack path
133  C expected path
134  jump to D matching path
135  C backtrack path
136  A backtrack path
137 
138  Notice, that the order of backtrack code paths are the opposite of the fast
139  code paths. In this way the topmost value on the stack is always belong
140  to the current backtrack code path. The backtrack path must check
141  whether there is a next alternative. If so, it needs to jump back to
142  the matching path eventually. Otherwise it needs to clear out its own stack
143  frame and continue the execution on the backtrack code paths.
144 */
145 
146 /*
147 Saved stack frames:
148 
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153 
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156 
157 Thus we can restore the private data to a particular point in the stack.
158 */
159 
160 typedef struct jit_arguments {
161   /* Pointers first. */
162   struct sljit_stack *stack;
163   const pcre_uchar *str;
164   const pcre_uchar *begin;
165   const pcre_uchar *end;
166   int *offsets;
167   pcre_uchar *uchar_ptr;
168   pcre_uchar *mark_ptr;
169   void *callout_data;
170   /* Everything else after. */
171   pcre_uint32 limit_match;
172   int real_offset_count;
173   int offset_count;
174   pcre_uint8 notbol;
175   pcre_uint8 noteol;
176   pcre_uint8 notempty;
177   pcre_uint8 notempty_atstart;
178 } jit_arguments;
179 
180 typedef struct executable_functions {
181   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182   void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184   PUBL(jit_callback) callback;
185   void *userdata;
186   pcre_uint32 top_bracket;
187   pcre_uint32 limit_match;
188 } executable_functions;
189 
190 typedef struct jump_list {
191   struct sljit_jump *jump;
192   struct jump_list *next;
193 } jump_list;
194 
195 typedef struct stub_list {
196   struct sljit_jump *start;
197   struct sljit_label *quit;
198   struct stub_list *next;
199 } stub_list;
200 
201 typedef struct label_addr_list {
202   struct sljit_label *label;
203   sljit_uw *update_addr;
204   struct label_addr_list *next;
205 } label_addr_list;
206 
207 enum frame_types {
208   no_frame = -1,
209   no_stack = -2
210 };
211 
212 enum control_types {
213   type_mark = 0,
214   type_then_trap = 1
215 };
216 
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218 
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224   /* Concatenation stack. */
225   struct backtrack_common *prev;
226   jump_list *nextbacktracks;
227   /* Internal stack (for component operators). */
228   struct backtrack_common *top;
229   jump_list *topbacktracks;
230   /* Opcode pointer. */
231   pcre_uchar *cc;
232 } backtrack_common;
233 
234 typedef struct assert_backtrack {
235   backtrack_common common;
236   jump_list *condfailed;
237   /* Less than 0 if a frame is not needed. */
238   int framesize;
239   /* Points to our private memory word on the stack. */
240   int private_data_ptr;
241   /* For iterators. */
242   struct sljit_label *matchingpath;
243 } assert_backtrack;
244 
245 typedef struct bracket_backtrack {
246   backtrack_common common;
247   /* Where to coninue if an alternative is successfully matched. */
248   struct sljit_label *alternative_matchingpath;
249   /* For rmin and rmax iterators. */
250   struct sljit_label *recursive_matchingpath;
251   /* For greedy ? operator. */
252   struct sljit_label *zero_matchingpath;
253   /* Contains the branches of a failed condition. */
254   union {
255     /* Both for OP_COND, OP_SCOND. */
256     jump_list *condfailed;
257     assert_backtrack *assert;
258     /* For OP_ONCE. Less than 0 if not needed. */
259     int framesize;
260   } u;
261   /* Points to our private memory word on the stack. */
262   int private_data_ptr;
263 } bracket_backtrack;
264 
265 typedef struct bracketpos_backtrack {
266   backtrack_common common;
267   /* Points to our private memory word on the stack. */
268   int private_data_ptr;
269   /* Reverting stack is needed. */
270   int framesize;
271   /* Allocated stack size. */
272   int stacksize;
273 } bracketpos_backtrack;
274 
275 typedef struct braminzero_backtrack {
276   backtrack_common common;
277   struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279 
280 typedef struct iterator_backtrack {
281   backtrack_common common;
282   /* Next iteration. */
283   struct sljit_label *matchingpath;
284 } iterator_backtrack;
285 
286 typedef struct recurse_entry {
287   struct recurse_entry *next;
288   /* Contains the function entry. */
289   struct sljit_label *entry;
290   /* Collects the calls until the function is not created. */
291   jump_list *calls;
292   /* Points to the starting opcode. */
293   sljit_sw start;
294 } recurse_entry;
295 
296 typedef struct recurse_backtrack {
297   backtrack_common common;
298   BOOL inlined_pattern;
299 } recurse_backtrack;
300 
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302 
303 typedef struct then_trap_backtrack {
304   backtrack_common common;
305   /* If then_trap is not NULL, this structure contains the real
306   then_trap for the backtracking path. */
307   struct then_trap_backtrack *then_trap;
308   /* Points to the starting opcode. */
309   sljit_sw start;
310   /* Exit point for the then opcodes of this alternative. */
311   jump_list *quit;
312   /* Frame size of the current alternative. */
313   int framesize;
314 } then_trap_backtrack;
315 
316 #define MAX_RANGE_SIZE 4
317 
318 typedef struct compiler_common {
319   /* The sljit ceneric compiler. */
320   struct sljit_compiler *compiler;
321   /* First byte code. */
322   pcre_uchar *start;
323   /* Maps private data offset to each opcode. */
324   sljit_si *private_data_ptrs;
325   /* Chain list of read-only data ptrs. */
326   void *read_only_data_head;
327   /* Tells whether the capturing bracket is optimized. */
328   pcre_uint8 *optimized_cbracket;
329   /* Tells whether the starting offset is a target of then. */
330   pcre_uint8 *then_offsets;
331   /* Current position where a THEN must jump. */
332   then_trap_backtrack *then_trap;
333   /* Starting offset of private data for capturing brackets. */
334   int cbra_ptr;
335   /* Output vector starting point. Must be divisible by 2. */
336   int ovector_start;
337   /* Last known position of the requested byte. */
338   int req_char_ptr;
339   /* Head of the last recursion. */
340   int recursive_head_ptr;
341   /* First inspected character for partial matching. */
342   int start_used_ptr;
343   /* Starting pointer for partial soft matches. */
344   int hit_start;
345   /* End pointer of the first line. */
346   int first_line_end;
347   /* Points to the marked string. */
348   int mark_ptr;
349   /* Recursive control verb management chain. */
350   int control_head_ptr;
351   /* Points to the last matched capture block index. */
352   int capture_last_ptr;
353   /* Points to the starting position of the current match. */
354   int start_ptr;
355 
356   /* Flipped and lower case tables. */
357   const pcre_uint8 *fcc;
358   sljit_sw lcc;
359   /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360   int mode;
361   /* TRUE, when minlength is greater than 0. */
362   BOOL might_be_empty;
363   /* \K is found in the pattern. */
364   BOOL has_set_som;
365   /* (*SKIP:arg) is found in the pattern. */
366   BOOL has_skip_arg;
367   /* (*THEN) is found in the pattern. */
368   BOOL has_then;
369   /* Needs to know the start position anytime. */
370   BOOL needs_start_ptr;
371   /* Currently in recurse or negative assert. */
372   BOOL local_exit;
373   /* Currently in a positive assert. */
374   BOOL positive_assert;
375   /* Newline control. */
376   int nltype;
377   pcre_uint32 nlmax;
378   pcre_uint32 nlmin;
379   int newline;
380   int bsr_nltype;
381   pcre_uint32 bsr_nlmax;
382   pcre_uint32 bsr_nlmin;
383   /* Dollar endonly. */
384   int endonly;
385   /* Tables. */
386   sljit_sw ctypes;
387   /* Named capturing brackets. */
388   pcre_uchar *name_table;
389   sljit_sw name_count;
390   sljit_sw name_entry_size;
391 
392   /* Labels and jump lists. */
393   struct sljit_label *partialmatchlabel;
394   struct sljit_label *quit_label;
395   struct sljit_label *forced_quit_label;
396   struct sljit_label *accept_label;
397   struct sljit_label *ff_newline_shortcut;
398   stub_list *stubs;
399   label_addr_list *label_addrs;
400   recurse_entry *entries;
401   recurse_entry *currententry;
402   jump_list *partialmatch;
403   jump_list *quit;
404   jump_list *positive_assert_quit;
405   jump_list *forced_quit;
406   jump_list *accept;
407   jump_list *calllimit;
408   jump_list *stackalloc;
409   jump_list *revertframes;
410   jump_list *wordboundary;
411   jump_list *anynewline;
412   jump_list *hspace;
413   jump_list *vspace;
414   jump_list *casefulcmp;
415   jump_list *caselesscmp;
416   jump_list *reset_match;
417   BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419   BOOL utf;
420 #ifdef SUPPORT_UCP
421   BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424   jump_list *utfreadchar;
425   jump_list *utfreadchar16;
426   jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430   jump_list *getucd;
431 #endif
432 } compiler_common;
433 
434 /* For byte_sequence_compare. */
435 
436 typedef struct compare_context {
437   int length;
438   int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440   int ucharptr;
441   union {
442     sljit_si asint;
443     sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445     sljit_ub asbyte;
446     sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448     sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450     sljit_ui asuchars[1];
451 #endif
452   } c;
453   union {
454     sljit_si asint;
455     sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457     sljit_ub asbyte;
458     sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460     sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462     sljit_ui asuchars[1];
463 #endif
464   } oc;
465 #endif
466 } compare_context;
467 
468 /* Undefine sljit macros. */
469 #undef CMP
470 
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
473 
474 #define TMP1          SLJIT_R0
475 #define TMP2          SLJIT_R2
476 #define TMP3          SLJIT_R3
477 #define STR_PTR       SLJIT_S0
478 #define STR_END       SLJIT_S1
479 #define STACK_TOP     SLJIT_R1
480 #define STACK_LIMIT   SLJIT_S2
481 #define COUNT_MATCH   SLJIT_S3
482 #define ARGUMENTS     SLJIT_S4
483 #define RETURN_ADDR   SLJIT_R4
484 
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0          (0 * sizeof(sljit_sw))
488 #define LOCALS1          (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0      (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1      (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START    (common->ovector_start)
499 #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502 
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR  SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR  SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR  SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515 
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518   struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520   sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522   sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524   sljit_emit_label(compiler)
525 #define JUMP(type) \
526   sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528   sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530   sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532   sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534   sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536   sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538   sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540   sljit_get_local_base(compiler, (dst), (dstw), (offset))
541 
542 #define READ_CHAR_MAX 0x7fffffff
543 
bracketend(pcre_uchar * cc)544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552 
no_alternatives(pcre_uchar * cc)553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558   {
559   cc += GET(cc, 1);
560   count++;
561   }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566 
567 static int ones_in_half_byte[16] = {
568   /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569   /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571 
572 /* Functions whose might need modification for all new supported opcodes:
573  next_opcode
574  check_opcode_types
575  set_private_data_ptrs
576  get_framesize
577  init_frame
578  get_private_data_copy_length
579  copy_private_data
580  compile_matchingpath
581  compile_backtrackingpath
582 */
583 
next_opcode(compiler_common * common,pcre_uchar * cc)584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588   {
589   case OP_SOD:
590   case OP_SOM:
591   case OP_SET_SOM:
592   case OP_NOT_WORD_BOUNDARY:
593   case OP_WORD_BOUNDARY:
594   case OP_NOT_DIGIT:
595   case OP_DIGIT:
596   case OP_NOT_WHITESPACE:
597   case OP_WHITESPACE:
598   case OP_NOT_WORDCHAR:
599   case OP_WORDCHAR:
600   case OP_ANY:
601   case OP_ALLANY:
602   case OP_NOTPROP:
603   case OP_PROP:
604   case OP_ANYNL:
605   case OP_NOT_HSPACE:
606   case OP_HSPACE:
607   case OP_NOT_VSPACE:
608   case OP_VSPACE:
609   case OP_EXTUNI:
610   case OP_EODN:
611   case OP_EOD:
612   case OP_CIRC:
613   case OP_CIRCM:
614   case OP_DOLL:
615   case OP_DOLLM:
616   case OP_CRSTAR:
617   case OP_CRMINSTAR:
618   case OP_CRPLUS:
619   case OP_CRMINPLUS:
620   case OP_CRQUERY:
621   case OP_CRMINQUERY:
622   case OP_CRRANGE:
623   case OP_CRMINRANGE:
624   case OP_CRPOSSTAR:
625   case OP_CRPOSPLUS:
626   case OP_CRPOSQUERY:
627   case OP_CRPOSRANGE:
628   case OP_CLASS:
629   case OP_NCLASS:
630   case OP_REF:
631   case OP_REFI:
632   case OP_DNREF:
633   case OP_DNREFI:
634   case OP_RECURSE:
635   case OP_CALLOUT:
636   case OP_ALT:
637   case OP_KET:
638   case OP_KETRMAX:
639   case OP_KETRMIN:
640   case OP_KETRPOS:
641   case OP_REVERSE:
642   case OP_ASSERT:
643   case OP_ASSERT_NOT:
644   case OP_ASSERTBACK:
645   case OP_ASSERTBACK_NOT:
646   case OP_ONCE:
647   case OP_ONCE_NC:
648   case OP_BRA:
649   case OP_BRAPOS:
650   case OP_CBRA:
651   case OP_CBRAPOS:
652   case OP_COND:
653   case OP_SBRA:
654   case OP_SBRAPOS:
655   case OP_SCBRA:
656   case OP_SCBRAPOS:
657   case OP_SCOND:
658   case OP_CREF:
659   case OP_DNCREF:
660   case OP_RREF:
661   case OP_DNRREF:
662   case OP_DEF:
663   case OP_BRAZERO:
664   case OP_BRAMINZERO:
665   case OP_BRAPOSZERO:
666   case OP_PRUNE:
667   case OP_SKIP:
668   case OP_THEN:
669   case OP_COMMIT:
670   case OP_FAIL:
671   case OP_ACCEPT:
672   case OP_ASSERT_ACCEPT:
673   case OP_CLOSE:
674   case OP_SKIPZERO:
675   return cc + PRIV(OP_lengths)[*cc];
676 
677   case OP_CHAR:
678   case OP_CHARI:
679   case OP_NOT:
680   case OP_NOTI:
681   case OP_STAR:
682   case OP_MINSTAR:
683   case OP_PLUS:
684   case OP_MINPLUS:
685   case OP_QUERY:
686   case OP_MINQUERY:
687   case OP_UPTO:
688   case OP_MINUPTO:
689   case OP_EXACT:
690   case OP_POSSTAR:
691   case OP_POSPLUS:
692   case OP_POSQUERY:
693   case OP_POSUPTO:
694   case OP_STARI:
695   case OP_MINSTARI:
696   case OP_PLUSI:
697   case OP_MINPLUSI:
698   case OP_QUERYI:
699   case OP_MINQUERYI:
700   case OP_UPTOI:
701   case OP_MINUPTOI:
702   case OP_EXACTI:
703   case OP_POSSTARI:
704   case OP_POSPLUSI:
705   case OP_POSQUERYI:
706   case OP_POSUPTOI:
707   case OP_NOTSTAR:
708   case OP_NOTMINSTAR:
709   case OP_NOTPLUS:
710   case OP_NOTMINPLUS:
711   case OP_NOTQUERY:
712   case OP_NOTMINQUERY:
713   case OP_NOTUPTO:
714   case OP_NOTMINUPTO:
715   case OP_NOTEXACT:
716   case OP_NOTPOSSTAR:
717   case OP_NOTPOSPLUS:
718   case OP_NOTPOSQUERY:
719   case OP_NOTPOSUPTO:
720   case OP_NOTSTARI:
721   case OP_NOTMINSTARI:
722   case OP_NOTPLUSI:
723   case OP_NOTMINPLUSI:
724   case OP_NOTQUERYI:
725   case OP_NOTMINQUERYI:
726   case OP_NOTUPTOI:
727   case OP_NOTMINUPTOI:
728   case OP_NOTEXACTI:
729   case OP_NOTPOSSTARI:
730   case OP_NOTPOSPLUSI:
731   case OP_NOTPOSQUERYI:
732   case OP_NOTPOSUPTOI:
733   cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737   return cc;
738 
739   /* Special cases. */
740   case OP_TYPESTAR:
741   case OP_TYPEMINSTAR:
742   case OP_TYPEPLUS:
743   case OP_TYPEMINPLUS:
744   case OP_TYPEQUERY:
745   case OP_TYPEMINQUERY:
746   case OP_TYPEUPTO:
747   case OP_TYPEMINUPTO:
748   case OP_TYPEEXACT:
749   case OP_TYPEPOSSTAR:
750   case OP_TYPEPOSPLUS:
751   case OP_TYPEPOSQUERY:
752   case OP_TYPEPOSUPTO:
753   return cc + PRIV(OP_lengths)[*cc] - 1;
754 
755   case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757   if (common->utf) return NULL;
758 #endif
759   return cc + 1;
760 
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762   case OP_XCLASS:
763   return cc + GET(cc, 1);
764 #endif
765 
766   case OP_MARK:
767   case OP_PRUNE_ARG:
768   case OP_SKIP_ARG:
769   case OP_THEN_ARG:
770   return cc + 1 + 2 + cc[1];
771 
772   default:
773   /* All opcodes are supported now! */
774   SLJIT_ASSERT_STOP();
775   return NULL;
776   }
777 }
778 
check_opcode_types(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend)779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783 
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786   {
787   switch(*cc)
788     {
789     case OP_SET_SOM:
790     common->has_set_som = TRUE;
791     common->might_be_empty = TRUE;
792     cc += 1;
793     break;
794 
795     case OP_REF:
796     case OP_REFI:
797     common->optimized_cbracket[GET2(cc, 1)] = 0;
798     cc += 1 + IMM2_SIZE;
799     break;
800 
801     case OP_CBRAPOS:
802     case OP_SCBRAPOS:
803     common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804     cc += 1 + LINK_SIZE + IMM2_SIZE;
805     break;
806 
807     case OP_COND:
808     case OP_SCOND:
809     /* Only AUTO_CALLOUT can insert this opcode. We do
810        not intend to support this case. */
811     if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812       return FALSE;
813     cc += 1 + LINK_SIZE;
814     break;
815 
816     case OP_CREF:
817     common->optimized_cbracket[GET2(cc, 1)] = 0;
818     cc += 1 + IMM2_SIZE;
819     break;
820 
821     case OP_DNREF:
822     case OP_DNREFI:
823     case OP_DNCREF:
824     count = GET2(cc, 1 + IMM2_SIZE);
825     slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826     while (count-- > 0)
827       {
828       common->optimized_cbracket[GET2(slot, 0)] = 0;
829       slot += common->name_entry_size;
830       }
831     cc += 1 + 2 * IMM2_SIZE;
832     break;
833 
834     case OP_RECURSE:
835     /* Set its value only once. */
836     if (common->recursive_head_ptr == 0)
837       {
838       common->recursive_head_ptr = common->ovector_start;
839       common->ovector_start += sizeof(sljit_sw);
840       }
841     cc += 1 + LINK_SIZE;
842     break;
843 
844     case OP_CALLOUT:
845     if (common->capture_last_ptr == 0)
846       {
847       common->capture_last_ptr = common->ovector_start;
848       common->ovector_start += sizeof(sljit_sw);
849       }
850     cc += 2 + 2 * LINK_SIZE;
851     break;
852 
853     case OP_THEN_ARG:
854     common->has_then = TRUE;
855     common->control_head_ptr = 1;
856     /* Fall through. */
857 
858     case OP_PRUNE_ARG:
859     common->needs_start_ptr = TRUE;
860     /* Fall through. */
861 
862     case OP_MARK:
863     if (common->mark_ptr == 0)
864       {
865       common->mark_ptr = common->ovector_start;
866       common->ovector_start += sizeof(sljit_sw);
867       }
868     cc += 1 + 2 + cc[1];
869     break;
870 
871     case OP_THEN:
872     common->has_then = TRUE;
873     common->control_head_ptr = 1;
874     /* Fall through. */
875 
876     case OP_PRUNE:
877     case OP_SKIP:
878     common->needs_start_ptr = TRUE;
879     cc += 1;
880     break;
881 
882     case OP_SKIP_ARG:
883     common->control_head_ptr = 1;
884     common->has_skip_arg = TRUE;
885     cc += 1 + 2 + cc[1];
886     break;
887 
888     default:
889     cc = next_opcode(common, cc);
890     if (cc == NULL)
891       return FALSE;
892     break;
893     }
894   }
895 return TRUE;
896 }
897 
get_class_iterator_size(pcre_uchar * cc)898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901   {
902   case OP_CRSTAR:
903   case OP_CRPLUS:
904   return 2;
905 
906   case OP_CRMINSTAR:
907   case OP_CRMINPLUS:
908   case OP_CRQUERY:
909   case OP_CRMINQUERY:
910   return 1;
911 
912   case OP_CRRANGE:
913   case OP_CRMINRANGE:
914   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915     return 0;
916   return 2;
917 
918   default:
919   return 0;
920   }
921 }
922 
detect_repeat(compiler_common * common,pcre_uchar * begin)923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932 
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935   return FALSE;
936 
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939   return TRUE;
940 
941 next = end;
942 min = 1;
943 while (1)
944   {
945   if (*next != *begin)
946     break;
947   next_end = bracketend(next);
948   if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949     break;
950   next = next_end;
951   min++;
952   }
953 
954 if (min == 2)
955   return FALSE;
956 
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960   {
961   type = *next;
962   while (1)
963     {
964     if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965       break;
966     next_end = bracketend(next + 2 + LINK_SIZE);
967     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968       break;
969     next = next_end;
970     max++;
971     }
972 
973   if (next[0] == type && next[1] == *begin && max >= 1)
974     {
975     next_end = bracketend(next + 1);
976     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977       {
978       for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979         if (*next_end != OP_KET)
980           break;
981 
982       if (i == max)
983         {
984         common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986         /* +2 the original and the last. */
987         common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988         if (min == 1)
989           return TRUE;
990         min--;
991         max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992         }
993       }
994     }
995   }
996 
997 if (min >= 3)
998   {
999   common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000   common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001   common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002   return TRUE;
1003   }
1004 
1005 return FALSE;
1006 }
1007 
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009     case OP_MINSTAR: \
1010     case OP_MINPLUS: \
1011     case OP_QUERY: \
1012     case OP_MINQUERY: \
1013     case OP_MINSTARI: \
1014     case OP_MINPLUSI: \
1015     case OP_QUERYI: \
1016     case OP_MINQUERYI: \
1017     case OP_NOTMINSTAR: \
1018     case OP_NOTMINPLUS: \
1019     case OP_NOTQUERY: \
1020     case OP_NOTMINQUERY: \
1021     case OP_NOTMINSTARI: \
1022     case OP_NOTMINPLUSI: \
1023     case OP_NOTQUERYI: \
1024     case OP_NOTMINQUERYI:
1025 
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027     case OP_STAR: \
1028     case OP_PLUS: \
1029     case OP_STARI: \
1030     case OP_PLUSI: \
1031     case OP_NOTSTAR: \
1032     case OP_NOTPLUS: \
1033     case OP_NOTSTARI: \
1034     case OP_NOTPLUSI:
1035 
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037     case OP_UPTO: \
1038     case OP_MINUPTO: \
1039     case OP_UPTOI: \
1040     case OP_MINUPTOI: \
1041     case OP_NOTUPTO: \
1042     case OP_NOTMINUPTO: \
1043     case OP_NOTUPTOI: \
1044     case OP_NOTMINUPTOI:
1045 
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047     case OP_TYPEMINSTAR: \
1048     case OP_TYPEMINPLUS: \
1049     case OP_TYPEQUERY: \
1050     case OP_TYPEMINQUERY:
1051 
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053     case OP_TYPESTAR: \
1054     case OP_TYPEPLUS:
1055 
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057     case OP_TYPEUPTO: \
1058     case OP_TYPEMINUPTO:
1059 
set_private_data_ptrs(compiler_common * common,int * private_data_start,pcre_uchar * ccend)1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1068 
1069 while (cc < ccend)
1070   {
1071   space = 0;
1072   size = 0;
1073   bracketlen = 0;
1074   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1075     break;
1076 
1077   if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1078     {
1079     if (detect_repeat(common, cc))
1080       {
1081       /* These brackets are converted to repeats, so no global
1082       based single character repeat is allowed. */
1083       if (cc >= end)
1084         end = bracketend(cc);
1085       }
1086     }
1087   repeat_check = TRUE;
1088 
1089   switch(*cc)
1090     {
1091     case OP_KET:
1092     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1093       {
1094       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095       private_data_ptr += sizeof(sljit_sw);
1096       cc += common->private_data_ptrs[cc + 1 - common->start];
1097       }
1098     cc += 1 + LINK_SIZE;
1099     break;
1100 
1101     case OP_ASSERT:
1102     case OP_ASSERT_NOT:
1103     case OP_ASSERTBACK:
1104     case OP_ASSERTBACK_NOT:
1105     case OP_ONCE:
1106     case OP_ONCE_NC:
1107     case OP_BRAPOS:
1108     case OP_SBRA:
1109     case OP_SBRAPOS:
1110     case OP_SCOND:
1111     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112     private_data_ptr += sizeof(sljit_sw);
1113     bracketlen = 1 + LINK_SIZE;
1114     break;
1115 
1116     case OP_CBRAPOS:
1117     case OP_SCBRAPOS:
1118     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119     private_data_ptr += sizeof(sljit_sw);
1120     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1121     break;
1122 
1123     case OP_COND:
1124     /* Might be a hidden SCOND. */
1125     alternative = cc + GET(cc, 1);
1126     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1127       {
1128       common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129       private_data_ptr += sizeof(sljit_sw);
1130       }
1131     bracketlen = 1 + LINK_SIZE;
1132     break;
1133 
1134     case OP_BRA:
1135     bracketlen = 1 + LINK_SIZE;
1136     break;
1137 
1138     case OP_CBRA:
1139     case OP_SCBRA:
1140     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1141     break;
1142 
1143     case OP_BRAZERO:
1144     case OP_BRAMINZERO:
1145     case OP_BRAPOSZERO:
1146     repeat_check = FALSE;
1147     size = 1;
1148     break;
1149 
1150     CASE_ITERATOR_PRIVATE_DATA_1
1151     space = 1;
1152     size = -2;
1153     break;
1154 
1155     CASE_ITERATOR_PRIVATE_DATA_2A
1156     space = 2;
1157     size = -2;
1158     break;
1159 
1160     CASE_ITERATOR_PRIVATE_DATA_2B
1161     space = 2;
1162     size = -(2 + IMM2_SIZE);
1163     break;
1164 
1165     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1166     space = 1;
1167     size = 1;
1168     break;
1169 
1170     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1172       space = 2;
1173     size = 1;
1174     break;
1175 
1176     case OP_TYPEUPTO:
1177     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1178       space = 2;
1179     size = 1 + IMM2_SIZE;
1180     break;
1181 
1182     case OP_TYPEMINUPTO:
1183     space = 2;
1184     size = 1 + IMM2_SIZE;
1185     break;
1186 
1187     case OP_CLASS:
1188     case OP_NCLASS:
1189     size += 1 + 32 / sizeof(pcre_uchar);
1190     space = get_class_iterator_size(cc + size);
1191     break;
1192 
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1194     case OP_XCLASS:
1195     size = GET(cc, 1);
1196     space = get_class_iterator_size(cc + size);
1197     break;
1198 #endif
1199 
1200     default:
1201     cc = next_opcode(common, cc);
1202     SLJIT_ASSERT(cc != NULL);
1203     break;
1204     }
1205 
1206   /* Character iterators, which are not inside a repeated bracket,
1207      gets a private slot instead of allocating it on the stack. */
1208   if (space > 0 && cc >= end)
1209     {
1210     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211     private_data_ptr += sizeof(sljit_sw) * space;
1212     }
1213 
1214   if (size != 0)
1215     {
1216     if (size < 0)
1217       {
1218       cc += -size;
1219 #ifdef SUPPORT_UTF
1220       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1221 #endif
1222       }
1223     else
1224       cc += size;
1225     }
1226 
1227   if (bracketlen > 0)
1228     {
1229     if (cc >= end)
1230       {
1231       end = bracketend(cc);
1232       if (end[-1 - LINK_SIZE] == OP_KET)
1233         end = NULL;
1234       }
1235     cc += bracketlen;
1236     }
1237   }
1238 *private_data_start = private_data_ptr;
1239 }
1240 
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL recursive,BOOL * needs_control_head)1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1243 {
1244 int length = 0;
1245 int possessive = 0;
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1251 
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1255 #else
1256 *needs_control_head = FALSE;
1257 #endif
1258 
1259 if (ccend == NULL)
1260   {
1261   ccend = bracketend(cc) - (1 + LINK_SIZE);
1262   if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1263     {
1264     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265     /* This is correct regardless of common->capture_last_ptr. */
1266     capture_last_found = TRUE;
1267     }
1268   cc = next_opcode(common, cc);
1269   }
1270 
1271 SLJIT_ASSERT(cc != NULL);
1272 while (cc < ccend)
1273   switch(*cc)
1274     {
1275     case OP_SET_SOM:
1276     SLJIT_ASSERT(common->has_set_som);
1277     stack_restore = TRUE;
1278     if (!setsom_found)
1279       {
1280       length += 2;
1281       setsom_found = TRUE;
1282       }
1283     cc += 1;
1284     break;
1285 
1286     case OP_MARK:
1287     case OP_PRUNE_ARG:
1288     case OP_THEN_ARG:
1289     SLJIT_ASSERT(common->mark_ptr != 0);
1290     stack_restore = TRUE;
1291     if (!setmark_found)
1292       {
1293       length += 2;
1294       setmark_found = TRUE;
1295       }
1296     if (common->control_head_ptr != 0)
1297       *needs_control_head = TRUE;
1298     cc += 1 + 2 + cc[1];
1299     break;
1300 
1301     case OP_RECURSE:
1302     stack_restore = TRUE;
1303     if (common->has_set_som && !setsom_found)
1304       {
1305       length += 2;
1306       setsom_found = TRUE;
1307       }
1308     if (common->mark_ptr != 0 && !setmark_found)
1309       {
1310       length += 2;
1311       setmark_found = TRUE;
1312       }
1313     if (common->capture_last_ptr != 0 && !capture_last_found)
1314       {
1315       length += 2;
1316       capture_last_found = TRUE;
1317       }
1318     cc += 1 + LINK_SIZE;
1319     break;
1320 
1321     case OP_CBRA:
1322     case OP_CBRAPOS:
1323     case OP_SCBRA:
1324     case OP_SCBRAPOS:
1325     stack_restore = TRUE;
1326     if (common->capture_last_ptr != 0 && !capture_last_found)
1327       {
1328       length += 2;
1329       capture_last_found = TRUE;
1330       }
1331     length += 3;
1332     cc += 1 + LINK_SIZE + IMM2_SIZE;
1333     break;
1334 
1335     case OP_THEN:
1336     stack_restore = TRUE;
1337     if (common->control_head_ptr != 0)
1338       *needs_control_head = TRUE;
1339     cc ++;
1340     break;
1341 
1342     default:
1343     stack_restore = TRUE;
1344     /* Fall through. */
1345 
1346     case OP_NOT_WORD_BOUNDARY:
1347     case OP_WORD_BOUNDARY:
1348     case OP_NOT_DIGIT:
1349     case OP_DIGIT:
1350     case OP_NOT_WHITESPACE:
1351     case OP_WHITESPACE:
1352     case OP_NOT_WORDCHAR:
1353     case OP_WORDCHAR:
1354     case OP_ANY:
1355     case OP_ALLANY:
1356     case OP_ANYBYTE:
1357     case OP_NOTPROP:
1358     case OP_PROP:
1359     case OP_ANYNL:
1360     case OP_NOT_HSPACE:
1361     case OP_HSPACE:
1362     case OP_NOT_VSPACE:
1363     case OP_VSPACE:
1364     case OP_EXTUNI:
1365     case OP_EODN:
1366     case OP_EOD:
1367     case OP_CIRC:
1368     case OP_CIRCM:
1369     case OP_DOLL:
1370     case OP_DOLLM:
1371     case OP_CHAR:
1372     case OP_CHARI:
1373     case OP_NOT:
1374     case OP_NOTI:
1375 
1376     case OP_EXACT:
1377     case OP_POSSTAR:
1378     case OP_POSPLUS:
1379     case OP_POSQUERY:
1380     case OP_POSUPTO:
1381 
1382     case OP_EXACTI:
1383     case OP_POSSTARI:
1384     case OP_POSPLUSI:
1385     case OP_POSQUERYI:
1386     case OP_POSUPTOI:
1387 
1388     case OP_NOTEXACT:
1389     case OP_NOTPOSSTAR:
1390     case OP_NOTPOSPLUS:
1391     case OP_NOTPOSQUERY:
1392     case OP_NOTPOSUPTO:
1393 
1394     case OP_NOTEXACTI:
1395     case OP_NOTPOSSTARI:
1396     case OP_NOTPOSPLUSI:
1397     case OP_NOTPOSQUERYI:
1398     case OP_NOTPOSUPTOI:
1399 
1400     case OP_TYPEEXACT:
1401     case OP_TYPEPOSSTAR:
1402     case OP_TYPEPOSPLUS:
1403     case OP_TYPEPOSQUERY:
1404     case OP_TYPEPOSUPTO:
1405 
1406     case OP_CLASS:
1407     case OP_NCLASS:
1408     case OP_XCLASS:
1409 
1410     cc = next_opcode(common, cc);
1411     SLJIT_ASSERT(cc != NULL);
1412     break;
1413     }
1414 
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417   return stack_restore ? no_frame : no_stack;
1418 
1419 if (length > 0)
1420   return length + 1;
1421 return stack_restore ? no_frame : no_stack;
1422 }
1423 
init_frame(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,int stackpos,int stacktop,BOOL recursive)1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1425 {
1426 DEFINE_COMPILER;
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1431 int offset;
1432 
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1436 
1437 stackpos = STACK(stackpos);
1438 if (ccend == NULL)
1439   {
1440   ccend = bracketend(cc) - (1 + LINK_SIZE);
1441   if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442     cc = next_opcode(common, cc);
1443   }
1444 
1445 SLJIT_ASSERT(cc != NULL);
1446 while (cc < ccend)
1447   switch(*cc)
1448     {
1449     case OP_SET_SOM:
1450     SLJIT_ASSERT(common->has_set_som);
1451     if (!setsom_found)
1452       {
1453       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455       stackpos += (int)sizeof(sljit_sw);
1456       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457       stackpos += (int)sizeof(sljit_sw);
1458       setsom_found = TRUE;
1459       }
1460     cc += 1;
1461     break;
1462 
1463     case OP_MARK:
1464     case OP_PRUNE_ARG:
1465     case OP_THEN_ARG:
1466     SLJIT_ASSERT(common->mark_ptr != 0);
1467     if (!setmark_found)
1468       {
1469       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471       stackpos += (int)sizeof(sljit_sw);
1472       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473       stackpos += (int)sizeof(sljit_sw);
1474       setmark_found = TRUE;
1475       }
1476     cc += 1 + 2 + cc[1];
1477     break;
1478 
1479     case OP_RECURSE:
1480     if (common->has_set_som && !setsom_found)
1481       {
1482       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484       stackpos += (int)sizeof(sljit_sw);
1485       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486       stackpos += (int)sizeof(sljit_sw);
1487       setsom_found = TRUE;
1488       }
1489     if (common->mark_ptr != 0 && !setmark_found)
1490       {
1491       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493       stackpos += (int)sizeof(sljit_sw);
1494       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495       stackpos += (int)sizeof(sljit_sw);
1496       setmark_found = TRUE;
1497       }
1498     if (common->capture_last_ptr != 0 && !capture_last_found)
1499       {
1500       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502       stackpos += (int)sizeof(sljit_sw);
1503       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504       stackpos += (int)sizeof(sljit_sw);
1505       capture_last_found = TRUE;
1506       }
1507     cc += 1 + LINK_SIZE;
1508     break;
1509 
1510     case OP_CBRA:
1511     case OP_CBRAPOS:
1512     case OP_SCBRA:
1513     case OP_SCBRAPOS:
1514     if (common->capture_last_ptr != 0 && !capture_last_found)
1515       {
1516       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518       stackpos += (int)sizeof(sljit_sw);
1519       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520       stackpos += (int)sizeof(sljit_sw);
1521       capture_last_found = TRUE;
1522       }
1523     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525     stackpos += (int)sizeof(sljit_sw);
1526     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529     stackpos += (int)sizeof(sljit_sw);
1530     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531     stackpos += (int)sizeof(sljit_sw);
1532 
1533     cc += 1 + LINK_SIZE + IMM2_SIZE;
1534     break;
1535 
1536     default:
1537     cc = next_opcode(common, cc);
1538     SLJIT_ASSERT(cc != NULL);
1539     break;
1540     }
1541 
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1544 }
1545 
get_private_data_copy_length(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL needs_control_head)1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1547 {
1548 int private_data_length = needs_control_head ? 3 : 2;
1549 int size;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1552 while (cc < ccend)
1553   {
1554   size = 0;
1555   switch(*cc)
1556     {
1557     case OP_KET:
1558     if (PRIVATE_DATA(cc) != 0)
1559       {
1560       private_data_length++;
1561       SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562       cc += PRIVATE_DATA(cc + 1);
1563       }
1564     cc += 1 + LINK_SIZE;
1565     break;
1566 
1567     case OP_ASSERT:
1568     case OP_ASSERT_NOT:
1569     case OP_ASSERTBACK:
1570     case OP_ASSERTBACK_NOT:
1571     case OP_ONCE:
1572     case OP_ONCE_NC:
1573     case OP_BRAPOS:
1574     case OP_SBRA:
1575     case OP_SBRAPOS:
1576     case OP_SCOND:
1577     private_data_length++;
1578     SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579     cc += 1 + LINK_SIZE;
1580     break;
1581 
1582     case OP_CBRA:
1583     case OP_SCBRA:
1584     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585       private_data_length++;
1586     cc += 1 + LINK_SIZE + IMM2_SIZE;
1587     break;
1588 
1589     case OP_CBRAPOS:
1590     case OP_SCBRAPOS:
1591     private_data_length += 2;
1592     cc += 1 + LINK_SIZE + IMM2_SIZE;
1593     break;
1594 
1595     case OP_COND:
1596     /* Might be a hidden SCOND. */
1597     alternative = cc + GET(cc, 1);
1598     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599       private_data_length++;
1600     cc += 1 + LINK_SIZE;
1601     break;
1602 
1603     CASE_ITERATOR_PRIVATE_DATA_1
1604     if (PRIVATE_DATA(cc))
1605       private_data_length++;
1606     cc += 2;
1607 #ifdef SUPPORT_UTF
1608     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610     break;
1611 
1612     CASE_ITERATOR_PRIVATE_DATA_2A
1613     if (PRIVATE_DATA(cc))
1614       private_data_length += 2;
1615     cc += 2;
1616 #ifdef SUPPORT_UTF
1617     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1618 #endif
1619     break;
1620 
1621     CASE_ITERATOR_PRIVATE_DATA_2B
1622     if (PRIVATE_DATA(cc))
1623       private_data_length += 2;
1624     cc += 2 + IMM2_SIZE;
1625 #ifdef SUPPORT_UTF
1626     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1627 #endif
1628     break;
1629 
1630     CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631     if (PRIVATE_DATA(cc))
1632       private_data_length++;
1633     cc += 1;
1634     break;
1635 
1636     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637     if (PRIVATE_DATA(cc))
1638       private_data_length += 2;
1639     cc += 1;
1640     break;
1641 
1642     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643     if (PRIVATE_DATA(cc))
1644       private_data_length += 2;
1645     cc += 1 + IMM2_SIZE;
1646     break;
1647 
1648     case OP_CLASS:
1649     case OP_NCLASS:
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1651     case OP_XCLASS:
1652     size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1653 #else
1654     size = 1 + 32 / (int)sizeof(pcre_uchar);
1655 #endif
1656     if (PRIVATE_DATA(cc))
1657       private_data_length += get_class_iterator_size(cc + size);
1658     cc += size;
1659     break;
1660 
1661     default:
1662     cc = next_opcode(common, cc);
1663     SLJIT_ASSERT(cc != NULL);
1664     break;
1665     }
1666   }
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1669 }
1670 
copy_private_data(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,BOOL save,int stackptr,int stacktop,BOOL needs_control_head)1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672   BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1673 {
1674 DEFINE_COMPILER;
1675 int srcw[2];
1676 int count, size;
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1681 enum {
1682   start,
1683   loop,
1684   end
1685 } status;
1686 
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1690 
1691 if (!save)
1692   {
1693   stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694   if (stackptr < stacktop)
1695     {
1696     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697     stackptr += sizeof(sljit_sw);
1698     tmp1empty = FALSE;
1699     }
1700   if (stackptr < stacktop)
1701     {
1702     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703     stackptr += sizeof(sljit_sw);
1704     tmp2empty = FALSE;
1705     }
1706   /* The tmp1next must be TRUE in either way. */
1707   }
1708 
1709 do
1710   {
1711   count = 0;
1712   switch(status)
1713     {
1714     case start:
1715     SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1716     count = 1;
1717     srcw[0] = common->recursive_head_ptr;
1718     if (needs_control_head)
1719       {
1720       SLJIT_ASSERT(common->control_head_ptr != 0);
1721       count = 2;
1722       srcw[1] = common->control_head_ptr;
1723       }
1724     status = loop;
1725     break;
1726 
1727     case loop:
1728     if (cc >= ccend)
1729       {
1730       status = end;
1731       break;
1732       }
1733 
1734     switch(*cc)
1735       {
1736       case OP_KET:
1737       if (PRIVATE_DATA(cc) != 0)
1738         {
1739         count = 1;
1740         srcw[0] = PRIVATE_DATA(cc);
1741         SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742         cc += PRIVATE_DATA(cc + 1);
1743         }
1744       cc += 1 + LINK_SIZE;
1745       break;
1746 
1747       case OP_ASSERT:
1748       case OP_ASSERT_NOT:
1749       case OP_ASSERTBACK:
1750       case OP_ASSERTBACK_NOT:
1751       case OP_ONCE:
1752       case OP_ONCE_NC:
1753       case OP_BRAPOS:
1754       case OP_SBRA:
1755       case OP_SBRAPOS:
1756       case OP_SCOND:
1757       count = 1;
1758       srcw[0] = PRIVATE_DATA(cc);
1759       SLJIT_ASSERT(srcw[0] != 0);
1760       cc += 1 + LINK_SIZE;
1761       break;
1762 
1763       case OP_CBRA:
1764       case OP_SCBRA:
1765       if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1766         {
1767         count = 1;
1768         srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1769         }
1770       cc += 1 + LINK_SIZE + IMM2_SIZE;
1771       break;
1772 
1773       case OP_CBRAPOS:
1774       case OP_SCBRAPOS:
1775       count = 2;
1776       srcw[0] = PRIVATE_DATA(cc);
1777       srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778       SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779       cc += 1 + LINK_SIZE + IMM2_SIZE;
1780       break;
1781 
1782       case OP_COND:
1783       /* Might be a hidden SCOND. */
1784       alternative = cc + GET(cc, 1);
1785       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1786         {
1787         count = 1;
1788         srcw[0] = PRIVATE_DATA(cc);
1789         SLJIT_ASSERT(srcw[0] != 0);
1790         }
1791       cc += 1 + LINK_SIZE;
1792       break;
1793 
1794       CASE_ITERATOR_PRIVATE_DATA_1
1795       if (PRIVATE_DATA(cc))
1796         {
1797         count = 1;
1798         srcw[0] = PRIVATE_DATA(cc);
1799         }
1800       cc += 2;
1801 #ifdef SUPPORT_UTF
1802       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1803 #endif
1804       break;
1805 
1806       CASE_ITERATOR_PRIVATE_DATA_2A
1807       if (PRIVATE_DATA(cc))
1808         {
1809         count = 2;
1810         srcw[0] = PRIVATE_DATA(cc);
1811         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1812         }
1813       cc += 2;
1814 #ifdef SUPPORT_UTF
1815       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817       break;
1818 
1819       CASE_ITERATOR_PRIVATE_DATA_2B
1820       if (PRIVATE_DATA(cc))
1821         {
1822         count = 2;
1823         srcw[0] = PRIVATE_DATA(cc);
1824         srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825         }
1826       cc += 2 + IMM2_SIZE;
1827 #ifdef SUPPORT_UTF
1828       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830       break;
1831 
1832       CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833       if (PRIVATE_DATA(cc))
1834         {
1835         count = 1;
1836         srcw[0] = PRIVATE_DATA(cc);
1837         }
1838       cc += 1;
1839       break;
1840 
1841       CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842       if (PRIVATE_DATA(cc))
1843         {
1844         count = 2;
1845         srcw[0] = PRIVATE_DATA(cc);
1846         srcw[1] = srcw[0] + sizeof(sljit_sw);
1847         }
1848       cc += 1;
1849       break;
1850 
1851       CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852       if (PRIVATE_DATA(cc))
1853         {
1854         count = 2;
1855         srcw[0] = PRIVATE_DATA(cc);
1856         srcw[1] = srcw[0] + sizeof(sljit_sw);
1857         }
1858       cc += 1 + IMM2_SIZE;
1859       break;
1860 
1861       case OP_CLASS:
1862       case OP_NCLASS:
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1864       case OP_XCLASS:
1865       size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1866 #else
1867       size = 1 + 32 / (int)sizeof(pcre_uchar);
1868 #endif
1869       if (PRIVATE_DATA(cc))
1870         switch(get_class_iterator_size(cc + size))
1871           {
1872           case 1:
1873           count = 1;
1874           srcw[0] = PRIVATE_DATA(cc);
1875           break;
1876 
1877           case 2:
1878           count = 2;
1879           srcw[0] = PRIVATE_DATA(cc);
1880           srcw[1] = srcw[0] + sizeof(sljit_sw);
1881           break;
1882 
1883           default:
1884           SLJIT_ASSERT_STOP();
1885           break;
1886           }
1887       cc += size;
1888       break;
1889 
1890       default:
1891       cc = next_opcode(common, cc);
1892       SLJIT_ASSERT(cc != NULL);
1893       break;
1894       }
1895     break;
1896 
1897     case end:
1898     SLJIT_ASSERT_STOP();
1899     break;
1900     }
1901 
1902   while (count > 0)
1903     {
1904     count--;
1905     if (save)
1906       {
1907       if (tmp1next)
1908         {
1909         if (!tmp1empty)
1910           {
1911           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912           stackptr += sizeof(sljit_sw);
1913           }
1914         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915         tmp1empty = FALSE;
1916         tmp1next = FALSE;
1917         }
1918       else
1919         {
1920         if (!tmp2empty)
1921           {
1922           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923           stackptr += sizeof(sljit_sw);
1924           }
1925         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1926         tmp2empty = FALSE;
1927         tmp1next = TRUE;
1928         }
1929       }
1930     else
1931       {
1932       if (tmp1next)
1933         {
1934         SLJIT_ASSERT(!tmp1empty);
1935         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936         tmp1empty = stackptr >= stacktop;
1937         if (!tmp1empty)
1938           {
1939           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940           stackptr += sizeof(sljit_sw);
1941           }
1942         tmp1next = FALSE;
1943         }
1944       else
1945         {
1946         SLJIT_ASSERT(!tmp2empty);
1947         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948         tmp2empty = stackptr >= stacktop;
1949         if (!tmp2empty)
1950           {
1951           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952           stackptr += sizeof(sljit_sw);
1953           }
1954         tmp1next = TRUE;
1955         }
1956       }
1957     }
1958   }
1959 while (status != end);
1960 
1961 if (save)
1962   {
1963   if (tmp1next)
1964     {
1965     if (!tmp1empty)
1966       {
1967       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968       stackptr += sizeof(sljit_sw);
1969       }
1970     if (!tmp2empty)
1971       {
1972       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973       stackptr += sizeof(sljit_sw);
1974       }
1975     }
1976   else
1977     {
1978     if (!tmp2empty)
1979       {
1980       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981       stackptr += sizeof(sljit_sw);
1982       }
1983     if (!tmp1empty)
1984       {
1985       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986       stackptr += sizeof(sljit_sw);
1987       }
1988     }
1989   }
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1991 }
1992 
set_then_offsets(compiler_common * common,pcre_uchar * cc,pcre_uint8 * current_offset)1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1994 {
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1997 
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000   current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003   has_alternatives = FALSE;
2004 
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007   current_offset = common->then_offsets + (cc - common->start);
2008 
2009 while (cc < end)
2010   {
2011   if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012     cc = set_then_offsets(common, cc, current_offset);
2013   else
2014     {
2015     if (*cc == OP_ALT && has_alternatives)
2016       current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018       *current_offset = 1;
2019     cc = next_opcode(common, cc);
2020     }
2021   }
2022 
2023 return end;
2024 }
2025 
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2032 
is_powerof2(unsigned int value)2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2034 {
2035 return (value & (value - 1)) == 0;
2036 }
2037 
set_jumps(jump_list * list,struct sljit_label * label)2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2039 {
2040 while (list)
2041   {
2042   /* sljit_set_label is clever enough to do nothing
2043   if either the jump or the label is NULL. */
2044   SET_LABEL(list->jump, label);
2045   list = list->next;
2046   }
2047 }
2048 
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2050 {
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2052 if (list_item)
2053   {
2054   list_item->next = *list;
2055   list_item->jump = jump;
2056   *list = list_item;
2057   }
2058 }
2059 
add_stub(compiler_common * common,struct sljit_jump * start)2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2061 {
2062 DEFINE_COMPILER;
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2064 
2065 if (list_item)
2066   {
2067   list_item->start = start;
2068   list_item->quit = LABEL();
2069   list_item->next = common->stubs;
2070   common->stubs = list_item;
2071   }
2072 }
2073 
flush_stubs(compiler_common * common)2074 static void flush_stubs(compiler_common *common)
2075 {
2076 DEFINE_COMPILER;
2077 stub_list *list_item = common->stubs;
2078 
2079 while (list_item)
2080   {
2081   JUMPHERE(list_item->start);
2082   add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083   JUMPTO(SLJIT_JUMP, list_item->quit);
2084   list_item = list_item->next;
2085   }
2086 common->stubs = NULL;
2087 }
2088 
add_label_addr(compiler_common * common,sljit_uw * update_addr)2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2090 {
2091 DEFINE_COMPILER;
2092 label_addr_list *label_addr;
2093 
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2096   return;
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2101 }
2102 
count_match(compiler_common * common)2103 static SLJIT_INLINE void count_match(compiler_common *common)
2104 {
2105 DEFINE_COMPILER;
2106 
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2109 }
2110 
allocate_stack(compiler_common * common,int size)2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2112 {
2113 /* May destroy all locals and registers except TMP2. */
2114 DEFINE_COMPILER;
2115 
2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2117 #ifdef DESTROY_REGISTERS
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2123 #endif
2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2125 }
2126 
free_stack(compiler_common * common,int size)2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2128 {
2129 DEFINE_COMPILER;
2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2131 }
2132 
allocate_read_only_data(compiler_common * common,sljit_uw size)2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2134 {
2135 DEFINE_COMPILER;
2136 sljit_uw *result;
2137 
2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2139   return NULL;
2140 
2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2142 if (SLJIT_UNLIKELY(result == NULL))
2143   {
2144   sljit_set_compiler_memory_error(compiler);
2145   return NULL;
2146   }
2147 
2148 *(void**)result = common->read_only_data_head;
2149 common->read_only_data_head = (void *)result;
2150 return result + 1;
2151 }
2152 
free_read_only_data(void * current,void * allocator_data)2153 static void free_read_only_data(void *current, void *allocator_data)
2154 {
2155 void *next;
2156 
2157 SLJIT_UNUSED_ARG(allocator_data);
2158 
2159 while (current != NULL)
2160   {
2161   next = *(void**)current;
2162   SLJIT_FREE(current, allocator_data);
2163   current = next;
2164   }
2165 }
2166 
reset_ovector(compiler_common * common,int length)2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2168 {
2169 DEFINE_COMPILER;
2170 struct sljit_label *loop;
2171 int i;
2172 
2173 /* At this point we can freely use all temporary registers. */
2174 SLJIT_ASSERT(length > 1);
2175 /* TMP1 returns with begin - 1. */
2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2177 if (length < 8)
2178   {
2179   for (i = 1; i < length; i++)
2180     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2181   }
2182 else
2183   {
2184   GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2185   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2186   loop = LABEL();
2187   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2188   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2189   JUMPTO(SLJIT_NOT_ZERO, loop);
2190   }
2191 }
2192 
do_reset_match(compiler_common * common,int length)2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_label *loop;
2197 int i;
2198 
2199 SLJIT_ASSERT(length > 1);
2200 /* OVECTOR(1) contains the "string begin - 1" constant. */
2201 if (length > 2)
2202   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2203 if (length < 8)
2204   {
2205   for (i = 2; i < length; i++)
2206     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2207   }
2208 else
2209   {
2210   GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2211   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2212   loop = LABEL();
2213   OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2214   OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2215   JUMPTO(SLJIT_NOT_ZERO, loop);
2216   }
2217 
2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2219 if (common->mark_ptr != 0)
2220   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2221 if (common->control_head_ptr != 0)
2222   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2226 }
2227 
do_search_mark(sljit_sw * current,const pcre_uchar * skip_arg)2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2229 {
2230 while (current != NULL)
2231   {
2232   switch (current[-2])
2233     {
2234     case type_then_trap:
2235     break;
2236 
2237     case type_mark:
2238     if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2239       return current[-4];
2240     break;
2241 
2242     default:
2243     SLJIT_ASSERT_STOP();
2244     break;
2245     }
2246   SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2247   current = (sljit_sw*)current[-1];
2248   }
2249 return -1;
2250 }
2251 
copy_ovector(compiler_common * common,int topbracket)2252 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2253 {
2254 DEFINE_COMPILER;
2255 struct sljit_label *loop;
2256 struct sljit_jump *early_quit;
2257 
2258 /* At this point we can freely use all registers. */
2259 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2261 
2262 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2263 if (common->mark_ptr != 0)
2264   OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2265 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2266 if (common->mark_ptr != 0)
2267   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2268 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2269 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2270 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2271 /* Unlikely, but possible */
2272 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2273 loop = LABEL();
2274 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2275 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2276 /* Copy the integer value to the output buffer */
2277 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2278 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2279 #endif
2280 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2282 JUMPTO(SLJIT_NOT_ZERO, loop);
2283 JUMPHERE(early_quit);
2284 
2285 /* Calculate the return value, which is the maximum ovector value. */
2286 if (topbracket > 1)
2287   {
2288   GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2289   OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2290 
2291   /* OVECTOR(0) is never equal to SLJIT_S2. */
2292   loop = LABEL();
2293   OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2294   OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2295   CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2296   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2297   }
2298 else
2299   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2300 }
2301 
return_with_partial_match(compiler_common * common,struct sljit_label * quit)2302 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2303 {
2304 DEFINE_COMPILER;
2305 struct sljit_jump *jump;
2306 
2307 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2308 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2309   && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2310 
2311 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2313 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2314 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2315 
2316 /* Store match begin and end. */
2317 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2318 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2319 
2320 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2321 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2322 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2323 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2324 #endif
2325 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2326 JUMPHERE(jump);
2327 
2328 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2329 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2331 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2332 #endif
2333 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2334 
2335 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2336 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2337 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2338 #endif
2339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2340 
2341 JUMPTO(SLJIT_JUMP, quit);
2342 }
2343 
check_start_used_ptr(compiler_common * common)2344 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2345 {
2346 /* May destroy TMP1. */
2347 DEFINE_COMPILER;
2348 struct sljit_jump *jump;
2349 
2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2351   {
2352   /* The value of -1 must be kept for start_used_ptr! */
2353   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2354   /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2355   is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2356   jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2357   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2358   JUMPHERE(jump);
2359   }
2360 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2361   {
2362   jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2364   JUMPHERE(jump);
2365   }
2366 }
2367 
char_has_othercase(compiler_common * common,pcre_uchar * cc)2368 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2369 {
2370 /* Detects if the character has an othercase. */
2371 unsigned int c;
2372 
2373 #ifdef SUPPORT_UTF
2374 if (common->utf)
2375   {
2376   GETCHAR(c, cc);
2377   if (c > 127)
2378     {
2379 #ifdef SUPPORT_UCP
2380     return c != UCD_OTHERCASE(c);
2381 #else
2382     return FALSE;
2383 #endif
2384     }
2385 #ifndef COMPILE_PCRE8
2386   return common->fcc[c] != c;
2387 #endif
2388   }
2389 else
2390 #endif
2391   c = *cc;
2392 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2393 }
2394 
char_othercase(compiler_common * common,unsigned int c)2395 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2396 {
2397 /* Returns with the othercase. */
2398 #ifdef SUPPORT_UTF
2399 if (common->utf && c > 127)
2400   {
2401 #ifdef SUPPORT_UCP
2402   return UCD_OTHERCASE(c);
2403 #else
2404   return c;
2405 #endif
2406   }
2407 #endif
2408 return TABLE_GET(c, common->fcc, c);
2409 }
2410 
char_get_othercase_bit(compiler_common * common,pcre_uchar * cc)2411 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2412 {
2413 /* Detects if the character and its othercase has only 1 bit difference. */
2414 unsigned int c, oc, bit;
2415 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2416 int n;
2417 #endif
2418 
2419 #ifdef SUPPORT_UTF
2420 if (common->utf)
2421   {
2422   GETCHAR(c, cc);
2423   if (c <= 127)
2424     oc = common->fcc[c];
2425   else
2426     {
2427 #ifdef SUPPORT_UCP
2428     oc = UCD_OTHERCASE(c);
2429 #else
2430     oc = c;
2431 #endif
2432     }
2433   }
2434 else
2435   {
2436   c = *cc;
2437   oc = TABLE_GET(c, common->fcc, c);
2438   }
2439 #else
2440 c = *cc;
2441 oc = TABLE_GET(c, common->fcc, c);
2442 #endif
2443 
2444 SLJIT_ASSERT(c != oc);
2445 
2446 bit = c ^ oc;
2447 /* Optimized for English alphabet. */
2448 if (c <= 127 && bit == 0x20)
2449   return (0 << 8) | 0x20;
2450 
2451 /* Since c != oc, they must have at least 1 bit difference. */
2452 if (!is_powerof2(bit))
2453   return 0;
2454 
2455 #if defined COMPILE_PCRE8
2456 
2457 #ifdef SUPPORT_UTF
2458 if (common->utf && c > 127)
2459   {
2460   n = GET_EXTRALEN(*cc);
2461   while ((bit & 0x3f) == 0)
2462     {
2463     n--;
2464     bit >>= 6;
2465     }
2466   return (n << 8) | bit;
2467   }
2468 #endif /* SUPPORT_UTF */
2469 return (0 << 8) | bit;
2470 
2471 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2472 
2473 #ifdef SUPPORT_UTF
2474 if (common->utf && c > 65535)
2475   {
2476   if (bit >= (1 << 10))
2477     bit >>= 10;
2478   else
2479     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2480   }
2481 #endif /* SUPPORT_UTF */
2482 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2483 
2484 #endif /* COMPILE_PCRE[8|16|32] */
2485 }
2486 
check_partial(compiler_common * common,BOOL force)2487 static void check_partial(compiler_common *common, BOOL force)
2488 {
2489 /* Checks whether a partial matching is occurred. Does not modify registers. */
2490 DEFINE_COMPILER;
2491 struct sljit_jump *jump = NULL;
2492 
2493 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2494 
2495 if (common->mode == JIT_COMPILE)
2496   return;
2497 
2498 if (!force)
2499   jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2500 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2501   jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2502 
2503 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2504   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2505 else
2506   {
2507   if (common->partialmatchlabel != NULL)
2508     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2509   else
2510     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2511   }
2512 
2513 if (jump != NULL)
2514   JUMPHERE(jump);
2515 }
2516 
check_str_end(compiler_common * common,jump_list ** end_reached)2517 static void check_str_end(compiler_common *common, jump_list **end_reached)
2518 {
2519 /* Does not affect registers. Usually used in a tight spot. */
2520 DEFINE_COMPILER;
2521 struct sljit_jump *jump;
2522 
2523 if (common->mode == JIT_COMPILE)
2524   {
2525   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2526   return;
2527   }
2528 
2529 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2531   {
2532   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2533   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2534   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2535   }
2536 else
2537   {
2538   add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2539   if (common->partialmatchlabel != NULL)
2540     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2541   else
2542     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2543   }
2544 JUMPHERE(jump);
2545 }
2546 
detect_partial_match(compiler_common * common,jump_list ** backtracks)2547 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2548 {
2549 DEFINE_COMPILER;
2550 struct sljit_jump *jump;
2551 
2552 if (common->mode == JIT_COMPILE)
2553   {
2554   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2555   return;
2556   }
2557 
2558 /* Partial matching mode. */
2559 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2560 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2561 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2562   {
2563   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2564   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2565   }
2566 else
2567   {
2568   if (common->partialmatchlabel != NULL)
2569     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2570   else
2571     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2572   }
2573 JUMPHERE(jump);
2574 }
2575 
peek_char(compiler_common * common,pcre_uint32 max)2576 static void peek_char(compiler_common *common, pcre_uint32 max)
2577 {
2578 /* Reads the character into TMP1, keeps STR_PTR.
2579 Does not check STR_END. TMP2 Destroyed. */
2580 DEFINE_COMPILER;
2581 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2582 struct sljit_jump *jump;
2583 #endif
2584 
2585 SLJIT_UNUSED_ARG(max);
2586 
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2589 if (common->utf)
2590   {
2591   if (max < 128) return;
2592 
2593   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2594   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2595   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2596   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2597   JUMPHERE(jump);
2598   }
2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2600 
2601 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2602 if (common->utf)
2603   {
2604   if (max < 0xd800) return;
2605 
2606   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2607   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2608   /* TMP2 contains the high surrogate. */
2609   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2610   OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2611   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2612   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2613   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2614   JUMPHERE(jump);
2615   }
2616 #endif
2617 }
2618 
2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2620 
is_char7_bitset(const pcre_uint8 * bitset,BOOL nclass)2621 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2622 {
2623 /* Tells whether the character codes below 128 are enough
2624 to determine a match. */
2625 const pcre_uint8 value = nclass ? 0xff : 0;
2626 const pcre_uint8 *end = bitset + 32;
2627 
2628 bitset += 16;
2629 do
2630   {
2631   if (*bitset++ != value)
2632     return FALSE;
2633   }
2634 while (bitset < end);
2635 return TRUE;
2636 }
2637 
read_char7_type(compiler_common * common,BOOL full_read)2638 static void read_char7_type(compiler_common *common, BOOL full_read)
2639 {
2640 /* Reads the precise character type of a character into TMP1, if the character
2641 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2642 full_read argument tells whether characters above max are accepted or not. */
2643 DEFINE_COMPILER;
2644 struct sljit_jump *jump;
2645 
2646 SLJIT_ASSERT(common->utf);
2647 
2648 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2650 
2651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2652 
2653 if (full_read)
2654   {
2655   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2657   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2658   JUMPHERE(jump);
2659   }
2660 }
2661 
2662 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2663 
read_char_range(compiler_common * common,pcre_uint32 min,pcre_uint32 max,BOOL update_str_ptr)2664 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2665 {
2666 /* Reads the precise value of a character into TMP1, if the character is
2667 between min and max (c >= min && c <= max). Otherwise it returns with a value
2668 outside the range. Does not check STR_END. */
2669 DEFINE_COMPILER;
2670 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2671 struct sljit_jump *jump;
2672 #endif
2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2674 struct sljit_jump *jump2;
2675 #endif
2676 
2677 SLJIT_UNUSED_ARG(update_str_ptr);
2678 SLJIT_UNUSED_ARG(min);
2679 SLJIT_UNUSED_ARG(max);
2680 SLJIT_ASSERT(min <= max);
2681 
2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2684 
2685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2686 if (common->utf)
2687   {
2688   if (max < 128 && !update_str_ptr) return;
2689 
2690   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2691   if (min >= 0x10000)
2692     {
2693     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2694     if (update_str_ptr)
2695       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2698     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2703     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2704     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2705     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2706     if (!update_str_ptr)
2707       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2708     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2709     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2710     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2711     JUMPHERE(jump2);
2712     if (update_str_ptr)
2713       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2714     }
2715   else if (min >= 0x800 && max <= 0xffff)
2716     {
2717     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2718     if (update_str_ptr)
2719       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2720     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2722     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2723     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2724     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2725     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2726     if (!update_str_ptr)
2727       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2728     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2729     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2730     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731     JUMPHERE(jump2);
2732     if (update_str_ptr)
2733       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2734     }
2735   else if (max >= 0x800)
2736     add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2737   else if (max < 128)
2738     {
2739     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2741     }
2742   else
2743     {
2744     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745     if (!update_str_ptr)
2746       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2747     else
2748       OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2749     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2750     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2751     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2752     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2753     if (update_str_ptr)
2754       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2755     }
2756   JUMPHERE(jump);
2757   }
2758 #endif
2759 
2760 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2761 if (common->utf)
2762   {
2763   if (max >= 0x10000)
2764     {
2765     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2766     jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2767     /* TMP2 contains the high surrogate. */
2768     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2769     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2770     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2771     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2772     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2773     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2774     JUMPHERE(jump);
2775     return;
2776     }
2777 
2778   if (max < 0xd800 && !update_str_ptr) return;
2779 
2780   /* Skip low surrogate if necessary. */
2781   OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2782   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2783   if (update_str_ptr)
2784     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2785   if (max >= 0xd800)
2786     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2787   JUMPHERE(jump);
2788   }
2789 #endif
2790 }
2791 
read_char(compiler_common * common)2792 static SLJIT_INLINE void read_char(compiler_common *common)
2793 {
2794 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2795 }
2796 
read_char8_type(compiler_common * common,BOOL update_str_ptr)2797 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2798 {
2799 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2800 DEFINE_COMPILER;
2801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2802 struct sljit_jump *jump;
2803 #endif
2804 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2805 struct sljit_jump *jump2;
2806 #endif
2807 
2808 SLJIT_UNUSED_ARG(update_str_ptr);
2809 
2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2812 
2813 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2814 if (common->utf)
2815   {
2816   /* This can be an extra read in some situations, but hopefully
2817   it is needed in most cases. */
2818   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2819   jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2820   if (!update_str_ptr)
2821     {
2822     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2825     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2826     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2827     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2828     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2829     jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2830     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2831     JUMPHERE(jump2);
2832     }
2833   else
2834     add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2835   JUMPHERE(jump);
2836   return;
2837   }
2838 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2839 
2840 #if !defined COMPILE_PCRE8
2841 /* The ctypes array contains only 256 values. */
2842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2844 #endif
2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2846 #if !defined COMPILE_PCRE8
2847 JUMPHERE(jump);
2848 #endif
2849 
2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2851 if (common->utf && update_str_ptr)
2852   {
2853   /* Skip low surrogate if necessary. */
2854   OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2855   jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2856   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2857   JUMPHERE(jump);
2858   }
2859 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2860 }
2861 
skip_char_back(compiler_common * common)2862 static void skip_char_back(compiler_common *common)
2863 {
2864 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2865 DEFINE_COMPILER;
2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2867 #if defined COMPILE_PCRE8
2868 struct sljit_label *label;
2869 
2870 if (common->utf)
2871   {
2872   label = LABEL();
2873   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2874   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2876   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2877   return;
2878   }
2879 #elif defined COMPILE_PCRE16
2880 if (common->utf)
2881   {
2882   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2883   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2884   /* Skip low surrogate if necessary. */
2885   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2886   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2887   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2888   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2889   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2890   return;
2891   }
2892 #endif /* COMPILE_PCRE[8|16] */
2893 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2895 }
2896 
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)2897 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2898 {
2899 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2900 DEFINE_COMPILER;
2901 struct sljit_jump *jump;
2902 
2903 if (nltype == NLTYPE_ANY)
2904   {
2905   add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2906   add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2907   }
2908 else if (nltype == NLTYPE_ANYCRLF)
2909   {
2910   if (jumpifmatch)
2911     {
2912     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2913     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2914     }
2915   else
2916     {
2917     jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2918     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2919     JUMPHERE(jump);
2920     }
2921   }
2922 else
2923   {
2924   SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2925   add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2926   }
2927 }
2928 
2929 #ifdef SUPPORT_UTF
2930 
2931 #if defined COMPILE_PCRE8
do_utfreadchar(compiler_common * common)2932 static void do_utfreadchar(compiler_common *common)
2933 {
2934 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2935 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2936 DEFINE_COMPILER;
2937 struct sljit_jump *jump;
2938 
2939 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945 
2946 /* Searching for the first zero. */
2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2948 jump = JUMP(SLJIT_NOT_ZERO);
2949 /* Two byte sequence. */
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2952 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2953 
2954 JUMPHERE(jump);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960 
2961 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2962 jump = JUMP(SLJIT_NOT_ZERO);
2963 /* Three byte sequence. */
2964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2966 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2967 
2968 /* Four byte sequence. */
2969 JUMPHERE(jump);
2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2971 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2972 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2977 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2978 }
2979 
do_utfreadchar16(compiler_common * common)2980 static void do_utfreadchar16(compiler_common *common)
2981 {
2982 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2983 of the character (>= 0xc0). Return value in TMP1. */
2984 DEFINE_COMPILER;
2985 struct sljit_jump *jump;
2986 
2987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2989 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2991 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2993 
2994 /* Searching for the first zero. */
2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2996 jump = JUMP(SLJIT_NOT_ZERO);
2997 /* Two byte sequence. */
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000 
3001 JUMPHERE(jump);
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3003 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3004 /* This code runs only in 8 bit mode. No need to shift the value. */
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3007 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 /* Three byte sequence. */
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3013 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3014 }
3015 
do_utfreadtype8(compiler_common * common)3016 static void do_utfreadtype8(compiler_common *common)
3017 {
3018 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3019 of the character (>= 0xc0). Return value in TMP1. */
3020 DEFINE_COMPILER;
3021 struct sljit_jump *jump;
3022 struct sljit_jump *compare;
3023 
3024 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3025 
3026 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3027 jump = JUMP(SLJIT_NOT_ZERO);
3028 /* Two byte sequence. */
3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3032 /* The upper 5 bits are known at this point. */
3033 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3034 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3035 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3036 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3039 
3040 JUMPHERE(compare);
3041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3043 
3044 /* We only have types for characters less than 256. */
3045 JUMPHERE(jump);
3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3050 }
3051 
3052 #endif /* COMPILE_PCRE8 */
3053 
3054 #endif /* SUPPORT_UTF */
3055 
3056 #ifdef SUPPORT_UCP
3057 
3058 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3059 #define UCD_BLOCK_MASK 127
3060 #define UCD_BLOCK_SHIFT 7
3061 
do_getucd(compiler_common * common)3062 static void do_getucd(compiler_common *common)
3063 {
3064 /* Search the UCD record for the character comes in TMP1.
3065 Returns chartype in TMP1 and UCD offset in TMP2. */
3066 DEFINE_COMPILER;
3067 
3068 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3069 
3070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3071 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3077 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3080 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3081 }
3082 #endif
3083 
mainloop_entry(compiler_common * common,BOOL hascrorlf,BOOL firstline)3084 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3085 {
3086 DEFINE_COMPILER;
3087 struct sljit_label *mainloop;
3088 struct sljit_label *newlinelabel = NULL;
3089 struct sljit_jump *start;
3090 struct sljit_jump *end = NULL;
3091 struct sljit_jump *nl = NULL;
3092 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3093 struct sljit_jump *singlechar;
3094 #endif
3095 jump_list *newline = NULL;
3096 BOOL newlinecheck = FALSE;
3097 BOOL readuchar = FALSE;
3098 
3099 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3100     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3101   newlinecheck = TRUE;
3102 
3103 if (firstline)
3104   {
3105   /* Search for the end of the first line. */
3106   SLJIT_ASSERT(common->first_line_end != 0);
3107   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3108 
3109   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3110     {
3111     mainloop = LABEL();
3112     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3113     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3114     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3115     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3116     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3117     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3118     JUMPHERE(end);
3119     OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120     }
3121   else
3122     {
3123     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3124     mainloop = LABEL();
3125     /* Continual stores does not cause data dependency. */
3126     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3127     read_char_range(common, common->nlmin, common->nlmax, TRUE);
3128     check_newlinechar(common, common->nltype, &newline, TRUE);
3129     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3130     JUMPHERE(end);
3131     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3132     set_jumps(newline, LABEL());
3133     }
3134 
3135   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3136   }
3137 
3138 start = JUMP(SLJIT_JUMP);
3139 
3140 if (newlinecheck)
3141   {
3142   newlinelabel = LABEL();
3143   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3144   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3145   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3146   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3147   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3149   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3150 #endif
3151   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3152   nl = JUMP(SLJIT_JUMP);
3153   }
3154 
3155 mainloop = LABEL();
3156 
3157 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3158 #ifdef SUPPORT_UTF
3159 if (common->utf) readuchar = TRUE;
3160 #endif
3161 if (newlinecheck) readuchar = TRUE;
3162 
3163 if (readuchar)
3164   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3165 
3166 if (newlinecheck)
3167   CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3168 
3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3171 #if defined COMPILE_PCRE8
3172 if (common->utf)
3173   {
3174   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3175   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3176   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3177   JUMPHERE(singlechar);
3178   }
3179 #elif defined COMPILE_PCRE16
3180 if (common->utf)
3181   {
3182   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3183   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3184   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3185   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3186   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3187   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3188   JUMPHERE(singlechar);
3189   }
3190 #endif /* COMPILE_PCRE[8|16] */
3191 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3192 JUMPHERE(start);
3193 
3194 if (newlinecheck)
3195   {
3196   JUMPHERE(end);
3197   JUMPHERE(nl);
3198   }
3199 
3200 return mainloop;
3201 }
3202 
3203 #define MAX_N_CHARS 16
3204 #define MAX_N_BYTES 8
3205 
add_prefix_byte(pcre_uint8 byte,pcre_uint8 * bytes)3206 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3207 {
3208 pcre_uint8 len = bytes[0];
3209 int i;
3210 
3211 if (len == 255)
3212   return;
3213 
3214 if (len == 0)
3215   {
3216   bytes[0] = 1;
3217   bytes[1] = byte;
3218   return;
3219   }
3220 
3221 for (i = len; i > 0; i--)
3222   if (bytes[i] == byte)
3223     return;
3224 
3225 if (len >= MAX_N_BYTES - 1)
3226   {
3227   bytes[0] = 255;
3228   return;
3229   }
3230 
3231 len++;
3232 bytes[len] = byte;
3233 bytes[0] = len;
3234 }
3235 
scan_prefix(compiler_common * common,pcre_uchar * cc,pcre_uint32 * chars,pcre_uint8 * bytes,int max_chars,pcre_uint32 * rec_count)3236 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3237 {
3238 /* Recursive function, which scans prefix literals. */
3239 BOOL last, any, caseless;
3240 int len, repeat, len_save, consumed = 0;
3241 pcre_uint32 chr, mask;
3242 pcre_uchar *alternative, *cc_save, *oc;
3243 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3244 pcre_uchar othercase[8];
3245 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3246 pcre_uchar othercase[2];
3247 #else
3248 pcre_uchar othercase[1];
3249 #endif
3250 
3251 repeat = 1;
3252 while (TRUE)
3253   {
3254   if (*rec_count == 0)
3255     return 0;
3256   (*rec_count)--;
3257 
3258   last = TRUE;
3259   any = FALSE;
3260   caseless = FALSE;
3261 
3262   switch (*cc)
3263     {
3264     case OP_CHARI:
3265     caseless = TRUE;
3266     case OP_CHAR:
3267     last = FALSE;
3268     cc++;
3269     break;
3270 
3271     case OP_SOD:
3272     case OP_SOM:
3273     case OP_SET_SOM:
3274     case OP_NOT_WORD_BOUNDARY:
3275     case OP_WORD_BOUNDARY:
3276     case OP_EODN:
3277     case OP_EOD:
3278     case OP_CIRC:
3279     case OP_CIRCM:
3280     case OP_DOLL:
3281     case OP_DOLLM:
3282     /* Zero width assertions. */
3283     cc++;
3284     continue;
3285 
3286     case OP_ASSERT:
3287     case OP_ASSERT_NOT:
3288     case OP_ASSERTBACK:
3289     case OP_ASSERTBACK_NOT:
3290     cc = bracketend(cc);
3291     continue;
3292 
3293     case OP_PLUSI:
3294     case OP_MINPLUSI:
3295     case OP_POSPLUSI:
3296     caseless = TRUE;
3297     case OP_PLUS:
3298     case OP_MINPLUS:
3299     case OP_POSPLUS:
3300     cc++;
3301     break;
3302 
3303     case OP_EXACTI:
3304     caseless = TRUE;
3305     case OP_EXACT:
3306     repeat = GET2(cc, 1);
3307     last = FALSE;
3308     cc += 1 + IMM2_SIZE;
3309     break;
3310 
3311     case OP_QUERYI:
3312     case OP_MINQUERYI:
3313     case OP_POSQUERYI:
3314     caseless = TRUE;
3315     case OP_QUERY:
3316     case OP_MINQUERY:
3317     case OP_POSQUERY:
3318     len = 1;
3319     cc++;
3320 #ifdef SUPPORT_UTF
3321     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3322 #endif
3323     max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3324     if (max_chars == 0)
3325       return consumed;
3326     last = FALSE;
3327     break;
3328 
3329     case OP_KET:
3330     cc += 1 + LINK_SIZE;
3331     continue;
3332 
3333     case OP_ALT:
3334     cc += GET(cc, 1);
3335     continue;
3336 
3337     case OP_ONCE:
3338     case OP_ONCE_NC:
3339     case OP_BRA:
3340     case OP_BRAPOS:
3341     case OP_CBRA:
3342     case OP_CBRAPOS:
3343     alternative = cc + GET(cc, 1);
3344     while (*alternative == OP_ALT)
3345       {
3346       max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3347       if (max_chars == 0)
3348         return consumed;
3349       alternative += GET(alternative, 1);
3350       }
3351 
3352     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3353       cc += IMM2_SIZE;
3354     cc += 1 + LINK_SIZE;
3355     continue;
3356 
3357     case OP_CLASS:
3358 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3359     if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3360 #endif
3361     any = TRUE;
3362     cc += 1 + 32 / sizeof(pcre_uchar);
3363     break;
3364 
3365     case OP_NCLASS:
3366 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3367     if (common->utf) return consumed;
3368 #endif
3369     any = TRUE;
3370     cc += 1 + 32 / sizeof(pcre_uchar);
3371     break;
3372 
3373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3374     case OP_XCLASS:
3375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3376     if (common->utf) return consumed;
3377 #endif
3378     any = TRUE;
3379     cc += GET(cc, 1);
3380     break;
3381 #endif
3382 
3383     case OP_DIGIT:
3384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3385     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3386       return consumed;
3387 #endif
3388     any = TRUE;
3389     cc++;
3390     break;
3391 
3392     case OP_WHITESPACE:
3393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3394     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3395       return consumed;
3396 #endif
3397     any = TRUE;
3398     cc++;
3399     break;
3400 
3401     case OP_WORDCHAR:
3402 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3403     if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3404       return consumed;
3405 #endif
3406     any = TRUE;
3407     cc++;
3408     break;
3409 
3410     case OP_NOT:
3411     case OP_NOTI:
3412     cc++;
3413     /* Fall through. */
3414     case OP_NOT_DIGIT:
3415     case OP_NOT_WHITESPACE:
3416     case OP_NOT_WORDCHAR:
3417     case OP_ANY:
3418     case OP_ALLANY:
3419 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3420     if (common->utf) return consumed;
3421 #endif
3422     any = TRUE;
3423     cc++;
3424     break;
3425 
3426 #ifdef SUPPORT_UCP
3427     case OP_NOTPROP:
3428     case OP_PROP:
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430     if (common->utf) return consumed;
3431 #endif
3432     any = TRUE;
3433     cc += 1 + 2;
3434     break;
3435 #endif
3436 
3437     case OP_TYPEEXACT:
3438     repeat = GET2(cc, 1);
3439     cc += 1 + IMM2_SIZE;
3440     continue;
3441 
3442     case OP_NOTEXACT:
3443     case OP_NOTEXACTI:
3444 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3445     if (common->utf) return consumed;
3446 #endif
3447     any = TRUE;
3448     repeat = GET2(cc, 1);
3449     cc += 1 + IMM2_SIZE + 1;
3450     break;
3451 
3452     default:
3453     return consumed;
3454     }
3455 
3456   if (any)
3457     {
3458 #if defined COMPILE_PCRE8
3459     mask = 0xff;
3460 #elif defined COMPILE_PCRE16
3461     mask = 0xffff;
3462 #elif defined COMPILE_PCRE32
3463     mask = 0xffffffff;
3464 #else
3465     SLJIT_ASSERT_STOP();
3466 #endif
3467 
3468     do
3469       {
3470       chars[0] = mask;
3471       chars[1] = mask;
3472       bytes[0] = 255;
3473 
3474       consumed++;
3475       if (--max_chars == 0)
3476         return consumed;
3477       chars += 2;
3478       bytes += MAX_N_BYTES;
3479       }
3480     while (--repeat > 0);
3481 
3482     repeat = 1;
3483     continue;
3484     }
3485 
3486   len = 1;
3487 #ifdef SUPPORT_UTF
3488   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3489 #endif
3490 
3491   if (caseless && char_has_othercase(common, cc))
3492     {
3493 #ifdef SUPPORT_UTF
3494     if (common->utf)
3495       {
3496       GETCHAR(chr, cc);
3497       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3498         return consumed;
3499       }
3500     else
3501 #endif
3502       {
3503       chr = *cc;
3504       othercase[0] = TABLE_GET(chr, common->fcc, chr);
3505       }
3506     }
3507   else
3508     caseless = FALSE;
3509 
3510   len_save = len;
3511   cc_save = cc;
3512   while (TRUE)
3513     {
3514     oc = othercase;
3515     do
3516       {
3517       chr = *cc;
3518 #ifdef COMPILE_PCRE32
3519       if (SLJIT_UNLIKELY(chr == NOTACHAR))
3520         return consumed;
3521 #endif
3522       add_prefix_byte((pcre_uint8)chr, bytes);
3523 
3524       mask = 0;
3525       if (caseless)
3526         {
3527         add_prefix_byte((pcre_uint8)*oc, bytes);
3528         mask = *cc ^ *oc;
3529         chr |= mask;
3530         }
3531 
3532 #ifdef COMPILE_PCRE32
3533       if (chars[0] == NOTACHAR && chars[1] == 0)
3534 #else
3535       if (chars[0] == NOTACHAR)
3536 #endif
3537         {
3538         chars[0] = chr;
3539         chars[1] = mask;
3540         }
3541       else
3542         {
3543         mask |= chars[0] ^ chr;
3544         chr |= mask;
3545         chars[0] = chr;
3546         chars[1] |= mask;
3547         }
3548 
3549       len--;
3550       consumed++;
3551       if (--max_chars == 0)
3552         return consumed;
3553       chars += 2;
3554       bytes += MAX_N_BYTES;
3555       cc++;
3556       oc++;
3557       }
3558     while (len > 0);
3559 
3560     if (--repeat == 0)
3561       break;
3562 
3563     len = len_save;
3564     cc = cc_save;
3565     }
3566 
3567   repeat = 1;
3568   if (last)
3569     return consumed;
3570   }
3571 }
3572 
fast_forward_first_n_chars(compiler_common * common,BOOL firstline)3573 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3574 {
3575 DEFINE_COMPILER;
3576 struct sljit_label *start;
3577 struct sljit_jump *quit;
3578 pcre_uint32 chars[MAX_N_CHARS * 2];
3579 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3580 pcre_uint8 ones[MAX_N_CHARS];
3581 int offsets[3];
3582 pcre_uint32 mask;
3583 pcre_uint8 *byte_set, *byte_set_end;
3584 int i, max, from;
3585 int range_right = -1, range_len = 3 - 1;
3586 sljit_ub *update_table = NULL;
3587 BOOL in_range;
3588 pcre_uint32 rec_count;
3589 
3590 for (i = 0; i < MAX_N_CHARS; i++)
3591   {
3592   chars[i << 1] = NOTACHAR;
3593   chars[(i << 1) + 1] = 0;
3594   bytes[i * MAX_N_BYTES] = 0;
3595   }
3596 
3597 rec_count = 10000;
3598 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3599 
3600 if (max <= 1)
3601   return FALSE;
3602 
3603 for (i = 0; i < max; i++)
3604   {
3605   mask = chars[(i << 1) + 1];
3606   ones[i] = ones_in_half_byte[mask & 0xf];
3607   mask >>= 4;
3608   while (mask != 0)
3609     {
3610     ones[i] += ones_in_half_byte[mask & 0xf];
3611     mask >>= 4;
3612     }
3613   }
3614 
3615 in_range = FALSE;
3616 from = 0;   /* Prevent compiler "uninitialized" warning */
3617 for (i = 0; i <= max; i++)
3618   {
3619   if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3620     {
3621     range_len = i - from;
3622     range_right = i - 1;
3623     }
3624 
3625   if (i < max && bytes[i * MAX_N_BYTES] < 255)
3626     {
3627     if (!in_range)
3628       {
3629       in_range = TRUE;
3630       from = i;
3631       }
3632     }
3633   else if (in_range)
3634     in_range = FALSE;
3635   }
3636 
3637 if (range_right >= 0)
3638   {
3639   update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3640   if (update_table == NULL)
3641     return TRUE;
3642   memset(update_table, IN_UCHARS(range_len), 256);
3643 
3644   for (i = 0; i < range_len; i++)
3645     {
3646     byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3647     SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3648     byte_set_end = byte_set + byte_set[0];
3649     byte_set++;
3650     while (byte_set <= byte_set_end)
3651       {
3652       if (update_table[*byte_set] > IN_UCHARS(i))
3653         update_table[*byte_set] = IN_UCHARS(i);
3654       byte_set++;
3655       }
3656     }
3657   }
3658 
3659 offsets[0] = -1;
3660 /* Scan forward. */
3661 for (i = 0; i < max; i++)
3662   if (ones[i] <= 2) {
3663     offsets[0] = i;
3664     break;
3665   }
3666 
3667 if (offsets[0] < 0 && range_right < 0)
3668   return FALSE;
3669 
3670 if (offsets[0] >= 0)
3671   {
3672   /* Scan backward. */
3673   offsets[1] = -1;
3674   for (i = max - 1; i > offsets[0]; i--)
3675     if (ones[i] <= 2 && i != range_right)
3676       {
3677       offsets[1] = i;
3678       break;
3679       }
3680 
3681   /* This case is handled better by fast_forward_first_char. */
3682   if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3683     return FALSE;
3684 
3685   offsets[2] = -1;
3686   /* We only search for a middle character if there is no range check. */
3687   if (offsets[1] >= 0 && range_right == -1)
3688     {
3689     /* Scan from middle. */
3690     for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3691       if (ones[i] <= 2)
3692         {
3693         offsets[2] = i;
3694         break;
3695         }
3696 
3697     if (offsets[2] == -1)
3698       {
3699       for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3700         if (ones[i] <= 2)
3701           {
3702           offsets[2] = i;
3703           break;
3704           }
3705       }
3706     }
3707 
3708   SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3709   SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3710 
3711   chars[0] = chars[offsets[0] << 1];
3712   chars[1] = chars[(offsets[0] << 1) + 1];
3713   if (offsets[2] >= 0)
3714     {
3715     chars[2] = chars[offsets[2] << 1];
3716     chars[3] = chars[(offsets[2] << 1) + 1];
3717     }
3718   if (offsets[1] >= 0)
3719     {
3720     chars[4] = chars[offsets[1] << 1];
3721     chars[5] = chars[(offsets[1] << 1) + 1];
3722     }
3723   }
3724 
3725 max -= 1;
3726 if (firstline)
3727   {
3728   SLJIT_ASSERT(common->first_line_end != 0);
3729   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3730   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3731   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3732   quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3733   OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3734   JUMPHERE(quit);
3735   }
3736 else
3737   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3738 
3739 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3740 if (range_right >= 0)
3741   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3742 #endif
3743 
3744 start = LABEL();
3745 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3746 
3747 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3748 
3749 if (range_right >= 0)
3750   {
3751 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3752   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3753 #else
3754   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3755 #endif
3756 
3757 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3758   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3759 #else
3760   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3761 #endif
3762   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3763   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3764   }
3765 
3766 if (offsets[0] >= 0)
3767   {
3768   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3769   if (offsets[1] >= 0)
3770     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3771   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3772 
3773   if (chars[1] != 0)
3774     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3775   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3776   if (offsets[2] >= 0)
3777     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3778 
3779   if (offsets[1] >= 0)
3780     {
3781     if (chars[5] != 0)
3782       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3783     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3784     }
3785 
3786   if (offsets[2] >= 0)
3787     {
3788     if (chars[3] != 0)
3789       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3790     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3791     }
3792   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3793   }
3794 
3795 JUMPHERE(quit);
3796 
3797 if (firstline)
3798   {
3799   if (range_right >= 0)
3800     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3801   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3802   if (range_right >= 0)
3803     {
3804     quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3805     OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3806     JUMPHERE(quit);
3807     }
3808   }
3809 else
3810   OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3811 return TRUE;
3812 }
3813 
3814 #undef MAX_N_CHARS
3815 #undef MAX_N_BYTES
3816 
fast_forward_first_char(compiler_common * common,pcre_uchar first_char,BOOL caseless,BOOL firstline)3817 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3818 {
3819 DEFINE_COMPILER;
3820 struct sljit_label *start;
3821 struct sljit_jump *quit;
3822 struct sljit_jump *found;
3823 pcre_uchar oc, bit;
3824 
3825 if (firstline)
3826   {
3827   SLJIT_ASSERT(common->first_line_end != 0);
3828   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3829   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3830   }
3831 
3832 start = LABEL();
3833 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3835 
3836 oc = first_char;
3837 if (caseless)
3838   {
3839   oc = TABLE_GET(first_char, common->fcc, first_char);
3840 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3841   if (first_char > 127 && common->utf)
3842     oc = UCD_OTHERCASE(first_char);
3843 #endif
3844   }
3845 if (first_char == oc)
3846   found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3847 else
3848   {
3849   bit = first_char ^ oc;
3850   if (is_powerof2(bit))
3851     {
3852     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3853     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3854     }
3855   else
3856     {
3857     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3858     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3859     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3860     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3861     found = JUMP(SLJIT_NOT_ZERO);
3862     }
3863   }
3864 
3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3866 JUMPTO(SLJIT_JUMP, start);
3867 JUMPHERE(found);
3868 JUMPHERE(quit);
3869 
3870 if (firstline)
3871   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3872 }
3873 
fast_forward_newline(compiler_common * common,BOOL firstline)3874 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3875 {
3876 DEFINE_COMPILER;
3877 struct sljit_label *loop;
3878 struct sljit_jump *lastchar;
3879 struct sljit_jump *firstchar;
3880 struct sljit_jump *quit;
3881 struct sljit_jump *foundcr = NULL;
3882 struct sljit_jump *notfoundnl;
3883 jump_list *newline = NULL;
3884 
3885 if (firstline)
3886   {
3887   SLJIT_ASSERT(common->first_line_end != 0);
3888   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3889   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3890   }
3891 
3892 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3893   {
3894   lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3895   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3896   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3897   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3898   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3899 
3900   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3901   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3902   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3905 #endif
3906   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3907 
3908   loop = LABEL();
3909   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910   quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3911   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3912   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3913   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3914   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3915 
3916   JUMPHERE(quit);
3917   JUMPHERE(firstchar);
3918   JUMPHERE(lastchar);
3919 
3920   if (firstline)
3921     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3922   return;
3923   }
3924 
3925 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3927 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3928 skip_char_back(common);
3929 
3930 loop = LABEL();
3931 common->ff_newline_shortcut = loop;
3932 
3933 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3934 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3935 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3936   foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3937 check_newlinechar(common, common->nltype, &newline, FALSE);
3938 set_jumps(newline, loop);
3939 
3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3941   {
3942   quit = JUMP(SLJIT_JUMP);
3943   JUMPHERE(foundcr);
3944   notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3945   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3946   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3947   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3949   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3950 #endif
3951   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3952   JUMPHERE(notfoundnl);
3953   JUMPHERE(quit);
3954   }
3955 JUMPHERE(lastchar);
3956 JUMPHERE(firstchar);
3957 
3958 if (firstline)
3959   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3960 }
3961 
3962 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3963 
fast_forward_start_bits(compiler_common * common,pcre_uint8 * start_bits,BOOL firstline)3964 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3965 {
3966 DEFINE_COMPILER;
3967 struct sljit_label *start;
3968 struct sljit_jump *quit;
3969 struct sljit_jump *found = NULL;
3970 jump_list *matches = NULL;
3971 #ifndef COMPILE_PCRE8
3972 struct sljit_jump *jump;
3973 #endif
3974 
3975 if (firstline)
3976   {
3977   SLJIT_ASSERT(common->first_line_end != 0);
3978   OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3979   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3980   }
3981 
3982 start = LABEL();
3983 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3985 #ifdef SUPPORT_UTF
3986 if (common->utf)
3987   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3988 #endif
3989 
3990 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3991   {
3992 #ifndef COMPILE_PCRE8
3993   jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3994   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3995   JUMPHERE(jump);
3996 #endif
3997   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3998   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3999   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4000   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4001   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4002   found = JUMP(SLJIT_NOT_ZERO);
4003   }
4004 
4005 #ifdef SUPPORT_UTF
4006 if (common->utf)
4007   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4008 #endif
4009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4010 #ifdef SUPPORT_UTF
4011 #if defined COMPILE_PCRE8
4012 if (common->utf)
4013   {
4014   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4015   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4016   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4017   }
4018 #elif defined COMPILE_PCRE16
4019 if (common->utf)
4020   {
4021   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4022   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4023   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4024   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4025   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4026   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4027   }
4028 #endif /* COMPILE_PCRE[8|16] */
4029 #endif /* SUPPORT_UTF */
4030 JUMPTO(SLJIT_JUMP, start);
4031 if (found != NULL)
4032   JUMPHERE(found);
4033 if (matches != NULL)
4034   set_jumps(matches, LABEL());
4035 JUMPHERE(quit);
4036 
4037 if (firstline)
4038   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4039 }
4040 
search_requested_char(compiler_common * common,pcre_uchar req_char,BOOL caseless,BOOL has_firstchar)4041 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4042 {
4043 DEFINE_COMPILER;
4044 struct sljit_label *loop;
4045 struct sljit_jump *toolong;
4046 struct sljit_jump *alreadyfound;
4047 struct sljit_jump *found;
4048 struct sljit_jump *foundoc = NULL;
4049 struct sljit_jump *notfound;
4050 pcre_uint32 oc, bit;
4051 
4052 SLJIT_ASSERT(common->req_char_ptr != 0);
4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4054 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4055 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4056 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4057 
4058 if (has_firstchar)
4059   OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4060 else
4061   OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4062 
4063 loop = LABEL();
4064 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4065 
4066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4067 oc = req_char;
4068 if (caseless)
4069   {
4070   oc = TABLE_GET(req_char, common->fcc, req_char);
4071 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4072   if (req_char > 127 && common->utf)
4073     oc = UCD_OTHERCASE(req_char);
4074 #endif
4075   }
4076 if (req_char == oc)
4077   found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4078 else
4079   {
4080   bit = req_char ^ oc;
4081   if (is_powerof2(bit))
4082     {
4083     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4084     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4085     }
4086   else
4087     {
4088     found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4089     foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4090     }
4091   }
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4093 JUMPTO(SLJIT_JUMP, loop);
4094 
4095 JUMPHERE(found);
4096 if (foundoc)
4097   JUMPHERE(foundoc);
4098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4099 JUMPHERE(alreadyfound);
4100 JUMPHERE(toolong);
4101 return notfound;
4102 }
4103 
do_revertframes(compiler_common * common)4104 static void do_revertframes(compiler_common *common)
4105 {
4106 DEFINE_COMPILER;
4107 struct sljit_jump *jump;
4108 struct sljit_label *mainloop;
4109 
4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4112 GET_LOCAL_BASE(TMP3, 0, 0);
4113 
4114 /* Drop frames until we reach STACK_TOP. */
4115 mainloop = LABEL();
4116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4117 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4118 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4119 
4120 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4124 JUMPTO(SLJIT_JUMP, mainloop);
4125 
4126 JUMPHERE(jump);
4127 jump = JUMP(SLJIT_SIG_LESS);
4128 /* End of dropping frames. */
4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4130 
4131 JUMPHERE(jump);
4132 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4133 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4134 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4135 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4136 JUMPTO(SLJIT_JUMP, mainloop);
4137 }
4138 
check_wordboundary(compiler_common * common)4139 static void check_wordboundary(compiler_common *common)
4140 {
4141 DEFINE_COMPILER;
4142 struct sljit_jump *skipread;
4143 jump_list *skipread_list = NULL;
4144 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4145 struct sljit_jump *jump;
4146 #endif
4147 
4148 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4149 
4150 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4151 /* Get type of the previous char, and put it to LOCALS1. */
4152 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4155 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4156 skip_char_back(common);
4157 check_start_used_ptr(common);
4158 read_char(common);
4159 
4160 /* Testing char type. */
4161 #ifdef SUPPORT_UCP
4162 if (common->use_ucp)
4163   {
4164   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4165   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4166   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4167   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4168   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4169   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4170   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4171   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4172   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4173   JUMPHERE(jump);
4174   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4175   }
4176 else
4177 #endif
4178   {
4179 #ifndef COMPILE_PCRE8
4180   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182   /* Here LOCALS1 has already been zeroed. */
4183   jump = NULL;
4184   if (common->utf)
4185     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif /* COMPILE_PCRE8 */
4187   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4189   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4190   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4191 #ifndef COMPILE_PCRE8
4192   JUMPHERE(jump);
4193 #elif defined SUPPORT_UTF
4194   if (jump != NULL)
4195     JUMPHERE(jump);
4196 #endif /* COMPILE_PCRE8 */
4197   }
4198 JUMPHERE(skipread);
4199 
4200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4201 check_str_end(common, &skipread_list);
4202 peek_char(common, READ_CHAR_MAX);
4203 
4204 /* Testing char type. This is a code duplication. */
4205 #ifdef SUPPORT_UCP
4206 if (common->use_ucp)
4207   {
4208   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4209   jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4210   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4211   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4212   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4213   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4214   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4215   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4216   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4217   JUMPHERE(jump);
4218   }
4219 else
4220 #endif
4221   {
4222 #ifndef COMPILE_PCRE8
4223   /* TMP2 may be destroyed by peek_char. */
4224   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4225   jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4226 #elif defined SUPPORT_UTF
4227   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4228   jump = NULL;
4229   if (common->utf)
4230     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4231 #endif
4232   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4233   OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4234   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4235 #ifndef COMPILE_PCRE8
4236   JUMPHERE(jump);
4237 #elif defined SUPPORT_UTF
4238   if (jump != NULL)
4239     JUMPHERE(jump);
4240 #endif /* COMPILE_PCRE8 */
4241   }
4242 set_jumps(skipread_list, LABEL());
4243 
4244 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4245 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4246 }
4247 
check_class_ranges(compiler_common * common,const pcre_uint8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)4248 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4249 {
4250 DEFINE_COMPILER;
4251 int ranges[MAX_RANGE_SIZE];
4252 pcre_uint8 bit, cbit, all;
4253 int i, byte, length = 0;
4254 
4255 bit = bits[0] & 0x1;
4256 /* All bits will be zero or one (since bit is zero or one). */
4257 all = -bit;
4258 
4259 for (i = 0; i < 256; )
4260   {
4261   byte = i >> 3;
4262   if ((i & 0x7) == 0 && bits[byte] == all)
4263     i += 8;
4264   else
4265     {
4266     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4267     if (cbit != bit)
4268       {
4269       if (length >= MAX_RANGE_SIZE)
4270         return FALSE;
4271       ranges[length] = i;
4272       length++;
4273       bit = cbit;
4274       all = -cbit;
4275       }
4276     i++;
4277     }
4278   }
4279 
4280 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4281   {
4282   if (length >= MAX_RANGE_SIZE)
4283     return FALSE;
4284   ranges[length] = 256;
4285   length++;
4286   }
4287 
4288 if (length < 0 || length > 4)
4289   return FALSE;
4290 
4291 bit = bits[0] & 0x1;
4292 if (invert) bit ^= 0x1;
4293 
4294 /* No character is accepted. */
4295 if (length == 0 && bit == 0)
4296   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4297 
4298 switch(length)
4299   {
4300   case 0:
4301   /* When bit != 0, all characters are accepted. */
4302   return TRUE;
4303 
4304   case 1:
4305   add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4306   return TRUE;
4307 
4308   case 2:
4309   if (ranges[0] + 1 != ranges[1])
4310     {
4311     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4312     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4313     }
4314   else
4315     add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4316   return TRUE;
4317 
4318   case 3:
4319   if (bit != 0)
4320     {
4321     add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4322     if (ranges[0] + 1 != ranges[1])
4323       {
4324       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4325       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4326       }
4327     else
4328       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4329     return TRUE;
4330     }
4331 
4332   add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4333   if (ranges[1] + 1 != ranges[2])
4334     {
4335     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4336     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4337     }
4338   else
4339     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4340   return TRUE;
4341 
4342   case 4:
4343   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4344       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4345       && (ranges[1] & (ranges[2] - ranges[0])) == 0
4346       && is_powerof2(ranges[2] - ranges[0]))
4347     {
4348     SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4349     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4350     if (ranges[2] + 1 != ranges[3])
4351       {
4352       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4353       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4354       }
4355     else
4356       add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4357     return TRUE;
4358     }
4359 
4360   if (bit != 0)
4361     {
4362     i = 0;
4363     if (ranges[0] + 1 != ranges[1])
4364       {
4365       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4366       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4367       i = ranges[0];
4368       }
4369     else
4370       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4371 
4372     if (ranges[2] + 1 != ranges[3])
4373       {
4374       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4375       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4376       }
4377     else
4378       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4379     return TRUE;
4380     }
4381 
4382   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4383   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4384   if (ranges[1] + 1 != ranges[2])
4385     {
4386     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4387     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4388     }
4389   else
4390     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4391   return TRUE;
4392 
4393   default:
4394   SLJIT_ASSERT_STOP();
4395   return FALSE;
4396   }
4397 }
4398 
check_anynewline(compiler_common * common)4399 static void check_anynewline(compiler_common *common)
4400 {
4401 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4402 DEFINE_COMPILER;
4403 
4404 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4405 
4406 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4407 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4408 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4410 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4411 #ifdef COMPILE_PCRE8
4412 if (common->utf)
4413   {
4414 #endif
4415   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4416   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4417   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4418 #ifdef COMPILE_PCRE8
4419   }
4420 #endif
4421 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4422 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4423 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4424 }
4425 
check_hspace(compiler_common * common)4426 static void check_hspace(compiler_common *common)
4427 {
4428 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4429 DEFINE_COMPILER;
4430 
4431 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4432 
4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4436 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4438 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4439 #ifdef COMPILE_PCRE8
4440 if (common->utf)
4441   {
4442 #endif
4443   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4444   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4445   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4446   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4447   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4448   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4449   OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4450   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4451   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4452   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4453   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4454   OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4455   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4456 #ifdef COMPILE_PCRE8
4457   }
4458 #endif
4459 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4460 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4461 
4462 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4463 }
4464 
check_vspace(compiler_common * common)4465 static void check_vspace(compiler_common *common)
4466 {
4467 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4468 DEFINE_COMPILER;
4469 
4470 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4471 
4472 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4476 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4477 #ifdef COMPILE_PCRE8
4478 if (common->utf)
4479   {
4480 #endif
4481   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4482   OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4483   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4484 #ifdef COMPILE_PCRE8
4485   }
4486 #endif
4487 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4488 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4489 
4490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4491 }
4492 
4493 #define CHAR1 STR_END
4494 #define CHAR2 STACK_TOP
4495 
do_casefulcmp(compiler_common * common)4496 static void do_casefulcmp(compiler_common *common)
4497 {
4498 DEFINE_COMPILER;
4499 struct sljit_jump *jump;
4500 struct sljit_label *label;
4501 
4502 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4504 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4508 
4509 label = LABEL();
4510 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4511 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4512 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4513 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4514 JUMPTO(SLJIT_NOT_ZERO, label);
4515 
4516 JUMPHERE(jump);
4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4518 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4519 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4520 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4521 }
4522 
4523 #define LCC_TABLE STACK_LIMIT
4524 
do_caselesscmp(compiler_common * common)4525 static void do_caselesscmp(compiler_common *common)
4526 {
4527 DEFINE_COMPILER;
4528 struct sljit_jump *jump;
4529 struct sljit_label *label;
4530 
4531 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4533 
4534 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4537 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4538 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4539 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4540 
4541 label = LABEL();
4542 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4543 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4544 #ifndef COMPILE_PCRE8
4545 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4546 #endif
4547 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4548 #ifndef COMPILE_PCRE8
4549 JUMPHERE(jump);
4550 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4551 #endif
4552 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4553 #ifndef COMPILE_PCRE8
4554 JUMPHERE(jump);
4555 #endif
4556 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4557 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4558 JUMPTO(SLJIT_NOT_ZERO, label);
4559 
4560 JUMPHERE(jump);
4561 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4562 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4563 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4564 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4565 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4566 }
4567 
4568 #undef LCC_TABLE
4569 #undef CHAR1
4570 #undef CHAR2
4571 
4572 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4573 
do_utf_caselesscmp(pcre_uchar * src1,jit_arguments * args,pcre_uchar * end1)4574 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4575 {
4576 /* This function would be ineffective to do in JIT level. */
4577 pcre_uint32 c1, c2;
4578 const pcre_uchar *src2 = args->uchar_ptr;
4579 const pcre_uchar *end2 = args->end;
4580 const ucd_record *ur;
4581 const pcre_uint32 *pp;
4582 
4583 while (src1 < end1)
4584   {
4585   if (src2 >= end2)
4586     return (pcre_uchar*)1;
4587   GETCHARINC(c1, src1);
4588   GETCHARINC(c2, src2);
4589   ur = GET_UCD(c2);
4590   if (c1 != c2 && c1 != c2 + ur->other_case)
4591     {
4592     pp = PRIV(ucd_caseless_sets) + ur->caseset;
4593     for (;;)
4594       {
4595       if (c1 < *pp) return NULL;
4596       if (c1 == *pp++) break;
4597       }
4598     }
4599   }
4600 return src2;
4601 }
4602 
4603 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4604 
byte_sequence_compare(compiler_common * common,BOOL caseless,pcre_uchar * cc,compare_context * context,jump_list ** backtracks)4605 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4606     compare_context *context, jump_list **backtracks)
4607 {
4608 DEFINE_COMPILER;
4609 unsigned int othercasebit = 0;
4610 pcre_uchar *othercasechar = NULL;
4611 #ifdef SUPPORT_UTF
4612 int utflength;
4613 #endif
4614 
4615 if (caseless && char_has_othercase(common, cc))
4616   {
4617   othercasebit = char_get_othercase_bit(common, cc);
4618   SLJIT_ASSERT(othercasebit);
4619   /* Extracting bit difference info. */
4620 #if defined COMPILE_PCRE8
4621   othercasechar = cc + (othercasebit >> 8);
4622   othercasebit &= 0xff;
4623 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4624   /* Note that this code only handles characters in the BMP. If there
4625   ever are characters outside the BMP whose othercase differs in only one
4626   bit from itself (there currently are none), this code will need to be
4627   revised for COMPILE_PCRE32. */
4628   othercasechar = cc + (othercasebit >> 9);
4629   if ((othercasebit & 0x100) != 0)
4630     othercasebit = (othercasebit & 0xff) << 8;
4631   else
4632     othercasebit &= 0xff;
4633 #endif /* COMPILE_PCRE[8|16|32] */
4634   }
4635 
4636 if (context->sourcereg == -1)
4637   {
4638 #if defined COMPILE_PCRE8
4639 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4640   if (context->length >= 4)
4641     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4642   else if (context->length >= 2)
4643     OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4644   else
4645 #endif
4646     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 #elif defined COMPILE_PCRE16
4648 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4649   if (context->length >= 4)
4650     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651   else
4652 #endif
4653     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4654 #elif defined COMPILE_PCRE32
4655   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4656 #endif /* COMPILE_PCRE[8|16|32] */
4657   context->sourcereg = TMP2;
4658   }
4659 
4660 #ifdef SUPPORT_UTF
4661 utflength = 1;
4662 if (common->utf && HAS_EXTRALEN(*cc))
4663   utflength += GET_EXTRALEN(*cc);
4664 
4665 do
4666   {
4667 #endif
4668 
4669   context->length -= IN_UCHARS(1);
4670 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4671 
4672   /* Unaligned read is supported. */
4673   if (othercasebit != 0 && othercasechar == cc)
4674     {
4675     context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4676     context->oc.asuchars[context->ucharptr] = othercasebit;
4677     }
4678   else
4679     {
4680     context->c.asuchars[context->ucharptr] = *cc;
4681     context->oc.asuchars[context->ucharptr] = 0;
4682     }
4683   context->ucharptr++;
4684 
4685 #if defined COMPILE_PCRE8
4686   if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4687 #else
4688   if (context->ucharptr >= 2 || context->length == 0)
4689 #endif
4690     {
4691     if (context->length >= 4)
4692       OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4693     else if (context->length >= 2)
4694       OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4695 #if defined COMPILE_PCRE8
4696     else if (context->length >= 1)
4697       OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4698 #endif /* COMPILE_PCRE8 */
4699     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4700 
4701     switch(context->ucharptr)
4702       {
4703       case 4 / sizeof(pcre_uchar):
4704       if (context->oc.asint != 0)
4705         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4706       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4707       break;
4708 
4709       case 2 / sizeof(pcre_uchar):
4710       if (context->oc.asushort != 0)
4711         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4712       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4713       break;
4714 
4715 #ifdef COMPILE_PCRE8
4716       case 1:
4717       if (context->oc.asbyte != 0)
4718         OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4719       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4720       break;
4721 #endif
4722 
4723       default:
4724       SLJIT_ASSERT_STOP();
4725       break;
4726       }
4727     context->ucharptr = 0;
4728     }
4729 
4730 #else
4731 
4732   /* Unaligned read is unsupported or in 32 bit mode. */
4733   if (context->length >= 1)
4734     OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4735 
4736   context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4737 
4738   if (othercasebit != 0 && othercasechar == cc)
4739     {
4740     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4741     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4742     }
4743   else
4744     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4745 
4746 #endif
4747 
4748   cc++;
4749 #ifdef SUPPORT_UTF
4750   utflength--;
4751   }
4752 while (utflength > 0);
4753 #endif
4754 
4755 return cc;
4756 }
4757 
4758 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4759 
4760 #define SET_TYPE_OFFSET(value) \
4761   if ((value) != typeoffset) \
4762     { \
4763     if ((value) < typeoffset) \
4764       OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4765     else \
4766       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4767     } \
4768   typeoffset = (value);
4769 
4770 #define SET_CHAR_OFFSET(value) \
4771   if ((value) != charoffset) \
4772     { \
4773     if ((value) < charoffset) \
4774       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4775     else \
4776       OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4777     } \
4778   charoffset = (value);
4779 
compile_xclass_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)4780 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4781 {
4782 DEFINE_COMPILER;
4783 jump_list *found = NULL;
4784 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4785 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4786 struct sljit_jump *jump = NULL;
4787 pcre_uchar *ccbegin;
4788 int compares, invertcmp, numberofcmps;
4789 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4790 BOOL utf = common->utf;
4791 #endif
4792 
4793 #ifdef SUPPORT_UCP
4794 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4795 BOOL charsaved = FALSE;
4796 int typereg = TMP1, scriptreg = TMP1;
4797 const pcre_uint32 *other_cases;
4798 sljit_uw typeoffset;
4799 #endif
4800 
4801 /* Scanning the necessary info. */
4802 cc++;
4803 ccbegin = cc;
4804 compares = 0;
4805 if (cc[-1] & XCL_MAP)
4806   {
4807   min = 0;
4808   cc += 32 / sizeof(pcre_uchar);
4809   }
4810 
4811 while (*cc != XCL_END)
4812   {
4813   compares++;
4814   if (*cc == XCL_SINGLE)
4815     {
4816     cc ++;
4817     GETCHARINCTEST(c, cc);
4818     if (c > max) max = c;
4819     if (c < min) min = c;
4820 #ifdef SUPPORT_UCP
4821     needschar = TRUE;
4822 #endif
4823     }
4824   else if (*cc == XCL_RANGE)
4825     {
4826     cc ++;
4827     GETCHARINCTEST(c, cc);
4828     if (c < min) min = c;
4829     GETCHARINCTEST(c, cc);
4830     if (c > max) max = c;
4831 #ifdef SUPPORT_UCP
4832     needschar = TRUE;
4833 #endif
4834     }
4835 #ifdef SUPPORT_UCP
4836   else
4837     {
4838     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4839     cc++;
4840     if (*cc == PT_CLIST)
4841       {
4842       other_cases = PRIV(ucd_caseless_sets) + cc[1];
4843       while (*other_cases != NOTACHAR)
4844         {
4845         if (*other_cases > max) max = *other_cases;
4846         if (*other_cases < min) min = *other_cases;
4847         other_cases++;
4848         }
4849       }
4850     else
4851       {
4852       max = READ_CHAR_MAX;
4853       min = 0;
4854       }
4855 
4856     switch(*cc)
4857       {
4858       case PT_ANY:
4859       break;
4860 
4861       case PT_LAMP:
4862       case PT_GC:
4863       case PT_PC:
4864       case PT_ALNUM:
4865       needstype = TRUE;
4866       break;
4867 
4868       case PT_SC:
4869       needsscript = TRUE;
4870       break;
4871 
4872       case PT_SPACE:
4873       case PT_PXSPACE:
4874       case PT_WORD:
4875       case PT_PXGRAPH:
4876       case PT_PXPRINT:
4877       case PT_PXPUNCT:
4878       needstype = TRUE;
4879       needschar = TRUE;
4880       break;
4881 
4882       case PT_CLIST:
4883       case PT_UCNC:
4884       needschar = TRUE;
4885       break;
4886 
4887       default:
4888       SLJIT_ASSERT_STOP();
4889       break;
4890       }
4891     cc += 2;
4892     }
4893 #endif
4894   }
4895 
4896 /* We are not necessary in utf mode even in 8 bit mode. */
4897 cc = ccbegin;
4898 detect_partial_match(common, backtracks);
4899 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4900 
4901 if ((cc[-1] & XCL_HASPROP) == 0)
4902   {
4903   if ((cc[-1] & XCL_MAP) != 0)
4904     {
4905     jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4906     if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4907       {
4908       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4909       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4910       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4911       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4912       OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4913       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4914       }
4915 
4916     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4917     JUMPHERE(jump);
4918 
4919     cc += 32 / sizeof(pcre_uchar);
4920     }
4921   else
4922     {
4923     OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4924     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4925     }
4926   }
4927 else if ((cc[-1] & XCL_MAP) != 0)
4928   {
4929   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4930 #ifdef SUPPORT_UCP
4931   charsaved = TRUE;
4932 #endif
4933   if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4934     {
4935 #ifdef COMPILE_PCRE8
4936     jump = NULL;
4937     if (common->utf)
4938 #endif
4939       jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4940 
4941     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4942     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4943     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4944     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4945     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4946     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4947 
4948 #ifdef COMPILE_PCRE8
4949     if (common->utf)
4950 #endif
4951       JUMPHERE(jump);
4952     }
4953 
4954   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4955   cc += 32 / sizeof(pcre_uchar);
4956   }
4957 
4958 #ifdef SUPPORT_UCP
4959 /* Simple register allocation. TMP1 is preferred if possible. */
4960 if (needstype || needsscript)
4961   {
4962   if (needschar && !charsaved)
4963     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4964   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4965   if (needschar)
4966     {
4967     if (needstype)
4968       {
4969       OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4970       typereg = RETURN_ADDR;
4971       }
4972 
4973     if (needsscript)
4974       scriptreg = TMP3;
4975     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4976     }
4977   else if (needstype && needsscript)
4978     scriptreg = TMP3;
4979   /* In all other cases only one of them was specified, and that can goes to TMP1. */
4980 
4981   if (needsscript)
4982     {
4983     if (scriptreg == TMP1)
4984       {
4985       OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4986       OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4987       }
4988     else
4989       {
4990       OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4991       OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4992       OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4993       }
4994     }
4995   }
4996 #endif
4997 
4998 /* Generating code. */
4999 charoffset = 0;
5000 numberofcmps = 0;
5001 #ifdef SUPPORT_UCP
5002 typeoffset = 0;
5003 #endif
5004 
5005 while (*cc != XCL_END)
5006   {
5007   compares--;
5008   invertcmp = (compares == 0 && list != backtracks);
5009   jump = NULL;
5010 
5011   if (*cc == XCL_SINGLE)
5012     {
5013     cc ++;
5014     GETCHARINCTEST(c, cc);
5015 
5016     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5017       {
5018       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5019       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5020       numberofcmps++;
5021       }
5022     else if (numberofcmps > 0)
5023       {
5024       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5025       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5026       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5027       numberofcmps = 0;
5028       }
5029     else
5030       {
5031       jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5032       numberofcmps = 0;
5033       }
5034     }
5035   else if (*cc == XCL_RANGE)
5036     {
5037     cc ++;
5038     GETCHARINCTEST(c, cc);
5039     SET_CHAR_OFFSET(c);
5040     GETCHARINCTEST(c, cc);
5041 
5042     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5043       {
5044       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5045       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5046       numberofcmps++;
5047       }
5048     else if (numberofcmps > 0)
5049       {
5050       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5051       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5052       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5053       numberofcmps = 0;
5054       }
5055     else
5056       {
5057       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5058       numberofcmps = 0;
5059       }
5060     }
5061 #ifdef SUPPORT_UCP
5062   else
5063     {
5064     if (*cc == XCL_NOTPROP)
5065       invertcmp ^= 0x1;
5066     cc++;
5067     switch(*cc)
5068       {
5069       case PT_ANY:
5070       if (list != backtracks)
5071         {
5072         if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5073           continue;
5074         }
5075       else if (cc[-1] == XCL_NOTPROP)
5076         continue;
5077       jump = JUMP(SLJIT_JUMP);
5078       break;
5079 
5080       case PT_LAMP:
5081       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5082       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5083       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5084       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5085       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5086       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5087       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5088       break;
5089 
5090       case PT_GC:
5091       c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5092       SET_TYPE_OFFSET(c);
5093       jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5094       break;
5095 
5096       case PT_PC:
5097       jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5098       break;
5099 
5100       case PT_SC:
5101       jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5102       break;
5103 
5104       case PT_SPACE:
5105       case PT_PXSPACE:
5106       SET_CHAR_OFFSET(9);
5107       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5108       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5109 
5110       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5111       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5112 
5113       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5114       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5115 
5116       SET_TYPE_OFFSET(ucp_Zl);
5117       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5118       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5119       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5120       break;
5121 
5122       case PT_WORD:
5123       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5124       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5125       /* Fall through. */
5126 
5127       case PT_ALNUM:
5128       SET_TYPE_OFFSET(ucp_Ll);
5129       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5130       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5131       SET_TYPE_OFFSET(ucp_Nd);
5132       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5133       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5134       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5135       break;
5136 
5137       case PT_CLIST:
5138       other_cases = PRIV(ucd_caseless_sets) + cc[1];
5139 
5140       /* At least three characters are required.
5141          Otherwise this case would be handled by the normal code path. */
5142       SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5143       SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5144 
5145       /* Optimizing character pairs, if their difference is power of 2. */
5146       if (is_powerof2(other_cases[1] ^ other_cases[0]))
5147         {
5148         if (charoffset == 0)
5149           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5150         else
5151           {
5152           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5153           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5154           }
5155         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5156         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5157         other_cases += 2;
5158         }
5159       else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5160         {
5161         if (charoffset == 0)
5162           OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5163         else
5164           {
5165           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5166           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5167           }
5168         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5169         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5170 
5171         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5172         OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5173 
5174         other_cases += 3;
5175         }
5176       else
5177         {
5178         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5179         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5180         }
5181 
5182       while (*other_cases != NOTACHAR)
5183         {
5184         OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5185         OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5186         }
5187       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5188       break;
5189 
5190       case PT_UCNC:
5191       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5192       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5193       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5194       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5195       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5196       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5197 
5198       SET_CHAR_OFFSET(0xa0);
5199       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5200       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5201       SET_CHAR_OFFSET(0);
5202       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5203       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5204       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5205       break;
5206 
5207       case PT_PXGRAPH:
5208       /* C and Z groups are the farthest two groups. */
5209       SET_TYPE_OFFSET(ucp_Ll);
5210       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5211       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5212 
5213       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5214 
5215       /* In case of ucp_Cf, we overwrite the result. */
5216       SET_CHAR_OFFSET(0x2066);
5217       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5218       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5219 
5220       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5221       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5222 
5223       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5224       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5225 
5226       JUMPHERE(jump);
5227       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5228       break;
5229 
5230       case PT_PXPRINT:
5231       /* C and Z groups are the farthest two groups. */
5232       SET_TYPE_OFFSET(ucp_Ll);
5233       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5234       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5235 
5236       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5237       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5238 
5239       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5240 
5241       /* In case of ucp_Cf, we overwrite the result. */
5242       SET_CHAR_OFFSET(0x2066);
5243       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5244       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5245 
5246       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5247       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5248 
5249       JUMPHERE(jump);
5250       jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5251       break;
5252 
5253       case PT_PXPUNCT:
5254       SET_TYPE_OFFSET(ucp_Sc);
5255       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5256       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5257 
5258       SET_CHAR_OFFSET(0);
5259       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5260       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5261 
5262       SET_TYPE_OFFSET(ucp_Pc);
5263       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5264       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5265       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5266       break;
5267       }
5268     cc += 2;
5269     }
5270 #endif
5271 
5272   if (jump != NULL)
5273     add_jump(compiler, compares > 0 ? list : backtracks, jump);
5274   }
5275 
5276 if (found != NULL)
5277   set_jumps(found, LABEL());
5278 }
5279 
5280 #undef SET_TYPE_OFFSET
5281 #undef SET_CHAR_OFFSET
5282 
5283 #endif
5284 
compile_char1_matchingpath(compiler_common * common,pcre_uchar type,pcre_uchar * cc,jump_list ** backtracks)5285 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5286 {
5287 DEFINE_COMPILER;
5288 int length;
5289 unsigned int c, oc, bit;
5290 compare_context context;
5291 struct sljit_jump *jump[4];
5292 jump_list *end_list;
5293 #ifdef SUPPORT_UTF
5294 struct sljit_label *label;
5295 #ifdef SUPPORT_UCP
5296 pcre_uchar propdata[5];
5297 #endif
5298 #endif /* SUPPORT_UTF */
5299 
5300 switch(type)
5301   {
5302   case OP_SOD:
5303   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5304   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5305   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5306   return cc;
5307 
5308   case OP_SOM:
5309   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5310   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5311   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5312   return cc;
5313 
5314   case OP_NOT_WORD_BOUNDARY:
5315   case OP_WORD_BOUNDARY:
5316   add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5317   add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5318   return cc;
5319 
5320   case OP_NOT_DIGIT:
5321   case OP_DIGIT:
5322   /* Digits are usually 0-9, so it is worth to optimize them. */
5323   detect_partial_match(common, backtracks);
5324 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5325   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5326     read_char7_type(common, type == OP_NOT_DIGIT);
5327   else
5328 #endif
5329     read_char8_type(common, type == OP_NOT_DIGIT);
5330     /* Flip the starting bit in the negative case. */
5331   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5332   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5333   return cc;
5334 
5335   case OP_NOT_WHITESPACE:
5336   case OP_WHITESPACE:
5337   detect_partial_match(common, backtracks);
5338 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5339   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5340     read_char7_type(common, type == OP_NOT_WHITESPACE);
5341   else
5342 #endif
5343     read_char8_type(common, type == OP_NOT_WHITESPACE);
5344   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5345   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5346   return cc;
5347 
5348   case OP_NOT_WORDCHAR:
5349   case OP_WORDCHAR:
5350   detect_partial_match(common, backtracks);
5351 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5352   if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5353     read_char7_type(common, type == OP_NOT_WORDCHAR);
5354   else
5355 #endif
5356     read_char8_type(common, type == OP_NOT_WORDCHAR);
5357   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5358   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5359   return cc;
5360 
5361   case OP_ANY:
5362   detect_partial_match(common, backtracks);
5363   read_char_range(common, common->nlmin, common->nlmax, TRUE);
5364   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5365     {
5366     jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5367     end_list = NULL;
5368     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5369       add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5370     else
5371       check_str_end(common, &end_list);
5372 
5373     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5374     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5375     set_jumps(end_list, LABEL());
5376     JUMPHERE(jump[0]);
5377     }
5378   else
5379     check_newlinechar(common, common->nltype, backtracks, TRUE);
5380   return cc;
5381 
5382   case OP_ALLANY:
5383   detect_partial_match(common, backtracks);
5384 #ifdef SUPPORT_UTF
5385   if (common->utf)
5386     {
5387     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5388     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5389 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5390 #if defined COMPILE_PCRE8
5391     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5392     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5393     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5394 #elif defined COMPILE_PCRE16
5395     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5396     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5397     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5398     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5399     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5400     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5401 #endif
5402     JUMPHERE(jump[0]);
5403 #endif /* COMPILE_PCRE[8|16] */
5404     return cc;
5405     }
5406 #endif
5407   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5408   return cc;
5409 
5410   case OP_ANYBYTE:
5411   detect_partial_match(common, backtracks);
5412   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5413   return cc;
5414 
5415 #ifdef SUPPORT_UTF
5416 #ifdef SUPPORT_UCP
5417   case OP_NOTPROP:
5418   case OP_PROP:
5419   propdata[0] = XCL_HASPROP;
5420   propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5421   propdata[2] = cc[0];
5422   propdata[3] = cc[1];
5423   propdata[4] = XCL_END;
5424   compile_xclass_matchingpath(common, propdata, backtracks);
5425   return cc + 2;
5426 #endif
5427 #endif
5428 
5429   case OP_ANYNL:
5430   detect_partial_match(common, backtracks);
5431   read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5432   jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5433   /* We don't need to handle soft partial matching case. */
5434   end_list = NULL;
5435   if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5436     add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5437   else
5438     check_str_end(common, &end_list);
5439   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5440   jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5441   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5442   jump[2] = JUMP(SLJIT_JUMP);
5443   JUMPHERE(jump[0]);
5444   check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5445   set_jumps(end_list, LABEL());
5446   JUMPHERE(jump[1]);
5447   JUMPHERE(jump[2]);
5448   return cc;
5449 
5450   case OP_NOT_HSPACE:
5451   case OP_HSPACE:
5452   detect_partial_match(common, backtracks);
5453   read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5454   add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5455   add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5456   return cc;
5457 
5458   case OP_NOT_VSPACE:
5459   case OP_VSPACE:
5460   detect_partial_match(common, backtracks);
5461   read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5462   add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5463   add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5464   return cc;
5465 
5466 #ifdef SUPPORT_UCP
5467   case OP_EXTUNI:
5468   detect_partial_match(common, backtracks);
5469   read_char(common);
5470   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5471   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5472   /* Optimize register allocation: use a real register. */
5473   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5474   OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5475 
5476   label = LABEL();
5477   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5478   OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5479   read_char(common);
5480   add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5481   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5482   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5483 
5484   OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5485   OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5486   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5487   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5488   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5489   JUMPTO(SLJIT_NOT_ZERO, label);
5490 
5491   OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5492   JUMPHERE(jump[0]);
5493   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5494 
5495   if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5496     {
5497     jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5498     /* Since we successfully read a char above, partial matching must occure. */
5499     check_partial(common, TRUE);
5500     JUMPHERE(jump[0]);
5501     }
5502   return cc;
5503 #endif
5504 
5505   case OP_EODN:
5506   /* Requires rather complex checks. */
5507   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5508   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5509     {
5510     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5511     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5512     if (common->mode == JIT_COMPILE)
5513       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5514     else
5515       {
5516       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5517       OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5518       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5519       OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5520       OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5521       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5522       check_partial(common, TRUE);
5523       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5524       JUMPHERE(jump[1]);
5525       }
5526     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5527     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5528     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5529     }
5530   else if (common->nltype == NLTYPE_FIXED)
5531     {
5532     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5534     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5535     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5536     }
5537   else
5538     {
5539     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5540     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5541     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5542     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5543     jump[2] = JUMP(SLJIT_GREATER);
5544     add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5545     /* Equal. */
5546     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5547     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5548     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5549 
5550     JUMPHERE(jump[1]);
5551     if (common->nltype == NLTYPE_ANYCRLF)
5552       {
5553       OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5554       add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5555       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5556       }
5557     else
5558       {
5559       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5560       read_char_range(common, common->nlmin, common->nlmax, TRUE);
5561       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5562       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5563       add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5564       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5565       }
5566     JUMPHERE(jump[2]);
5567     JUMPHERE(jump[3]);
5568     }
5569   JUMPHERE(jump[0]);
5570   check_partial(common, FALSE);
5571   return cc;
5572 
5573   case OP_EOD:
5574   add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5575   check_partial(common, FALSE);
5576   return cc;
5577 
5578   case OP_CIRC:
5579   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5580   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5581   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5582   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5583   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5584   return cc;
5585 
5586   case OP_CIRCM:
5587   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5588   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5589   jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5590   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5591   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5592   jump[0] = JUMP(SLJIT_JUMP);
5593   JUMPHERE(jump[1]);
5594 
5595   add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5596   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5597     {
5598     OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5599     add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5600     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5601     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5602     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5603     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5604     }
5605   else
5606     {
5607     skip_char_back(common);
5608     read_char_range(common, common->nlmin, common->nlmax, TRUE);
5609     check_newlinechar(common, common->nltype, backtracks, FALSE);
5610     }
5611   JUMPHERE(jump[0]);
5612   return cc;
5613 
5614   case OP_DOLL:
5615   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5616   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5617   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5618 
5619   if (!common->endonly)
5620     compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5621   else
5622     {
5623     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5624     check_partial(common, FALSE);
5625     }
5626   return cc;
5627 
5628   case OP_DOLLM:
5629   jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5630   OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5631   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5632   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5633   check_partial(common, FALSE);
5634   jump[0] = JUMP(SLJIT_JUMP);
5635   JUMPHERE(jump[1]);
5636 
5637   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5638     {
5639     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5640     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5641     if (common->mode == JIT_COMPILE)
5642       add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5643     else
5644       {
5645       jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5646       /* STR_PTR = STR_END - IN_UCHARS(1) */
5647       add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5648       check_partial(common, TRUE);
5649       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5650       JUMPHERE(jump[1]);
5651       }
5652 
5653     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5654     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5655     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5656     }
5657   else
5658     {
5659     peek_char(common, common->nlmax);
5660     check_newlinechar(common, common->nltype, backtracks, FALSE);
5661     }
5662   JUMPHERE(jump[0]);
5663   return cc;
5664 
5665   case OP_CHAR:
5666   case OP_CHARI:
5667   length = 1;
5668 #ifdef SUPPORT_UTF
5669   if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5670 #endif
5671   if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5672     {
5673     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5674     add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5675 
5676     context.length = IN_UCHARS(length);
5677     context.sourcereg = -1;
5678 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5679     context.ucharptr = 0;
5680 #endif
5681     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5682     }
5683 
5684   detect_partial_match(common, backtracks);
5685 #ifdef SUPPORT_UTF
5686   if (common->utf)
5687     {
5688     GETCHAR(c, cc);
5689     }
5690   else
5691 #endif
5692     c = *cc;
5693 
5694   if (type == OP_CHAR || !char_has_othercase(common, cc))
5695     {
5696     read_char_range(common, c, c, FALSE);
5697     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5698     return cc + length;
5699     }
5700   oc = char_othercase(common, c);
5701   read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5702   bit = c ^ oc;
5703   if (is_powerof2(bit))
5704     {
5705     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5706     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5707     return cc + length;
5708     }
5709   jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5710   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5711   JUMPHERE(jump[0]);
5712   return cc + length;
5713 
5714   case OP_NOT:
5715   case OP_NOTI:
5716   detect_partial_match(common, backtracks);
5717   length = 1;
5718 #ifdef SUPPORT_UTF
5719   if (common->utf)
5720     {
5721 #ifdef COMPILE_PCRE8
5722     c = *cc;
5723     if (c < 128)
5724       {
5725       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5726       if (type == OP_NOT || !char_has_othercase(common, cc))
5727         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5728       else
5729         {
5730         /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5731         OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5732         add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5733         }
5734       /* Skip the variable-length character. */
5735       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5736       jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5737       OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5738       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5739       JUMPHERE(jump[0]);
5740       return cc + 1;
5741       }
5742     else
5743 #endif /* COMPILE_PCRE8 */
5744       {
5745       GETCHARLEN(c, cc, length);
5746       }
5747     }
5748   else
5749 #endif /* SUPPORT_UTF */
5750     c = *cc;
5751 
5752   if (type == OP_NOT || !char_has_othercase(common, cc))
5753     {
5754     read_char_range(common, c, c, TRUE);
5755     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5756     }
5757   else
5758     {
5759     oc = char_othercase(common, c);
5760     read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5761     bit = c ^ oc;
5762     if (is_powerof2(bit))
5763       {
5764       OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5765       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5766       }
5767     else
5768       {
5769       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5770       add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5771       }
5772     }
5773   return cc + length;
5774 
5775   case OP_CLASS:
5776   case OP_NCLASS:
5777   detect_partial_match(common, backtracks);
5778 
5779 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5780   bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5781   read_char_range(common, 0, bit, type == OP_NCLASS);
5782 #else
5783   read_char_range(common, 0, 255, type == OP_NCLASS);
5784 #endif
5785 
5786   if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5787     return cc + 32 / sizeof(pcre_uchar);
5788 
5789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5790   jump[0] = NULL;
5791   if (common->utf)
5792     {
5793     jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5794     if (type == OP_CLASS)
5795       {
5796       add_jump(compiler, backtracks, jump[0]);
5797       jump[0] = NULL;
5798       }
5799     }
5800 #elif !defined COMPILE_PCRE8
5801   jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5802   if (type == OP_CLASS)
5803     {
5804     add_jump(compiler, backtracks, jump[0]);
5805     jump[0] = NULL;
5806     }
5807 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5808 
5809   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5810   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5811   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5812   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5813   OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5814   add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5815 
5816 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5817   if (jump[0] != NULL)
5818     JUMPHERE(jump[0]);
5819 #endif
5820 
5821   return cc + 32 / sizeof(pcre_uchar);
5822 
5823 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5824   case OP_XCLASS:
5825   compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5826   return cc + GET(cc, 0) - 1;
5827 #endif
5828 
5829   case OP_REVERSE:
5830   length = GET(cc, 0);
5831   if (length == 0)
5832     return cc + LINK_SIZE;
5833   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5834 #ifdef SUPPORT_UTF
5835   if (common->utf)
5836     {
5837     OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5838     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5839     label = LABEL();
5840     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5841     skip_char_back(common);
5842     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5843     JUMPTO(SLJIT_NOT_ZERO, label);
5844     }
5845   else
5846 #endif
5847     {
5848     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5849     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5850     add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5851     }
5852   check_start_used_ptr(common);
5853   return cc + LINK_SIZE;
5854   }
5855 SLJIT_ASSERT_STOP();
5856 return cc;
5857 }
5858 
compile_charn_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,jump_list ** backtracks)5859 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5860 {
5861 /* This function consumes at least one input character. */
5862 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5863 DEFINE_COMPILER;
5864 pcre_uchar *ccbegin = cc;
5865 compare_context context;
5866 int size;
5867 
5868 context.length = 0;
5869 do
5870   {
5871   if (cc >= ccend)
5872     break;
5873 
5874   if (*cc == OP_CHAR)
5875     {
5876     size = 1;
5877 #ifdef SUPPORT_UTF
5878     if (common->utf && HAS_EXTRALEN(cc[1]))
5879       size += GET_EXTRALEN(cc[1]);
5880 #endif
5881     }
5882   else if (*cc == OP_CHARI)
5883     {
5884     size = 1;
5885 #ifdef SUPPORT_UTF
5886     if (common->utf)
5887       {
5888       if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5889         size = 0;
5890       else if (HAS_EXTRALEN(cc[1]))
5891         size += GET_EXTRALEN(cc[1]);
5892       }
5893     else
5894 #endif
5895     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5896       size = 0;
5897     }
5898   else
5899     size = 0;
5900 
5901   cc += 1 + size;
5902   context.length += IN_UCHARS(size);
5903   }
5904 while (size > 0 && context.length <= 128);
5905 
5906 cc = ccbegin;
5907 if (context.length > 0)
5908   {
5909   /* We have a fixed-length byte sequence. */
5910   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5911   add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5912 
5913   context.sourcereg = -1;
5914 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5915   context.ucharptr = 0;
5916 #endif
5917   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5918   return cc;
5919   }
5920 
5921 /* A non-fixed length character will be checked if length == 0. */
5922 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5923 }
5924 
5925 /* Forward definitions. */
5926 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5927 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5928 
5929 #define PUSH_BACKTRACK(size, ccstart, error) \
5930   do \
5931     { \
5932     backtrack = sljit_alloc_memory(compiler, (size)); \
5933     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5934       return error; \
5935     memset(backtrack, 0, size); \
5936     backtrack->prev = parent->top; \
5937     backtrack->cc = (ccstart); \
5938     parent->top = backtrack; \
5939     } \
5940   while (0)
5941 
5942 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5943   do \
5944     { \
5945     backtrack = sljit_alloc_memory(compiler, (size)); \
5946     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5947       return; \
5948     memset(backtrack, 0, size); \
5949     backtrack->prev = parent->top; \
5950     backtrack->cc = (ccstart); \
5951     parent->top = backtrack; \
5952     } \
5953   while (0)
5954 
5955 #define BACKTRACK_AS(type) ((type *)backtrack)
5956 
compile_dnref_search(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks)5957 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5958 {
5959 /* The OVECTOR offset goes to TMP2. */
5960 DEFINE_COMPILER;
5961 int count = GET2(cc, 1 + IMM2_SIZE);
5962 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5963 unsigned int offset;
5964 jump_list *found = NULL;
5965 
5966 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5967 
5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5969 
5970 count--;
5971 while (count-- > 0)
5972   {
5973   offset = GET2(slot, 0) << 1;
5974   GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5975   add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5976   slot += common->name_entry_size;
5977   }
5978 
5979 offset = GET2(slot, 0) << 1;
5980 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5981 if (backtracks != NULL && !common->jscript_compat)
5982   add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5983 
5984 set_jumps(found, LABEL());
5985 }
5986 
compile_ref_matchingpath(compiler_common * common,pcre_uchar * cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)5987 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5988 {
5989 DEFINE_COMPILER;
5990 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5991 int offset = 0;
5992 struct sljit_jump *jump = NULL;
5993 struct sljit_jump *partial;
5994 struct sljit_jump *nopartial;
5995 
5996 if (ref)
5997   {
5998   offset = GET2(cc, 1) << 1;
5999   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6000   /* OVECTOR(1) contains the "string begin - 1" constant. */
6001   if (withchecks && !common->jscript_compat)
6002     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6003   }
6004 else
6005   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6006 
6007 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6008 if (common->utf && *cc == OP_REFI)
6009   {
6010   SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6011   if (ref)
6012     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6013   else
6014     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6015 
6016   if (withchecks)
6017     jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6018 
6019   /* Needed to save important temporary registers. */
6020   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6021   OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6022   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6023   sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6024   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6025   if (common->mode == JIT_COMPILE)
6026     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6027   else
6028     {
6029     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6030     nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6031     check_partial(common, FALSE);
6032     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6033     JUMPHERE(nopartial);
6034     }
6035   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6036   }
6037 else
6038 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6039   {
6040   if (ref)
6041     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6042   else
6043     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6044 
6045   if (withchecks)
6046     jump = JUMP(SLJIT_ZERO);
6047 
6048   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6049   partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6050   if (common->mode == JIT_COMPILE)
6051     add_jump(compiler, backtracks, partial);
6052 
6053   add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6054   add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6055 
6056   if (common->mode != JIT_COMPILE)
6057     {
6058     nopartial = JUMP(SLJIT_JUMP);
6059     JUMPHERE(partial);
6060     /* TMP2 -= STR_END - STR_PTR */
6061     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6062     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6063     partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6064     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6065     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6066     add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6067     JUMPHERE(partial);
6068     check_partial(common, FALSE);
6069     add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6070     JUMPHERE(nopartial);
6071     }
6072   }
6073 
6074 if (jump != NULL)
6075   {
6076   if (emptyfail)
6077     add_jump(compiler, backtracks, jump);
6078   else
6079     JUMPHERE(jump);
6080   }
6081 }
6082 
compile_ref_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6083 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6084 {
6085 DEFINE_COMPILER;
6086 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6087 backtrack_common *backtrack;
6088 pcre_uchar type;
6089 int offset = 0;
6090 struct sljit_label *label;
6091 struct sljit_jump *zerolength;
6092 struct sljit_jump *jump = NULL;
6093 pcre_uchar *ccbegin = cc;
6094 int min = 0, max = 0;
6095 BOOL minimize;
6096 
6097 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6098 
6099 if (ref)
6100   offset = GET2(cc, 1) << 1;
6101 else
6102   cc += IMM2_SIZE;
6103 type = cc[1 + IMM2_SIZE];
6104 
6105 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6106 minimize = (type & 0x1) != 0;
6107 switch(type)
6108   {
6109   case OP_CRSTAR:
6110   case OP_CRMINSTAR:
6111   min = 0;
6112   max = 0;
6113   cc += 1 + IMM2_SIZE + 1;
6114   break;
6115   case OP_CRPLUS:
6116   case OP_CRMINPLUS:
6117   min = 1;
6118   max = 0;
6119   cc += 1 + IMM2_SIZE + 1;
6120   break;
6121   case OP_CRQUERY:
6122   case OP_CRMINQUERY:
6123   min = 0;
6124   max = 1;
6125   cc += 1 + IMM2_SIZE + 1;
6126   break;
6127   case OP_CRRANGE:
6128   case OP_CRMINRANGE:
6129   min = GET2(cc, 1 + IMM2_SIZE + 1);
6130   max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6131   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6132   break;
6133   default:
6134   SLJIT_ASSERT_STOP();
6135   break;
6136   }
6137 
6138 if (!minimize)
6139   {
6140   if (min == 0)
6141     {
6142     allocate_stack(common, 2);
6143     if (ref)
6144       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6145     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6146     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6147     /* Temporary release of STR_PTR. */
6148     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6149     /* Handles both invalid and empty cases. Since the minimum repeat,
6150     is zero the invalid case is basically the same as an empty case. */
6151     if (ref)
6152       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6153     else
6154       {
6155       compile_dnref_search(common, ccbegin, NULL);
6156       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6157       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6158       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6159       }
6160     /* Restore if not zero length. */
6161     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6162     }
6163   else
6164     {
6165     allocate_stack(common, 1);
6166     if (ref)
6167       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6168     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6169     if (ref)
6170       {
6171       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6172       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6173       }
6174     else
6175       {
6176       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6177       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6178       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6179       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6180       }
6181     }
6182 
6183   if (min > 1 || max > 1)
6184     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6185 
6186   label = LABEL();
6187   if (!ref)
6188     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6189   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6190 
6191   if (min > 1 || max > 1)
6192     {
6193     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6194     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6195     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6196     if (min > 1)
6197       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6198     if (max > 1)
6199       {
6200       jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6201       allocate_stack(common, 1);
6202       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6203       JUMPTO(SLJIT_JUMP, label);
6204       JUMPHERE(jump);
6205       }
6206     }
6207 
6208   if (max == 0)
6209     {
6210     /* Includes min > 1 case as well. */
6211     allocate_stack(common, 1);
6212     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6213     JUMPTO(SLJIT_JUMP, label);
6214     }
6215 
6216   JUMPHERE(zerolength);
6217   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6218 
6219   count_match(common);
6220   return cc;
6221   }
6222 
6223 allocate_stack(common, ref ? 2 : 3);
6224 if (ref)
6225   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6227 if (type != OP_CRMINSTAR)
6228   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6229 
6230 if (min == 0)
6231   {
6232   /* Handles both invalid and empty cases. Since the minimum repeat,
6233   is zero the invalid case is basically the same as an empty case. */
6234   if (ref)
6235     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6236   else
6237     {
6238     compile_dnref_search(common, ccbegin, NULL);
6239     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6240     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6241     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6242     }
6243   /* Length is non-zero, we can match real repeats. */
6244   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6245   jump = JUMP(SLJIT_JUMP);
6246   }
6247 else
6248   {
6249   if (ref)
6250     {
6251     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6252     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6253     }
6254   else
6255     {
6256     compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6257     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6258     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6259     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6260     }
6261   }
6262 
6263 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6264 if (max > 0)
6265   add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6266 
6267 if (!ref)
6268   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6269 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6271 
6272 if (min > 1)
6273   {
6274   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6275   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6276   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6277   CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6278   }
6279 else if (max > 0)
6280   OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6281 
6282 if (jump != NULL)
6283   JUMPHERE(jump);
6284 JUMPHERE(zerolength);
6285 
6286 count_match(common);
6287 return cc;
6288 }
6289 
compile_recurse_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6290 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6291 {
6292 DEFINE_COMPILER;
6293 backtrack_common *backtrack;
6294 recurse_entry *entry = common->entries;
6295 recurse_entry *prev = NULL;
6296 sljit_sw start = GET(cc, 1);
6297 pcre_uchar *start_cc;
6298 BOOL needs_control_head;
6299 
6300 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6301 
6302 /* Inlining simple patterns. */
6303 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6304   {
6305   start_cc = common->start + start;
6306   compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6307   BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6308   return cc + 1 + LINK_SIZE;
6309   }
6310 
6311 while (entry != NULL)
6312   {
6313   if (entry->start == start)
6314     break;
6315   prev = entry;
6316   entry = entry->next;
6317   }
6318 
6319 if (entry == NULL)
6320   {
6321   entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6322   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6323     return NULL;
6324   entry->next = NULL;
6325   entry->entry = NULL;
6326   entry->calls = NULL;
6327   entry->start = start;
6328 
6329   if (prev != NULL)
6330     prev->next = entry;
6331   else
6332     common->entries = entry;
6333   }
6334 
6335 if (common->has_set_som && common->mark_ptr != 0)
6336   {
6337   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6338   allocate_stack(common, 2);
6339   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6340   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6341   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6342   }
6343 else if (common->has_set_som || common->mark_ptr != 0)
6344   {
6345   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6346   allocate_stack(common, 1);
6347   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6348   }
6349 
6350 if (entry->entry == NULL)
6351   add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6352 else
6353   JUMPTO(SLJIT_FAST_CALL, entry->entry);
6354 /* Leave if the match is failed. */
6355 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6356 return cc + 1 + LINK_SIZE;
6357 }
6358 
do_callout(struct jit_arguments * arguments,PUBL (callout_block)* callout_block,pcre_uchar ** jit_ovector)6359 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6360 {
6361 const pcre_uchar *begin = arguments->begin;
6362 int *offset_vector = arguments->offsets;
6363 int offset_count = arguments->offset_count;
6364 int i;
6365 
6366 if (PUBL(callout) == NULL)
6367   return 0;
6368 
6369 callout_block->version = 2;
6370 callout_block->callout_data = arguments->callout_data;
6371 
6372 /* Offsets in subject. */
6373 callout_block->subject_length = arguments->end - arguments->begin;
6374 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6375 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6376 #if defined COMPILE_PCRE8
6377 callout_block->subject = (PCRE_SPTR)begin;
6378 #elif defined COMPILE_PCRE16
6379 callout_block->subject = (PCRE_SPTR16)begin;
6380 #elif defined COMPILE_PCRE32
6381 callout_block->subject = (PCRE_SPTR32)begin;
6382 #endif
6383 
6384 /* Convert and copy the JIT offset vector to the offset_vector array. */
6385 callout_block->capture_top = 0;
6386 callout_block->offset_vector = offset_vector;
6387 for (i = 2; i < offset_count; i += 2)
6388   {
6389   offset_vector[i] = jit_ovector[i] - begin;
6390   offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6391   if (jit_ovector[i] >= begin)
6392     callout_block->capture_top = i;
6393   }
6394 
6395 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6396 if (offset_count > 0)
6397   offset_vector[0] = -1;
6398 if (offset_count > 1)
6399   offset_vector[1] = -1;
6400 return (*PUBL(callout))(callout_block);
6401 }
6402 
6403 /* Aligning to 8 byte. */
6404 #define CALLOUT_ARG_SIZE \
6405     (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6406 
6407 #define CALLOUT_ARG_OFFSET(arg) \
6408     (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6409 
compile_callout_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6410 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6411 {
6412 DEFINE_COMPILER;
6413 backtrack_common *backtrack;
6414 
6415 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6416 
6417 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6418 
6419 SLJIT_ASSERT(common->capture_last_ptr != 0);
6420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6421 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6422 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6423 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6424 
6425 /* These pointer sized fields temporarly stores internal variables. */
6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6429 
6430 if (common->mark_ptr != 0)
6431   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6432 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6433 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6435 
6436 /* Needed to save important temporary registers. */
6437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6438 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6439 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6440 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6441 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6443 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6444 
6445 /* Check return value. */
6446 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6447 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6448 if (common->forced_quit_label == NULL)
6449   add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6450 else
6451   JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6452 return cc + 2 + 2 * LINK_SIZE;
6453 }
6454 
6455 #undef CALLOUT_ARG_SIZE
6456 #undef CALLOUT_ARG_OFFSET
6457 
compile_assert_matchingpath(compiler_common * common,pcre_uchar * cc,assert_backtrack * backtrack,BOOL conditional)6458 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6459 {
6460 DEFINE_COMPILER;
6461 int framesize;
6462 int extrasize;
6463 BOOL needs_control_head;
6464 int private_data_ptr;
6465 backtrack_common altbacktrack;
6466 pcre_uchar *ccbegin;
6467 pcre_uchar opcode;
6468 pcre_uchar bra = OP_BRA;
6469 jump_list *tmp = NULL;
6470 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6471 jump_list **found;
6472 /* Saving previous accept variables. */
6473 BOOL save_local_exit = common->local_exit;
6474 BOOL save_positive_assert = common->positive_assert;
6475 then_trap_backtrack *save_then_trap = common->then_trap;
6476 struct sljit_label *save_quit_label = common->quit_label;
6477 struct sljit_label *save_accept_label = common->accept_label;
6478 jump_list *save_quit = common->quit;
6479 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6480 jump_list *save_accept = common->accept;
6481 struct sljit_jump *jump;
6482 struct sljit_jump *brajump = NULL;
6483 
6484 /* Assert captures then. */
6485 common->then_trap = NULL;
6486 
6487 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6488   {
6489   SLJIT_ASSERT(!conditional);
6490   bra = *cc;
6491   cc++;
6492   }
6493 private_data_ptr = PRIVATE_DATA(cc);
6494 SLJIT_ASSERT(private_data_ptr != 0);
6495 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6496 backtrack->framesize = framesize;
6497 backtrack->private_data_ptr = private_data_ptr;
6498 opcode = *cc;
6499 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6500 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6501 ccbegin = cc;
6502 cc += GET(cc, 1);
6503 
6504 if (bra == OP_BRAMINZERO)
6505   {
6506   /* This is a braminzero backtrack path. */
6507   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6508   free_stack(common, 1);
6509   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6510   }
6511 
6512 if (framesize < 0)
6513   {
6514   extrasize = needs_control_head ? 2 : 1;
6515   if (framesize == no_frame)
6516     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6517   allocate_stack(common, extrasize);
6518   if (needs_control_head)
6519     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6520   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6521   if (needs_control_head)
6522     {
6523     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6524     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6525     }
6526   }
6527 else
6528   {
6529   extrasize = needs_control_head ? 3 : 2;
6530   allocate_stack(common, framesize + extrasize);
6531   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6532   OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6533   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6534   if (needs_control_head)
6535     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6536   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6537   if (needs_control_head)
6538     {
6539     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6540     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6541     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6542     }
6543   else
6544     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6545   init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6546   }
6547 
6548 memset(&altbacktrack, 0, sizeof(backtrack_common));
6549 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6550   {
6551   /* Negative assert is stronger than positive assert. */
6552   common->local_exit = TRUE;
6553   common->quit_label = NULL;
6554   common->quit = NULL;
6555   common->positive_assert = FALSE;
6556   }
6557 else
6558   common->positive_assert = TRUE;
6559 common->positive_assert_quit = NULL;
6560 
6561 while (1)
6562   {
6563   common->accept_label = NULL;
6564   common->accept = NULL;
6565   altbacktrack.top = NULL;
6566   altbacktrack.topbacktracks = NULL;
6567 
6568   if (*ccbegin == OP_ALT)
6569     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6570 
6571   altbacktrack.cc = ccbegin;
6572   compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6573   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6574     {
6575     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6576       {
6577       common->local_exit = save_local_exit;
6578       common->quit_label = save_quit_label;
6579       common->quit = save_quit;
6580       }
6581     common->positive_assert = save_positive_assert;
6582     common->then_trap = save_then_trap;
6583     common->accept_label = save_accept_label;
6584     common->positive_assert_quit = save_positive_assert_quit;
6585     common->accept = save_accept;
6586     return NULL;
6587     }
6588   common->accept_label = LABEL();
6589   if (common->accept != NULL)
6590     set_jumps(common->accept, common->accept_label);
6591 
6592   /* Reset stack. */
6593   if (framesize < 0)
6594     {
6595     if (framesize == no_frame)
6596       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6597     else
6598       free_stack(common, extrasize);
6599     if (needs_control_head)
6600       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6601     }
6602   else
6603     {
6604     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6605       {
6606       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6607       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6608       if (needs_control_head)
6609         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6610       }
6611     else
6612       {
6613       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6614       if (needs_control_head)
6615         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6616       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6617       }
6618     }
6619 
6620   if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6621     {
6622     /* We know that STR_PTR was stored on the top of the stack. */
6623     if (conditional)
6624       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6625     else if (bra == OP_BRAZERO)
6626       {
6627       if (framesize < 0)
6628         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6629       else
6630         {
6631         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6632         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6633         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6634         }
6635       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6636       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6637       }
6638     else if (framesize >= 0)
6639       {
6640       /* For OP_BRA and OP_BRAMINZERO. */
6641       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6642       }
6643     }
6644   add_jump(compiler, found, JUMP(SLJIT_JUMP));
6645 
6646   compile_backtrackingpath(common, altbacktrack.top);
6647   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6648     {
6649     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6650       {
6651       common->local_exit = save_local_exit;
6652       common->quit_label = save_quit_label;
6653       common->quit = save_quit;
6654       }
6655     common->positive_assert = save_positive_assert;
6656     common->then_trap = save_then_trap;
6657     common->accept_label = save_accept_label;
6658     common->positive_assert_quit = save_positive_assert_quit;
6659     common->accept = save_accept;
6660     return NULL;
6661     }
6662   set_jumps(altbacktrack.topbacktracks, LABEL());
6663 
6664   if (*cc != OP_ALT)
6665     break;
6666 
6667   ccbegin = cc;
6668   cc += GET(cc, 1);
6669   }
6670 
6671 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6672   {
6673   SLJIT_ASSERT(common->positive_assert_quit == NULL);
6674   /* Makes the check less complicated below. */
6675   common->positive_assert_quit = common->quit;
6676   }
6677 
6678 /* None of them matched. */
6679 if (common->positive_assert_quit != NULL)
6680   {
6681   jump = JUMP(SLJIT_JUMP);
6682   set_jumps(common->positive_assert_quit, LABEL());
6683   SLJIT_ASSERT(framesize != no_stack);
6684   if (framesize < 0)
6685     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6686   else
6687     {
6688     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6689     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6690     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6691     }
6692   JUMPHERE(jump);
6693   }
6694 
6695 if (needs_control_head)
6696   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6697 
6698 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6699   {
6700   /* Assert is failed. */
6701   if (conditional || bra == OP_BRAZERO)
6702     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6703 
6704   if (framesize < 0)
6705     {
6706     /* The topmost item should be 0. */
6707     if (bra == OP_BRAZERO)
6708       {
6709       if (extrasize == 2)
6710         free_stack(common, 1);
6711       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6712       }
6713     else
6714       free_stack(common, extrasize);
6715     }
6716   else
6717     {
6718     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6719     /* The topmost item should be 0. */
6720     if (bra == OP_BRAZERO)
6721       {
6722       free_stack(common, framesize + extrasize - 1);
6723       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6724       }
6725     else
6726       free_stack(common, framesize + extrasize);
6727     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6728     }
6729   jump = JUMP(SLJIT_JUMP);
6730   if (bra != OP_BRAZERO)
6731     add_jump(compiler, target, jump);
6732 
6733   /* Assert is successful. */
6734   set_jumps(tmp, LABEL());
6735   if (framesize < 0)
6736     {
6737     /* We know that STR_PTR was stored on the top of the stack. */
6738     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6739     /* Keep the STR_PTR on the top of the stack. */
6740     if (bra == OP_BRAZERO)
6741       {
6742       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6743       if (extrasize == 2)
6744         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6745       }
6746     else if (bra == OP_BRAMINZERO)
6747       {
6748       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6749       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6750       }
6751     }
6752   else
6753     {
6754     if (bra == OP_BRA)
6755       {
6756       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6757       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6758       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6759       }
6760     else
6761       {
6762       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6763       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6764       if (extrasize == 2)
6765         {
6766         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6767         if (bra == OP_BRAMINZERO)
6768           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6769         }
6770       else
6771         {
6772         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6773         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6774         }
6775       }
6776     }
6777 
6778   if (bra == OP_BRAZERO)
6779     {
6780     backtrack->matchingpath = LABEL();
6781     SET_LABEL(jump, backtrack->matchingpath);
6782     }
6783   else if (bra == OP_BRAMINZERO)
6784     {
6785     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6786     JUMPHERE(brajump);
6787     if (framesize >= 0)
6788       {
6789       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6790       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6791       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6792       }
6793     set_jumps(backtrack->common.topbacktracks, LABEL());
6794     }
6795   }
6796 else
6797   {
6798   /* AssertNot is successful. */
6799   if (framesize < 0)
6800     {
6801     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6802     if (bra != OP_BRA)
6803       {
6804       if (extrasize == 2)
6805         free_stack(common, 1);
6806       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6807       }
6808     else
6809       free_stack(common, extrasize);
6810     }
6811   else
6812     {
6813     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6814     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6815     /* The topmost item should be 0. */
6816     if (bra != OP_BRA)
6817       {
6818       free_stack(common, framesize + extrasize - 1);
6819       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6820       }
6821     else
6822       free_stack(common, framesize + extrasize);
6823     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6824     }
6825 
6826   if (bra == OP_BRAZERO)
6827     backtrack->matchingpath = LABEL();
6828   else if (bra == OP_BRAMINZERO)
6829     {
6830     JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6831     JUMPHERE(brajump);
6832     }
6833 
6834   if (bra != OP_BRA)
6835     {
6836     SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6837     set_jumps(backtrack->common.topbacktracks, LABEL());
6838     backtrack->common.topbacktracks = NULL;
6839     }
6840   }
6841 
6842 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6843   {
6844   common->local_exit = save_local_exit;
6845   common->quit_label = save_quit_label;
6846   common->quit = save_quit;
6847   }
6848 common->positive_assert = save_positive_assert;
6849 common->then_trap = save_then_trap;
6850 common->accept_label = save_accept_label;
6851 common->positive_assert_quit = save_positive_assert_quit;
6852 common->accept = save_accept;
6853 return cc + 1 + LINK_SIZE;
6854 }
6855 
match_once_common(compiler_common * common,pcre_uchar ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)6856 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6857 {
6858 DEFINE_COMPILER;
6859 int stacksize;
6860 
6861 if (framesize < 0)
6862   {
6863   if (framesize == no_frame)
6864     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6865   else
6866     {
6867     stacksize = needs_control_head ? 1 : 0;
6868     if (ket != OP_KET || has_alternatives)
6869       stacksize++;
6870     free_stack(common, stacksize);
6871     }
6872 
6873   if (needs_control_head)
6874     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6875 
6876   /* TMP2 which is set here used by OP_KETRMAX below. */
6877   if (ket == OP_KETRMAX)
6878     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6879   else if (ket == OP_KETRMIN)
6880     {
6881     /* Move the STR_PTR to the private_data_ptr. */
6882     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6883     }
6884   }
6885 else
6886   {
6887   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6888   OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6889   if (needs_control_head)
6890     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6891 
6892   if (ket == OP_KETRMAX)
6893     {
6894     /* TMP2 which is set here used by OP_KETRMAX below. */
6895     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6896     }
6897   }
6898 if (needs_control_head)
6899   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
6900 }
6901 
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)6902 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6903 {
6904 DEFINE_COMPILER;
6905 
6906 if (common->capture_last_ptr != 0)
6907   {
6908   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6909   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6910   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6911   stacksize++;
6912   }
6913 if (common->optimized_cbracket[offset >> 1] == 0)
6914   {
6915   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6916   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6917   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6918   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6919   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6920   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
6921   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
6922   stacksize += 2;
6923   }
6924 return stacksize;
6925 }
6926 
6927 /*
6928   Handling bracketed expressions is probably the most complex part.
6929 
6930   Stack layout naming characters:
6931     S - Push the current STR_PTR
6932     0 - Push a 0 (NULL)
6933     A - Push the current STR_PTR. Needed for restoring the STR_PTR
6934         before the next alternative. Not pushed if there are no alternatives.
6935     M - Any values pushed by the current alternative. Can be empty, or anything.
6936     C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6937     L - Push the previous local (pointed by localptr) to the stack
6938    () - opional values stored on the stack
6939   ()* - optonal, can be stored multiple times
6940 
6941   The following list shows the regular expression templates, their PCRE byte codes
6942   and stack layout supported by pcre-sljit.
6943 
6944   (?:)                     OP_BRA     | OP_KET                A M
6945   ()                       OP_CBRA    | OP_KET                C M
6946   (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
6947                            OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
6948   (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
6949                            OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
6950   ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
6951                            OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
6952   ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
6953                            OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
6954   (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
6955   (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
6956   ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
6957   ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
6958   (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
6959            OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
6960   (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
6961            OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
6962   ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
6963            OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
6964   ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
6965            OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
6966 
6967 
6968   Stack layout naming characters:
6969     A - Push the alternative index (starting from 0) on the stack.
6970         Not pushed if there is no alternatives.
6971     M - Any values pushed by the current alternative. Can be empty, or anything.
6972 
6973   The next list shows the possible content of a bracket:
6974   (|)     OP_*BRA    | OP_ALT ...         M A
6975   (?()|)  OP_*COND   | OP_ALT             M A
6976   (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
6977   (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
6978                                           Or nothing, if trace is unnecessary
6979 */
6980 
compile_bracket_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)6981 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6982 {
6983 DEFINE_COMPILER;
6984 backtrack_common *backtrack;
6985 pcre_uchar opcode;
6986 int private_data_ptr = 0;
6987 int offset = 0;
6988 int i, stacksize;
6989 int repeat_ptr = 0, repeat_length = 0;
6990 int repeat_type = 0, repeat_count = 0;
6991 pcre_uchar *ccbegin;
6992 pcre_uchar *matchingpath;
6993 pcre_uchar *slot;
6994 pcre_uchar bra = OP_BRA;
6995 pcre_uchar ket;
6996 assert_backtrack *assert;
6997 BOOL has_alternatives;
6998 BOOL needs_control_head = FALSE;
6999 struct sljit_jump *jump;
7000 struct sljit_jump *skip;
7001 struct sljit_label *rmax_label = NULL;
7002 struct sljit_jump *braminzero = NULL;
7003 
7004 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7005 
7006 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7007   {
7008   bra = *cc;
7009   cc++;
7010   opcode = *cc;
7011   }
7012 
7013 opcode = *cc;
7014 ccbegin = cc;
7015 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7016 ket = *matchingpath;
7017 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7018   {
7019   repeat_ptr = PRIVATE_DATA(matchingpath);
7020   repeat_length = PRIVATE_DATA(matchingpath + 1);
7021   repeat_type = PRIVATE_DATA(matchingpath + 2);
7022   repeat_count = PRIVATE_DATA(matchingpath + 3);
7023   SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7024   if (repeat_type == OP_UPTO)
7025     ket = OP_KETRMAX;
7026   if (repeat_type == OP_MINUPTO)
7027     ket = OP_KETRMIN;
7028   }
7029 
7030 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
7031   {
7032   /* Drop this bracket_backtrack. */
7033   parent->top = backtrack->prev;
7034   return matchingpath + 1 + LINK_SIZE + repeat_length;
7035   }
7036 
7037 matchingpath = ccbegin + 1 + LINK_SIZE;
7038 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7039 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7040 cc += GET(cc, 1);
7041 
7042 has_alternatives = *cc == OP_ALT;
7043 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7044   has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7045 
7046 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7047   opcode = OP_SCOND;
7048 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7049   opcode = OP_ONCE;
7050 
7051 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7052   {
7053   /* Capturing brackets has a pre-allocated space. */
7054   offset = GET2(ccbegin, 1 + LINK_SIZE);
7055   if (common->optimized_cbracket[offset] == 0)
7056     {
7057     private_data_ptr = OVECTOR_PRIV(offset);
7058     offset <<= 1;
7059     }
7060   else
7061     {
7062     offset <<= 1;
7063     private_data_ptr = OVECTOR(offset);
7064     }
7065   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7066   matchingpath += IMM2_SIZE;
7067   }
7068 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7069   {
7070   /* Other brackets simply allocate the next entry. */
7071   private_data_ptr = PRIVATE_DATA(ccbegin);
7072   SLJIT_ASSERT(private_data_ptr != 0);
7073   BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7074   if (opcode == OP_ONCE)
7075     BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7076   }
7077 
7078 /* Instructions before the first alternative. */
7079 stacksize = 0;
7080 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7081   stacksize++;
7082 if (bra == OP_BRAZERO)
7083   stacksize++;
7084 
7085 if (stacksize > 0)
7086   allocate_stack(common, stacksize);
7087 
7088 stacksize = 0;
7089 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7090   {
7091   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7092   stacksize++;
7093   }
7094 
7095 if (bra == OP_BRAZERO)
7096   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7097 
7098 if (bra == OP_BRAMINZERO)
7099   {
7100   /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7101   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7102   if (ket != OP_KETRMIN)
7103     {
7104     free_stack(common, 1);
7105     braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7106     }
7107   else
7108     {
7109     if (opcode == OP_ONCE || opcode >= OP_SBRA)
7110       {
7111       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7112       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7113       /* Nothing stored during the first run. */
7114       skip = JUMP(SLJIT_JUMP);
7115       JUMPHERE(jump);
7116       /* Checking zero-length iteration. */
7117       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7118         {
7119         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7120         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7121         }
7122       else
7123         {
7124         /* Except when the whole stack frame must be saved. */
7125         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7126         braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7127         }
7128       JUMPHERE(skip);
7129       }
7130     else
7131       {
7132       jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7133       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7134       JUMPHERE(jump);
7135       }
7136     }
7137   }
7138 
7139 if (repeat_type != 0)
7140   {
7141   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7142   if (repeat_type == OP_EXACT)
7143     rmax_label = LABEL();
7144   }
7145 
7146 if (ket == OP_KETRMIN)
7147   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7148 
7149 if (ket == OP_KETRMAX)
7150   {
7151   rmax_label = LABEL();
7152   if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7153     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7154   }
7155 
7156 /* Handling capturing brackets and alternatives. */
7157 if (opcode == OP_ONCE)
7158   {
7159   stacksize = 0;
7160   if (needs_control_head)
7161     {
7162     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7163     stacksize++;
7164     }
7165 
7166   if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7167     {
7168     /* Neither capturing brackets nor recursions are found in the block. */
7169     if (ket == OP_KETRMIN)
7170       {
7171       stacksize += 2;
7172       if (!needs_control_head)
7173         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7174       }
7175     else
7176       {
7177       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7178         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7179       if (ket == OP_KETRMAX || has_alternatives)
7180         stacksize++;
7181       }
7182 
7183     if (stacksize > 0)
7184       allocate_stack(common, stacksize);
7185 
7186     stacksize = 0;
7187     if (needs_control_head)
7188       {
7189       stacksize++;
7190       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7191       }
7192 
7193     if (ket == OP_KETRMIN)
7194       {
7195       if (needs_control_head)
7196         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7197       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7198       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7199         OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7200       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7201       }
7202     else if (ket == OP_KETRMAX || has_alternatives)
7203       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7204     }
7205   else
7206     {
7207     if (ket != OP_KET || has_alternatives)
7208       stacksize++;
7209 
7210     stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7211     allocate_stack(common, stacksize);
7212 
7213     if (needs_control_head)
7214       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7215 
7216     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7217     OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7218 
7219     stacksize = needs_control_head ? 1 : 0;
7220     if (ket != OP_KET || has_alternatives)
7221       {
7222       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7223       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7224       stacksize++;
7225       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7226       }
7227     else
7228       {
7229       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7230       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7231       }
7232     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7233     }
7234   }
7235 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7236   {
7237   /* Saving the previous values. */
7238   if (common->optimized_cbracket[offset >> 1] != 0)
7239     {
7240     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7241     allocate_stack(common, 2);
7242     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7243     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
7244     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7245     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7246     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7247     }
7248   else
7249     {
7250     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7251     allocate_stack(common, 1);
7252     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7253     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7254     }
7255   }
7256 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7257   {
7258   /* Saving the previous value. */
7259   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7260   allocate_stack(common, 1);
7261   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7262   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7263   }
7264 else if (has_alternatives)
7265   {
7266   /* Pushing the starting string pointer. */
7267   allocate_stack(common, 1);
7268   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7269   }
7270 
7271 /* Generating code for the first alternative. */
7272 if (opcode == OP_COND || opcode == OP_SCOND)
7273   {
7274   if (*matchingpath == OP_CREF)
7275     {
7276     SLJIT_ASSERT(has_alternatives);
7277     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7278       CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7279     matchingpath += 1 + IMM2_SIZE;
7280     }
7281   else if (*matchingpath == OP_DNCREF)
7282     {
7283     SLJIT_ASSERT(has_alternatives);
7284 
7285     i = GET2(matchingpath, 1 + IMM2_SIZE);
7286     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7287     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7288     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7289     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7290     slot += common->name_entry_size;
7291     i--;
7292     while (i-- > 0)
7293       {
7294       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7295       OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7296       slot += common->name_entry_size;
7297       }
7298     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7299     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
7300     matchingpath += 1 + 2 * IMM2_SIZE;
7301     }
7302   else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
7303     {
7304     /* Never has other case. */
7305     BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7306     SLJIT_ASSERT(!has_alternatives);
7307 
7308     if (*matchingpath == OP_FAIL)
7309       stacksize = 0;
7310     if (*matchingpath == OP_RREF)
7311       {
7312       stacksize = GET2(matchingpath, 1);
7313       if (common->currententry == NULL)
7314         stacksize = 0;
7315       else if (stacksize == RREF_ANY)
7316         stacksize = 1;
7317       else if (common->currententry->start == 0)
7318         stacksize = stacksize == 0;
7319       else
7320         stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7321 
7322       if (stacksize != 0)
7323         matchingpath += 1 + IMM2_SIZE;
7324       }
7325     else
7326       {
7327       if (common->currententry == NULL || common->currententry->start == 0)
7328         stacksize = 0;
7329       else
7330         {
7331         stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7332         slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7333         i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7334         while (stacksize > 0)
7335           {
7336           if ((int)GET2(slot, 0) == i)
7337             break;
7338           slot += common->name_entry_size;
7339           stacksize--;
7340           }
7341         }
7342 
7343       if (stacksize != 0)
7344         matchingpath += 1 + 2 * IMM2_SIZE;
7345       }
7346 
7347       /* The stacksize == 0 is a common "else" case. */
7348       if (stacksize == 0)
7349         {
7350         if (*cc == OP_ALT)
7351           {
7352           matchingpath = cc + 1 + LINK_SIZE;
7353           cc += GET(cc, 1);
7354           }
7355         else
7356           matchingpath = cc;
7357         }
7358     }
7359   else
7360     {
7361     SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7362     /* Similar code as PUSH_BACKTRACK macro. */
7363     assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7364     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7365       return NULL;
7366     memset(assert, 0, sizeof(assert_backtrack));
7367     assert->common.cc = matchingpath;
7368     BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7369     matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7370     }
7371   }
7372 
7373 compile_matchingpath(common, matchingpath, cc, backtrack);
7374 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7375   return NULL;
7376 
7377 if (opcode == OP_ONCE)
7378   match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7379 
7380 stacksize = 0;
7381 if (repeat_type == OP_MINUPTO)
7382   {
7383   /* We need to preserve the counter. TMP2 will be used below. */
7384   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7385   stacksize++;
7386   }
7387 if (ket != OP_KET || bra != OP_BRA)
7388   stacksize++;
7389 if (offset != 0)
7390   {
7391   if (common->capture_last_ptr != 0)
7392     stacksize++;
7393   if (common->optimized_cbracket[offset >> 1] == 0)
7394     stacksize += 2;
7395   }
7396 if (has_alternatives && opcode != OP_ONCE)
7397   stacksize++;
7398 
7399 if (stacksize > 0)
7400   allocate_stack(common, stacksize);
7401 
7402 stacksize = 0;
7403 if (repeat_type == OP_MINUPTO)
7404   {
7405   /* TMP2 was set above. */
7406   OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7407   stacksize++;
7408   }
7409 
7410 if (ket != OP_KET || bra != OP_BRA)
7411   {
7412   if (ket != OP_KET)
7413     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7414   else
7415     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7416   stacksize++;
7417   }
7418 
7419 if (offset != 0)
7420   stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7421 
7422 if (has_alternatives)
7423   {
7424   if (opcode != OP_ONCE)
7425     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7426   if (ket != OP_KETRMAX)
7427     BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7428   }
7429 
7430 /* Must be after the matchingpath label. */
7431 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7432   {
7433   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7434   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7435   }
7436 
7437 if (ket == OP_KETRMAX)
7438   {
7439   if (repeat_type != 0)
7440     {
7441     if (has_alternatives)
7442       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7443     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7444     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7445     /* Drop STR_PTR for greedy plus quantifier. */
7446     if (opcode != OP_ONCE)
7447       free_stack(common, 1);
7448     }
7449   else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7450     {
7451     if (has_alternatives)
7452       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7453     /* Checking zero-length iteration. */
7454     if (opcode != OP_ONCE)
7455       {
7456       CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
7457       /* Drop STR_PTR for greedy plus quantifier. */
7458       if (bra != OP_BRAZERO)
7459         free_stack(common, 1);
7460       }
7461     else
7462       /* TMP2 must contain the starting STR_PTR. */
7463       CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7464     }
7465   else
7466     JUMPTO(SLJIT_JUMP, rmax_label);
7467   BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7468   }
7469 
7470 if (repeat_type == OP_EXACT)
7471   {
7472   count_match(common);
7473   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7474   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7475   }
7476 else if (repeat_type == OP_UPTO)
7477   {
7478   /* We need to preserve the counter. */
7479   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7480   allocate_stack(common, 1);
7481   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7482   }
7483 
7484 if (bra == OP_BRAZERO)
7485   BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7486 
7487 if (bra == OP_BRAMINZERO)
7488   {
7489   /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7490   JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7491   if (braminzero != NULL)
7492     {
7493     JUMPHERE(braminzero);
7494     /* We need to release the end pointer to perform the
7495     backtrack for the zero-length iteration. When
7496     framesize is < 0, OP_ONCE will do the release itself. */
7497     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7498       {
7499       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7500       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7501       }
7502     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7503       free_stack(common, 1);
7504     }
7505   /* Continue to the normal backtrack. */
7506   }
7507 
7508 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7509   count_match(common);
7510 
7511 /* Skip the other alternatives. */
7512 while (*cc == OP_ALT)
7513   cc += GET(cc, 1);
7514 cc += 1 + LINK_SIZE;
7515 
7516 /* Temporarily encoding the needs_control_head in framesize. */
7517 if (opcode == OP_ONCE)
7518   BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7519 return cc + repeat_length;
7520 }
7521 
compile_bracketpos_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7522 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7523 {
7524 DEFINE_COMPILER;
7525 backtrack_common *backtrack;
7526 pcre_uchar opcode;
7527 int private_data_ptr;
7528 int cbraprivptr = 0;
7529 BOOL needs_control_head;
7530 int framesize;
7531 int stacksize;
7532 int offset = 0;
7533 BOOL zero = FALSE;
7534 pcre_uchar *ccbegin = NULL;
7535 int stack; /* Also contains the offset of control head. */
7536 struct sljit_label *loop = NULL;
7537 struct jump_list *emptymatch = NULL;
7538 
7539 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7540 if (*cc == OP_BRAPOSZERO)
7541   {
7542   zero = TRUE;
7543   cc++;
7544   }
7545 
7546 opcode = *cc;
7547 private_data_ptr = PRIVATE_DATA(cc);
7548 SLJIT_ASSERT(private_data_ptr != 0);
7549 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7550 switch(opcode)
7551   {
7552   case OP_BRAPOS:
7553   case OP_SBRAPOS:
7554   ccbegin = cc + 1 + LINK_SIZE;
7555   break;
7556 
7557   case OP_CBRAPOS:
7558   case OP_SCBRAPOS:
7559   offset = GET2(cc, 1 + LINK_SIZE);
7560   /* This case cannot be optimized in the same was as
7561   normal capturing brackets. */
7562   SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7563   cbraprivptr = OVECTOR_PRIV(offset);
7564   offset <<= 1;
7565   ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7566   break;
7567 
7568   default:
7569   SLJIT_ASSERT_STOP();
7570   break;
7571   }
7572 
7573 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7574 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7575 if (framesize < 0)
7576   {
7577   if (offset != 0)
7578     {
7579     stacksize = 2;
7580     if (common->capture_last_ptr != 0)
7581       stacksize++;
7582     }
7583   else
7584     stacksize = 1;
7585 
7586   if (needs_control_head)
7587     stacksize++;
7588   if (!zero)
7589     stacksize++;
7590 
7591   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7592   allocate_stack(common, stacksize);
7593   if (framesize == no_frame)
7594     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7595 
7596   stack = 0;
7597   if (offset != 0)
7598     {
7599     stack = 2;
7600     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7601     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7602     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7603     if (common->capture_last_ptr != 0)
7604       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7605     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7606     if (needs_control_head)
7607       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7608     if (common->capture_last_ptr != 0)
7609       {
7610       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7611       stack = 3;
7612       }
7613     }
7614   else
7615     {
7616     if (needs_control_head)
7617       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7618     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7619     stack = 1;
7620     }
7621 
7622   if (needs_control_head)
7623     stack++;
7624   if (!zero)
7625     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7626   if (needs_control_head)
7627     {
7628     stack--;
7629     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7630     }
7631   }
7632 else
7633   {
7634   stacksize = framesize + 1;
7635   if (!zero)
7636     stacksize++;
7637   if (needs_control_head)
7638     stacksize++;
7639   if (offset == 0)
7640     stacksize++;
7641   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7642 
7643   allocate_stack(common, stacksize);
7644   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7645   if (needs_control_head)
7646     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7647   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7648 
7649   stack = 0;
7650   if (!zero)
7651     {
7652     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7653     stack = 1;
7654     }
7655   if (needs_control_head)
7656     {
7657     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7658     stack++;
7659     }
7660   if (offset == 0)
7661     {
7662     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7663     stack++;
7664     }
7665   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7666   init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7667   stack -= 1 + (offset == 0);
7668   }
7669 
7670 if (offset != 0)
7671   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7672 
7673 loop = LABEL();
7674 while (*cc != OP_KETRPOS)
7675   {
7676   backtrack->top = NULL;
7677   backtrack->topbacktracks = NULL;
7678   cc += GET(cc, 1);
7679 
7680   compile_matchingpath(common, ccbegin, cc, backtrack);
7681   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7682     return NULL;
7683 
7684   if (framesize < 0)
7685     {
7686     if (framesize == no_frame)
7687       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7688 
7689     if (offset != 0)
7690       {
7691       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7692       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7693       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7694       if (common->capture_last_ptr != 0)
7695         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7696       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7697       }
7698     else
7699       {
7700       if (opcode == OP_SBRAPOS)
7701         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7702       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7703       }
7704 
7705     /* Even if the match is empty, we need to reset the control head. */
7706     if (needs_control_head)
7707       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7708 
7709     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7710       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7711 
7712     if (!zero)
7713       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7714     }
7715   else
7716     {
7717     if (offset != 0)
7718       {
7719       OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7720       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7721       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7722       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7723       if (common->capture_last_ptr != 0)
7724         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7725       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7726       }
7727     else
7728       {
7729       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7730       OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7731       if (opcode == OP_SBRAPOS)
7732         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7733       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7734       }
7735 
7736     /* Even if the match is empty, we need to reset the control head. */
7737     if (needs_control_head)
7738       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7739 
7740     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7741       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7742 
7743     if (!zero)
7744       {
7745       if (framesize < 0)
7746         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7747       else
7748         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7749       }
7750     }
7751 
7752   JUMPTO(SLJIT_JUMP, loop);
7753   flush_stubs(common);
7754 
7755   compile_backtrackingpath(common, backtrack->top);
7756   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7757     return NULL;
7758   set_jumps(backtrack->topbacktracks, LABEL());
7759 
7760   if (framesize < 0)
7761     {
7762     if (offset != 0)
7763       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7764     else
7765       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7766     }
7767   else
7768     {
7769     if (offset != 0)
7770       {
7771       /* Last alternative. */
7772       if (*cc == OP_KETRPOS)
7773         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7774       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7775       }
7776     else
7777       {
7778       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7779       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7780       }
7781     }
7782 
7783   if (*cc == OP_KETRPOS)
7784     break;
7785   ccbegin = cc + 1 + LINK_SIZE;
7786   }
7787 
7788 /* We don't have to restore the control head in case of a failed match. */
7789 
7790 backtrack->topbacktracks = NULL;
7791 if (!zero)
7792   {
7793   if (framesize < 0)
7794     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7795   else /* TMP2 is set to [private_data_ptr] above. */
7796     add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7797   }
7798 
7799 /* None of them matched. */
7800 set_jumps(emptymatch, LABEL());
7801 count_match(common);
7802 return cc + 1 + LINK_SIZE;
7803 }
7804 
get_iterator_parameters(compiler_common * common,pcre_uchar * cc,pcre_uchar * opcode,pcre_uchar * type,int * max,int * min,pcre_uchar ** end)7805 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7806 {
7807 int class_len;
7808 
7809 *opcode = *cc;
7810 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7811   {
7812   cc++;
7813   *type = OP_CHAR;
7814   }
7815 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7816   {
7817   cc++;
7818   *type = OP_CHARI;
7819   *opcode -= OP_STARI - OP_STAR;
7820   }
7821 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7822   {
7823   cc++;
7824   *type = OP_NOT;
7825   *opcode -= OP_NOTSTAR - OP_STAR;
7826   }
7827 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7828   {
7829   cc++;
7830   *type = OP_NOTI;
7831   *opcode -= OP_NOTSTARI - OP_STAR;
7832   }
7833 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7834   {
7835   cc++;
7836   *opcode -= OP_TYPESTAR - OP_STAR;
7837   *type = 0;
7838   }
7839 else
7840   {
7841   SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7842   *type = *opcode;
7843   cc++;
7844   class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7845   *opcode = cc[class_len - 1];
7846   if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7847     {
7848     *opcode -= OP_CRSTAR - OP_STAR;
7849     if (end != NULL)
7850       *end = cc + class_len;
7851     }
7852   else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7853     {
7854     *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7855     if (end != NULL)
7856       *end = cc + class_len;
7857     }
7858   else
7859     {
7860     SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7861     *max = GET2(cc, (class_len + IMM2_SIZE));
7862     *min = GET2(cc, class_len);
7863 
7864     if (*min == 0)
7865       {
7866       SLJIT_ASSERT(*max != 0);
7867       *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7868       }
7869     if (*max == *min)
7870       *opcode = OP_EXACT;
7871 
7872     if (end != NULL)
7873       *end = cc + class_len + 2 * IMM2_SIZE;
7874     }
7875   return cc;
7876   }
7877 
7878 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7879   {
7880   *max = GET2(cc, 0);
7881   cc += IMM2_SIZE;
7882   }
7883 
7884 if (*type == 0)
7885   {
7886   *type = *cc;
7887   if (end != NULL)
7888     *end = next_opcode(common, cc);
7889   cc++;
7890   return cc;
7891   }
7892 
7893 if (end != NULL)
7894   {
7895   *end = cc + 1;
7896 #ifdef SUPPORT_UTF
7897   if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7898 #endif
7899   }
7900 return cc;
7901 }
7902 
compile_iterator_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)7903 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7904 {
7905 DEFINE_COMPILER;
7906 backtrack_common *backtrack;
7907 pcre_uchar opcode;
7908 pcre_uchar type;
7909 int max = -1, min = -1;
7910 pcre_uchar *end;
7911 jump_list *nomatch = NULL;
7912 struct sljit_jump *jump = NULL;
7913 struct sljit_label *label;
7914 int private_data_ptr = PRIVATE_DATA(cc);
7915 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
7916 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7917 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7918 int tmp_base, tmp_offset;
7919 
7920 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7921 
7922 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7923 
7924 switch(type)
7925   {
7926   case OP_NOT_DIGIT:
7927   case OP_DIGIT:
7928   case OP_NOT_WHITESPACE:
7929   case OP_WHITESPACE:
7930   case OP_NOT_WORDCHAR:
7931   case OP_WORDCHAR:
7932   case OP_ANY:
7933   case OP_ALLANY:
7934   case OP_ANYBYTE:
7935   case OP_ANYNL:
7936   case OP_NOT_HSPACE:
7937   case OP_HSPACE:
7938   case OP_NOT_VSPACE:
7939   case OP_VSPACE:
7940   case OP_CHAR:
7941   case OP_CHARI:
7942   case OP_NOT:
7943   case OP_NOTI:
7944   case OP_CLASS:
7945   case OP_NCLASS:
7946   tmp_base = TMP3;
7947   tmp_offset = 0;
7948   break;
7949 
7950   default:
7951   SLJIT_ASSERT_STOP();
7952   /* Fall through. */
7953 
7954   case OP_EXTUNI:
7955   case OP_XCLASS:
7956   case OP_NOTPROP:
7957   case OP_PROP:
7958   tmp_base = SLJIT_MEM1(SLJIT_SP);
7959   tmp_offset = POSSESSIVE0;
7960   break;
7961   }
7962 
7963 switch(opcode)
7964   {
7965   case OP_STAR:
7966   case OP_PLUS:
7967   case OP_UPTO:
7968   case OP_CRRANGE:
7969   if (type == OP_ANYNL || type == OP_EXTUNI)
7970     {
7971     SLJIT_ASSERT(private_data_ptr == 0);
7972     if (opcode == OP_STAR || opcode == OP_UPTO)
7973       {
7974       allocate_stack(common, 2);
7975       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7976       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7977       }
7978     else
7979       {
7980       allocate_stack(common, 1);
7981       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7982       }
7983 
7984     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7985       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7986 
7987     label = LABEL();
7988     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7989     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7990       {
7991       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7992       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7993       if (opcode == OP_CRRANGE && min > 0)
7994         CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7995       if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7996         jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7997       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7998       }
7999 
8000     /* We cannot use TMP3 because of this allocate_stack. */
8001     allocate_stack(common, 1);
8002     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8003     JUMPTO(SLJIT_JUMP, label);
8004     if (jump != NULL)
8005       JUMPHERE(jump);
8006     }
8007   else
8008     {
8009     if (opcode == OP_PLUS)
8010       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8011     if (private_data_ptr == 0)
8012       allocate_stack(common, 2);
8013     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8014     if (opcode <= OP_PLUS)
8015       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8016     else
8017       OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8018     label = LABEL();
8019     compile_char1_matchingpath(common, type, cc, &nomatch);
8020     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8021     if (opcode <= OP_PLUS)
8022       JUMPTO(SLJIT_JUMP, label);
8023     else if (opcode == OP_CRRANGE && max == 0)
8024       {
8025       OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
8026       JUMPTO(SLJIT_JUMP, label);
8027       }
8028     else
8029       {
8030       OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8031       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8032       OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8033       CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
8034       }
8035     set_jumps(nomatch, LABEL());
8036     if (opcode == OP_CRRANGE)
8037       add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
8038     OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8039     }
8040   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8041   break;
8042 
8043   case OP_MINSTAR:
8044   case OP_MINPLUS:
8045   if (opcode == OP_MINPLUS)
8046     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8047   if (private_data_ptr == 0)
8048     allocate_stack(common, 1);
8049   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8050   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8051   break;
8052 
8053   case OP_MINUPTO:
8054   case OP_CRMINRANGE:
8055   if (private_data_ptr == 0)
8056     allocate_stack(common, 2);
8057   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8058   OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8059   if (opcode == OP_CRMINRANGE)
8060     add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8061   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8062   break;
8063 
8064   case OP_QUERY:
8065   case OP_MINQUERY:
8066   if (private_data_ptr == 0)
8067     allocate_stack(common, 1);
8068   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8069   if (opcode == OP_QUERY)
8070     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8071   BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8072   break;
8073 
8074   case OP_EXACT:
8075   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8076   label = LABEL();
8077   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8078   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8079   JUMPTO(SLJIT_NOT_ZERO, label);
8080   break;
8081 
8082   case OP_POSSTAR:
8083   case OP_POSPLUS:
8084   case OP_POSUPTO:
8085   if (opcode == OP_POSPLUS)
8086     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8087   if (opcode == OP_POSUPTO)
8088     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
8089   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8090   label = LABEL();
8091   compile_char1_matchingpath(common, type, cc, &nomatch);
8092   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8093   if (opcode != OP_POSUPTO)
8094     JUMPTO(SLJIT_JUMP, label);
8095   else
8096     {
8097     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8098     JUMPTO(SLJIT_NOT_ZERO, label);
8099     }
8100   set_jumps(nomatch, LABEL());
8101   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8102   break;
8103 
8104   case OP_POSQUERY:
8105   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8106   compile_char1_matchingpath(common, type, cc, &nomatch);
8107   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8108   set_jumps(nomatch, LABEL());
8109   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8110   break;
8111 
8112   case OP_CRPOSRANGE:
8113   /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8114   OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8115   label = LABEL();
8116   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8117   OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8118   JUMPTO(SLJIT_NOT_ZERO, label);
8119 
8120   if (max != 0)
8121     {
8122     SLJIT_ASSERT(max - min > 0);
8123     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
8124     }
8125   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8126   label = LABEL();
8127   compile_char1_matchingpath(common, type, cc, &nomatch);
8128   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8129   if (max == 0)
8130     JUMPTO(SLJIT_JUMP, label);
8131   else
8132     {
8133     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8134     JUMPTO(SLJIT_NOT_ZERO, label);
8135     }
8136   set_jumps(nomatch, LABEL());
8137   OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8138   break;
8139 
8140   default:
8141   SLJIT_ASSERT_STOP();
8142   break;
8143   }
8144 
8145 count_match(common);
8146 return end;
8147 }
8148 
compile_fail_accept_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8149 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8150 {
8151 DEFINE_COMPILER;
8152 backtrack_common *backtrack;
8153 
8154 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8155 
8156 if (*cc == OP_FAIL)
8157   {
8158   add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8159   return cc + 1;
8160   }
8161 
8162 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8163   {
8164   /* No need to check notempty conditions. */
8165   if (common->accept_label == NULL)
8166     add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8167   else
8168     JUMPTO(SLJIT_JUMP, common->accept_label);
8169   return cc + 1;
8170   }
8171 
8172 if (common->accept_label == NULL)
8173   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
8174 else
8175   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
8176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8177 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8179 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8180 if (common->accept_label == NULL)
8181   add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8182 else
8183   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8185 if (common->accept_label == NULL)
8186   add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8187 else
8188   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8189 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8190 return cc + 1;
8191 }
8192 
compile_close_matchingpath(compiler_common * common,pcre_uchar * cc)8193 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8194 {
8195 DEFINE_COMPILER;
8196 int offset = GET2(cc, 1);
8197 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8198 
8199 /* Data will be discarded anyway... */
8200 if (common->currententry != NULL)
8201   return cc + 1 + IMM2_SIZE;
8202 
8203 if (!optimized_cbracket)
8204   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
8205 offset <<= 1;
8206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8207 if (!optimized_cbracket)
8208   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8209 return cc + 1 + IMM2_SIZE;
8210 }
8211 
compile_control_verb_matchingpath(compiler_common * common,pcre_uchar * cc,backtrack_common * parent)8212 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8213 {
8214 DEFINE_COMPILER;
8215 backtrack_common *backtrack;
8216 pcre_uchar opcode = *cc;
8217 pcre_uchar *ccend = cc + 1;
8218 
8219 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8220   ccend += 2 + cc[1];
8221 
8222 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8223 
8224 if (opcode == OP_SKIP)
8225   {
8226   allocate_stack(common, 1);
8227   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8228   return ccend;
8229   }
8230 
8231 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8232   {
8233   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8234   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8235   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8236   OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8237   }
8238 
8239 return ccend;
8240 }
8241 
8242 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8243 
compile_then_trap_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8244 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8245 {
8246 DEFINE_COMPILER;
8247 backtrack_common *backtrack;
8248 BOOL needs_control_head;
8249 int size;
8250 
8251 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8252 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8253 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8254 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8255 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8256 
8257 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8258 size = 3 + (size < 0 ? 0 : size);
8259 
8260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8261 allocate_stack(common, size);
8262 if (size > 3)
8263   OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8264 else
8265   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8269 
8270 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8271 if (size >= 0)
8272   init_frame(common, cc, ccend, size - 1, 0, FALSE);
8273 }
8274 
compile_matchingpath(compiler_common * common,pcre_uchar * cc,pcre_uchar * ccend,backtrack_common * parent)8275 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8276 {
8277 DEFINE_COMPILER;
8278 backtrack_common *backtrack;
8279 BOOL has_then_trap = FALSE;
8280 then_trap_backtrack *save_then_trap = NULL;
8281 
8282 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8283 
8284 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8285   {
8286   SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8287   has_then_trap = TRUE;
8288   save_then_trap = common->then_trap;
8289   /* Tail item on backtrack. */
8290   compile_then_trap_matchingpath(common, cc, ccend, parent);
8291   }
8292 
8293 while (cc < ccend)
8294   {
8295   switch(*cc)
8296     {
8297     case OP_SOD:
8298     case OP_SOM:
8299     case OP_NOT_WORD_BOUNDARY:
8300     case OP_WORD_BOUNDARY:
8301     case OP_NOT_DIGIT:
8302     case OP_DIGIT:
8303     case OP_NOT_WHITESPACE:
8304     case OP_WHITESPACE:
8305     case OP_NOT_WORDCHAR:
8306     case OP_WORDCHAR:
8307     case OP_ANY:
8308     case OP_ALLANY:
8309     case OP_ANYBYTE:
8310     case OP_NOTPROP:
8311     case OP_PROP:
8312     case OP_ANYNL:
8313     case OP_NOT_HSPACE:
8314     case OP_HSPACE:
8315     case OP_NOT_VSPACE:
8316     case OP_VSPACE:
8317     case OP_EXTUNI:
8318     case OP_EODN:
8319     case OP_EOD:
8320     case OP_CIRC:
8321     case OP_CIRCM:
8322     case OP_DOLL:
8323     case OP_DOLLM:
8324     case OP_NOT:
8325     case OP_NOTI:
8326     case OP_REVERSE:
8327     cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8328     break;
8329 
8330     case OP_SET_SOM:
8331     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8332     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
8333     allocate_stack(common, 1);
8334     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
8335     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8336     cc++;
8337     break;
8338 
8339     case OP_CHAR:
8340     case OP_CHARI:
8341     if (common->mode == JIT_COMPILE)
8342       cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8343     else
8344       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8345     break;
8346 
8347     case OP_STAR:
8348     case OP_MINSTAR:
8349     case OP_PLUS:
8350     case OP_MINPLUS:
8351     case OP_QUERY:
8352     case OP_MINQUERY:
8353     case OP_UPTO:
8354     case OP_MINUPTO:
8355     case OP_EXACT:
8356     case OP_POSSTAR:
8357     case OP_POSPLUS:
8358     case OP_POSQUERY:
8359     case OP_POSUPTO:
8360     case OP_STARI:
8361     case OP_MINSTARI:
8362     case OP_PLUSI:
8363     case OP_MINPLUSI:
8364     case OP_QUERYI:
8365     case OP_MINQUERYI:
8366     case OP_UPTOI:
8367     case OP_MINUPTOI:
8368     case OP_EXACTI:
8369     case OP_POSSTARI:
8370     case OP_POSPLUSI:
8371     case OP_POSQUERYI:
8372     case OP_POSUPTOI:
8373     case OP_NOTSTAR:
8374     case OP_NOTMINSTAR:
8375     case OP_NOTPLUS:
8376     case OP_NOTMINPLUS:
8377     case OP_NOTQUERY:
8378     case OP_NOTMINQUERY:
8379     case OP_NOTUPTO:
8380     case OP_NOTMINUPTO:
8381     case OP_NOTEXACT:
8382     case OP_NOTPOSSTAR:
8383     case OP_NOTPOSPLUS:
8384     case OP_NOTPOSQUERY:
8385     case OP_NOTPOSUPTO:
8386     case OP_NOTSTARI:
8387     case OP_NOTMINSTARI:
8388     case OP_NOTPLUSI:
8389     case OP_NOTMINPLUSI:
8390     case OP_NOTQUERYI:
8391     case OP_NOTMINQUERYI:
8392     case OP_NOTUPTOI:
8393     case OP_NOTMINUPTOI:
8394     case OP_NOTEXACTI:
8395     case OP_NOTPOSSTARI:
8396     case OP_NOTPOSPLUSI:
8397     case OP_NOTPOSQUERYI:
8398     case OP_NOTPOSUPTOI:
8399     case OP_TYPESTAR:
8400     case OP_TYPEMINSTAR:
8401     case OP_TYPEPLUS:
8402     case OP_TYPEMINPLUS:
8403     case OP_TYPEQUERY:
8404     case OP_TYPEMINQUERY:
8405     case OP_TYPEUPTO:
8406     case OP_TYPEMINUPTO:
8407     case OP_TYPEEXACT:
8408     case OP_TYPEPOSSTAR:
8409     case OP_TYPEPOSPLUS:
8410     case OP_TYPEPOSQUERY:
8411     case OP_TYPEPOSUPTO:
8412     cc = compile_iterator_matchingpath(common, cc, parent);
8413     break;
8414 
8415     case OP_CLASS:
8416     case OP_NCLASS:
8417     if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8418       cc = compile_iterator_matchingpath(common, cc, parent);
8419     else
8420       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8421     break;
8422 
8423 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8424     case OP_XCLASS:
8425     if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8426       cc = compile_iterator_matchingpath(common, cc, parent);
8427     else
8428       cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8429     break;
8430 #endif
8431 
8432     case OP_REF:
8433     case OP_REFI:
8434     if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8435       cc = compile_ref_iterator_matchingpath(common, cc, parent);
8436     else
8437       {
8438       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8439       cc += 1 + IMM2_SIZE;
8440       }
8441     break;
8442 
8443     case OP_DNREF:
8444     case OP_DNREFI:
8445     if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8446       cc = compile_ref_iterator_matchingpath(common, cc, parent);
8447     else
8448       {
8449       compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8450       compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8451       cc += 1 + 2 * IMM2_SIZE;
8452       }
8453     break;
8454 
8455     case OP_RECURSE:
8456     cc = compile_recurse_matchingpath(common, cc, parent);
8457     break;
8458 
8459     case OP_CALLOUT:
8460     cc = compile_callout_matchingpath(common, cc, parent);
8461     break;
8462 
8463     case OP_ASSERT:
8464     case OP_ASSERT_NOT:
8465     case OP_ASSERTBACK:
8466     case OP_ASSERTBACK_NOT:
8467     PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8468     cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8469     break;
8470 
8471     case OP_BRAMINZERO:
8472     PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8473     cc = bracketend(cc + 1);
8474     if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8475       {
8476       allocate_stack(common, 1);
8477       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8478       }
8479     else
8480       {
8481       allocate_stack(common, 2);
8482       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8483       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8484       }
8485     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8486     count_match(common);
8487     break;
8488 
8489     case OP_ONCE:
8490     case OP_ONCE_NC:
8491     case OP_BRA:
8492     case OP_CBRA:
8493     case OP_COND:
8494     case OP_SBRA:
8495     case OP_SCBRA:
8496     case OP_SCOND:
8497     cc = compile_bracket_matchingpath(common, cc, parent);
8498     break;
8499 
8500     case OP_BRAZERO:
8501     if (cc[1] > OP_ASSERTBACK_NOT)
8502       cc = compile_bracket_matchingpath(common, cc, parent);
8503     else
8504       {
8505       PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8506       cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8507       }
8508     break;
8509 
8510     case OP_BRAPOS:
8511     case OP_CBRAPOS:
8512     case OP_SBRAPOS:
8513     case OP_SCBRAPOS:
8514     case OP_BRAPOSZERO:
8515     cc = compile_bracketpos_matchingpath(common, cc, parent);
8516     break;
8517 
8518     case OP_MARK:
8519     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8520     SLJIT_ASSERT(common->mark_ptr != 0);
8521     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
8522     allocate_stack(common, common->has_skip_arg ? 5 : 1);
8523     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8524     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8525     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8526     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8527     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8528     if (common->has_skip_arg)
8529       {
8530       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8531       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8532       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8533       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8534       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8535       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8536       }
8537     cc += 1 + 2 + cc[1];
8538     break;
8539 
8540     case OP_PRUNE:
8541     case OP_PRUNE_ARG:
8542     case OP_SKIP:
8543     case OP_SKIP_ARG:
8544     case OP_THEN:
8545     case OP_THEN_ARG:
8546     case OP_COMMIT:
8547     cc = compile_control_verb_matchingpath(common, cc, parent);
8548     break;
8549 
8550     case OP_FAIL:
8551     case OP_ACCEPT:
8552     case OP_ASSERT_ACCEPT:
8553     cc = compile_fail_accept_matchingpath(common, cc, parent);
8554     break;
8555 
8556     case OP_CLOSE:
8557     cc = compile_close_matchingpath(common, cc);
8558     break;
8559 
8560     case OP_SKIPZERO:
8561     cc = bracketend(cc + 1);
8562     break;
8563 
8564     default:
8565     SLJIT_ASSERT_STOP();
8566     return;
8567     }
8568   if (cc == NULL)
8569     return;
8570   }
8571 
8572 if (has_then_trap)
8573   {
8574   /* Head item on backtrack. */
8575   PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8576   BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8577   BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8578   common->then_trap = save_then_trap;
8579   }
8580 SLJIT_ASSERT(cc == ccend);
8581 }
8582 
8583 #undef PUSH_BACKTRACK
8584 #undef PUSH_BACKTRACK_NOVALUE
8585 #undef BACKTRACK_AS
8586 
8587 #define COMPILE_BACKTRACKINGPATH(current) \
8588   do \
8589     { \
8590     compile_backtrackingpath(common, (current)); \
8591     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8592       return; \
8593     } \
8594   while (0)
8595 
8596 #define CURRENT_AS(type) ((type *)current)
8597 
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8598 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8599 {
8600 DEFINE_COMPILER;
8601 pcre_uchar *cc = current->cc;
8602 pcre_uchar opcode;
8603 pcre_uchar type;
8604 int max = -1, min = -1;
8605 struct sljit_label *label = NULL;
8606 struct sljit_jump *jump = NULL;
8607 jump_list *jumplist = NULL;
8608 int private_data_ptr = PRIVATE_DATA(cc);
8609 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8610 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8611 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8612 
8613 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8614 
8615 switch(opcode)
8616   {
8617   case OP_STAR:
8618   case OP_PLUS:
8619   case OP_UPTO:
8620   case OP_CRRANGE:
8621   if (type == OP_ANYNL || type == OP_EXTUNI)
8622     {
8623     SLJIT_ASSERT(private_data_ptr == 0);
8624     set_jumps(current->topbacktracks, LABEL());
8625     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8626     free_stack(common, 1);
8627     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8628     }
8629   else
8630     {
8631     if (opcode == OP_UPTO)
8632       min = 0;
8633     if (opcode <= OP_PLUS)
8634       {
8635       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8636       jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
8637       }
8638     else
8639       {
8640       OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8641       OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8642       jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8643       OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8644       }
8645     skip_char_back(common);
8646     OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8647     JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8648     if (opcode == OP_CRRANGE)
8649       set_jumps(current->topbacktracks, LABEL());
8650     JUMPHERE(jump);
8651     if (private_data_ptr == 0)
8652       free_stack(common, 2);
8653     if (opcode == OP_PLUS)
8654       set_jumps(current->topbacktracks, LABEL());
8655     }
8656   break;
8657 
8658   case OP_MINSTAR:
8659   case OP_MINPLUS:
8660   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8661   compile_char1_matchingpath(common, type, cc, &jumplist);
8662   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8663   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8664   set_jumps(jumplist, LABEL());
8665   if (private_data_ptr == 0)
8666     free_stack(common, 1);
8667   if (opcode == OP_MINPLUS)
8668     set_jumps(current->topbacktracks, LABEL());
8669   break;
8670 
8671   case OP_MINUPTO:
8672   case OP_CRMINRANGE:
8673   if (opcode == OP_CRMINRANGE)
8674     {
8675     label = LABEL();
8676     set_jumps(current->topbacktracks, label);
8677     }
8678   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8679   compile_char1_matchingpath(common, type, cc, &jumplist);
8680 
8681   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8682   OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8683   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8684   OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8685 
8686   if (opcode == OP_CRMINRANGE)
8687     CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8688 
8689   if (opcode == OP_CRMINRANGE && max == 0)
8690     JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8691   else
8692     CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8693 
8694   set_jumps(jumplist, LABEL());
8695   if (private_data_ptr == 0)
8696     free_stack(common, 2);
8697   break;
8698 
8699   case OP_QUERY:
8700   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8701   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8702   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8703   jump = JUMP(SLJIT_JUMP);
8704   set_jumps(current->topbacktracks, LABEL());
8705   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8706   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8707   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8708   JUMPHERE(jump);
8709   if (private_data_ptr == 0)
8710     free_stack(common, 1);
8711   break;
8712 
8713   case OP_MINQUERY:
8714   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8715   OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8716   jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8717   compile_char1_matchingpath(common, type, cc, &jumplist);
8718   JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8719   set_jumps(jumplist, LABEL());
8720   JUMPHERE(jump);
8721   if (private_data_ptr == 0)
8722     free_stack(common, 1);
8723   break;
8724 
8725   case OP_EXACT:
8726   case OP_POSPLUS:
8727   case OP_CRPOSRANGE:
8728   set_jumps(current->topbacktracks, LABEL());
8729   break;
8730 
8731   case OP_POSSTAR:
8732   case OP_POSQUERY:
8733   case OP_POSUPTO:
8734   break;
8735 
8736   default:
8737   SLJIT_ASSERT_STOP();
8738   break;
8739   }
8740 }
8741 
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)8742 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8743 {
8744 DEFINE_COMPILER;
8745 pcre_uchar *cc = current->cc;
8746 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8747 pcre_uchar type;
8748 
8749 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8750 
8751 if ((type & 0x1) == 0)
8752   {
8753   /* Maximize case. */
8754   set_jumps(current->topbacktracks, LABEL());
8755   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8756   free_stack(common, 1);
8757   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8758   return;
8759   }
8760 
8761 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8762 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8763 set_jumps(current->topbacktracks, LABEL());
8764 free_stack(common, ref ? 2 : 3);
8765 }
8766 
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)8767 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8768 {
8769 DEFINE_COMPILER;
8770 
8771 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8772   compile_backtrackingpath(common, current->top);
8773 set_jumps(current->topbacktracks, LABEL());
8774 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8775   return;
8776 
8777 if (common->has_set_som && common->mark_ptr != 0)
8778   {
8779   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8780   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8781   free_stack(common, 2);
8782   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
8783   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
8784   }
8785 else if (common->has_set_som || common->mark_ptr != 0)
8786   {
8787   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8788   free_stack(common, 1);
8789   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8790   }
8791 }
8792 
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)8793 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8794 {
8795 DEFINE_COMPILER;
8796 pcre_uchar *cc = current->cc;
8797 pcre_uchar bra = OP_BRA;
8798 struct sljit_jump *brajump = NULL;
8799 
8800 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8801 if (*cc == OP_BRAZERO)
8802   {
8803   bra = *cc;
8804   cc++;
8805   }
8806 
8807 if (bra == OP_BRAZERO)
8808   {
8809   SLJIT_ASSERT(current->topbacktracks == NULL);
8810   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8811   }
8812 
8813 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8814   {
8815   set_jumps(current->topbacktracks, LABEL());
8816 
8817   if (bra == OP_BRAZERO)
8818     {
8819     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8820     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8821     free_stack(common, 1);
8822     }
8823   return;
8824   }
8825 
8826 if (bra == OP_BRAZERO)
8827   {
8828   if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8829     {
8830     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8831     CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8832     free_stack(common, 1);
8833     return;
8834     }
8835   free_stack(common, 1);
8836   brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8837   }
8838 
8839 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8840   {
8841   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
8842   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8843   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8844 
8845   set_jumps(current->topbacktracks, LABEL());
8846   }
8847 else
8848   set_jumps(current->topbacktracks, LABEL());
8849 
8850 if (bra == OP_BRAZERO)
8851   {
8852   /* We know there is enough place on the stack. */
8853   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8854   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8855   JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8856   JUMPHERE(brajump);
8857   }
8858 }
8859 
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)8860 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8861 {
8862 DEFINE_COMPILER;
8863 int opcode, stacksize, alt_count, alt_max;
8864 int offset = 0;
8865 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8866 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8867 pcre_uchar *cc = current->cc;
8868 pcre_uchar *ccbegin;
8869 pcre_uchar *ccprev;
8870 pcre_uchar bra = OP_BRA;
8871 pcre_uchar ket;
8872 assert_backtrack *assert;
8873 sljit_uw *next_update_addr = NULL;
8874 BOOL has_alternatives;
8875 BOOL needs_control_head = FALSE;
8876 struct sljit_jump *brazero = NULL;
8877 struct sljit_jump *alt1 = NULL;
8878 struct sljit_jump *alt2 = NULL;
8879 struct sljit_jump *once = NULL;
8880 struct sljit_jump *cond = NULL;
8881 struct sljit_label *rmin_label = NULL;
8882 struct sljit_label *exact_label = NULL;
8883 
8884 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8885   {
8886   bra = *cc;
8887   cc++;
8888   }
8889 
8890 opcode = *cc;
8891 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8892 ket = *ccbegin;
8893 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8894   {
8895   repeat_ptr = PRIVATE_DATA(ccbegin);
8896   repeat_type = PRIVATE_DATA(ccbegin + 2);
8897   repeat_count = PRIVATE_DATA(ccbegin + 3);
8898   SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8899   if (repeat_type == OP_UPTO)
8900     ket = OP_KETRMAX;
8901   if (repeat_type == OP_MINUPTO)
8902     ket = OP_KETRMIN;
8903   }
8904 ccbegin = cc;
8905 cc += GET(cc, 1);
8906 has_alternatives = *cc == OP_ALT;
8907 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8908   has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8909 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8910   offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8911 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8912   opcode = OP_SCOND;
8913 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8914   opcode = OP_ONCE;
8915 
8916 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8917 
8918 /* Decoding the needs_control_head in framesize. */
8919 if (opcode == OP_ONCE)
8920   {
8921   needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8922   CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8923   }
8924 
8925 if (ket != OP_KET && repeat_type != 0)
8926   {
8927   /* TMP1 is used in OP_KETRMIN below. */
8928   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8929   free_stack(common, 1);
8930   if (repeat_type == OP_UPTO)
8931     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8932   else
8933     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
8934   }
8935 
8936 if (ket == OP_KETRMAX)
8937   {
8938   if (bra == OP_BRAZERO)
8939     {
8940     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8941     free_stack(common, 1);
8942     brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8943     }
8944   }
8945 else if (ket == OP_KETRMIN)
8946   {
8947   if (bra != OP_BRAMINZERO)
8948     {
8949     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8950     if (repeat_type != 0)
8951       {
8952       /* TMP1 was set a few lines above. */
8953       CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8954       /* Drop STR_PTR for non-greedy plus quantifier. */
8955       if (opcode != OP_ONCE)
8956         free_stack(common, 1);
8957       }
8958     else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8959       {
8960       /* Checking zero-length iteration. */
8961       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8962         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8963       else
8964         {
8965         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8966         CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8967         }
8968       /* Drop STR_PTR for non-greedy plus quantifier. */
8969       if (opcode != OP_ONCE)
8970         free_stack(common, 1);
8971       }
8972     else
8973       JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8974     }
8975   rmin_label = LABEL();
8976   if (repeat_type != 0)
8977     OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8978   }
8979 else if (bra == OP_BRAZERO)
8980   {
8981   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8982   free_stack(common, 1);
8983   brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8984   }
8985 else if (repeat_type == OP_EXACT)
8986   {
8987   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8988   exact_label = LABEL();
8989   }
8990 
8991 if (offset != 0)
8992   {
8993   if (common->capture_last_ptr != 0)
8994     {
8995     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8996     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8997     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8998     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
8999     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9000     free_stack(common, 3);
9001     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
9002     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9003     }
9004   else if (common->optimized_cbracket[offset >> 1] == 0)
9005     {
9006     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9007     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9008     free_stack(common, 2);
9009     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9010     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9011     }
9012   }
9013 
9014 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
9015   {
9016   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9017     {
9018     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9019     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9020     }
9021   once = JUMP(SLJIT_JUMP);
9022   }
9023 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9024   {
9025   if (has_alternatives)
9026     {
9027     /* Always exactly one alternative. */
9028     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9029     free_stack(common, 1);
9030 
9031     alt_max = 2;
9032     alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9033     }
9034   }
9035 else if (has_alternatives)
9036   {
9037   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9038   free_stack(common, 1);
9039 
9040   if (alt_max > 4)
9041     {
9042     /* Table jump if alt_max is greater than 4. */
9043     next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
9044     if (SLJIT_UNLIKELY(next_update_addr == NULL))
9045       return;
9046     sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
9047     add_label_addr(common, next_update_addr++);
9048     }
9049   else
9050     {
9051     if (alt_max == 4)
9052       alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9053     alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9054     }
9055   }
9056 
9057 COMPILE_BACKTRACKINGPATH(current->top);
9058 if (current->topbacktracks)
9059   set_jumps(current->topbacktracks, LABEL());
9060 
9061 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9062   {
9063   /* Conditional block always has at most one alternative. */
9064   if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9065     {
9066     SLJIT_ASSERT(has_alternatives);
9067     assert = CURRENT_AS(bracket_backtrack)->u.assert;
9068     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9069       {
9070       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9071       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9072       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9073       }
9074     cond = JUMP(SLJIT_JUMP);
9075     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9076     }
9077   else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9078     {
9079     SLJIT_ASSERT(has_alternatives);
9080     cond = JUMP(SLJIT_JUMP);
9081     set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9082     }
9083   else
9084     SLJIT_ASSERT(!has_alternatives);
9085   }
9086 
9087 if (has_alternatives)
9088   {
9089   alt_count = sizeof(sljit_uw);
9090   do
9091     {
9092     current->top = NULL;
9093     current->topbacktracks = NULL;
9094     current->nextbacktracks = NULL;
9095     /* Conditional blocks always have an additional alternative, even if it is empty. */
9096     if (*cc == OP_ALT)
9097       {
9098       ccprev = cc + 1 + LINK_SIZE;
9099       cc += GET(cc, 1);
9100       if (opcode != OP_COND && opcode != OP_SCOND)
9101         {
9102         if (opcode != OP_ONCE)
9103           {
9104           if (private_data_ptr != 0)
9105             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9106           else
9107             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9108           }
9109         else
9110           OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9111         }
9112       compile_matchingpath(common, ccprev, cc, current);
9113       if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9114         return;
9115       }
9116 
9117     /* Instructions after the current alternative is successfully matched. */
9118     /* There is a similar code in compile_bracket_matchingpath. */
9119     if (opcode == OP_ONCE)
9120       match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9121 
9122     stacksize = 0;
9123     if (repeat_type == OP_MINUPTO)
9124       {
9125       /* We need to preserve the counter. TMP2 will be used below. */
9126       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9127       stacksize++;
9128       }
9129     if (ket != OP_KET || bra != OP_BRA)
9130       stacksize++;
9131     if (offset != 0)
9132       {
9133       if (common->capture_last_ptr != 0)
9134         stacksize++;
9135       if (common->optimized_cbracket[offset >> 1] == 0)
9136         stacksize += 2;
9137       }
9138     if (opcode != OP_ONCE)
9139       stacksize++;
9140 
9141     if (stacksize > 0)
9142       allocate_stack(common, stacksize);
9143 
9144     stacksize = 0;
9145     if (repeat_type == OP_MINUPTO)
9146       {
9147       /* TMP2 was set above. */
9148       OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9149       stacksize++;
9150       }
9151 
9152     if (ket != OP_KET || bra != OP_BRA)
9153       {
9154       if (ket != OP_KET)
9155         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9156       else
9157         OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9158       stacksize++;
9159       }
9160 
9161     if (offset != 0)
9162       stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9163 
9164     if (opcode != OP_ONCE)
9165       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9166 
9167     if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9168       {
9169       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9170       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9171       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9172       }
9173 
9174     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9175 
9176     if (opcode != OP_ONCE)
9177       {
9178       if (alt_max > 4)
9179         add_label_addr(common, next_update_addr++);
9180       else
9181         {
9182         if (alt_count != 2 * sizeof(sljit_uw))
9183           {
9184           JUMPHERE(alt1);
9185           if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9186             alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9187           }
9188         else
9189           {
9190           JUMPHERE(alt2);
9191           if (alt_max == 4)
9192             alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9193           }
9194         }
9195       alt_count += sizeof(sljit_uw);
9196       }
9197 
9198     COMPILE_BACKTRACKINGPATH(current->top);
9199     if (current->topbacktracks)
9200       set_jumps(current->topbacktracks, LABEL());
9201     SLJIT_ASSERT(!current->nextbacktracks);
9202     }
9203   while (*cc == OP_ALT);
9204 
9205   if (cond != NULL)
9206     {
9207     SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9208     assert = CURRENT_AS(bracket_backtrack)->u.assert;
9209     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9210       {
9211       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9212       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9213       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9214       }
9215     JUMPHERE(cond);
9216     }
9217 
9218   /* Free the STR_PTR. */
9219   if (private_data_ptr == 0)
9220     free_stack(common, 1);
9221   }
9222 
9223 if (offset != 0)
9224   {
9225   /* Using both tmp register is better for instruction scheduling. */
9226   if (common->optimized_cbracket[offset >> 1] != 0)
9227     {
9228     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9229     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9230     free_stack(common, 2);
9231     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9232     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9233     }
9234   else
9235     {
9236     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9237     free_stack(common, 1);
9238     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9239     }
9240   }
9241 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9242   {
9243   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9244   free_stack(common, 1);
9245   }
9246 else if (opcode == OP_ONCE)
9247   {
9248   cc = ccbegin + GET(ccbegin, 1);
9249   stacksize = needs_control_head ? 1 : 0;
9250 
9251   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9252     {
9253     /* Reset head and drop saved frame. */
9254     stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9255     }
9256   else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9257     {
9258     /* The STR_PTR must be released. */
9259     stacksize++;
9260     }
9261   free_stack(common, stacksize);
9262 
9263   JUMPHERE(once);
9264   /* Restore previous private_data_ptr */
9265   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9266     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9267   else if (ket == OP_KETRMIN)
9268     {
9269     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9270     /* See the comment below. */
9271     free_stack(common, 2);
9272     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9273     }
9274   }
9275 
9276 if (repeat_type == OP_EXACT)
9277   {
9278   OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9279   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9280   CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9281   }
9282 else if (ket == OP_KETRMAX)
9283   {
9284   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9285   if (bra != OP_BRAZERO)
9286     free_stack(common, 1);
9287 
9288   CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9289   if (bra == OP_BRAZERO)
9290     {
9291     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9292     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9293     JUMPHERE(brazero);
9294     free_stack(common, 1);
9295     }
9296   }
9297 else if (ket == OP_KETRMIN)
9298   {
9299   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9300 
9301   /* OP_ONCE removes everything in case of a backtrack, so we don't
9302   need to explicitly release the STR_PTR. The extra release would
9303   affect badly the free_stack(2) above. */
9304   if (opcode != OP_ONCE)
9305     free_stack(common, 1);
9306   CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9307   if (opcode == OP_ONCE)
9308     free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9309   else if (bra == OP_BRAMINZERO)
9310     free_stack(common, 1);
9311   }
9312 else if (bra == OP_BRAZERO)
9313   {
9314   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9315   JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9316   JUMPHERE(brazero);
9317   }
9318 }
9319 
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)9320 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9321 {
9322 DEFINE_COMPILER;
9323 int offset;
9324 struct sljit_jump *jump;
9325 
9326 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9327   {
9328   if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9329     {
9330     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9331     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9332     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9333     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9334     if (common->capture_last_ptr != 0)
9335       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9336     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9337     if (common->capture_last_ptr != 0)
9338       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9339     }
9340   set_jumps(current->topbacktracks, LABEL());
9341   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9342   return;
9343   }
9344 
9345 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9346 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9347 
9348 if (current->topbacktracks)
9349   {
9350   jump = JUMP(SLJIT_JUMP);
9351   set_jumps(current->topbacktracks, LABEL());
9352   /* Drop the stack frame. */
9353   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9354   JUMPHERE(jump);
9355   }
9356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9357 }
9358 
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)9359 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9360 {
9361 assert_backtrack backtrack;
9362 
9363 current->top = NULL;
9364 current->topbacktracks = NULL;
9365 current->nextbacktracks = NULL;
9366 if (current->cc[1] > OP_ASSERTBACK_NOT)
9367   {
9368   /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9369   compile_bracket_matchingpath(common, current->cc, current);
9370   compile_bracket_backtrackingpath(common, current->top);
9371   }
9372 else
9373   {
9374   memset(&backtrack, 0, sizeof(backtrack));
9375   backtrack.common.cc = current->cc;
9376   backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9377   /* Manual call of compile_assert_matchingpath. */
9378   compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9379   }
9380 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9381 }
9382 
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)9383 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9384 {
9385 DEFINE_COMPILER;
9386 pcre_uchar opcode = *current->cc;
9387 struct sljit_label *loop;
9388 struct sljit_jump *jump;
9389 
9390 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9391   {
9392   if (common->then_trap != NULL)
9393     {
9394     SLJIT_ASSERT(common->control_head_ptr != 0);
9395 
9396     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9397     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9398     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9399     jump = JUMP(SLJIT_JUMP);
9400 
9401     loop = LABEL();
9402     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9403     JUMPHERE(jump);
9404     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9405     CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9406     add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9407     return;
9408     }
9409   else if (common->positive_assert)
9410     {
9411     add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9412     return;
9413     }
9414   }
9415 
9416 if (common->local_exit)
9417   {
9418   if (common->quit_label == NULL)
9419     add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9420   else
9421     JUMPTO(SLJIT_JUMP, common->quit_label);
9422   return;
9423   }
9424 
9425 if (opcode == OP_SKIP_ARG)
9426   {
9427   SLJIT_ASSERT(common->control_head_ptr != 0);
9428   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9429   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
9430   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9431   sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9432   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9433 
9434   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9435   add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9436   return;
9437   }
9438 
9439 if (opcode == OP_SKIP)
9440   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9441 else
9442   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9443 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9444 }
9445 
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)9446 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9447 {
9448 DEFINE_COMPILER;
9449 struct sljit_jump *jump;
9450 int size;
9451 
9452 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9453   {
9454   common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9455   return;
9456   }
9457 
9458 size = CURRENT_AS(then_trap_backtrack)->framesize;
9459 size = 3 + (size < 0 ? 0 : size);
9460 
9461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9462 free_stack(common, size);
9463 jump = JUMP(SLJIT_JUMP);
9464 
9465 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9466 /* STACK_TOP is set by THEN. */
9467 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9468   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9470 free_stack(common, 3);
9471 
9472 JUMPHERE(jump);
9473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9474 }
9475 
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)9476 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9477 {
9478 DEFINE_COMPILER;
9479 then_trap_backtrack *save_then_trap = common->then_trap;
9480 
9481 while (current)
9482   {
9483   if (current->nextbacktracks != NULL)
9484     set_jumps(current->nextbacktracks, LABEL());
9485   switch(*current->cc)
9486     {
9487     case OP_SET_SOM:
9488     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9489     free_stack(common, 1);
9490     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
9491     break;
9492 
9493     case OP_STAR:
9494     case OP_MINSTAR:
9495     case OP_PLUS:
9496     case OP_MINPLUS:
9497     case OP_QUERY:
9498     case OP_MINQUERY:
9499     case OP_UPTO:
9500     case OP_MINUPTO:
9501     case OP_EXACT:
9502     case OP_POSSTAR:
9503     case OP_POSPLUS:
9504     case OP_POSQUERY:
9505     case OP_POSUPTO:
9506     case OP_STARI:
9507     case OP_MINSTARI:
9508     case OP_PLUSI:
9509     case OP_MINPLUSI:
9510     case OP_QUERYI:
9511     case OP_MINQUERYI:
9512     case OP_UPTOI:
9513     case OP_MINUPTOI:
9514     case OP_EXACTI:
9515     case OP_POSSTARI:
9516     case OP_POSPLUSI:
9517     case OP_POSQUERYI:
9518     case OP_POSUPTOI:
9519     case OP_NOTSTAR:
9520     case OP_NOTMINSTAR:
9521     case OP_NOTPLUS:
9522     case OP_NOTMINPLUS:
9523     case OP_NOTQUERY:
9524     case OP_NOTMINQUERY:
9525     case OP_NOTUPTO:
9526     case OP_NOTMINUPTO:
9527     case OP_NOTEXACT:
9528     case OP_NOTPOSSTAR:
9529     case OP_NOTPOSPLUS:
9530     case OP_NOTPOSQUERY:
9531     case OP_NOTPOSUPTO:
9532     case OP_NOTSTARI:
9533     case OP_NOTMINSTARI:
9534     case OP_NOTPLUSI:
9535     case OP_NOTMINPLUSI:
9536     case OP_NOTQUERYI:
9537     case OP_NOTMINQUERYI:
9538     case OP_NOTUPTOI:
9539     case OP_NOTMINUPTOI:
9540     case OP_NOTEXACTI:
9541     case OP_NOTPOSSTARI:
9542     case OP_NOTPOSPLUSI:
9543     case OP_NOTPOSQUERYI:
9544     case OP_NOTPOSUPTOI:
9545     case OP_TYPESTAR:
9546     case OP_TYPEMINSTAR:
9547     case OP_TYPEPLUS:
9548     case OP_TYPEMINPLUS:
9549     case OP_TYPEQUERY:
9550     case OP_TYPEMINQUERY:
9551     case OP_TYPEUPTO:
9552     case OP_TYPEMINUPTO:
9553     case OP_TYPEEXACT:
9554     case OP_TYPEPOSSTAR:
9555     case OP_TYPEPOSPLUS:
9556     case OP_TYPEPOSQUERY:
9557     case OP_TYPEPOSUPTO:
9558     case OP_CLASS:
9559     case OP_NCLASS:
9560 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9561     case OP_XCLASS:
9562 #endif
9563     compile_iterator_backtrackingpath(common, current);
9564     break;
9565 
9566     case OP_REF:
9567     case OP_REFI:
9568     case OP_DNREF:
9569     case OP_DNREFI:
9570     compile_ref_iterator_backtrackingpath(common, current);
9571     break;
9572 
9573     case OP_RECURSE:
9574     compile_recurse_backtrackingpath(common, current);
9575     break;
9576 
9577     case OP_ASSERT:
9578     case OP_ASSERT_NOT:
9579     case OP_ASSERTBACK:
9580     case OP_ASSERTBACK_NOT:
9581     compile_assert_backtrackingpath(common, current);
9582     break;
9583 
9584     case OP_ONCE:
9585     case OP_ONCE_NC:
9586     case OP_BRA:
9587     case OP_CBRA:
9588     case OP_COND:
9589     case OP_SBRA:
9590     case OP_SCBRA:
9591     case OP_SCOND:
9592     compile_bracket_backtrackingpath(common, current);
9593     break;
9594 
9595     case OP_BRAZERO:
9596     if (current->cc[1] > OP_ASSERTBACK_NOT)
9597       compile_bracket_backtrackingpath(common, current);
9598     else
9599       compile_assert_backtrackingpath(common, current);
9600     break;
9601 
9602     case OP_BRAPOS:
9603     case OP_CBRAPOS:
9604     case OP_SBRAPOS:
9605     case OP_SCBRAPOS:
9606     case OP_BRAPOSZERO:
9607     compile_bracketpos_backtrackingpath(common, current);
9608     break;
9609 
9610     case OP_BRAMINZERO:
9611     compile_braminzero_backtrackingpath(common, current);
9612     break;
9613 
9614     case OP_MARK:
9615     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9616     if (common->has_skip_arg)
9617       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9618     free_stack(common, common->has_skip_arg ? 5 : 1);
9619     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9620     if (common->has_skip_arg)
9621       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9622     break;
9623 
9624     case OP_THEN:
9625     case OP_THEN_ARG:
9626     case OP_PRUNE:
9627     case OP_PRUNE_ARG:
9628     case OP_SKIP:
9629     case OP_SKIP_ARG:
9630     compile_control_verb_backtrackingpath(common, current);
9631     break;
9632 
9633     case OP_COMMIT:
9634     if (!common->local_exit)
9635       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9636     if (common->quit_label == NULL)
9637       add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9638     else
9639       JUMPTO(SLJIT_JUMP, common->quit_label);
9640     break;
9641 
9642     case OP_CALLOUT:
9643     case OP_FAIL:
9644     case OP_ACCEPT:
9645     case OP_ASSERT_ACCEPT:
9646     set_jumps(current->topbacktracks, LABEL());
9647     break;
9648 
9649     case OP_THEN_TRAP:
9650     /* A virtual opcode for then traps. */
9651     compile_then_trap_backtrackingpath(common, current);
9652     break;
9653 
9654     default:
9655     SLJIT_ASSERT_STOP();
9656     break;
9657     }
9658   current = current->prev;
9659   }
9660 common->then_trap = save_then_trap;
9661 }
9662 
compile_recurse(compiler_common * common)9663 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9664 {
9665 DEFINE_COMPILER;
9666 pcre_uchar *cc = common->start + common->currententry->start;
9667 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9668 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
9669 BOOL needs_control_head;
9670 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9671 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9672 int alternativesize;
9673 BOOL needs_frame;
9674 backtrack_common altbacktrack;
9675 struct sljit_jump *jump;
9676 
9677 /* Recurse captures then. */
9678 common->then_trap = NULL;
9679 
9680 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9681 needs_frame = framesize >= 0;
9682 if (!needs_frame)
9683   framesize = 0;
9684 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9685 
9686 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9687 common->currententry->entry = LABEL();
9688 set_jumps(common->currententry->calls, common->currententry->entry);
9689 
9690 sljit_emit_fast_enter(compiler, TMP2, 0);
9691 allocate_stack(common, private_data_size + framesize + alternativesize);
9692 count_match(common);
9693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9694 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9695 if (needs_control_head)
9696   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
9698 if (needs_frame)
9699   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9700 
9701 if (alternativesize > 0)
9702   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9703 
9704 memset(&altbacktrack, 0, sizeof(backtrack_common));
9705 common->quit_label = NULL;
9706 common->accept_label = NULL;
9707 common->quit = NULL;
9708 common->accept = NULL;
9709 altbacktrack.cc = ccbegin;
9710 cc += GET(cc, 1);
9711 while (1)
9712   {
9713   altbacktrack.top = NULL;
9714   altbacktrack.topbacktracks = NULL;
9715 
9716   if (altbacktrack.cc != ccbegin)
9717     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9718 
9719   compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9720   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9721     return;
9722 
9723   add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9724 
9725   compile_backtrackingpath(common, altbacktrack.top);
9726   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9727     return;
9728   set_jumps(altbacktrack.topbacktracks, LABEL());
9729 
9730   if (*cc != OP_ALT)
9731     break;
9732 
9733   altbacktrack.cc = cc + 1 + LINK_SIZE;
9734   cc += GET(cc, 1);
9735   }
9736 
9737 /* None of them matched. */
9738 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9739 jump = JUMP(SLJIT_JUMP);
9740 
9741 if (common->quit != NULL)
9742   {
9743   set_jumps(common->quit, LABEL());
9744   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9745   if (needs_frame)
9746     {
9747     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9748     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9749     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9750     }
9751   OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9752   common->quit = NULL;
9753   add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9754   }
9755 
9756 set_jumps(common->accept, LABEL());
9757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9758 if (needs_frame)
9759   {
9760   OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9761   add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9762   OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9763   }
9764 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9765 
9766 JUMPHERE(jump);
9767 if (common->quit != NULL)
9768   set_jumps(common->quit, LABEL());
9769 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9770 free_stack(common, private_data_size + framesize + alternativesize);
9771 if (needs_control_head)
9772   {
9773   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9774   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9775   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
9776   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9777   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9778   }
9779 else
9780   {
9781   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9782   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9783   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
9784   }
9785 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9786 }
9787 
9788 #undef COMPILE_BACKTRACKINGPATH
9789 #undef CURRENT_AS
9790 
9791 void
PRIV(jit_compile)9792 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9793 {
9794 struct sljit_compiler *compiler;
9795 backtrack_common rootbacktrack;
9796 compiler_common common_data;
9797 compiler_common *common = &common_data;
9798 const pcre_uint8 *tables = re->tables;
9799 pcre_study_data *study;
9800 int private_data_size;
9801 pcre_uchar *ccend;
9802 executable_functions *functions;
9803 void *executable_func;
9804 sljit_uw executable_size;
9805 sljit_uw total_length;
9806 label_addr_list *label_addr;
9807 struct sljit_label *mainloop_label = NULL;
9808 struct sljit_label *continue_match_label;
9809 struct sljit_label *empty_match_found_label = NULL;
9810 struct sljit_label *empty_match_backtrack_label = NULL;
9811 struct sljit_label *reset_match_label;
9812 struct sljit_label *quit_label;
9813 struct sljit_jump *jump;
9814 struct sljit_jump *minlength_check_failed = NULL;
9815 struct sljit_jump *reqbyte_notfound = NULL;
9816 struct sljit_jump *empty_match = NULL;
9817 
9818 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9819 study = extra->study_data;
9820 
9821 if (!tables)
9822   tables = PRIV(default_tables);
9823 
9824 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9825 memset(common, 0, sizeof(compiler_common));
9826 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9827 
9828 common->start = rootbacktrack.cc;
9829 common->read_only_data_head = NULL;
9830 common->fcc = tables + fcc_offset;
9831 common->lcc = (sljit_sw)(tables + lcc_offset);
9832 common->mode = mode;
9833 common->might_be_empty = study->minlength == 0;
9834 common->nltype = NLTYPE_FIXED;
9835 switch(re->options & PCRE_NEWLINE_BITS)
9836   {
9837   case 0:
9838   /* Compile-time default */
9839   switch(NEWLINE)
9840     {
9841     case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9842     case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9843     default: common->newline = NEWLINE; break;
9844     }
9845   break;
9846   case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9847   case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9848   case PCRE_NEWLINE_CR+
9849        PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9850   case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9851   case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9852   default: return;
9853   }
9854 common->nlmax = READ_CHAR_MAX;
9855 common->nlmin = 0;
9856 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9857   common->bsr_nltype = NLTYPE_ANYCRLF;
9858 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9859   common->bsr_nltype = NLTYPE_ANY;
9860 else
9861   {
9862 #ifdef BSR_ANYCRLF
9863   common->bsr_nltype = NLTYPE_ANYCRLF;
9864 #else
9865   common->bsr_nltype = NLTYPE_ANY;
9866 #endif
9867   }
9868 common->bsr_nlmax = READ_CHAR_MAX;
9869 common->bsr_nlmin = 0;
9870 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9871 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9872 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9873 common->name_count = re->name_count;
9874 common->name_entry_size = re->name_entry_size;
9875 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9876 #ifdef SUPPORT_UTF
9877 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9878 common->utf = (re->options & PCRE_UTF8) != 0;
9879 #ifdef SUPPORT_UCP
9880 common->use_ucp = (re->options & PCRE_UCP) != 0;
9881 #endif
9882 if (common->utf)
9883   {
9884   if (common->nltype == NLTYPE_ANY)
9885     common->nlmax = 0x2029;
9886   else if (common->nltype == NLTYPE_ANYCRLF)
9887     common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9888   else
9889     {
9890     /* We only care about the first newline character. */
9891     common->nlmax = common->newline & 0xff;
9892     }
9893 
9894   if (common->nltype == NLTYPE_FIXED)
9895     common->nlmin = common->newline & 0xff;
9896   else
9897     common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9898 
9899   if (common->bsr_nltype == NLTYPE_ANY)
9900     common->bsr_nlmax = 0x2029;
9901   else
9902     common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9903   common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9904   }
9905 #endif /* SUPPORT_UTF */
9906 ccend = bracketend(common->start);
9907 
9908 /* Calculate the local space size on the stack. */
9909 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9910 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
9911 if (!common->optimized_cbracket)
9912   return;
9913 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9914 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9915 #else
9916 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9917 #endif
9918 
9919 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9920 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9921 common->capture_last_ptr = common->ovector_start;
9922 common->ovector_start += sizeof(sljit_sw);
9923 #endif
9924 if (!check_opcode_types(common, common->start, ccend))
9925   {
9926   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9927   return;
9928   }
9929 
9930 /* Checking flags and updating ovector_start. */
9931 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9932   {
9933   common->req_char_ptr = common->ovector_start;
9934   common->ovector_start += sizeof(sljit_sw);
9935   }
9936 if (mode != JIT_COMPILE)
9937   {
9938   common->start_used_ptr = common->ovector_start;
9939   common->ovector_start += sizeof(sljit_sw);
9940   if (mode == JIT_PARTIAL_SOFT_COMPILE)
9941     {
9942     common->hit_start = common->ovector_start;
9943     common->ovector_start += 2 * sizeof(sljit_sw);
9944     }
9945   else
9946     {
9947     SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9948     common->needs_start_ptr = TRUE;
9949     }
9950   }
9951 if ((re->options & PCRE_FIRSTLINE) != 0)
9952   {
9953   common->first_line_end = common->ovector_start;
9954   common->ovector_start += sizeof(sljit_sw);
9955   }
9956 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9957 common->control_head_ptr = 1;
9958 #endif
9959 if (common->control_head_ptr != 0)
9960   {
9961   common->control_head_ptr = common->ovector_start;
9962   common->ovector_start += sizeof(sljit_sw);
9963   }
9964 if (common->needs_start_ptr && common->has_set_som)
9965   {
9966   /* Saving the real start pointer is necessary. */
9967   common->start_ptr = common->ovector_start;
9968   common->ovector_start += sizeof(sljit_sw);
9969   }
9970 else
9971   common->needs_start_ptr = FALSE;
9972 
9973 /* Aligning ovector to even number of sljit words. */
9974 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9975   common->ovector_start += sizeof(sljit_sw);
9976 
9977 if (common->start_ptr == 0)
9978   common->start_ptr = OVECTOR(0);
9979 
9980 /* Capturing brackets cannot be optimized if callouts are allowed. */
9981 if (common->capture_last_ptr != 0)
9982   memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9983 
9984 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9985 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9986 
9987 total_length = ccend - common->start;
9988 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
9989 if (!common->private_data_ptrs)
9990   {
9991   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9992   return;
9993   }
9994 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9995 
9996 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9997 set_private_data_ptrs(common, &private_data_size, ccend);
9998 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9999   {
10000   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10001   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10002   return;
10003   }
10004 
10005 if (common->has_then)
10006   {
10007   common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
10008   memset(common->then_offsets, 0, total_length);
10009   set_then_offsets(common, common->start, NULL);
10010   }
10011 
10012 compiler = sljit_create_compiler(NULL);
10013 if (!compiler)
10014   {
10015   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10016   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10017   return;
10018   }
10019 common->compiler = compiler;
10020 
10021 /* Main pcre_jit_exec entry. */
10022 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
10023 
10024 /* Register init. */
10025 reset_ovector(common, (re->top_bracket + 1) * 2);
10026 if (common->req_char_ptr != 0)
10027   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
10028 
10029 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
10030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
10031 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10032 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
10033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10034 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
10035 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
10036 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
10037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
10039 
10040 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10041   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10042 if (common->mark_ptr != 0)
10043   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
10044 if (common->control_head_ptr != 0)
10045   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10046 
10047 /* Main part of the matching */
10048 if ((re->options & PCRE_ANCHORED) == 0)
10049   {
10050   mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10051   continue_match_label = LABEL();
10052   /* Forward search if possible. */
10053   if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10054     {
10055     if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10056       ;
10057     else if ((re->flags & PCRE_FIRSTSET) != 0)
10058       fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10059     else if ((re->flags & PCRE_STARTLINE) != 0)
10060       fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10061     else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10062       fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10063     }
10064   }
10065 else
10066   continue_match_label = LABEL();
10067 
10068 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10069   {
10070   OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10071   OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10072   minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
10073   }
10074 if (common->req_char_ptr != 0)
10075   reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10076 
10077 /* Store the current STR_PTR in OVECTOR(0). */
10078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10079 /* Copy the limit of allowed recursions. */
10080 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
10081 if (common->capture_last_ptr != 0)
10082   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
10083 
10084 if (common->needs_start_ptr)
10085   {
10086   SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10087   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
10088   }
10089 else
10090   SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10091 
10092 /* Copy the beginning of the string. */
10093 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10094   {
10095   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10096   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10097   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10098   JUMPHERE(jump);
10099   }
10100 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10101   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10102 
10103 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10104 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10105   {
10106   sljit_free_compiler(compiler);
10107   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10108   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10109   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10110   return;
10111   }
10112 
10113 if (common->might_be_empty)
10114   {
10115   empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10116   empty_match_found_label = LABEL();
10117   }
10118 
10119 common->accept_label = LABEL();
10120 if (common->accept != NULL)
10121   set_jumps(common->accept, common->accept_label);
10122 
10123 /* This means we have a match. Update the ovector. */
10124 copy_ovector(common, re->top_bracket + 1);
10125 common->quit_label = common->forced_quit_label = LABEL();
10126 if (common->quit != NULL)
10127   set_jumps(common->quit, common->quit_label);
10128 if (common->forced_quit != NULL)
10129   set_jumps(common->forced_quit, common->forced_quit_label);
10130 if (minlength_check_failed != NULL)
10131   SET_LABEL(minlength_check_failed, common->forced_quit_label);
10132 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10133 
10134 if (mode != JIT_COMPILE)
10135   {
10136   common->partialmatchlabel = LABEL();
10137   set_jumps(common->partialmatch, common->partialmatchlabel);
10138   return_with_partial_match(common, common->quit_label);
10139   }
10140 
10141 if (common->might_be_empty)
10142   empty_match_backtrack_label = LABEL();
10143 compile_backtrackingpath(common, rootbacktrack.top);
10144 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10145   {
10146   sljit_free_compiler(compiler);
10147   SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10148   SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10149   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10150   return;
10151   }
10152 
10153 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10154 reset_match_label = LABEL();
10155 
10156 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10157   {
10158   /* Update hit_start only in the first time. */
10159   jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
10160   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
10161   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
10162   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
10163   JUMPHERE(jump);
10164   }
10165 
10166 /* Check we have remaining characters. */
10167 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10168   {
10169   SLJIT_ASSERT(common->first_line_end != 0);
10170   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
10171   }
10172 
10173 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
10174 
10175 if ((re->options & PCRE_ANCHORED) == 0)
10176   {
10177   if (common->ff_newline_shortcut != NULL)
10178     {
10179     if ((re->options & PCRE_FIRSTLINE) == 0)
10180       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10181     /* There cannot be more newlines here. */
10182     }
10183   else
10184     {
10185     if ((re->options & PCRE_FIRSTLINE) == 0)
10186       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10187     else
10188       CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10189     }
10190   }
10191 
10192 /* No more remaining characters. */
10193 if (reqbyte_notfound != NULL)
10194   JUMPHERE(reqbyte_notfound);
10195 
10196 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10197   CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10198 
10199 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10200 JUMPTO(SLJIT_JUMP, common->quit_label);
10201 
10202 flush_stubs(common);
10203 
10204 if (common->might_be_empty)
10205   {
10206   JUMPHERE(empty_match);
10207   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10208   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10209   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10210   OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10211   CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10212   OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10213   CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10214   JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10215   }
10216 
10217 common->currententry = common->entries;
10218 common->local_exit = TRUE;
10219 quit_label = common->quit_label;
10220 while (common->currententry != NULL)
10221   {
10222   /* Might add new entries. */
10223   compile_recurse(common);
10224   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10225     {
10226     sljit_free_compiler(compiler);
10227     SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10228     SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10229     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10230     return;
10231     }
10232   flush_stubs(common);
10233   common->currententry = common->currententry->next;
10234   }
10235 common->local_exit = FALSE;
10236 common->quit_label = quit_label;
10237 
10238 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10239 /* This is a (really) rare case. */
10240 set_jumps(common->stackalloc, LABEL());
10241 /* RETURN_ADDR is not a saved register. */
10242 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
10244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10246 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10247 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10248 
10249 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10250 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10251 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10254 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
10256 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10257 
10258 /* Allocation failed. */
10259 JUMPHERE(jump);
10260 /* We break the return address cache here, but this is a really rare case. */
10261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10262 JUMPTO(SLJIT_JUMP, common->quit_label);
10263 
10264 /* Call limit reached. */
10265 set_jumps(common->calllimit, LABEL());
10266 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10267 JUMPTO(SLJIT_JUMP, common->quit_label);
10268 
10269 if (common->revertframes != NULL)
10270   {
10271   set_jumps(common->revertframes, LABEL());
10272   do_revertframes(common);
10273   }
10274 if (common->wordboundary != NULL)
10275   {
10276   set_jumps(common->wordboundary, LABEL());
10277   check_wordboundary(common);
10278   }
10279 if (common->anynewline != NULL)
10280   {
10281   set_jumps(common->anynewline, LABEL());
10282   check_anynewline(common);
10283   }
10284 if (common->hspace != NULL)
10285   {
10286   set_jumps(common->hspace, LABEL());
10287   check_hspace(common);
10288   }
10289 if (common->vspace != NULL)
10290   {
10291   set_jumps(common->vspace, LABEL());
10292   check_vspace(common);
10293   }
10294 if (common->casefulcmp != NULL)
10295   {
10296   set_jumps(common->casefulcmp, LABEL());
10297   do_casefulcmp(common);
10298   }
10299 if (common->caselesscmp != NULL)
10300   {
10301   set_jumps(common->caselesscmp, LABEL());
10302   do_caselesscmp(common);
10303   }
10304 if (common->reset_match != NULL)
10305   {
10306   set_jumps(common->reset_match, LABEL());
10307   do_reset_match(common, (re->top_bracket + 1) * 2);
10308   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10309   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10310   JUMPTO(SLJIT_JUMP, reset_match_label);
10311   }
10312 #ifdef SUPPORT_UTF
10313 #ifdef COMPILE_PCRE8
10314 if (common->utfreadchar != NULL)
10315   {
10316   set_jumps(common->utfreadchar, LABEL());
10317   do_utfreadchar(common);
10318   }
10319 if (common->utfreadchar16 != NULL)
10320   {
10321   set_jumps(common->utfreadchar16, LABEL());
10322   do_utfreadchar16(common);
10323   }
10324 if (common->utfreadtype8 != NULL)
10325   {
10326   set_jumps(common->utfreadtype8, LABEL());
10327   do_utfreadtype8(common);
10328   }
10329 #endif /* COMPILE_PCRE8 */
10330 #endif /* SUPPORT_UTF */
10331 #ifdef SUPPORT_UCP
10332 if (common->getucd != NULL)
10333   {
10334   set_jumps(common->getucd, LABEL());
10335   do_getucd(common);
10336   }
10337 #endif
10338 
10339 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10340 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10341 
10342 executable_func = sljit_generate_code(compiler);
10343 executable_size = sljit_get_generated_code_size(compiler);
10344 label_addr = common->label_addrs;
10345 while (label_addr != NULL)
10346   {
10347   *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
10348   label_addr = label_addr->next;
10349   }
10350 sljit_free_compiler(compiler);
10351 if (executable_func == NULL)
10352   {
10353   free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10354   return;
10355   }
10356 
10357 /* Reuse the function descriptor if possible. */
10358 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10359   functions = (executable_functions *)extra->executable_jit;
10360 else
10361   {
10362   /* Note: If your memory-checker has flagged the allocation below as a
10363    * memory leak, it is probably because you either forgot to call
10364    * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10365    * pcre16_extra) object, or you called said function after having
10366    * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10367    * of the object. (The function will only free the JIT data if the
10368    * bit remains set, as the bit indicates that the pointer to the data
10369    * is valid.)
10370    */
10371   functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
10372   if (functions == NULL)
10373     {
10374     /* This case is highly unlikely since we just recently
10375     freed a lot of memory. Not impossible though. */
10376     sljit_free_code(executable_func);
10377     free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10378     return;
10379     }
10380   memset(functions, 0, sizeof(executable_functions));
10381   functions->top_bracket = (re->top_bracket + 1) * 2;
10382   functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10383   extra->executable_jit = functions;
10384   extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10385   }
10386 
10387 functions->executable_funcs[mode] = executable_func;
10388 functions->read_only_data_heads[mode] = common->read_only_data_head;
10389 functions->executable_sizes[mode] = executable_size;
10390 }
10391 
jit_machine_stack_exec(jit_arguments * arguments,void * executable_func)10392 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
10393 {
10394 union {
10395    void *executable_func;
10396    jit_function call_executable_func;
10397 } convert_executable_func;
10398 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10399 struct sljit_stack local_stack;
10400 
10401 local_stack.top = (sljit_sw)&local_space;
10402 local_stack.base = local_stack.top;
10403 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10404 local_stack.max_limit = local_stack.limit;
10405 arguments->stack = &local_stack;
10406 convert_executable_func.executable_func = executable_func;
10407 return convert_executable_func.call_executable_func(arguments);
10408 }
10409 
10410 int
PRIV(jit_exec)10411 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10412   int length, int start_offset, int options, int *offsets, int offset_count)
10413 {
10414 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10415 union {
10416    void *executable_func;
10417    jit_function call_executable_func;
10418 } convert_executable_func;
10419 jit_arguments arguments;
10420 int max_offset_count;
10421 int retval;
10422 int mode = JIT_COMPILE;
10423 
10424 if ((options & PCRE_PARTIAL_HARD) != 0)
10425   mode = JIT_PARTIAL_HARD_COMPILE;
10426 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10427   mode = JIT_PARTIAL_SOFT_COMPILE;
10428 
10429 if (functions->executable_funcs[mode] == NULL)
10430   return PCRE_ERROR_JIT_BADOPTION;
10431 
10432 /* Sanity checks should be handled by pcre_exec. */
10433 arguments.str = subject + start_offset;
10434 arguments.begin = subject;
10435 arguments.end = subject + length;
10436 arguments.mark_ptr = NULL;
10437 /* JIT decreases this value less frequently than the interpreter. */
10438 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10439 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10440   arguments.limit_match = functions->limit_match;
10441 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10442 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10443 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10444 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10445 arguments.offsets = offsets;
10446 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10447 arguments.real_offset_count = offset_count;
10448 
10449 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10450 the output vector for storing captured strings, with the remainder used as
10451 workspace. We don't need the workspace here. For compatibility, we limit the
10452 number of captured strings in the same way as pcre_exec(), so that the user
10453 gets the same result with and without JIT. */
10454 
10455 if (offset_count != 2)
10456   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10457 max_offset_count = functions->top_bracket;
10458 if (offset_count > max_offset_count)
10459   offset_count = max_offset_count;
10460 arguments.offset_count = offset_count;
10461 
10462 if (functions->callback)
10463   arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10464 else
10465   arguments.stack = (struct sljit_stack *)functions->userdata;
10466 
10467 if (arguments.stack == NULL)
10468   retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10469 else
10470   {
10471   convert_executable_func.executable_func = functions->executable_funcs[mode];
10472   retval = convert_executable_func.call_executable_func(&arguments);
10473   }
10474 
10475 if (retval * 2 > offset_count)
10476   retval = 0;
10477 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10478   *(extra_data->mark) = arguments.mark_ptr;
10479 
10480 return retval;
10481 }
10482 
10483 #if defined COMPILE_PCRE8
10484 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_jit_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offset_count,pcre_jit_stack * stack)10485 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10486   PCRE_SPTR subject, int length, int start_offset, int options,
10487   int *offsets, int offset_count, pcre_jit_stack *stack)
10488 #elif defined COMPILE_PCRE16
10489 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10490 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10491   PCRE_SPTR16 subject, int length, int start_offset, int options,
10492   int *offsets, int offset_count, pcre16_jit_stack *stack)
10493 #elif defined COMPILE_PCRE32
10494 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10495 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10496   PCRE_SPTR32 subject, int length, int start_offset, int options,
10497   int *offsets, int offset_count, pcre32_jit_stack *stack)
10498 #endif
10499 {
10500 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10501 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10502 union {
10503    void *executable_func;
10504    jit_function call_executable_func;
10505 } convert_executable_func;
10506 jit_arguments arguments;
10507 int max_offset_count;
10508 int retval;
10509 int mode = JIT_COMPILE;
10510 
10511 SLJIT_UNUSED_ARG(argument_re);
10512 
10513 /* Plausibility checks */
10514 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10515 
10516 if ((options & PCRE_PARTIAL_HARD) != 0)
10517   mode = JIT_PARTIAL_HARD_COMPILE;
10518 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10519   mode = JIT_PARTIAL_SOFT_COMPILE;
10520 
10521 if (functions->executable_funcs[mode] == NULL)
10522   return PCRE_ERROR_JIT_BADOPTION;
10523 
10524 /* Sanity checks should be handled by pcre_exec. */
10525 arguments.stack = (struct sljit_stack *)stack;
10526 arguments.str = subject_ptr + start_offset;
10527 arguments.begin = subject_ptr;
10528 arguments.end = subject_ptr + length;
10529 arguments.mark_ptr = NULL;
10530 /* JIT decreases this value less frequently than the interpreter. */
10531 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10532 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10533   arguments.limit_match = functions->limit_match;
10534 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10535 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10536 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10537 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10538 arguments.offsets = offsets;
10539 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10540 arguments.real_offset_count = offset_count;
10541 
10542 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10543 the output vector for storing captured strings, with the remainder used as
10544 workspace. We don't need the workspace here. For compatibility, we limit the
10545 number of captured strings in the same way as pcre_exec(), so that the user
10546 gets the same result with and without JIT. */
10547 
10548 if (offset_count != 2)
10549   offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10550 max_offset_count = functions->top_bracket;
10551 if (offset_count > max_offset_count)
10552   offset_count = max_offset_count;
10553 arguments.offset_count = offset_count;
10554 
10555 convert_executable_func.executable_func = functions->executable_funcs[mode];
10556 retval = convert_executable_func.call_executable_func(&arguments);
10557 
10558 if (retval * 2 > offset_count)
10559   retval = 0;
10560 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10561   *(extra_data->mark) = arguments.mark_ptr;
10562 
10563 return retval;
10564 }
10565 
10566 void
PRIV(jit_free)10567 PRIV(jit_free)(void *executable_funcs)
10568 {
10569 int i;
10570 executable_functions *functions = (executable_functions *)executable_funcs;
10571 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10572   {
10573   if (functions->executable_funcs[i] != NULL)
10574     sljit_free_code(functions->executable_funcs[i]);
10575   free_read_only_data(functions->read_only_data_heads[i], NULL);
10576   }
10577 SLJIT_FREE(functions, compiler->allocator_data);
10578 }
10579 
10580 int
PRIV(jit_get_size)10581 PRIV(jit_get_size)(void *executable_funcs)
10582 {
10583 int i;
10584 sljit_uw size = 0;
10585 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10586 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10587   size += executable_sizes[i];
10588 return (int)size;
10589 }
10590 
10591 const char*
PRIV(jit_get_target)10592 PRIV(jit_get_target)(void)
10593 {
10594 return sljit_get_platform_name();
10595 }
10596 
10597 #if defined COMPILE_PCRE8
10598 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10599 pcre_jit_stack_alloc(int startsize, int maxsize)
10600 #elif defined COMPILE_PCRE16
10601 PCRE_EXP_DECL pcre16_jit_stack *
10602 pcre16_jit_stack_alloc(int startsize, int maxsize)
10603 #elif defined COMPILE_PCRE32
10604 PCRE_EXP_DECL pcre32_jit_stack *
10605 pcre32_jit_stack_alloc(int startsize, int maxsize)
10606 #endif
10607 {
10608 if (startsize < 1 || maxsize < 1)
10609   return NULL;
10610 if (startsize > maxsize)
10611   startsize = maxsize;
10612 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10613 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10614 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
10615 }
10616 
10617 #if defined COMPILE_PCRE8
10618 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10619 pcre_jit_stack_free(pcre_jit_stack *stack)
10620 #elif defined COMPILE_PCRE16
10621 PCRE_EXP_DECL void
10622 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10623 #elif defined COMPILE_PCRE32
10624 PCRE_EXP_DECL void
10625 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10626 #endif
10627 {
10628 sljit_free_stack((struct sljit_stack *)stack, NULL);
10629 }
10630 
10631 #if defined COMPILE_PCRE8
10632 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10633 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10634 #elif defined COMPILE_PCRE16
10635 PCRE_EXP_DECL void
10636 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10637 #elif defined COMPILE_PCRE32
10638 PCRE_EXP_DECL void
10639 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10640 #endif
10641 {
10642 executable_functions *functions;
10643 if (extra != NULL &&
10644     (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10645     extra->executable_jit != NULL)
10646   {
10647   functions = (executable_functions *)extra->executable_jit;
10648   functions->callback = callback;
10649   functions->userdata = userdata;
10650   }
10651 }
10652 
10653 #if defined COMPILE_PCRE8
10654 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10655 pcre_jit_free_unused_memory(void)
10656 #elif defined COMPILE_PCRE16
10657 PCRE_EXP_DECL void
10658 pcre16_jit_free_unused_memory(void)
10659 #elif defined COMPILE_PCRE32
10660 PCRE_EXP_DECL void
10661 pcre32_jit_free_unused_memory(void)
10662 #endif
10663 {
10664 sljit_free_unused_memory_exec();
10665 }
10666 
10667 #else  /* SUPPORT_JIT */
10668 
10669 /* These are dummy functions to avoid linking errors when JIT support is not
10670 being compiled. */
10671 
10672 #if defined COMPILE_PCRE8
10673 PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize,int maxsize)10674 pcre_jit_stack_alloc(int startsize, int maxsize)
10675 #elif defined COMPILE_PCRE16
10676 PCRE_EXP_DECL pcre16_jit_stack *
10677 pcre16_jit_stack_alloc(int startsize, int maxsize)
10678 #elif defined COMPILE_PCRE32
10679 PCRE_EXP_DECL pcre32_jit_stack *
10680 pcre32_jit_stack_alloc(int startsize, int maxsize)
10681 #endif
10682 {
10683 (void)startsize;
10684 (void)maxsize;
10685 return NULL;
10686 }
10687 
10688 #if defined COMPILE_PCRE8
10689 PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack * stack)10690 pcre_jit_stack_free(pcre_jit_stack *stack)
10691 #elif defined COMPILE_PCRE16
10692 PCRE_EXP_DECL void
10693 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10694 #elif defined COMPILE_PCRE32
10695 PCRE_EXP_DECL void
10696 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10697 #endif
10698 {
10699 (void)stack;
10700 }
10701 
10702 #if defined COMPILE_PCRE8
10703 PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra * extra,pcre_jit_callback callback,void * userdata)10704 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10705 #elif defined COMPILE_PCRE16
10706 PCRE_EXP_DECL void
10707 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10708 #elif defined COMPILE_PCRE32
10709 PCRE_EXP_DECL void
10710 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10711 #endif
10712 {
10713 (void)extra;
10714 (void)callback;
10715 (void)userdata;
10716 }
10717 
10718 #if defined COMPILE_PCRE8
10719 PCRE_EXP_DECL void
pcre_jit_free_unused_memory(void)10720 pcre_jit_free_unused_memory(void)
10721 #elif defined COMPILE_PCRE16
10722 PCRE_EXP_DECL void
10723 pcre16_jit_free_unused_memory(void)
10724 #elif defined COMPILE_PCRE32
10725 PCRE_EXP_DECL void
10726 pcre32_jit_free_unused_memory(void)
10727 #endif
10728 {
10729 }
10730 
10731 #endif
10732 
10733 /* End of pcre_jit_compile.c */
10734