1 /**********************************************************************
2   regcomp.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2013  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "regparse.h"
33 
34 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35 
36 extern OnigCaseFoldType
onig_get_default_case_fold_flag(void)37 onig_get_default_case_fold_flag(void)
38 {
39   return OnigDefaultCaseFoldFlag;
40 }
41 
42 extern int
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 {
45   OnigDefaultCaseFoldFlag = case_fold_flag;
46   return 0;
47 }
48 
49 
50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
52 #endif
53 
54 static UChar*
str_dup(UChar * s,UChar * end)55 str_dup(UChar* s, UChar* end)
56 {
57   int len = (int)(end - s);
58 
59   if (len > 0) {
60     UChar* r = (UChar* )xmalloc(len + 1);
61     CHECK_NULL_RETURN(r);
62     xmemcpy(r, s, len);
63     r[len] = (UChar )0;
64     return r;
65   }
66   else return NULL;
67 }
68 
69 static void
swap_node(Node * a,Node * b)70 swap_node(Node* a, Node* b)
71 {
72   Node c;
73   CopyMem (&c, a, sizeof (Node));
74   CopyMem (a, b, sizeof (Node));
75   CopyMem (b, &c, sizeof (Node));
76 
77   if (NTYPE(a) == NT_STR) {
78     StrNode* sn = NSTR(a);
79     if (sn->capa == 0) {
80       int len = (int)(sn->end - sn->s);
81       sn->s   = sn->buf;
82       sn->end = sn->s + len;
83     }
84   }
85 
86   if (NTYPE(b) == NT_STR) {
87     StrNode* sn = NSTR(b);
88     if (sn->capa == 0) {
89       int len = (int)(sn->end - sn->s);
90       sn->s   = sn->buf;
91       sn->end = sn->s + len;
92     }
93   }
94 }
95 
96 static OnigDistance
distance_add(OnigDistance d1,OnigDistance d2)97 distance_add(OnigDistance d1, OnigDistance d2)
98 {
99   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
100     return ONIG_INFINITE_DISTANCE;
101   else {
102     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
103     else return ONIG_INFINITE_DISTANCE;
104   }
105 }
106 
107 static OnigDistance
distance_multiply(OnigDistance d,int m)108 distance_multiply(OnigDistance d, int m)
109 {
110   if (m == 0) return 0;
111 
112   if (d < ONIG_INFINITE_DISTANCE / m)
113     return d * m;
114   else
115     return ONIG_INFINITE_DISTANCE;
116 }
117 
118 static int
bitset_is_empty(BitSetRef bs)119 bitset_is_empty(BitSetRef bs)
120 {
121   int i;
122   for (i = 0; i < (int )BITSET_SIZE; i++) {
123     if (bs[i] != 0) return 0;
124   }
125   return 1;
126 }
127 
128 #ifdef ONIG_DEBUG
129 static int
bitset_on_num(BitSetRef bs)130 bitset_on_num(BitSetRef bs)
131 {
132   int i, n;
133 
134   n = 0;
135   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
136     if (BITSET_AT(bs, i)) n++;
137   }
138   return n;
139 }
140 #endif
141 
142 extern int
onig_bbuf_init(BBuf * buf,int size)143 onig_bbuf_init(BBuf* buf, int size)
144 {
145   if (size <= 0) {
146     size   = 0;
147     buf->p = NULL;
148   }
149   else {
150     buf->p = (UChar* )xmalloc(size);
151     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
152   }
153 
154   buf->alloc = size;
155   buf->used  = 0;
156   return 0;
157 }
158 
159 
160 #ifdef USE_SUBEXP_CALL
161 
162 static int
unset_addr_list_init(UnsetAddrList * uslist,int size)163 unset_addr_list_init(UnsetAddrList* uslist, int size)
164 {
165   UnsetAddr* p;
166 
167   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
168   CHECK_NULL_RETURN_MEMERR(p);
169   uslist->num   = 0;
170   uslist->alloc = size;
171   uslist->us    = p;
172   return 0;
173 }
174 
175 static void
unset_addr_list_end(UnsetAddrList * uslist)176 unset_addr_list_end(UnsetAddrList* uslist)
177 {
178   if (IS_NOT_NULL(uslist->us))
179     xfree(uslist->us);
180 }
181 
182 static int
unset_addr_list_add(UnsetAddrList * uslist,int offset,struct _Node * node)183 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
184 {
185   UnsetAddr* p;
186   int size;
187 
188   if (uslist->num >= uslist->alloc) {
189     size = uslist->alloc * 2;
190     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc);
191     CHECK_NULL_RETURN_MEMERR(p);
192     uslist->alloc = size;
193     uslist->us    = p;
194   }
195 
196   uslist->us[uslist->num].offset = offset;
197   uslist->us[uslist->num].target = node;
198   uslist->num++;
199   return 0;
200 }
201 #endif /* USE_SUBEXP_CALL */
202 
203 
204 static int
add_opcode(regex_t * reg,int opcode)205 add_opcode(regex_t* reg, int opcode)
206 {
207   BBUF_ADD1(reg, ((unsigned char)opcode));
208   return 0;
209 }
210 
211 #ifdef USE_COMBINATION_EXPLOSION_CHECK
212 static int
add_state_check_num(regex_t * reg,int num)213 add_state_check_num(regex_t* reg, int num)
214 {
215   StateCheckNumType n = (StateCheckNumType )num;
216 
217   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
218   return 0;
219 }
220 #endif
221 
222 static int
add_rel_addr(regex_t * reg,int addr)223 add_rel_addr(regex_t* reg, int addr)
224 {
225   RelAddrType ra = (RelAddrType )addr;
226 
227   BBUF_ADD(reg, &ra, SIZE_RELADDR);
228   return 0;
229 }
230 
231 static int
add_abs_addr(regex_t * reg,int addr)232 add_abs_addr(regex_t* reg, int addr)
233 {
234   AbsAddrType ra = (AbsAddrType )addr;
235 
236   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
237   return 0;
238 }
239 
240 static int
add_length(regex_t * reg,int len)241 add_length(regex_t* reg, int len)
242 {
243   LengthType l = (LengthType )len;
244 
245   BBUF_ADD(reg, &l, SIZE_LENGTH);
246   return 0;
247 }
248 
249 static int
add_mem_num(regex_t * reg,int num)250 add_mem_num(regex_t* reg, int num)
251 {
252   MemNumType n = (MemNumType )num;
253 
254   BBUF_ADD(reg, &n, SIZE_MEMNUM);
255   return 0;
256 }
257 
258 static int
add_pointer(regex_t * reg,void * addr)259 add_pointer(regex_t* reg, void* addr)
260 {
261   PointerType ptr = (PointerType )addr;
262 
263   BBUF_ADD(reg, &ptr, SIZE_POINTER);
264   return 0;
265 }
266 
267 static int
add_option(regex_t * reg,OnigOptionType option)268 add_option(regex_t* reg, OnigOptionType option)
269 {
270   BBUF_ADD(reg, &option, SIZE_OPTION);
271   return 0;
272 }
273 
274 static int
add_opcode_rel_addr(regex_t * reg,int opcode,int addr)275 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
276 {
277   int r;
278 
279   r = add_opcode(reg, opcode);
280   if (r) return r;
281   r = add_rel_addr(reg, addr);
282   return r;
283 }
284 
285 static int
add_bytes(regex_t * reg,UChar * bytes,int len)286 add_bytes(regex_t* reg, UChar* bytes, int len)
287 {
288   BBUF_ADD(reg, bytes, len);
289   return 0;
290 }
291 
292 static int
add_bitset(regex_t * reg,BitSetRef bs)293 add_bitset(regex_t* reg, BitSetRef bs)
294 {
295   BBUF_ADD(reg, bs, SIZE_BITSET);
296   return 0;
297 }
298 
299 static int
add_opcode_option(regex_t * reg,int opcode,OnigOptionType option)300 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
301 {
302   int r;
303 
304   r = add_opcode(reg, opcode);
305   if (r) return r;
306   r = add_option(reg, option);
307   return r;
308 }
309 
310 static int compile_length_tree(Node* node, regex_t* reg);
311 static int compile_tree(Node* node, regex_t* reg);
312 
313 
314 #define IS_NEED_STR_LEN_OP_EXACT(op) \
315    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
316     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
317 
318 static int
select_str_opcode(int mb_len,int str_len,int ignore_case)319 select_str_opcode(int mb_len, int str_len, int ignore_case)
320 {
321   int op;
322 
323   if (ignore_case) {
324     switch (str_len) {
325     case 1:  op = OP_EXACT1_IC; break;
326     default: op = OP_EXACTN_IC; break;
327     }
328   }
329   else {
330     switch (mb_len) {
331     case 1:
332       switch (str_len) {
333       case 1:  op = OP_EXACT1; break;
334       case 2:  op = OP_EXACT2; break;
335       case 3:  op = OP_EXACT3; break;
336       case 4:  op = OP_EXACT4; break;
337       case 5:  op = OP_EXACT5; break;
338       default: op = OP_EXACTN; break;
339       }
340       break;
341 
342     case 2:
343       switch (str_len) {
344       case 1:  op = OP_EXACTMB2N1; break;
345       case 2:  op = OP_EXACTMB2N2; break;
346       case 3:  op = OP_EXACTMB2N3; break;
347       default: op = OP_EXACTMB2N;  break;
348       }
349       break;
350 
351     case 3:
352       op = OP_EXACTMB3N;
353       break;
354 
355     default:
356       op = OP_EXACTMBN;
357       break;
358     }
359   }
360   return op;
361 }
362 
363 static int
compile_tree_empty_check(Node * node,regex_t * reg,int empty_info)364 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
365 {
366   int r;
367   int saved_num_null_check = reg->num_null_check;
368 
369   if (empty_info != 0) {
370     r = add_opcode(reg, OP_NULL_CHECK_START);
371     if (r) return r;
372     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
373     if (r) return r;
374     reg->num_null_check++;
375   }
376 
377   r = compile_tree(node, reg);
378   if (r) return r;
379 
380   if (empty_info != 0) {
381     if (empty_info == NQ_TARGET_IS_EMPTY)
382       r = add_opcode(reg, OP_NULL_CHECK_END);
383     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
384       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
385     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
386       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
387 
388     if (r) return r;
389     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
390   }
391   return r;
392 }
393 
394 #ifdef USE_SUBEXP_CALL
395 static int
compile_call(CallNode * node,regex_t * reg)396 compile_call(CallNode* node, regex_t* reg)
397 {
398   int r;
399 
400   r = add_opcode(reg, OP_CALL);
401   if (r) return r;
402   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
403                           node->target);
404   if (r) return r;
405   r = add_abs_addr(reg, 0 /*dummy addr.*/);
406   return r;
407 }
408 #endif
409 
410 static int
compile_tree_n_times(Node * node,int n,regex_t * reg)411 compile_tree_n_times(Node* node, int n, regex_t* reg)
412 {
413   int i, r;
414 
415   for (i = 0; i < n; i++) {
416     r = compile_tree(node, reg);
417     if (r) return r;
418   }
419   return 0;
420 }
421 
422 static int
add_compile_string_length(UChar * s ARG_UNUSED,int mb_len,int str_len,regex_t * reg ARG_UNUSED,int ignore_case)423 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
424                           regex_t* reg ARG_UNUSED, int ignore_case)
425 {
426   int len;
427   int op = select_str_opcode(mb_len, str_len, ignore_case);
428 
429   len = SIZE_OPCODE;
430 
431   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
432   if (IS_NEED_STR_LEN_OP_EXACT(op))
433     len += SIZE_LENGTH;
434 
435   len += mb_len * str_len;
436   return len;
437 }
438 
439 static int
add_compile_string(UChar * s,int mb_len,int str_len,regex_t * reg,int ignore_case)440 add_compile_string(UChar* s, int mb_len, int str_len,
441                    regex_t* reg, int ignore_case)
442 {
443   int op = select_str_opcode(mb_len, str_len, ignore_case);
444   add_opcode(reg, op);
445 
446   if (op == OP_EXACTMBN)
447     add_length(reg, mb_len);
448 
449   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
450     if (op == OP_EXACTN_IC)
451       add_length(reg, mb_len * str_len);
452     else
453       add_length(reg, str_len);
454   }
455 
456   add_bytes(reg, s, mb_len * str_len);
457   return 0;
458 }
459 
460 
461 static int
compile_length_string_node(Node * node,regex_t * reg)462 compile_length_string_node(Node* node, regex_t* reg)
463 {
464   int rlen, r, len, prev_len, slen, ambig;
465   OnigEncoding enc = reg->enc;
466   UChar *p, *prev;
467   StrNode* sn;
468 
469   sn = NSTR(node);
470   if (sn->end <= sn->s)
471     return 0;
472 
473   ambig = NSTRING_IS_AMBIG(node);
474 
475   p = prev = sn->s;
476   prev_len = enclen(enc, p);
477   p += prev_len;
478   slen = 1;
479   rlen = 0;
480 
481   for (; p < sn->end; ) {
482     len = enclen(enc, p);
483     if (len == prev_len) {
484       slen++;
485     }
486     else {
487       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
488       rlen += r;
489       prev = p;
490       slen = 1;
491       prev_len = len;
492     }
493     p += len;
494   }
495   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
496   rlen += r;
497   return rlen;
498 }
499 
500 static int
compile_length_string_raw_node(StrNode * sn,regex_t * reg)501 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
502 {
503   if (sn->end <= sn->s)
504     return 0;
505 
506   return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
507 }
508 
509 static int
compile_string_node(Node * node,regex_t * reg)510 compile_string_node(Node* node, regex_t* reg)
511 {
512   int r, len, prev_len, slen, ambig;
513   OnigEncoding enc = reg->enc;
514   UChar *p, *prev, *end;
515   StrNode* sn;
516 
517   sn = NSTR(node);
518   if (sn->end <= sn->s)
519     return 0;
520 
521   end = sn->end;
522   ambig = NSTRING_IS_AMBIG(node);
523 
524   p = prev = sn->s;
525   prev_len = enclen(enc, p);
526   p += prev_len;
527   slen = 1;
528 
529   for (; p < end; ) {
530     len = enclen(enc, p);
531     if (len == prev_len) {
532       slen++;
533     }
534     else {
535       r = add_compile_string(prev, prev_len, slen, reg, ambig);
536       if (r) return r;
537 
538       prev  = p;
539       slen  = 1;
540       prev_len = len;
541     }
542 
543     p += len;
544   }
545   return add_compile_string(prev, prev_len, slen, reg, ambig);
546 }
547 
548 static int
compile_string_raw_node(StrNode * sn,regex_t * reg)549 compile_string_raw_node(StrNode* sn, regex_t* reg)
550 {
551   if (sn->end <= sn->s)
552     return 0;
553 
554   return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
555 }
556 
557 static int
add_multi_byte_cclass(BBuf * mbuf,regex_t * reg)558 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
559 {
560 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
561   add_length(reg, mbuf->used);
562   return add_bytes(reg, mbuf->p, mbuf->used);
563 #else
564   int r, pad_size;
565   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
566 
567   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
568   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
569   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
570 
571   r = add_bytes(reg, mbuf->p, mbuf->used);
572 
573   /* padding for return value from compile_length_cclass_node() to be fix. */
574   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
575   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
576   return r;
577 #endif
578 }
579 
580 static int
compile_length_cclass_node(CClassNode * cc,regex_t * reg)581 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
582 {
583   int len;
584 
585   if (IS_NCCLASS_SHARE(cc)) {
586     len = SIZE_OPCODE + SIZE_POINTER;
587     return len;
588   }
589 
590   if (IS_NULL(cc->mbuf)) {
591     len = SIZE_OPCODE + SIZE_BITSET;
592   }
593   else {
594     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595       len = SIZE_OPCODE;
596     }
597     else {
598       len = SIZE_OPCODE + SIZE_BITSET;
599     }
600 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601     len += SIZE_LENGTH + cc->mbuf->used;
602 #else
603     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604 #endif
605   }
606 
607   return len;
608 }
609 
610 static int
compile_cclass_node(CClassNode * cc,regex_t * reg)611 compile_cclass_node(CClassNode* cc, regex_t* reg)
612 {
613   int r;
614 
615   if (IS_NCCLASS_SHARE(cc)) {
616     add_opcode(reg, OP_CCLASS_NODE);
617     r = add_pointer(reg, cc);
618     return r;
619   }
620 
621   if (IS_NULL(cc->mbuf)) {
622     if (IS_NCCLASS_NOT(cc))
623       add_opcode(reg, OP_CCLASS_NOT);
624     else
625       add_opcode(reg, OP_CCLASS);
626 
627     r = add_bitset(reg, cc->bs);
628   }
629   else {
630     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
631       if (IS_NCCLASS_NOT(cc))
632         add_opcode(reg, OP_CCLASS_MB_NOT);
633       else
634         add_opcode(reg, OP_CCLASS_MB);
635 
636       r = add_multi_byte_cclass(cc->mbuf, reg);
637     }
638     else {
639       if (IS_NCCLASS_NOT(cc))
640         add_opcode(reg, OP_CCLASS_MIX_NOT);
641       else
642         add_opcode(reg, OP_CCLASS_MIX);
643 
644       r = add_bitset(reg, cc->bs);
645       if (r) return r;
646       r = add_multi_byte_cclass(cc->mbuf, reg);
647     }
648   }
649 
650   return r;
651 }
652 
653 static int
entry_repeat_range(regex_t * reg,int id,int lower,int upper)654 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
655 {
656 #define REPEAT_RANGE_ALLOC  4
657 
658   OnigRepeatRange* p;
659 
660   if (reg->repeat_range_alloc == 0) {
661     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
662     CHECK_NULL_RETURN_MEMERR(p);
663     reg->repeat_range = p;
664     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
665   }
666   else if (reg->repeat_range_alloc <= id) {
667     int n;
668     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
669     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
670                                     sizeof(OnigRepeatRange) * n,
671                                     sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
672     CHECK_NULL_RETURN_MEMERR(p);
673     reg->repeat_range = p;
674     reg->repeat_range_alloc = n;
675   }
676   else {
677     p = reg->repeat_range;
678   }
679 
680   p[id].lower = lower;
681   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
682   return 0;
683 }
684 
685 static int
compile_range_repeat_node(QtfrNode * qn,int target_len,int empty_info,regex_t * reg)686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
687                           regex_t* reg)
688 {
689   int r;
690   int num_repeat = reg->num_repeat;
691 
692   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
693   if (r) return r;
694   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
695   reg->num_repeat++;
696   if (r) return r;
697   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
698   if (r) return r;
699 
700   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
701   if (r) return r;
702 
703   r = compile_tree_empty_check(qn->target, reg, empty_info);
704   if (r) return r;
705 
706   if (
707 #ifdef USE_SUBEXP_CALL
708       reg->num_call > 0 ||
709 #endif
710       IS_QUANTIFIER_IN_REPEAT(qn)) {
711     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
712   }
713   else {
714     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
715   }
716   if (r) return r;
717   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
718   return r;
719 }
720 
721 static int
is_anychar_star_quantifier(QtfrNode * qn)722 is_anychar_star_quantifier(QtfrNode* qn)
723 {
724   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
725       NTYPE(qn->target) == NT_CANY)
726     return 1;
727   else
728     return 0;
729 }
730 
731 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
732 #define CKN_ON   (ckn > 0)
733 
734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
735 
736 static int
compile_length_quantifier_node(QtfrNode * qn,regex_t * reg)737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
738 {
739   int len, mod_tlen, cklen;
740   int ckn;
741   int infinite = IS_REPEAT_INFINITE(qn->upper);
742   int empty_info = qn->target_empty_info;
743   int tlen = compile_length_tree(qn->target, reg);
744 
745   if (tlen < 0) return tlen;
746 
747   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
748 
749   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
750 
751   /* anychar repeat */
752   if (NTYPE(qn->target) == NT_CANY) {
753     if (qn->greedy && infinite) {
754       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
755         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
756       else
757         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
758     }
759   }
760 
761   if (empty_info != 0)
762     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
763   else
764     mod_tlen = tlen;
765 
766   if (infinite && qn->lower <= 1) {
767     if (qn->greedy) {
768       if (qn->lower == 1)
769 	len = SIZE_OP_JUMP;
770       else
771 	len = 0;
772 
773       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
774     }
775     else {
776       if (qn->lower == 0)
777 	len = SIZE_OP_JUMP;
778       else
779 	len = 0;
780 
781       len += mod_tlen + SIZE_OP_PUSH + cklen;
782     }
783   }
784   else if (qn->upper == 0) {
785     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
786       len = SIZE_OP_JUMP + tlen;
787     else
788       len = 0;
789   }
790   else if (qn->upper == 1 && qn->greedy) {
791     if (qn->lower == 0) {
792       if (CKN_ON) {
793 	len = SIZE_OP_STATE_CHECK_PUSH + tlen;
794       }
795       else {
796 	len = SIZE_OP_PUSH + tlen;
797       }
798     }
799     else {
800       len = tlen;
801     }
802   }
803   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
804     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
805   }
806   else {
807     len = SIZE_OP_REPEAT_INC
808         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
809     if (CKN_ON)
810       len += SIZE_OP_STATE_CHECK;
811   }
812 
813   return len;
814 }
815 
816 static int
compile_quantifier_node(QtfrNode * qn,regex_t * reg)817 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
818 {
819   int r, mod_tlen;
820   int ckn;
821   int infinite = IS_REPEAT_INFINITE(qn->upper);
822   int empty_info = qn->target_empty_info;
823   int tlen = compile_length_tree(qn->target, reg);
824 
825   if (tlen < 0) return tlen;
826 
827   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
828 
829   if (is_anychar_star_quantifier(qn)) {
830     r = compile_tree_n_times(qn->target, qn->lower, reg);
831     if (r) return r;
832     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
833       if (IS_MULTILINE(reg->options))
834 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
835       else
836 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
837       if (r) return r;
838       if (CKN_ON) {
839 	r = add_state_check_num(reg, ckn);
840 	if (r) return r;
841       }
842 
843       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
844     }
845     else {
846       if (IS_MULTILINE(reg->options)) {
847 	r = add_opcode(reg, (CKN_ON ?
848 			       OP_STATE_CHECK_ANYCHAR_ML_STAR
849 			     : OP_ANYCHAR_ML_STAR));
850       }
851       else {
852 	r = add_opcode(reg, (CKN_ON ?
853 			       OP_STATE_CHECK_ANYCHAR_STAR
854 			     : OP_ANYCHAR_STAR));
855       }
856       if (r) return r;
857       if (CKN_ON)
858 	r = add_state_check_num(reg, ckn);
859 
860       return r;
861     }
862   }
863 
864   if (empty_info != 0)
865     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
866   else
867     mod_tlen = tlen;
868 
869   if (infinite && qn->lower <= 1) {
870     if (qn->greedy) {
871       if (qn->lower == 1) {
872 	r = add_opcode_rel_addr(reg, OP_JUMP,
873 			(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
874 	if (r) return r;
875       }
876 
877       if (CKN_ON) {
878 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
879 	if (r) return r;
880 	r = add_state_check_num(reg, ckn);
881 	if (r) return r;
882 	r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
883       }
884       else {
885 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
886       }
887       if (r) return r;
888       r = compile_tree_empty_check(qn->target, reg, empty_info);
889       if (r) return r;
890       r = add_opcode_rel_addr(reg, OP_JUMP,
891 	      -(mod_tlen + (int )SIZE_OP_JUMP
892 		+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
893     }
894     else {
895       if (qn->lower == 0) {
896 	r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
897 	if (r) return r;
898       }
899       r = compile_tree_empty_check(qn->target, reg, empty_info);
900       if (r) return r;
901       if (CKN_ON) {
902 	r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
903 	if (r) return r;
904 	r = add_state_check_num(reg, ckn);
905 	if (r) return r;
906 	r = add_rel_addr(reg,
907 		 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
908       }
909       else
910 	r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
911     }
912   }
913   else if (qn->upper == 0) {
914     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
915       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
916       if (r) return r;
917       r = compile_tree(qn->target, reg);
918     }
919     else
920       r = 0;
921   }
922   else if (qn->upper == 1 && qn->greedy) {
923     if (qn->lower == 0) {
924       if (CKN_ON) {
925 	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
926 	if (r) return r;
927 	r = add_state_check_num(reg, ckn);
928 	if (r) return r;
929 	r = add_rel_addr(reg, tlen);
930       }
931       else {
932 	r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
933       }
934       if (r) return r;
935     }
936 
937     r = compile_tree(qn->target, reg);
938   }
939   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
940     if (CKN_ON) {
941       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
942       if (r) return r;
943       r = add_state_check_num(reg, ckn);
944       if (r) return r;
945       r = add_rel_addr(reg, SIZE_OP_JUMP);
946     }
947     else {
948       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
949     }
950 
951     if (r) return r;
952     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
953     if (r) return r;
954     r = compile_tree(qn->target, reg);
955   }
956   else {
957     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
958     if (CKN_ON) {
959       if (r) return r;
960       r = add_opcode(reg, OP_STATE_CHECK);
961       if (r) return r;
962       r = add_state_check_num(reg, ckn);
963     }
964   }
965   return r;
966 }
967 
968 #else /* USE_COMBINATION_EXPLOSION_CHECK */
969 
970 static int
compile_length_quantifier_node(QtfrNode * qn,regex_t * reg)971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
972 {
973   int len, mod_tlen;
974   int infinite = IS_REPEAT_INFINITE(qn->upper);
975   int empty_info = qn->target_empty_info;
976   int tlen = compile_length_tree(qn->target, reg);
977 
978   if (tlen < 0) return tlen;
979 
980   /* anychar repeat */
981   if (NTYPE(qn->target) == NT_CANY) {
982     if (qn->greedy && infinite) {
983       if (IS_NOT_NULL(qn->next_head_exact))
984         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
985       else
986         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
987     }
988   }
989 
990   if (empty_info != 0)
991     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
992   else
993     mod_tlen = tlen;
994 
995   if (infinite &&
996       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
997     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
998       len = SIZE_OP_JUMP;
999     }
1000     else {
1001       len = tlen * qn->lower;
1002     }
1003 
1004     if (qn->greedy) {
1005       if (IS_NOT_NULL(qn->head_exact))
1006 	len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1007       else if (IS_NOT_NULL(qn->next_head_exact))
1008 	len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1009       else
1010 	len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1011     }
1012     else
1013       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1014   }
1015   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1016     len = SIZE_OP_JUMP + tlen;
1017   }
1018   else if (!infinite && qn->greedy &&
1019            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1020                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1021     len = tlen * qn->lower;
1022     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1023   }
1024   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1025     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1026   }
1027   else {
1028     len = SIZE_OP_REPEAT_INC
1029         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1030   }
1031 
1032   return len;
1033 }
1034 
1035 static int
compile_quantifier_node(QtfrNode * qn,regex_t * reg)1036 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1037 {
1038   int i, r, mod_tlen;
1039   int infinite = IS_REPEAT_INFINITE(qn->upper);
1040   int empty_info = qn->target_empty_info;
1041   int tlen = compile_length_tree(qn->target, reg);
1042 
1043   if (tlen < 0) return tlen;
1044 
1045   if (is_anychar_star_quantifier(qn)) {
1046     r = compile_tree_n_times(qn->target, qn->lower, reg);
1047     if (r) return r;
1048     if (IS_NOT_NULL(qn->next_head_exact)) {
1049       if (IS_MULTILINE(reg->options))
1050 	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1051       else
1052 	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1053       if (r) return r;
1054       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1055     }
1056     else {
1057       if (IS_MULTILINE(reg->options))
1058 	return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1059       else
1060 	return add_opcode(reg, OP_ANYCHAR_STAR);
1061     }
1062   }
1063 
1064   if (empty_info != 0)
1065     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1066   else
1067     mod_tlen = tlen;
1068 
1069   if (infinite &&
1070       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1071     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1072       if (qn->greedy) {
1073 	if (IS_NOT_NULL(qn->head_exact))
1074 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1075 	else if (IS_NOT_NULL(qn->next_head_exact))
1076 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1077 	else
1078 	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1079       }
1080       else {
1081 	r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1082       }
1083       if (r) return r;
1084     }
1085     else {
1086       r = compile_tree_n_times(qn->target, qn->lower, reg);
1087       if (r) return r;
1088     }
1089 
1090     if (qn->greedy) {
1091       if (IS_NOT_NULL(qn->head_exact)) {
1092 	r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093 			     mod_tlen + SIZE_OP_JUMP);
1094 	if (r) return r;
1095 	add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1097 	if (r) return r;
1098 	r = add_opcode_rel_addr(reg, OP_JUMP,
1099 	-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100       }
1101       else if (IS_NOT_NULL(qn->next_head_exact)) {
1102 	r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1103 				mod_tlen + SIZE_OP_JUMP);
1104 	if (r) return r;
1105 	add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1106 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1107 	if (r) return r;
1108 	r = add_opcode_rel_addr(reg, OP_JUMP,
1109           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1110       }
1111       else {
1112 	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1113 	if (r) return r;
1114 	r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 	if (r) return r;
1116 	r = add_opcode_rel_addr(reg, OP_JUMP,
1117 		     -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1118       }
1119     }
1120     else {
1121       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1122       if (r) return r;
1123       r = compile_tree_empty_check(qn->target, reg, empty_info);
1124       if (r) return r;
1125       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1126     }
1127   }
1128   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1129     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1130     if (r) return r;
1131     r = compile_tree(qn->target, reg);
1132   }
1133   else if (!infinite && qn->greedy &&
1134            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1135                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1136     int n = qn->upper - qn->lower;
1137 
1138     r = compile_tree_n_times(qn->target, qn->lower, reg);
1139     if (r) return r;
1140 
1141     for (i = 0; i < n; i++) {
1142       r = add_opcode_rel_addr(reg, OP_PUSH,
1143 			   (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1144       if (r) return r;
1145       r = compile_tree(qn->target, reg);
1146       if (r) return r;
1147     }
1148   }
1149   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1150     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1151     if (r) return r;
1152     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1153     if (r) return r;
1154     r = compile_tree(qn->target, reg);
1155   }
1156   else {
1157     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1158   }
1159   return r;
1160 }
1161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1162 
1163 static int
compile_length_option_node(EncloseNode * node,regex_t * reg)1164 compile_length_option_node(EncloseNode* node, regex_t* reg)
1165 {
1166   int tlen;
1167   OnigOptionType prev = reg->options;
1168 
1169   reg->options = node->option;
1170   tlen = compile_length_tree(node->target, reg);
1171   reg->options = prev;
1172 
1173   if (tlen < 0) return tlen;
1174 
1175   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1176     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1177            + tlen + SIZE_OP_SET_OPTION;
1178   }
1179   else
1180     return tlen;
1181 }
1182 
1183 static int
compile_option_node(EncloseNode * node,regex_t * reg)1184 compile_option_node(EncloseNode* node, regex_t* reg)
1185 {
1186   int r;
1187   OnigOptionType prev = reg->options;
1188 
1189   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1190     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1191     if (r) return r;
1192     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1193     if (r) return r;
1194     r = add_opcode(reg, OP_FAIL);
1195     if (r) return r;
1196   }
1197 
1198   reg->options = node->option;
1199   r = compile_tree(node->target, reg);
1200   reg->options = prev;
1201 
1202   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1203     if (r) return r;
1204     r = add_opcode_option(reg, OP_SET_OPTION, prev);
1205   }
1206   return r;
1207 }
1208 
1209 static int
compile_length_enclose_node(EncloseNode * node,regex_t * reg)1210 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1211 {
1212   int len;
1213   int tlen;
1214   QtfrNode* qn;
1215 
1216   qn = NULL;
1217 
1218   if (node->type == ENCLOSE_OPTION)
1219     return compile_length_option_node(node, reg);
1220 
1221   if (node->target) {
1222     tlen = compile_length_tree(node->target, reg);
1223     if (tlen < 0) return tlen;
1224   }
1225   else
1226     tlen = 0;
1227 
1228   switch (node->type) {
1229   case ENCLOSE_MEMORY:
1230 #ifdef USE_SUBEXP_CALL
1231     if (IS_ENCLOSE_CALLED(node)) {
1232       len = SIZE_OP_MEMORY_START_PUSH + tlen
1233 	  + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1234       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1235 	len += (IS_ENCLOSE_RECURSION(node)
1236 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1237       else
1238 	len += (IS_ENCLOSE_RECURSION(node)
1239 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1240     }
1241     else
1242 #endif
1243     {
1244       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1245 	len = SIZE_OP_MEMORY_START_PUSH;
1246       else
1247 	len = SIZE_OP_MEMORY_START;
1248 
1249       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1250 		     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1251     }
1252     break;
1253 
1254   case ENCLOSE_STOP_BACKTRACK:
1255     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1256       if (node->target == NULL) {
1257         CHECK_NULL_RETURN_MEMERR(node->target);
1258       }
1259       qn = NQTFR(node->target);
1260       tlen = compile_length_tree(qn->target, reg);
1261       if (tlen < 0) return tlen;
1262 
1263       len = tlen * qn->lower
1264 	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1265     }
1266     else {
1267       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1268     }
1269     break;
1270 
1271   default:
1272     return ONIGERR_TYPE_BUG;
1273     break;
1274   }
1275 
1276   return len;
1277 }
1278 
1279 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1280 
1281 static int
compile_enclose_node(EncloseNode * node,regex_t * reg)1282 compile_enclose_node(EncloseNode* node, regex_t* reg)
1283 {
1284   int r, len;
1285 
1286   if (node->type == ENCLOSE_OPTION)
1287     return compile_option_node(node, reg);
1288 
1289   switch (node->type) {
1290   case ENCLOSE_MEMORY:
1291 #ifdef USE_SUBEXP_CALL
1292     if (IS_ENCLOSE_CALLED(node)) {
1293       r = add_opcode(reg, OP_CALL);
1294       if (r) return r;
1295       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1296       node->state |= NST_ADDR_FIXED;
1297       r = add_abs_addr(reg, (int )node->call_addr);
1298       if (r) return r;
1299       len = compile_length_tree(node->target, reg);
1300       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1301       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1302 	len += (IS_ENCLOSE_RECURSION(node)
1303 		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1304       else
1305 	len += (IS_ENCLOSE_RECURSION(node)
1306 		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1307 
1308       r = add_opcode_rel_addr(reg, OP_JUMP, len);
1309       if (r) return r;
1310     }
1311 #endif
1312     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1313       r = add_opcode(reg, OP_MEMORY_START_PUSH);
1314     else
1315       r = add_opcode(reg, OP_MEMORY_START);
1316     if (r) return r;
1317     r = add_mem_num(reg, node->regnum);
1318     if (r) return r;
1319     r = compile_tree(node->target, reg);
1320     if (r) return r;
1321 #ifdef USE_SUBEXP_CALL
1322     if (IS_ENCLOSE_CALLED(node)) {
1323       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324 	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1325 			     ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1326       else
1327 	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1328 			     ? OP_MEMORY_END_REC : OP_MEMORY_END));
1329 
1330       if (r) return r;
1331       r = add_mem_num(reg, node->regnum);
1332       if (r) return r;
1333       r = add_opcode(reg, OP_RETURN);
1334     }
1335     else
1336 #endif
1337     {
1338       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1339 	r = add_opcode(reg, OP_MEMORY_END_PUSH);
1340       else
1341 	r = add_opcode(reg, OP_MEMORY_END);
1342       if (r) return r;
1343       r = add_mem_num(reg, node->regnum);
1344     }
1345     break;
1346 
1347   case ENCLOSE_STOP_BACKTRACK:
1348     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1349       QtfrNode* qn = NQTFR(node->target);
1350       r = compile_tree_n_times(qn->target, qn->lower, reg);
1351       if (r) return r;
1352 
1353       len = compile_length_tree(qn->target, reg);
1354       if (len < 0) return len;
1355 
1356       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1357       if (r) return r;
1358       r = compile_tree(qn->target, reg);
1359       if (r) return r;
1360       r = add_opcode(reg, OP_POP);
1361       if (r) return r;
1362       r = add_opcode_rel_addr(reg, OP_JUMP,
1363 	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1364     }
1365     else {
1366       r = add_opcode(reg, OP_PUSH_STOP_BT);
1367       if (r) return r;
1368       r = compile_tree(node->target, reg);
1369       if (r) return r;
1370       r = add_opcode(reg, OP_POP_STOP_BT);
1371     }
1372     break;
1373 
1374   default:
1375     return ONIGERR_TYPE_BUG;
1376     break;
1377   }
1378 
1379   return r;
1380 }
1381 
1382 static int
compile_length_anchor_node(AnchorNode * node,regex_t * reg)1383 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1384 {
1385   int len;
1386   int tlen = 0;
1387 
1388   if (node->target) {
1389     tlen = compile_length_tree(node->target, reg);
1390     if (tlen < 0) return tlen;
1391   }
1392 
1393   switch (node->type) {
1394   case ANCHOR_PREC_READ:
1395     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1396     break;
1397   case ANCHOR_PREC_READ_NOT:
1398     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1399     break;
1400   case ANCHOR_LOOK_BEHIND:
1401     len = SIZE_OP_LOOK_BEHIND + tlen;
1402     break;
1403   case ANCHOR_LOOK_BEHIND_NOT:
1404     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1405     break;
1406 
1407   default:
1408     len = SIZE_OPCODE;
1409     break;
1410   }
1411 
1412   return len;
1413 }
1414 
1415 static int
compile_anchor_node(AnchorNode * node,regex_t * reg)1416 compile_anchor_node(AnchorNode* node, regex_t* reg)
1417 {
1418   int r, len;
1419 
1420   switch (node->type) {
1421   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1422   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1423   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1424   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1425   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1426   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1427 
1428   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
1429   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1430 #ifdef USE_WORD_BEGIN_END
1431   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
1432   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
1433 #endif
1434 
1435   case ANCHOR_PREC_READ:
1436     r = add_opcode(reg, OP_PUSH_POS);
1437     if (r) return r;
1438     r = compile_tree(node->target, reg);
1439     if (r) return r;
1440     r = add_opcode(reg, OP_POP_POS);
1441     break;
1442 
1443   case ANCHOR_PREC_READ_NOT:
1444     len = compile_length_tree(node->target, reg);
1445     if (len < 0) return len;
1446     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1447     if (r) return r;
1448     r = compile_tree(node->target, reg);
1449     if (r) return r;
1450     r = add_opcode(reg, OP_FAIL_POS);
1451     break;
1452 
1453   case ANCHOR_LOOK_BEHIND:
1454     {
1455       int n;
1456       r = add_opcode(reg, OP_LOOK_BEHIND);
1457       if (r) return r;
1458       if (node->char_len < 0) {
1459 	r = get_char_length_tree(node->target, reg, &n);
1460 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1461       }
1462       else
1463 	n = node->char_len;
1464       r = add_length(reg, n);
1465       if (r) return r;
1466       r = compile_tree(node->target, reg);
1467     }
1468     break;
1469 
1470   case ANCHOR_LOOK_BEHIND_NOT:
1471     {
1472       int n;
1473       len = compile_length_tree(node->target, reg);
1474       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1475 			   len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1476       if (r) return r;
1477       if (node->char_len < 0) {
1478 	r = get_char_length_tree(node->target, reg, &n);
1479 	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1480       }
1481       else
1482 	n = node->char_len;
1483       r = add_length(reg, n);
1484       if (r) return r;
1485       r = compile_tree(node->target, reg);
1486       if (r) return r;
1487       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1488     }
1489     break;
1490 
1491   default:
1492     return ONIGERR_TYPE_BUG;
1493     break;
1494   }
1495 
1496   return r;
1497 }
1498 
1499 static int
compile_length_tree(Node * node,regex_t * reg)1500 compile_length_tree(Node* node, regex_t* reg)
1501 {
1502   int len, type, r;
1503 
1504   type = NTYPE(node);
1505   switch (type) {
1506   case NT_LIST:
1507     len = 0;
1508     do {
1509       r = compile_length_tree(NCAR(node), reg);
1510       if (r < 0) return r;
1511       len += r;
1512     } while (IS_NOT_NULL(node = NCDR(node)));
1513     r = len;
1514     break;
1515 
1516   case NT_ALT:
1517     {
1518       int n;
1519 
1520       n = r = 0;
1521       do {
1522 	r += compile_length_tree(NCAR(node), reg);
1523 	n++;
1524       } while (IS_NOT_NULL(node = NCDR(node)));
1525       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1526     }
1527     break;
1528 
1529   case NT_STR:
1530     if (NSTRING_IS_RAW(node))
1531       r = compile_length_string_raw_node(NSTR(node), reg);
1532     else
1533       r = compile_length_string_node(node, reg);
1534     break;
1535 
1536   case NT_CCLASS:
1537     r = compile_length_cclass_node(NCCLASS(node), reg);
1538     break;
1539 
1540   case NT_CTYPE:
1541   case NT_CANY:
1542     r = SIZE_OPCODE;
1543     break;
1544 
1545   case NT_BREF:
1546     {
1547       BRefNode* br = NBREF(node);
1548 
1549 #ifdef USE_BACKREF_WITH_LEVEL
1550       if (IS_BACKREF_NEST_LEVEL(br)) {
1551         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1552             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1553       }
1554       else
1555 #endif
1556       if (br->back_num == 1) {
1557 	r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1558 	     ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1559       }
1560       else {
1561 	r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1562       }
1563     }
1564     break;
1565 
1566 #ifdef USE_SUBEXP_CALL
1567   case NT_CALL:
1568     r = SIZE_OP_CALL;
1569     break;
1570 #endif
1571 
1572   case NT_QTFR:
1573     r = compile_length_quantifier_node(NQTFR(node), reg);
1574     break;
1575 
1576   case NT_ENCLOSE:
1577     r = compile_length_enclose_node(NENCLOSE(node), reg);
1578     break;
1579 
1580   case NT_ANCHOR:
1581     r = compile_length_anchor_node(NANCHOR(node), reg);
1582     break;
1583 
1584   default:
1585     return ONIGERR_TYPE_BUG;
1586     break;
1587   }
1588 
1589   return r;
1590 }
1591 
1592 static int
compile_tree(Node * node,regex_t * reg)1593 compile_tree(Node* node, regex_t* reg)
1594 {
1595   int n, type, len, pos, r = 0;
1596 
1597   type = NTYPE(node);
1598   switch (type) {
1599   case NT_LIST:
1600     do {
1601       r = compile_tree(NCAR(node), reg);
1602     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1603     break;
1604 
1605   case NT_ALT:
1606     {
1607       Node* x = node;
1608       len = 0;
1609       do {
1610 	len += compile_length_tree(NCAR(x), reg);
1611 	if (NCDR(x) != NULL) {
1612 	  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1613 	}
1614       } while (IS_NOT_NULL(x = NCDR(x)));
1615       pos = reg->used + len;  /* goal position */
1616 
1617       do {
1618 	len = compile_length_tree(NCAR(node), reg);
1619 	if (IS_NOT_NULL(NCDR(node))) {
1620 	  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1621 	  if (r) break;
1622 	}
1623 	r = compile_tree(NCAR(node), reg);
1624 	if (r) break;
1625 	if (IS_NOT_NULL(NCDR(node))) {
1626 	  len = pos - (reg->used + SIZE_OP_JUMP);
1627 	  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1628 	  if (r) break;
1629 	}
1630       } while (IS_NOT_NULL(node = NCDR(node)));
1631     }
1632     break;
1633 
1634   case NT_STR:
1635     if (NSTRING_IS_RAW(node))
1636       r = compile_string_raw_node(NSTR(node), reg);
1637     else
1638       r = compile_string_node(node, reg);
1639     break;
1640 
1641   case NT_CCLASS:
1642     r = compile_cclass_node(NCCLASS(node), reg);
1643     break;
1644 
1645   case NT_CTYPE:
1646     {
1647       int op;
1648 
1649       switch (NCTYPE(node)->ctype) {
1650       case ONIGENC_CTYPE_WORD:
1651 	if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1652 	else                         op = OP_WORD;
1653 	break;
1654       default:
1655 	return ONIGERR_TYPE_BUG;
1656 	break;
1657       }
1658       r = add_opcode(reg, op);
1659     }
1660     break;
1661 
1662   case NT_CANY:
1663     if (IS_MULTILINE(reg->options))
1664       r = add_opcode(reg, OP_ANYCHAR_ML);
1665     else
1666       r = add_opcode(reg, OP_ANYCHAR);
1667     break;
1668 
1669   case NT_BREF:
1670     {
1671       BRefNode* br = NBREF(node);
1672 
1673 #ifdef USE_BACKREF_WITH_LEVEL
1674       if (IS_BACKREF_NEST_LEVEL(br)) {
1675 	r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1676 	if (r) return r;
1677 	r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1678 	if (r) return r;
1679 	r = add_length(reg, br->nest_level);
1680 	if (r) return r;
1681 
1682 	goto add_bacref_mems;
1683       }
1684       else
1685 #endif
1686       if (br->back_num == 1) {
1687 	n = br->back_static[0];
1688 	if (IS_IGNORECASE(reg->options)) {
1689 	  r = add_opcode(reg, OP_BACKREFN_IC);
1690 	  if (r) return r;
1691 	  r = add_mem_num(reg, n);
1692 	}
1693 	else {
1694 	  switch (n) {
1695 	  case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1696 	  case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1697 	  default:
1698 	    r = add_opcode(reg, OP_BACKREFN);
1699 	    if (r) return r;
1700 	    r = add_mem_num(reg, n);
1701 	    break;
1702 	  }
1703 	}
1704       }
1705       else {
1706 	int i;
1707 	int* p;
1708 
1709         if (IS_IGNORECASE(reg->options)) {
1710           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1711         }
1712         else {
1713           r = add_opcode(reg, OP_BACKREF_MULTI);
1714         }
1715 	if (r) return r;
1716 
1717 #ifdef USE_BACKREF_WITH_LEVEL
1718       add_bacref_mems:
1719 #endif
1720 	r = add_length(reg, br->back_num);
1721 	if (r) return r;
1722 	p = BACKREFS_P(br);
1723 	for (i = br->back_num - 1; i >= 0; i--) {
1724 	  r = add_mem_num(reg, p[i]);
1725 	  if (r) return r;
1726 	}
1727       }
1728     }
1729     break;
1730 
1731 #ifdef USE_SUBEXP_CALL
1732   case NT_CALL:
1733     r = compile_call(NCALL(node), reg);
1734     break;
1735 #endif
1736 
1737   case NT_QTFR:
1738     r = compile_quantifier_node(NQTFR(node), reg);
1739     break;
1740 
1741   case NT_ENCLOSE:
1742     r = compile_enclose_node(NENCLOSE(node), reg);
1743     break;
1744 
1745   case NT_ANCHOR:
1746     r = compile_anchor_node(NANCHOR(node), reg);
1747     break;
1748 
1749   default:
1750 #ifdef ONIG_DEBUG
1751     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1752 #endif
1753     break;
1754   }
1755 
1756   return r;
1757 }
1758 
1759 #ifdef USE_NAMED_GROUP
1760 
1761 static int
noname_disable_map(Node ** plink,GroupNumRemap * map,int * counter)1762 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1763 {
1764   int r = 0;
1765   Node* node = *plink;
1766 
1767   switch (NTYPE(node)) {
1768   case NT_LIST:
1769   case NT_ALT:
1770     do {
1771       r = noname_disable_map(&(NCAR(node)), map, counter);
1772     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1773     break;
1774 
1775   case NT_QTFR:
1776     {
1777       Node** ptarget = &(NQTFR(node)->target);
1778       Node*  old = *ptarget;
1779       r = noname_disable_map(ptarget, map, counter);
1780       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1781 	onig_reduce_nested_quantifier(node, *ptarget);
1782       }
1783     }
1784     break;
1785 
1786   case NT_ENCLOSE:
1787     {
1788       EncloseNode* en = NENCLOSE(node);
1789       if (en->type == ENCLOSE_MEMORY) {
1790 	if (IS_ENCLOSE_NAMED_GROUP(en)) {
1791 	  (*counter)++;
1792 	  map[en->regnum].new_val = *counter;
1793 	  en->regnum = *counter;
1794 	  r = noname_disable_map(&(en->target), map, counter);
1795 	}
1796 	else {
1797 	  *plink = en->target;
1798 	  en->target = NULL_NODE;
1799 	  onig_node_free(node);
1800 	  r = noname_disable_map(plink, map, counter);
1801 	}
1802       }
1803       else
1804 	r = noname_disable_map(&(en->target), map, counter);
1805     }
1806     break;
1807 
1808   default:
1809     break;
1810   }
1811 
1812   return r;
1813 }
1814 
1815 static int
renumber_node_backref(Node * node,GroupNumRemap * map)1816 renumber_node_backref(Node* node, GroupNumRemap* map)
1817 {
1818   int i, pos, n, old_num;
1819   int *backs;
1820   BRefNode* bn = NBREF(node);
1821 
1822   if (! IS_BACKREF_NAME_REF(bn))
1823     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1824 
1825   old_num = bn->back_num;
1826   if (IS_NULL(bn->back_dynamic))
1827     backs = bn->back_static;
1828   else
1829     backs = bn->back_dynamic;
1830 
1831   for (i = 0, pos = 0; i < old_num; i++) {
1832     n = map[backs[i]].new_val;
1833     if (n > 0) {
1834       backs[pos] = n;
1835       pos++;
1836     }
1837   }
1838 
1839   bn->back_num = pos;
1840   return 0;
1841 }
1842 
1843 static int
renumber_by_map(Node * node,GroupNumRemap * map)1844 renumber_by_map(Node* node, GroupNumRemap* map)
1845 {
1846   int r = 0;
1847 
1848   switch (NTYPE(node)) {
1849   case NT_LIST:
1850   case NT_ALT:
1851     do {
1852       r = renumber_by_map(NCAR(node), map);
1853     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1854     break;
1855   case NT_QTFR:
1856     r = renumber_by_map(NQTFR(node)->target, map);
1857     break;
1858   case NT_ENCLOSE:
1859     r = renumber_by_map(NENCLOSE(node)->target, map);
1860     break;
1861 
1862   case NT_BREF:
1863     r = renumber_node_backref(node, map);
1864     break;
1865 
1866   default:
1867     break;
1868   }
1869 
1870   return r;
1871 }
1872 
1873 static int
numbered_ref_check(Node * node)1874 numbered_ref_check(Node* node)
1875 {
1876   int r = 0;
1877 
1878   switch (NTYPE(node)) {
1879   case NT_LIST:
1880   case NT_ALT:
1881     do {
1882       r = numbered_ref_check(NCAR(node));
1883     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1884     break;
1885   case NT_QTFR:
1886     r = numbered_ref_check(NQTFR(node)->target);
1887     break;
1888   case NT_ENCLOSE:
1889     r = numbered_ref_check(NENCLOSE(node)->target);
1890     break;
1891 
1892   case NT_BREF:
1893     if (! IS_BACKREF_NAME_REF(NBREF(node)))
1894       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1895     break;
1896 
1897   default:
1898     break;
1899   }
1900 
1901   return r;
1902 }
1903 
1904 static int
disable_noname_group_capture(Node ** root,regex_t * reg,ScanEnv * env)1905 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1906 {
1907   int r, i, pos, counter;
1908   int Result;
1909   BitStatusType loc;
1910   GroupNumRemap* map;
1911 
1912   map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
1913   CHECK_NULL_RETURN_MEMERR(map);
1914   for (i = 1; i <= env->num_mem; i++) {
1915     map[i].new_val = 0;
1916   }
1917   counter = 0;
1918   r = noname_disable_map(root, map, &counter);
1919   if (r != 0) return r;
1920 
1921   r = renumber_by_map(*root, map);
1922   if (r != 0) return r;
1923 
1924   for (i = 1, pos = 1; i <= env->num_mem; i++) {
1925     if (map[i].new_val > 0) {
1926       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1927       pos++;
1928     }
1929   }
1930 
1931   loc = env->capture_history;
1932   BIT_STATUS_CLEAR(env->capture_history);
1933   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1934     if (BIT_STATUS_AT(loc, i)) {
1935       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1936     }
1937   }
1938 
1939   env->num_mem = env->num_named;
1940   reg->num_mem = env->num_named;
1941 
1942   Result = onig_renumber_name_table(reg, map);
1943   xfree(map);
1944   return Result;
1945 }
1946 #endif /* USE_NAMED_GROUP */
1947 
1948 #ifdef USE_SUBEXP_CALL
1949 static int
unset_addr_list_fix(UnsetAddrList * uslist,regex_t * reg)1950 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1951 {
1952   int i, offset;
1953   EncloseNode* en;
1954   AbsAddrType addr;
1955 
1956   for (i = 0; i < uslist->num; i++) {
1957     en = NENCLOSE(uslist->us[i].target);
1958     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1959     addr = en->call_addr;
1960     offset = uslist->us[i].offset;
1961 
1962     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1963   }
1964   return 0;
1965 }
1966 #endif
1967 
1968 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1969 static int
quantifiers_memory_node_info(Node * node)1970 quantifiers_memory_node_info(Node* node)
1971 {
1972   int r = 0;
1973 
1974   switch (NTYPE(node)) {
1975   case NT_LIST:
1976   case NT_ALT:
1977     {
1978       int v;
1979       do {
1980 	v = quantifiers_memory_node_info(NCAR(node));
1981 	if (v > r) r = v;
1982       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
1983     }
1984     break;
1985 
1986 #ifdef USE_SUBEXP_CALL
1987   case NT_CALL:
1988     if (IS_CALL_RECURSION(NCALL(node))) {
1989       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
1990     }
1991     else
1992       r = quantifiers_memory_node_info(NCALL(node)->target);
1993     break;
1994 #endif
1995 
1996   case NT_QTFR:
1997     {
1998       QtfrNode* qn = NQTFR(node);
1999       if (qn->upper != 0) {
2000 	r = quantifiers_memory_node_info(qn->target);
2001       }
2002     }
2003     break;
2004 
2005   case NT_ENCLOSE:
2006     {
2007       EncloseNode* en = NENCLOSE(node);
2008       switch (en->type) {
2009       case ENCLOSE_MEMORY:
2010 	return NQ_TARGET_IS_EMPTY_MEM;
2011 	break;
2012 
2013       case ENCLOSE_OPTION:
2014       case ENCLOSE_STOP_BACKTRACK:
2015 	r = quantifiers_memory_node_info(en->target);
2016 	break;
2017       default:
2018 	break;
2019       }
2020     }
2021     break;
2022 
2023   case NT_BREF:
2024   case NT_STR:
2025   case NT_CTYPE:
2026   case NT_CCLASS:
2027   case NT_CANY:
2028   case NT_ANCHOR:
2029   default:
2030     break;
2031   }
2032 
2033   return r;
2034 }
2035 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2036 
2037 static int
get_min_match_length(Node * node,OnigDistance * min,ScanEnv * env)2038 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2039 {
2040   OnigDistance tmin;
2041   int r = 0;
2042 
2043   *min = 0;
2044   switch (NTYPE(node)) {
2045   case NT_BREF:
2046     {
2047       int i;
2048       int* backs;
2049       Node** nodes = SCANENV_MEM_NODES(env);
2050       BRefNode* br = NBREF(node);
2051       if (br->state & NST_RECURSION) break;
2052 
2053       backs = BACKREFS_P(br);
2054       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2055       r = get_min_match_length(nodes[backs[0]], min, env);
2056       if (r != 0) break;
2057       for (i = 1; i < br->back_num; i++) {
2058 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2059 	r = get_min_match_length(nodes[backs[i]], &tmin, env);
2060 	if (r != 0) break;
2061 	if (*min > tmin) *min = tmin;
2062       }
2063     }
2064     break;
2065 
2066 #ifdef USE_SUBEXP_CALL
2067   case NT_CALL:
2068     if (IS_CALL_RECURSION(NCALL(node))) {
2069       EncloseNode* en = NENCLOSE(NCALL(node)->target);
2070       if (IS_ENCLOSE_MIN_FIXED(en))
2071 	*min = en->min_len;
2072     }
2073     else
2074       r = get_min_match_length(NCALL(node)->target, min, env);
2075     break;
2076 #endif
2077 
2078   case NT_LIST:
2079     do {
2080       r = get_min_match_length(NCAR(node), &tmin, env);
2081       if (r == 0) *min += tmin;
2082     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2083     break;
2084 
2085   case NT_ALT:
2086     {
2087       Node *x, *y;
2088       y = node;
2089       do {
2090 	x = NCAR(y);
2091 	r = get_min_match_length(x, &tmin, env);
2092 	if (r != 0) break;
2093 	if (y == node) *min = tmin;
2094 	else if (*min > tmin) *min = tmin;
2095       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2096     }
2097     break;
2098 
2099   case NT_STR:
2100     {
2101       StrNode* sn = NSTR(node);
2102       *min = (OnigDistance)(sn->end - sn->s);
2103     }
2104     break;
2105 
2106   case NT_CTYPE:
2107     *min = 1;
2108     break;
2109 
2110   case NT_CCLASS:
2111   case NT_CANY:
2112     *min = 1;
2113     break;
2114 
2115   case NT_QTFR:
2116     {
2117       QtfrNode* qn = NQTFR(node);
2118 
2119       if (qn->lower > 0) {
2120 	r = get_min_match_length(qn->target, min, env);
2121 	if (r == 0)
2122 	  *min = distance_multiply(*min, qn->lower);
2123       }
2124     }
2125     break;
2126 
2127   case NT_ENCLOSE:
2128     {
2129       EncloseNode* en = NENCLOSE(node);
2130       switch (en->type) {
2131       case ENCLOSE_MEMORY:
2132 #ifdef USE_SUBEXP_CALL
2133 	if (IS_ENCLOSE_MIN_FIXED(en))
2134 	  *min = en->min_len;
2135 	else {
2136 	  r = get_min_match_length(en->target, min, env);
2137 	  if (r == 0) {
2138 	    en->min_len = *min;
2139 	    SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2140 	  }
2141 	}
2142 	break;
2143 #endif
2144       case ENCLOSE_OPTION:
2145       case ENCLOSE_STOP_BACKTRACK:
2146 	r = get_min_match_length(en->target, min, env);
2147 	break;
2148       }
2149     }
2150     break;
2151 
2152   case NT_ANCHOR:
2153   default:
2154     break;
2155   }
2156 
2157   return r;
2158 }
2159 
2160 static int
get_max_match_length(Node * node,OnigDistance * max,ScanEnv * env)2161 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2162 {
2163   OnigDistance tmax;
2164   int r = 0;
2165 
2166   *max = 0;
2167   switch (NTYPE(node)) {
2168   case NT_LIST:
2169     do {
2170       r = get_max_match_length(NCAR(node), &tmax, env);
2171       if (r == 0)
2172 	*max = distance_add(*max, tmax);
2173     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2174     break;
2175 
2176   case NT_ALT:
2177     do {
2178       r = get_max_match_length(NCAR(node), &tmax, env);
2179       if (r == 0 && *max < tmax) *max = tmax;
2180     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2181     break;
2182 
2183   case NT_STR:
2184     {
2185       StrNode* sn = NSTR(node);
2186       *max = (OnigDistance)(sn->end - sn->s);
2187     }
2188     break;
2189 
2190   case NT_CTYPE:
2191     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2192     break;
2193 
2194   case NT_CCLASS:
2195   case NT_CANY:
2196     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2197     break;
2198 
2199   case NT_BREF:
2200     {
2201       int i;
2202       int* backs;
2203       Node** nodes = SCANENV_MEM_NODES(env);
2204       BRefNode* br = NBREF(node);
2205       if (br->state & NST_RECURSION) {
2206 	*max = ONIG_INFINITE_DISTANCE;
2207 	break;
2208       }
2209       backs = BACKREFS_P(br);
2210       for (i = 0; i < br->back_num; i++) {
2211 	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2212 	r = get_max_match_length(nodes[backs[i]], &tmax, env);
2213 	if (r != 0) break;
2214 	if (*max < tmax) *max = tmax;
2215       }
2216     }
2217     break;
2218 
2219 #ifdef USE_SUBEXP_CALL
2220   case NT_CALL:
2221     if (! IS_CALL_RECURSION(NCALL(node)))
2222       r = get_max_match_length(NCALL(node)->target, max, env);
2223     else
2224       *max = ONIG_INFINITE_DISTANCE;
2225     break;
2226 #endif
2227 
2228   case NT_QTFR:
2229     {
2230       QtfrNode* qn = NQTFR(node);
2231 
2232       if (qn->upper != 0) {
2233 	r = get_max_match_length(qn->target, max, env);
2234 	if (r == 0 && *max != 0) {
2235 	  if (! IS_REPEAT_INFINITE(qn->upper))
2236 	    *max = distance_multiply(*max, qn->upper);
2237 	  else
2238 	    *max = ONIG_INFINITE_DISTANCE;
2239 	}
2240       }
2241     }
2242     break;
2243 
2244   case NT_ENCLOSE:
2245     {
2246       EncloseNode* en = NENCLOSE(node);
2247       switch (en->type) {
2248       case ENCLOSE_MEMORY:
2249 #ifdef USE_SUBEXP_CALL
2250 	if (IS_ENCLOSE_MAX_FIXED(en))
2251 	  *max = en->max_len;
2252 	else {
2253 	  r = get_max_match_length(en->target, max, env);
2254 	  if (r == 0) {
2255 	    en->max_len = *max;
2256 	    SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2257 	  }
2258 	}
2259 	break;
2260 #endif
2261       case ENCLOSE_OPTION:
2262       case ENCLOSE_STOP_BACKTRACK:
2263 	r = get_max_match_length(en->target, max, env);
2264 	break;
2265       }
2266     }
2267     break;
2268 
2269   case NT_ANCHOR:
2270   default:
2271     break;
2272   }
2273 
2274   return r;
2275 }
2276 
2277 #define GET_CHAR_LEN_VARLEN           -1
2278 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2279 
2280 /* fixed size pattern node only */
2281 static int
get_char_length_tree1(Node * node,regex_t * reg,int * len,int level)2282 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2283 {
2284   int tlen;
2285   int r = 0;
2286 
2287   level++;
2288   *len = 0;
2289   switch (NTYPE(node)) {
2290   case NT_LIST:
2291     do {
2292       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2293       if (r == 0)
2294 	*len = distance_add(*len, tlen);
2295     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2296     break;
2297 
2298   case NT_ALT:
2299     {
2300       int tlen2;
2301       int varlen = 0;
2302 
2303       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2304       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2305 	r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2306 	if (r == 0) {
2307 	  if (tlen != tlen2)
2308 	    varlen = 1;
2309 	}
2310       }
2311       if (r == 0) {
2312 	if (varlen != 0) {
2313 	  if (level == 1)
2314 	    r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2315 	  else
2316 	    r = GET_CHAR_LEN_VARLEN;
2317 	}
2318 	else
2319 	  *len = tlen;
2320       }
2321     }
2322     break;
2323 
2324   case NT_STR:
2325     {
2326       StrNode* sn = NSTR(node);
2327       UChar *s = sn->s;
2328       while (s < sn->end) {
2329 	s += enclen(reg->enc, s);
2330 	(*len)++;
2331       }
2332     }
2333     break;
2334 
2335   case NT_QTFR:
2336     {
2337       QtfrNode* qn = NQTFR(node);
2338       if (qn->lower == qn->upper) {
2339 	r = get_char_length_tree1(qn->target, reg, &tlen, level);
2340 	if (r == 0)
2341 	  *len = distance_multiply(tlen, qn->lower);
2342       }
2343       else
2344 	r = GET_CHAR_LEN_VARLEN;
2345     }
2346     break;
2347 
2348 #ifdef USE_SUBEXP_CALL
2349   case NT_CALL:
2350     if (! IS_CALL_RECURSION(NCALL(node)))
2351       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2352     else
2353       r = GET_CHAR_LEN_VARLEN;
2354     break;
2355 #endif
2356 
2357   case NT_CTYPE:
2358     *len = 1;
2359     break;
2360 
2361   case NT_CCLASS:
2362   case NT_CANY:
2363     *len = 1;
2364     break;
2365 
2366   case NT_ENCLOSE:
2367     {
2368       EncloseNode* en = NENCLOSE(node);
2369       switch (en->type) {
2370       case ENCLOSE_MEMORY:
2371 #ifdef USE_SUBEXP_CALL
2372 	if (IS_ENCLOSE_CLEN_FIXED(en))
2373 	  *len = en->char_len;
2374 	else {
2375 	  r = get_char_length_tree1(en->target, reg, len, level);
2376 	  if (r == 0) {
2377 	    en->char_len = *len;
2378 	    SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2379 	  }
2380 	}
2381 	break;
2382 #endif
2383       case ENCLOSE_OPTION:
2384       case ENCLOSE_STOP_BACKTRACK:
2385 	r = get_char_length_tree1(en->target, reg, len, level);
2386 	break;
2387       default:
2388 	break;
2389       }
2390     }
2391     break;
2392 
2393   case NT_ANCHOR:
2394     break;
2395 
2396   default:
2397     r = GET_CHAR_LEN_VARLEN;
2398     break;
2399   }
2400 
2401   return r;
2402 }
2403 
2404 static int
get_char_length_tree(Node * node,regex_t * reg,int * len)2405 get_char_length_tree(Node* node, regex_t* reg, int* len)
2406 {
2407   return get_char_length_tree1(node, reg, len, 0);
2408 }
2409 
2410 /* x is not included y ==>  1 : 0 */
2411 static int
is_not_included(Node * x,Node * y,regex_t * reg)2412 is_not_included(Node* x, Node* y, regex_t* reg)
2413 {
2414   int i, len;
2415   OnigCodePoint code;
2416   UChar *p;
2417   int ytype;
2418 
2419  retry:
2420   ytype = NTYPE(y);
2421   switch (NTYPE(x)) {
2422   case NT_CTYPE:
2423     {
2424       switch (ytype) {
2425       case NT_CTYPE:
2426 	if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2427 	    NCTYPE(y)->not   != NCTYPE(x)->not)
2428 	  return 1;
2429 	else
2430 	  return 0;
2431 	break;
2432 
2433       case NT_CCLASS:
2434       swap:
2435 	{
2436 	  Node* tmp;
2437 	  tmp = x; x = y; y = tmp;
2438 	  goto retry;
2439 	}
2440 	break;
2441 
2442       case NT_STR:
2443 	goto swap;
2444 	break;
2445 
2446       default:
2447 	break;
2448       }
2449     }
2450     break;
2451 
2452   case NT_CCLASS:
2453     {
2454       CClassNode* xc = NCCLASS(x);
2455       switch (ytype) {
2456       case NT_CTYPE:
2457 	switch (NCTYPE(y)->ctype) {
2458 	case ONIGENC_CTYPE_WORD:
2459 	  if (NCTYPE(y)->not == 0) {
2460 	    if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2461 	      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2462 		if (BITSET_AT(xc->bs, i)) {
2463 		  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2464 		}
2465 	      }
2466 	      return 1;
2467 	    }
2468 	    return 0;
2469 	  }
2470 	  else {
2471 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2472 	      if (! IS_CODE_SB_WORD(reg->enc, i)) {
2473 		if (!IS_NCCLASS_NOT(xc)) {
2474 		  if (BITSET_AT(xc->bs, i))
2475 		    return 0;
2476 		}
2477 		else {
2478 		  if (! BITSET_AT(xc->bs, i))
2479 		    return 0;
2480 		}
2481 	      }
2482 	    }
2483 	    return 1;
2484 	  }
2485 	  break;
2486 
2487 	default:
2488 	  break;
2489 	}
2490 	break;
2491 
2492       case NT_CCLASS:
2493 	{
2494 	  int v;
2495 	  CClassNode* yc = NCCLASS(y);
2496 
2497 	  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2498 	    v = BITSET_AT(xc->bs, i);
2499 	    if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2500                 (v == 0 && IS_NCCLASS_NOT(xc))) {
2501 	      v = BITSET_AT(yc->bs, i);
2502 	      if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2503                   (v == 0 && IS_NCCLASS_NOT(yc)))
2504 		return 0;
2505 	    }
2506 	  }
2507 	  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2508 	      (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2509 	    return 1;
2510 	  return 0;
2511 	}
2512 	break;
2513 
2514       case NT_STR:
2515 	goto swap;
2516 	break;
2517 
2518       default:
2519 	break;
2520       }
2521     }
2522     break;
2523 
2524   case NT_STR:
2525     {
2526       StrNode* xs = NSTR(x);
2527       if (NSTRING_LEN(x) == 0)
2528 	break;
2529 
2530       //c = *(xs->s);
2531       switch (ytype) {
2532       case NT_CTYPE:
2533         switch (NCTYPE(y)->ctype) {
2534         case ONIGENC_CTYPE_WORD:
2535           if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2536             return NCTYPE(y)->not;
2537           else
2538             return !(NCTYPE(y)->not);
2539           break;
2540         default:
2541           break;
2542         }
2543         break;
2544 
2545       case NT_CCLASS:
2546         {
2547           CClassNode* cc = NCCLASS(y);
2548 
2549           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2550                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2551           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2552         }
2553         break;
2554 
2555       case NT_STR:
2556         {
2557           UChar *q;
2558           StrNode* ys = NSTR(y);
2559           len = NSTRING_LEN(x);
2560           if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2561           if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2562             /* tiny version */
2563             return 0;
2564           }
2565           else {
2566             for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2567               if (*p != *q) return 1;
2568             }
2569           }
2570         }
2571         break;
2572 
2573       default:
2574         break;
2575       }
2576     }
2577     break;
2578 
2579   default:
2580     break;
2581   }
2582 
2583   return 0;
2584 }
2585 
2586 static Node*
get_head_value_node(Node * node,int exact,regex_t * reg)2587 get_head_value_node(Node* node, int exact, regex_t* reg)
2588 {
2589   Node* n = NULL_NODE;
2590 
2591   switch (NTYPE(node)) {
2592   case NT_BREF:
2593   case NT_ALT:
2594   case NT_CANY:
2595 #ifdef USE_SUBEXP_CALL
2596   case NT_CALL:
2597 #endif
2598     break;
2599 
2600   case NT_CTYPE:
2601   case NT_CCLASS:
2602     if (exact == 0) {
2603       n = node;
2604     }
2605     break;
2606 
2607   case NT_LIST:
2608     n = get_head_value_node(NCAR(node), exact, reg);
2609     break;
2610 
2611   case NT_STR:
2612     {
2613       StrNode* sn = NSTR(node);
2614 
2615       if (sn->end <= sn->s)
2616 	break;
2617 
2618       if (exact != 0 &&
2619 	  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2620       }
2621       else {
2622 	n = node;
2623       }
2624     }
2625     break;
2626 
2627   case NT_QTFR:
2628     {
2629       QtfrNode* qn = NQTFR(node);
2630       if (qn->lower > 0) {
2631 	if (IS_NOT_NULL(qn->head_exact))
2632 	  n = qn->head_exact;
2633 	else
2634 	  n = get_head_value_node(qn->target, exact, reg);
2635       }
2636     }
2637     break;
2638 
2639   case NT_ENCLOSE:
2640     {
2641       EncloseNode* en = NENCLOSE(node);
2642       switch (en->type) {
2643       case ENCLOSE_OPTION:
2644 	{
2645 	  OnigOptionType options = reg->options;
2646 
2647 	  reg->options = NENCLOSE(node)->option;
2648 	  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2649 	  reg->options = options;
2650 	}
2651 	break;
2652 
2653       case ENCLOSE_MEMORY:
2654       case ENCLOSE_STOP_BACKTRACK:
2655 	n = get_head_value_node(en->target, exact, reg);
2656 	break;
2657       }
2658     }
2659     break;
2660 
2661   case NT_ANCHOR:
2662     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2663       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2664     break;
2665 
2666   default:
2667     break;
2668   }
2669 
2670   return n;
2671 }
2672 
2673 static int
check_type_tree(Node * node,int type_mask,int enclose_mask,int anchor_mask)2674 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2675 {
2676   int type, r = 0;
2677 
2678   type = NTYPE(node);
2679   if ((NTYPE2BIT(type) & type_mask) == 0)
2680     return 1;
2681 
2682   switch (type) {
2683   case NT_LIST:
2684   case NT_ALT:
2685     do {
2686       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2687 			  anchor_mask);
2688     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2689     break;
2690 
2691   case NT_QTFR:
2692     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2693 			anchor_mask);
2694     break;
2695 
2696   case NT_ENCLOSE:
2697     {
2698       EncloseNode* en = NENCLOSE(node);
2699       if ((en->type & enclose_mask) == 0)
2700 	return 1;
2701 
2702       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2703     }
2704     break;
2705 
2706   case NT_ANCHOR:
2707     type = NANCHOR(node)->type;
2708     if ((type & anchor_mask) == 0)
2709       return 1;
2710 
2711     if (NANCHOR(node)->target)
2712       r = check_type_tree(NANCHOR(node)->target,
2713 			  type_mask, enclose_mask, anchor_mask);
2714     break;
2715 
2716   default:
2717     break;
2718   }
2719   return r;
2720 }
2721 
2722 #ifdef USE_SUBEXP_CALL
2723 
2724 #define RECURSION_EXIST       1
2725 #define RECURSION_INFINITE    2
2726 
2727 static int
subexp_inf_recursive_check(Node * node,ScanEnv * env,int head)2728 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2729 {
2730   int type;
2731   int r = 0;
2732 
2733   type = NTYPE(node);
2734   switch (type) {
2735   case NT_LIST:
2736     {
2737       Node *x;
2738       OnigDistance min;
2739       int ret;
2740 
2741       x = node;
2742       do {
2743 	ret = subexp_inf_recursive_check(NCAR(x), env, head);
2744 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2745 	r |= ret;
2746 	if (head) {
2747 	  ret = get_min_match_length(NCAR(x), &min, env);
2748 	  if (ret != 0) return ret;
2749 	  if (min != 0) head = 0;
2750 	}
2751       } while (IS_NOT_NULL(x = NCDR(x)));
2752     }
2753     break;
2754 
2755   case NT_ALT:
2756     {
2757       int ret;
2758       r = RECURSION_EXIST;
2759       do {
2760 	ret = subexp_inf_recursive_check(NCAR(node), env, head);
2761 	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2762 	r &= ret;
2763       } while (IS_NOT_NULL(node = NCDR(node)));
2764     }
2765     break;
2766 
2767   case NT_QTFR:
2768     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2769     if (r == RECURSION_EXIST) {
2770       if (NQTFR(node)->lower == 0) r = 0;
2771     }
2772     break;
2773 
2774   case NT_ANCHOR:
2775     {
2776       AnchorNode* an = NANCHOR(node);
2777       switch (an->type) {
2778       case ANCHOR_PREC_READ:
2779       case ANCHOR_PREC_READ_NOT:
2780       case ANCHOR_LOOK_BEHIND:
2781       case ANCHOR_LOOK_BEHIND_NOT:
2782 	r = subexp_inf_recursive_check(an->target, env, head);
2783 	break;
2784       }
2785     }
2786     break;
2787 
2788   case NT_CALL:
2789     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2790     break;
2791 
2792   case NT_ENCLOSE:
2793     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2794       return 0;
2795     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2796       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2797     else {
2798       SET_ENCLOSE_STATUS(node, NST_MARK2);
2799       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2800       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2801     }
2802     break;
2803 
2804   default:
2805     break;
2806   }
2807 
2808   return r;
2809 }
2810 
2811 static int
subexp_inf_recursive_check_trav(Node * node,ScanEnv * env)2812 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2813 {
2814   int type;
2815   int r = 0;
2816 
2817   type = NTYPE(node);
2818   switch (type) {
2819   case NT_LIST:
2820   case NT_ALT:
2821     do {
2822       r = subexp_inf_recursive_check_trav(NCAR(node), env);
2823     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2824     break;
2825 
2826   case NT_QTFR:
2827     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2828     break;
2829 
2830   case NT_ANCHOR:
2831     {
2832       AnchorNode* an = NANCHOR(node);
2833       switch (an->type) {
2834       case ANCHOR_PREC_READ:
2835       case ANCHOR_PREC_READ_NOT:
2836       case ANCHOR_LOOK_BEHIND:
2837       case ANCHOR_LOOK_BEHIND_NOT:
2838 	r = subexp_inf_recursive_check_trav(an->target, env);
2839 	break;
2840       }
2841     }
2842     break;
2843 
2844   case NT_ENCLOSE:
2845     {
2846       EncloseNode* en = NENCLOSE(node);
2847 
2848       if (IS_ENCLOSE_RECURSION(en)) {
2849 	SET_ENCLOSE_STATUS(node, NST_MARK1);
2850 	r = subexp_inf_recursive_check(en->target, env, 1);
2851 	if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2852 	CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2853       }
2854       r = subexp_inf_recursive_check_trav(en->target, env);
2855     }
2856 
2857     break;
2858 
2859   default:
2860     break;
2861   }
2862 
2863   return r;
2864 }
2865 
2866 static int
subexp_recursive_check(Node * node)2867 subexp_recursive_check(Node* node)
2868 {
2869   int r = 0;
2870 
2871   switch (NTYPE(node)) {
2872   case NT_LIST:
2873   case NT_ALT:
2874     do {
2875       r |= subexp_recursive_check(NCAR(node));
2876     } while (IS_NOT_NULL(node = NCDR(node)));
2877     break;
2878 
2879   case NT_QTFR:
2880     r = subexp_recursive_check(NQTFR(node)->target);
2881     break;
2882 
2883   case NT_ANCHOR:
2884     {
2885       AnchorNode* an = NANCHOR(node);
2886       switch (an->type) {
2887       case ANCHOR_PREC_READ:
2888       case ANCHOR_PREC_READ_NOT:
2889       case ANCHOR_LOOK_BEHIND:
2890       case ANCHOR_LOOK_BEHIND_NOT:
2891 	r = subexp_recursive_check(an->target);
2892 	break;
2893       }
2894     }
2895     break;
2896 
2897   case NT_CALL:
2898     r = subexp_recursive_check(NCALL(node)->target);
2899     if (r != 0) SET_CALL_RECURSION(node);
2900     break;
2901 
2902   case NT_ENCLOSE:
2903     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2904       return 0;
2905     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2906       return 1; /* recursion */
2907     else {
2908       SET_ENCLOSE_STATUS(node, NST_MARK2);
2909       r = subexp_recursive_check(NENCLOSE(node)->target);
2910       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2911     }
2912     break;
2913 
2914   default:
2915     break;
2916   }
2917 
2918   return r;
2919 }
2920 
2921 
2922 static int
subexp_recursive_check_trav(Node * node,ScanEnv * env)2923 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2924 {
2925 #define FOUND_CALLED_NODE    1
2926 
2927   int type;
2928   int r = 0;
2929 
2930   type = NTYPE(node);
2931   switch (type) {
2932   case NT_LIST:
2933   case NT_ALT:
2934     {
2935       int ret;
2936       do {
2937 	ret = subexp_recursive_check_trav(NCAR(node), env);
2938 	if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2939 	else if (ret < 0) return ret;
2940       } while (IS_NOT_NULL(node = NCDR(node)));
2941     }
2942     break;
2943 
2944   case NT_QTFR:
2945     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2946     if (NQTFR(node)->upper == 0) {
2947       if (r == FOUND_CALLED_NODE)
2948 	NQTFR(node)->is_refered = 1;
2949     }
2950     break;
2951 
2952   case NT_ANCHOR:
2953     {
2954       AnchorNode* an = NANCHOR(node);
2955       switch (an->type) {
2956       case ANCHOR_PREC_READ:
2957       case ANCHOR_PREC_READ_NOT:
2958       case ANCHOR_LOOK_BEHIND:
2959       case ANCHOR_LOOK_BEHIND_NOT:
2960 	r = subexp_recursive_check_trav(an->target, env);
2961 	break;
2962       }
2963     }
2964     break;
2965 
2966   case NT_ENCLOSE:
2967     {
2968       EncloseNode* en = NENCLOSE(node);
2969 
2970       if (! IS_ENCLOSE_RECURSION(en)) {
2971 	if (IS_ENCLOSE_CALLED(en)) {
2972 	  SET_ENCLOSE_STATUS(node, NST_MARK1);
2973 	  r = subexp_recursive_check(en->target);
2974 	  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2975 	  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2976 	}
2977       }
2978       r = subexp_recursive_check_trav(en->target, env);
2979       if (IS_ENCLOSE_CALLED(en))
2980 	r |= FOUND_CALLED_NODE;
2981     }
2982     break;
2983 
2984   default:
2985     break;
2986   }
2987 
2988   return r;
2989 }
2990 
2991 static int
setup_subexp_call(Node * node,ScanEnv * env)2992 setup_subexp_call(Node* node, ScanEnv* env)
2993 {
2994   int type;
2995   int r = 0;
2996 
2997   type = NTYPE(node);
2998   switch (type) {
2999   case NT_LIST:
3000     do {
3001       r = setup_subexp_call(NCAR(node), env);
3002     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3003     break;
3004 
3005   case NT_ALT:
3006     do {
3007       r = setup_subexp_call(NCAR(node), env);
3008     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3009     break;
3010 
3011   case NT_QTFR:
3012     r = setup_subexp_call(NQTFR(node)->target, env);
3013     break;
3014   case NT_ENCLOSE:
3015     r = setup_subexp_call(NENCLOSE(node)->target, env);
3016     break;
3017 
3018   case NT_CALL:
3019     {
3020       CallNode* cn = NCALL(node);
3021       Node** nodes = SCANENV_MEM_NODES(env);
3022 
3023       if (cn->group_num != 0) {
3024 	int gnum = cn->group_num;
3025 
3026 #ifdef USE_NAMED_GROUP
3027 	if (env->num_named > 0 &&
3028 	    IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3029 	    !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3030 	  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3031 	}
3032 #endif
3033 	if (gnum > env->num_mem) {
3034 	  onig_scan_env_set_error_string(env,
3035 		 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3036 	  return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3037 	}
3038 
3039 #ifdef USE_NAMED_GROUP
3040       set_call_attr:
3041 #endif
3042 	cn->target = nodes[cn->group_num];
3043 	if (IS_NULL(cn->target)) {
3044 	  onig_scan_env_set_error_string(env,
3045 		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3046 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3047 	}
3048 	SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3049 	BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3050 	cn->unset_addr_list = env->unset_addr_list;
3051       }
3052 #ifdef USE_NAMED_GROUP
3053       else {
3054 	int *refs;
3055 
3056 	int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3057 					   &refs);
3058 	if (n <= 0) {
3059 	  onig_scan_env_set_error_string(env,
3060 		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3061 	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3062 	}
3063 	else if (n > 1) {
3064 	  onig_scan_env_set_error_string(env,
3065 	    ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3066 	  return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3067 	}
3068 	else {
3069 	  cn->group_num = refs[0];
3070 	  goto set_call_attr;
3071 	}
3072       }
3073 #endif
3074     }
3075     break;
3076 
3077   case NT_ANCHOR:
3078     {
3079       AnchorNode* an = NANCHOR(node);
3080 
3081       switch (an->type) {
3082       case ANCHOR_PREC_READ:
3083       case ANCHOR_PREC_READ_NOT:
3084       case ANCHOR_LOOK_BEHIND:
3085       case ANCHOR_LOOK_BEHIND_NOT:
3086 	r = setup_subexp_call(an->target, env);
3087 	break;
3088       }
3089     }
3090     break;
3091 
3092   default:
3093     break;
3094   }
3095 
3096   return r;
3097 }
3098 #endif
3099 
3100 /* divide different length alternatives in look-behind.
3101   (?<=A|B) ==> (?<=A)|(?<=B)
3102   (?<!A|B) ==> (?<!A)(?<!B)
3103 */
3104 static int
divide_look_behind_alternatives(Node * node)3105 divide_look_behind_alternatives(Node* node)
3106 {
3107   Node *head, *np, *insert_node;
3108   AnchorNode* an = NANCHOR(node);
3109   int anc_type = an->type;
3110 
3111   head = an->target;
3112   np = NCAR(head);
3113   swap_node(node, head);
3114   NCAR(node) = head;
3115   NANCHOR(head)->target = np;
3116 
3117   np = node;
3118   while ((np = NCDR(np)) != NULL_NODE) {
3119     insert_node = onig_node_new_anchor(anc_type);
3120     CHECK_NULL_RETURN_MEMERR(insert_node);
3121     NANCHOR(insert_node)->target = NCAR(np);
3122     NCAR(np) = insert_node;
3123   }
3124 
3125   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3126     np = node;
3127     do {
3128       SET_NTYPE(np, NT_LIST);  /* alt -> list */
3129     } while ((np = NCDR(np)) != NULL_NODE);
3130   }
3131   return 0;
3132 }
3133 
3134 static int
setup_look_behind(Node * node,regex_t * reg,ScanEnv * env)3135 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3136 {
3137   int r, len;
3138   AnchorNode* an = NANCHOR(node);
3139 
3140   r = get_char_length_tree(an->target, reg, &len);
3141   if (r == 0)
3142     an->char_len = len;
3143   else if (r == GET_CHAR_LEN_VARLEN)
3144     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3145   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3146     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3147       r = divide_look_behind_alternatives(node);
3148     else
3149       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3150   }
3151 
3152   return r;
3153 }
3154 
3155 static int
next_setup(Node * node,Node * next_node,regex_t * reg)3156 next_setup(Node* node, Node* next_node, regex_t* reg)
3157 {
3158   int type;
3159 
3160  retry:
3161   type = NTYPE(node);
3162   if (type == NT_QTFR) {
3163     QtfrNode* qn = NQTFR(node);
3164     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3165 #ifdef USE_QTFR_PEEK_NEXT
3166       Node* n = get_head_value_node(next_node, 1, reg);
3167       /* '\0': for UTF-16BE etc... */
3168       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3169 	qn->next_head_exact = n;
3170       }
3171 #endif
3172       /* automatic posseivation a*b ==> (?>a*)b */
3173       if (qn->lower <= 1) {
3174 	int ttype = NTYPE(qn->target);
3175 	if (IS_NODE_TYPE_SIMPLE(ttype)) {
3176 	  Node *x, *y;
3177 	  x = get_head_value_node(qn->target, 0, reg);
3178 	  if (IS_NOT_NULL(x)) {
3179 	    y = get_head_value_node(next_node,  0, reg);
3180 	    if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3181 	      Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3182 	      CHECK_NULL_RETURN_MEMERR(en);
3183 	      SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3184 	      swap_node(node, en);
3185 	      NENCLOSE(node)->target = en;
3186 	    }
3187 	  }
3188 	}
3189       }
3190     }
3191   }
3192   else if (type == NT_ENCLOSE) {
3193     EncloseNode* en = NENCLOSE(node);
3194     if (en->type == ENCLOSE_MEMORY) {
3195       node = en->target;
3196       goto retry;
3197     }
3198   }
3199   return 0;
3200 }
3201 
3202 
3203 static int
update_string_node_case_fold(regex_t * reg,Node * node)3204 update_string_node_case_fold(regex_t* reg, Node *node)
3205 {
3206   UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3207   UChar *sbuf, *ebuf, *sp;
3208   int r, i, len, sbuf_size;
3209   StrNode* sn = NSTR(node);
3210 
3211   end = sn->end;
3212   sbuf_size = (int)(end - sn->s) * 2;
3213   sbuf = (UChar* )xmalloc(sbuf_size);
3214   CHECK_NULL_RETURN_MEMERR(sbuf);
3215   ebuf = sbuf + sbuf_size;
3216 
3217   sp = sbuf;
3218   p = sn->s;
3219   while (p < end) {
3220     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3221     for (i = 0; i < len; i++) {
3222       if (sp >= ebuf) {
3223         sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3224         CHECK_NULL_RETURN_MEMERR(sbuf);
3225         sp = sbuf + sbuf_size;
3226         sbuf_size *= 2;
3227         ebuf = sbuf + sbuf_size;
3228       }
3229 
3230       *sp++ = buf[i];
3231     }
3232   }
3233 
3234   r = onig_node_str_set(node, sbuf, sp);
3235   if (r != 0) {
3236     xfree(sbuf);
3237     return r;
3238   }
3239 
3240   xfree(sbuf);
3241   return 0;
3242 }
3243 
3244 static int
expand_case_fold_make_rem_string(Node ** rnode,UChar * s,UChar * end,regex_t * reg)3245 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3246 				 regex_t* reg)
3247 {
3248   int r;
3249   Node *node;
3250 
3251   node = onig_node_new_str(s, end);
3252   if (IS_NULL(node)) return ONIGERR_MEMORY;
3253 
3254   r = update_string_node_case_fold(reg, node);
3255   if (r != 0) {
3256     onig_node_free(node);
3257     return r;
3258   }
3259 
3260   NSTRING_SET_AMBIG(node);
3261   NSTRING_SET_DONT_GET_OPT_INFO(node);
3262   *rnode = node;
3263   return 0;
3264 }
3265 
3266 static int
expand_case_fold_string_alt(int item_num,OnigCaseFoldCodeItem items[],UChar * p,int slen,UChar * end,regex_t * reg,Node ** rnode)3267 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3268 			    UChar *p, int slen, UChar *end,
3269 			    regex_t* reg, Node **rnode)
3270 {
3271   int r, i, j, len, varlen;
3272   Node *anode, *var_anode, *snode, *xnode, *an;
3273   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3274   xnode = NULL_NODE;
3275 
3276   *rnode = var_anode = NULL_NODE;
3277 
3278   varlen = 0;
3279   for (i = 0; i < item_num; i++) {
3280     if (items[i].byte_len != slen) {
3281       varlen = 1;
3282       break;
3283     }
3284   }
3285 
3286   if (varlen != 0) {
3287     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3288     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3289 
3290     xnode = onig_node_new_list(NULL, NULL);
3291     if (IS_NULL(xnode)) goto mem_err;
3292     NCAR(var_anode) = xnode;
3293 
3294     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3295     if (IS_NULL(anode)) goto mem_err;
3296     NCAR(xnode) = anode;
3297   }
3298   else {
3299     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3300     if (IS_NULL(anode)) return ONIGERR_MEMORY;
3301   }
3302 
3303   snode = onig_node_new_str(p, p + slen);
3304   if (IS_NULL(snode)) goto mem_err;
3305 
3306   NCAR(anode) = snode;
3307 
3308   for (i = 0; i < item_num; i++) {
3309     snode = onig_node_new_str(NULL, NULL);
3310     if (IS_NULL(snode)) goto mem_err;
3311 
3312     for (j = 0; j < items[i].code_len; j++) {
3313       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3314       if (len < 0) {
3315 	r = len;
3316 	goto mem_err2;
3317       }
3318 
3319       r = onig_node_str_cat(snode, buf, buf + len);
3320       if (r != 0) goto mem_err2;
3321     }
3322 
3323     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3324     if (IS_NULL(an)) {
3325       goto mem_err2;
3326     }
3327 
3328     if (items[i].byte_len != slen) {
3329       Node *rem = NULL_NODE;
3330       UChar *q = p + items[i].byte_len;
3331 
3332       if (q < end) {
3333         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3334         if (r != 0) {
3335           onig_node_free(an);
3336           goto mem_err2;
3337         }
3338 
3339         xnode = onig_node_list_add(NULL_NODE, snode);
3340         if (IS_NULL(xnode)) {
3341           onig_node_free(an);
3342           onig_node_free(rem);
3343           goto mem_err2;
3344         }
3345         if (IS_NULL(onig_node_list_add(xnode, rem))) {
3346           onig_node_free(an);
3347           onig_node_free(xnode);
3348           onig_node_free(rem);
3349           goto mem_err;
3350         }
3351 
3352         NCAR(an) = xnode;
3353       }
3354       else {
3355         NCAR(an) = snode;
3356       }
3357 
3358       if (var_anode == NULL) {
3359         onig_node_free(an);
3360         onig_node_free(xnode);
3361         onig_node_free(rem);
3362         goto mem_err2;
3363       }
3364       NCDR(var_anode) = an;
3365       var_anode = an;
3366     }
3367     else {
3368       NCAR(an)     = snode;
3369       NCDR(anode) = an;
3370       anode = an;
3371     }
3372   }
3373 
3374   return varlen;
3375 
3376  mem_err2:
3377   onig_node_free(snode);
3378 
3379  mem_err:
3380   onig_node_free(*rnode);
3381 
3382   return ONIGERR_MEMORY;
3383 }
3384 
3385 static int
expand_case_fold_string(Node * node,regex_t * reg)3386 expand_case_fold_string(Node* node, regex_t* reg)
3387 {
3388 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3389 
3390   int r, n, len, alt_num;
3391   UChar *start, *end, *p;
3392   Node *top_root, *root, *snode, *prev_node;
3393   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3394   StrNode* sn = NSTR(node);
3395 
3396   if (NSTRING_IS_AMBIG(node)) return 0;
3397 
3398   start = sn->s;
3399   end   = sn->end;
3400   if (start >= end) return 0;
3401 
3402   r = 0;
3403   top_root = root = prev_node = snode = NULL_NODE;
3404   alt_num = 1;
3405   p = start;
3406   while (p < end) {
3407     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3408 					   p, end, items);
3409     if (n < 0) {
3410       r = n;
3411       goto err;
3412     }
3413 
3414     len = enclen(reg->enc, p);
3415 
3416     if (n == 0) {
3417       if (IS_NULL(snode)) {
3418 	if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3419 	  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3420 	  if (IS_NULL(root)) {
3421 	    onig_node_free(prev_node);
3422 	    goto mem_err;
3423 	  }
3424 	}
3425 
3426 	prev_node = snode = onig_node_new_str(NULL, NULL);
3427 	if (IS_NULL(snode)) goto mem_err;
3428 	if (IS_NOT_NULL(root)) {
3429 	  if (IS_NULL(onig_node_list_add(root, snode))) {
3430 	    onig_node_free(snode);
3431 	    goto mem_err;
3432 	  }
3433 	}
3434       }
3435 
3436       r = onig_node_str_cat(snode, p, p + len);
3437       if (r != 0) goto err;
3438     }
3439     else {
3440       alt_num *= (n + 1);
3441       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3442 
3443       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3444 	top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3445 	if (IS_NULL(root)) {
3446 	  onig_node_free(prev_node);
3447 	  goto mem_err;
3448 	}
3449       }
3450 
3451       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3452       if (r < 0) goto mem_err;
3453       if (r == 1) {
3454 	if (IS_NULL(root)) {
3455 	  top_root = prev_node;
3456 	}
3457 	else {
3458 	  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3459 	    onig_node_free(prev_node);
3460 	    goto mem_err;
3461 	  }
3462 	}
3463 
3464 	root = NCAR(prev_node);
3465       }
3466       else { /* r == 0 */
3467 	if (IS_NOT_NULL(root)) {
3468 	  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3469 	    onig_node_free(prev_node);
3470 	    goto mem_err;
3471 	  }
3472 	}
3473       }
3474 
3475       snode = NULL_NODE;
3476     }
3477 
3478     p += len;
3479   }
3480 
3481   if (p < end) {
3482     Node *srem;
3483 
3484     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3485     if (r != 0) goto mem_err;
3486 
3487     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3488       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3489       if (IS_NULL(root)) {
3490 	onig_node_free(srem);
3491 	onig_node_free(prev_node);
3492 	goto mem_err;
3493       }
3494     }
3495 
3496     if (IS_NULL(root)) {
3497       prev_node = srem;
3498     }
3499     else {
3500       if (IS_NULL(onig_node_list_add(root, srem))) {
3501 	onig_node_free(srem);
3502 	goto mem_err;
3503       }
3504     }
3505   }
3506 
3507   /* ending */
3508   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3509   swap_node(node, top_root);
3510   onig_node_free(top_root);
3511   return 0;
3512 
3513  mem_err:
3514   r = ONIGERR_MEMORY;
3515 
3516  err:
3517   onig_node_free(top_root);
3518   return r;
3519 }
3520 
3521 
3522 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3523 
3524 #define CEC_THRES_NUM_BIG_REPEAT         512
3525 #define CEC_INFINITE_NUM          0x7fffffff
3526 
3527 #define CEC_IN_INFINITE_REPEAT    (1<<0)
3528 #define CEC_IN_FINITE_REPEAT      (1<<1)
3529 #define CEC_CONT_BIG_REPEAT       (1<<2)
3530 
3531 static int
setup_comb_exp_check(Node * node,int state,ScanEnv * env)3532 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3533 {
3534   int type;
3535   int r = state;
3536 
3537   type = NTYPE(node);
3538   switch (type) {
3539   case NT_LIST:
3540     {
3541       Node* prev = NULL_NODE;
3542       do {
3543 	r = setup_comb_exp_check(NCAR(node), r, env);
3544 	prev = NCAR(node);
3545       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3546     }
3547     break;
3548 
3549   case NT_ALT:
3550     {
3551       int ret;
3552       do {
3553 	ret = setup_comb_exp_check(NCAR(node), state, env);
3554 	r |= ret;
3555       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3556     }
3557     break;
3558 
3559   case NT_QTFR:
3560     {
3561       int child_state = state;
3562       int add_state = 0;
3563       QtfrNode* qn = NQTFR(node);
3564       Node* target = qn->target;
3565       int var_num;
3566 
3567       if (! IS_REPEAT_INFINITE(qn->upper)) {
3568 	if (qn->upper > 1) {
3569 	  /* {0,1}, {1,1} are allowed */
3570 	  child_state |= CEC_IN_FINITE_REPEAT;
3571 
3572 	  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3573 	  if (env->backrefed_mem == 0) {
3574 	    if (NTYPE(qn->target) == NT_ENCLOSE) {
3575 	      EncloseNode* en = NENCLOSE(qn->target);
3576 	      if (en->type == ENCLOSE_MEMORY) {
3577 		if (NTYPE(en->target) == NT_QTFR) {
3578 		  QtfrNode* q = NQTFR(en->target);
3579 		  if (IS_REPEAT_INFINITE(q->upper)
3580 		      && q->greedy == qn->greedy) {
3581 		    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3582 		    if (qn->upper == 1)
3583 		      child_state = state;
3584 		  }
3585 		}
3586 	      }
3587 	    }
3588 	  }
3589 	}
3590       }
3591 
3592       if (state & CEC_IN_FINITE_REPEAT) {
3593 	qn->comb_exp_check_num = -1;
3594       }
3595       else {
3596 	if (IS_REPEAT_INFINITE(qn->upper)) {
3597 	  var_num = CEC_INFINITE_NUM;
3598 	  child_state |= CEC_IN_INFINITE_REPEAT;
3599 	}
3600 	else {
3601 	  var_num = qn->upper - qn->lower;
3602 	}
3603 
3604 	if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3605 	  add_state |= CEC_CONT_BIG_REPEAT;
3606 
3607 	if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3608 	    ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3609 	     var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3610 	  if (qn->comb_exp_check_num == 0) {
3611 	    env->num_comb_exp_check++;
3612 	    qn->comb_exp_check_num = env->num_comb_exp_check;
3613 	    if (env->curr_max_regnum > env->comb_exp_max_regnum)
3614 	      env->comb_exp_max_regnum = env->curr_max_regnum;
3615 	  }
3616 	}
3617       }
3618 
3619       r = setup_comb_exp_check(target, child_state, env);
3620       r |= add_state;
3621     }
3622     break;
3623 
3624   case NT_ENCLOSE:
3625     {
3626       EncloseNode* en = NENCLOSE(node);
3627 
3628       switch (en->type) {
3629       case ENCLOSE_MEMORY:
3630 	{
3631 	  if (env->curr_max_regnum < en->regnum)
3632 	    env->curr_max_regnum = en->regnum;
3633 
3634 	  r = setup_comb_exp_check(en->target, state, env);
3635 	}
3636 	break;
3637 
3638       default:
3639 	r = setup_comb_exp_check(en->target, state, env);
3640 	break;
3641       }
3642     }
3643     break;
3644 
3645 #ifdef USE_SUBEXP_CALL
3646   case NT_CALL:
3647     if (IS_CALL_RECURSION(NCALL(node)))
3648       env->has_recursion = 1;
3649     else
3650       r = setup_comb_exp_check(NCALL(node)->target, state, env);
3651     break;
3652 #endif
3653 
3654   default:
3655     break;
3656   }
3657 
3658   return r;
3659 }
3660 #endif
3661 
3662 #define IN_ALT        (1<<0)
3663 #define IN_NOT        (1<<1)
3664 #define IN_REPEAT     (1<<2)
3665 #define IN_VAR_REPEAT (1<<3)
3666 
3667 /* setup_tree does the following work.
3668  1. check empty loop. (set qn->target_empty_info)
3669  2. expand ignore-case in char class.
3670  3. set memory status bit flags. (reg->mem_stats)
3671  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3672  5. find invalid patterns in look-behind.
3673  6. expand repeated string.
3674  */
3675 static int
setup_tree(Node * node,regex_t * reg,int state,ScanEnv * env)3676 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3677 {
3678   int type;
3679   int r = 0;
3680 
3681   type = NTYPE(node);
3682   switch (type) {
3683   case NT_LIST:
3684     {
3685       Node* prev = NULL_NODE;
3686       do {
3687 	r = setup_tree(NCAR(node), reg, state, env);
3688 	if (IS_NOT_NULL(prev) && r == 0) {
3689 	  r = next_setup(prev, NCAR(node), reg);
3690 	}
3691 	prev = NCAR(node);
3692       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3693     }
3694     break;
3695 
3696   case NT_ALT:
3697     do {
3698       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3699     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3700     break;
3701 
3702   case NT_CCLASS:
3703     break;
3704 
3705   case NT_STR:
3706     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3707       r = expand_case_fold_string(node, reg);
3708     }
3709     break;
3710 
3711   case NT_CTYPE:
3712   case NT_CANY:
3713     break;
3714 
3715 #ifdef USE_SUBEXP_CALL
3716   case NT_CALL:
3717     break;
3718 #endif
3719 
3720   case NT_BREF:
3721     {
3722       int i;
3723       int* p;
3724       Node** nodes = SCANENV_MEM_NODES(env);
3725       BRefNode* br = NBREF(node);
3726       p = BACKREFS_P(br);
3727       for (i = 0; i < br->back_num; i++) {
3728 	if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3729 	BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3730 	BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3731 #ifdef USE_BACKREF_WITH_LEVEL
3732 	if (IS_BACKREF_NEST_LEVEL(br)) {
3733 	  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3734 	}
3735 #endif
3736 	SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3737       }
3738     }
3739     break;
3740 
3741   case NT_QTFR:
3742     {
3743       OnigDistance d;
3744       QtfrNode* qn = NQTFR(node);
3745       Node* target = qn->target;
3746 
3747       if ((state & IN_REPEAT) != 0) {
3748         qn->state |= NST_IN_REPEAT;
3749       }
3750 
3751       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3752 	r = get_min_match_length(target, &d, env);
3753 	if (r) break;
3754 	if (d == 0) {
3755 	  qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3756 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3757 	  r = quantifiers_memory_node_info(target);
3758 	  if (r < 0) break;
3759 	  if (r > 0) {
3760 	    qn->target_empty_info = r;
3761 	  }
3762 #endif
3763 #if 0
3764 	  r = get_max_match_length(target, &d, env);
3765 	  if (r == 0 && d == 0) {
3766 	    /*  ()* ==> ()?, ()+ ==> ()  */
3767 	    qn->upper = 1;
3768 	    if (qn->lower > 1) qn->lower = 1;
3769 	    if (NTYPE(target) == NT_STR) {
3770 	      qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3771 	    }
3772 	  }
3773 #endif
3774 	}
3775       }
3776 
3777       state |= IN_REPEAT;
3778       if (qn->lower != qn->upper)
3779 	state |= IN_VAR_REPEAT;
3780       r = setup_tree(target, reg, state, env);
3781       if (r) break;
3782 
3783       /* expand string */
3784 #define EXPAND_STRING_MAX_LENGTH  100
3785       if (NTYPE(target) == NT_STR) {
3786 	if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3787 	    qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3788 	  int len = NSTRING_LEN(target);
3789 	  StrNode* sn = NSTR(target);
3790 
3791 	  if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3792 	    int i, n = qn->lower;
3793 	    onig_node_conv_to_str_node(node, NSTR(target)->flag);
3794 	    for (i = 0; i < n; i++) {
3795 	      r = onig_node_str_cat(node, sn->s, sn->end);
3796 	      if (r) break;
3797 	    }
3798 	    onig_node_free(target);
3799 	    break; /* break case NT_QTFR: */
3800 	  }
3801 	}
3802       }
3803 
3804 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3805       if (qn->greedy && (qn->target_empty_info != 0)) {
3806 	if (NTYPE(target) == NT_QTFR) {
3807 	  QtfrNode* tqn = NQTFR(target);
3808 	  if (IS_NOT_NULL(tqn->head_exact)) {
3809 	    qn->head_exact  = tqn->head_exact;
3810 	    tqn->head_exact = NULL;
3811 	  }
3812 	}
3813 	else {
3814 	  qn->head_exact = get_head_value_node(qn->target, 1, reg);
3815 	}
3816       }
3817 #endif
3818     }
3819     break;
3820 
3821   case NT_ENCLOSE:
3822     {
3823       EncloseNode* en = NENCLOSE(node);
3824 
3825       switch (en->type) {
3826       case ENCLOSE_OPTION:
3827 	{
3828 	  OnigOptionType options = reg->options;
3829 	  reg->options = NENCLOSE(node)->option;
3830 	  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3831 	  reg->options = options;
3832 	}
3833 	break;
3834 
3835       case ENCLOSE_MEMORY:
3836 	if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3837 	  BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
3838 	  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3839 	}
3840         r = setup_tree(en->target, reg, state, env);
3841         break;
3842 
3843       case ENCLOSE_STOP_BACKTRACK:
3844 	{
3845 	  Node* target = en->target;
3846 	  r = setup_tree(target, reg, state, env);
3847 	  if (NTYPE(target) == NT_QTFR) {
3848 	    QtfrNode* tqn = NQTFR(target);
3849 	    if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3850 		tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
3851 	      int qtype = NTYPE(tqn->target);
3852 	      if (IS_NODE_TYPE_SIMPLE(qtype))
3853 		SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
3854 	    }
3855 	  }
3856 	}
3857 	break;
3858       }
3859     }
3860     break;
3861 
3862   case NT_ANCHOR:
3863     {
3864       AnchorNode* an = NANCHOR(node);
3865 
3866       switch (an->type) {
3867       case ANCHOR_PREC_READ:
3868 	r = setup_tree(an->target, reg, state, env);
3869 	break;
3870       case ANCHOR_PREC_READ_NOT:
3871 	r = setup_tree(an->target, reg, (state | IN_NOT), env);
3872 	break;
3873 
3874 /* allowed node types in look-behind */
3875 #define ALLOWED_TYPE_IN_LB  \
3876   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3877     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3878 
3879 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
3880 #define ALLOWED_ENCLOSE_IN_LB_NOT   0
3881 
3882 #define ALLOWED_ANCHOR_IN_LB \
3883 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3884 #define ALLOWED_ANCHOR_IN_LB_NOT \
3885 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3886 
3887       case ANCHOR_LOOK_BEHIND:
3888 	{
3889 	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3890 			      ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
3891 	  if (r < 0) return r;
3892 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3893 	  r = setup_look_behind(node, reg, env);
3894 	  if (r != 0) return r;
3895 	  r = setup_tree(an->target, reg, state, env);
3896 	}
3897 	break;
3898 
3899       case ANCHOR_LOOK_BEHIND_NOT:
3900 	{
3901 	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3902 		      ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
3903 	  if (r < 0) return r;
3904 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3905 	  r = setup_look_behind(node, reg, env);
3906 	  if (r != 0) return r;
3907 	  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3908 	}
3909 	break;
3910       }
3911     }
3912     break;
3913 
3914   default:
3915     break;
3916   }
3917 
3918   return r;
3919 }
3920 
3921 /* set skip map for Boyer-Moor search */
3922 static int
set_bm_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,UChar skip[],int ** int_skip)3923 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3924 	    UChar skip[], int** int_skip)
3925 {
3926   int i, len;
3927 
3928   len = (int)(end - s);
3929   if (len < ONIG_CHAR_TABLE_SIZE) {
3930     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len;
3931 
3932     for (i = 0; i < len - 1; i++)
3933       skip[s[i]] = (UChar)(len - 1 - i);
3934   }
3935   else {
3936     if (IS_NULL(*int_skip)) {
3937       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3938       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3939     }
3940     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
3941 
3942     for (i = 0; i < len - 1; i++)
3943       (*int_skip)[s[i]] = len - 1 - i;
3944   }
3945   return 0;
3946 }
3947 
3948 #define OPT_EXACT_MAXLEN   24
3949 
3950 typedef struct {
3951   OnigDistance min;  /* min byte length */
3952   OnigDistance max;  /* max byte length */
3953 } MinMaxLen;
3954 
3955 typedef struct {
3956   MinMaxLen        mmd;
3957   OnigEncoding     enc;
3958   OnigOptionType   options;
3959   OnigCaseFoldType case_fold_flag;
3960   ScanEnv*         scan_env;
3961 } OptEnv;
3962 
3963 typedef struct {
3964   int left_anchor;
3965   int right_anchor;
3966 } OptAncInfo;
3967 
3968 typedef struct {
3969   MinMaxLen  mmd; /* info position */
3970   OptAncInfo anc;
3971 
3972   int   reach_end;
3973   int   ignore_case;
3974   int   len;
3975   UChar s[OPT_EXACT_MAXLEN];
3976 } OptExactInfo;
3977 
3978 typedef struct {
3979   MinMaxLen mmd; /* info position */
3980   OptAncInfo anc;
3981 
3982   int   value;      /* weighted value */
3983   UChar map[ONIG_CHAR_TABLE_SIZE];
3984 } OptMapInfo;
3985 
3986 typedef struct {
3987   MinMaxLen    len;
3988 
3989   OptAncInfo   anc;
3990   OptExactInfo exb;    /* boundary */
3991   OptExactInfo exm;    /* middle */
3992   OptExactInfo expr;   /* prec read (?=...) */
3993 
3994   OptMapInfo   map;   /* boundary */
3995 } NodeOptInfo;
3996 
3997 
3998 static int
map_position_value(OnigEncoding enc,int i)3999 map_position_value(OnigEncoding enc, int i)
4000 {
4001   static const short int ByteValTable[] = {
4002      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4003      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4004     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4005      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4006      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4007      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4008      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4009      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4010   };
4011 
4012   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4013     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4014       return 20;
4015     else
4016       return (int )ByteValTable[i];
4017   }
4018   else
4019     return 4;   /* Take it easy. */
4020 }
4021 
4022 static int
distance_value(MinMaxLen * mm)4023 distance_value(MinMaxLen* mm)
4024 {
4025   /* 1000 / (min-max-dist + 1) */
4026   static const short int dist_vals[] = {
4027     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4028       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4029       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4030       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4031       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4032       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4033       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4034       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4035       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4036       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4037   };
4038 
4039   int d;
4040 
4041   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4042 
4043   d = mm->max - mm->min;
4044   if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4045     /* return dist_vals[d] * 16 / (mm->min + 12); */
4046     return (int )dist_vals[d];
4047   else
4048     return 1;
4049 }
4050 
4051 static int
comp_distance_value(MinMaxLen * d1,MinMaxLen * d2,int v1,int v2)4052 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4053 {
4054   if (v2 <= 0) return -1;
4055   if (v1 <= 0) return  1;
4056 
4057   v1 *= distance_value(d1);
4058   v2 *= distance_value(d2);
4059 
4060   if (v2 > v1) return  1;
4061   if (v2 < v1) return -1;
4062 
4063   if (d2->min < d1->min) return  1;
4064   if (d2->min > d1->min) return -1;
4065   return 0;
4066 }
4067 
4068 static int
is_equal_mml(MinMaxLen * a,MinMaxLen * b)4069 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4070 {
4071   return (a->min == b->min && a->max == b->max) ? 1 : 0;
4072 }
4073 
4074 
4075 static void
set_mml(MinMaxLen * mml,OnigDistance min,OnigDistance max)4076 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4077 {
4078   mml->min = min;
4079   mml->max = max;
4080 }
4081 
4082 static void
clear_mml(MinMaxLen * mml)4083 clear_mml(MinMaxLen* mml)
4084 {
4085   mml->min = mml->max = 0;
4086 }
4087 
4088 static void
copy_mml(MinMaxLen * to,MinMaxLen * from)4089 copy_mml(MinMaxLen* to, MinMaxLen* from)
4090 {
4091   to->min = from->min;
4092   to->max = from->max;
4093 }
4094 
4095 static void
add_mml(MinMaxLen * to,MinMaxLen * from)4096 add_mml(MinMaxLen* to, MinMaxLen* from)
4097 {
4098   to->min = distance_add(to->min, from->min);
4099   to->max = distance_add(to->max, from->max);
4100 }
4101 
4102 #if 0
4103 static void
4104 add_len_mml(MinMaxLen* to, OnigDistance len)
4105 {
4106   to->min = distance_add(to->min, len);
4107   to->max = distance_add(to->max, len);
4108 }
4109 #endif
4110 
4111 static void
alt_merge_mml(MinMaxLen * to,MinMaxLen * from)4112 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4113 {
4114   if (to->min > from->min) to->min = from->min;
4115   if (to->max < from->max) to->max = from->max;
4116 }
4117 
4118 static void
copy_opt_env(OptEnv * to,OptEnv * from)4119 copy_opt_env(OptEnv* to, OptEnv* from)
4120 {
4121   CopyMem (to, from, sizeof (OptEnv));
4122 }
4123 
4124 static void
clear_opt_anc_info(OptAncInfo * anc)4125 clear_opt_anc_info(OptAncInfo* anc)
4126 {
4127   anc->left_anchor  = 0;
4128   anc->right_anchor = 0;
4129 }
4130 
4131 static void
copy_opt_anc_info(OptAncInfo * to,OptAncInfo * from)4132 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4133 {
4134   CopyMem (to, from, sizeof (OptAncInfo));
4135 }
4136 
4137 static void
concat_opt_anc_info(OptAncInfo * to,OptAncInfo * left,OptAncInfo * right,OnigDistance left_len,OnigDistance right_len)4138 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4139 		    OnigDistance left_len, OnigDistance right_len)
4140 {
4141   clear_opt_anc_info(to);
4142 
4143   to->left_anchor = left->left_anchor;
4144   if (left_len == 0) {
4145     to->left_anchor |= right->left_anchor;
4146   }
4147 
4148   to->right_anchor = right->right_anchor;
4149   if (right_len == 0) {
4150     to->right_anchor |= left->right_anchor;
4151   }
4152 }
4153 
4154 static int
is_left_anchor(int anc)4155 is_left_anchor(int anc)
4156 {
4157   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4158       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4159       anc == ANCHOR_PREC_READ_NOT)
4160     return 0;
4161 
4162   return 1;
4163 }
4164 
4165 static int
is_set_opt_anc_info(OptAncInfo * to,int anc)4166 is_set_opt_anc_info(OptAncInfo* to, int anc)
4167 {
4168   if ((to->left_anchor & anc) != 0) return 1;
4169 
4170   return ((to->right_anchor & anc) != 0 ? 1 : 0);
4171 }
4172 
4173 static void
add_opt_anc_info(OptAncInfo * to,int anc)4174 add_opt_anc_info(OptAncInfo* to, int anc)
4175 {
4176   if (is_left_anchor(anc))
4177     to->left_anchor |= anc;
4178   else
4179     to->right_anchor |= anc;
4180 }
4181 
4182 static void
remove_opt_anc_info(OptAncInfo * to,int anc)4183 remove_opt_anc_info(OptAncInfo* to, int anc)
4184 {
4185   if (is_left_anchor(anc))
4186     to->left_anchor &= ~anc;
4187   else
4188     to->right_anchor &= ~anc;
4189 }
4190 
4191 static void
alt_merge_opt_anc_info(OptAncInfo * to,OptAncInfo * add)4192 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4193 {
4194   to->left_anchor  &= add->left_anchor;
4195   to->right_anchor &= add->right_anchor;
4196 }
4197 
4198 static int
is_full_opt_exact_info(OptExactInfo * ex)4199 is_full_opt_exact_info(OptExactInfo* ex)
4200 {
4201   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4202 }
4203 
4204 static void
clear_opt_exact_info(OptExactInfo * ex)4205 clear_opt_exact_info(OptExactInfo* ex)
4206 {
4207   clear_mml(&ex->mmd);
4208   clear_opt_anc_info(&ex->anc);
4209   ex->reach_end   = 0;
4210   ex->ignore_case = 0;
4211   ex->len         = 0;
4212   ex->s[0]        = '\0';
4213 }
4214 
4215 static void
copy_opt_exact_info(OptExactInfo * to,OptExactInfo * from)4216 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4217 {
4218   CopyMem (to, from, sizeof (OptExactInfo));
4219 }
4220 
4221 static void
concat_opt_exact_info(OptExactInfo * to,OptExactInfo * add,OnigEncoding enc)4222 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4223 {
4224   int i, j, len;
4225   UChar *p, *end;
4226   OptAncInfo tanc;
4227 
4228   if (! to->ignore_case && add->ignore_case) {
4229     if (to->len >= add->len) return ;  /* avoid */
4230 
4231     to->ignore_case = 1;
4232   }
4233 
4234   p = add->s;
4235   end = p + add->len;
4236   for (i = to->len; p < end; ) {
4237     len = enclen(enc, p);
4238     if (i + len > OPT_EXACT_MAXLEN) break;
4239     for (j = 0; j < len && p < end; j++)
4240       to->s[i++] = *p++;
4241   }
4242 
4243   to->len = i;
4244   to->reach_end = (p == end ? add->reach_end : 0);
4245 
4246   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4247   if (! to->reach_end) tanc.right_anchor = 0;
4248   copy_opt_anc_info(&to->anc, &tanc);
4249 }
4250 
4251 static void
concat_opt_exact_info_str(OptExactInfo * to,UChar * s,UChar * end,int raw ARG_UNUSED,OnigEncoding enc)4252 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4253 			  int raw ARG_UNUSED, OnigEncoding enc)
4254 {
4255   int i, j, len;
4256   UChar *p;
4257 
4258   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4259     len = enclen(enc, p);
4260     if (i + len > OPT_EXACT_MAXLEN) break;
4261     for (j = 0; j < len && p < end; j++)
4262       to->s[i++] = *p++;
4263   }
4264 
4265   to->len = i;
4266 }
4267 
4268 static void
alt_merge_opt_exact_info(OptExactInfo * to,OptExactInfo * add,OptEnv * env)4269 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4270 {
4271   int i, j, len;
4272 
4273   if (add->len == 0 || to->len == 0) {
4274     clear_opt_exact_info(to);
4275     return ;
4276   }
4277 
4278   if (! is_equal_mml(&to->mmd, &add->mmd)) {
4279     clear_opt_exact_info(to);
4280     return ;
4281   }
4282 
4283   for (i = 0; i < to->len && i < add->len; ) {
4284     if (to->s[i] != add->s[i]) break;
4285     len = enclen(env->enc, to->s + i);
4286 
4287     for (j = 1; j < len; j++) {
4288       if (to->s[i+j] != add->s[i+j]) break;
4289     }
4290     if (j < len) break;
4291     i += len;
4292   }
4293 
4294   if (! add->reach_end || i < add->len || i < to->len) {
4295     to->reach_end = 0;
4296   }
4297   to->len = i;
4298   to->ignore_case |= add->ignore_case;
4299 
4300   alt_merge_opt_anc_info(&to->anc, &add->anc);
4301   if (! to->reach_end) to->anc.right_anchor = 0;
4302 }
4303 
4304 static void
select_opt_exact_info(OnigEncoding enc,OptExactInfo * now,OptExactInfo * alt)4305 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4306 {
4307   int v1, v2;
4308 
4309   v1 = now->len;
4310   v2 = alt->len;
4311 
4312   if (v2 == 0) {
4313     return ;
4314   }
4315   else if (v1 == 0) {
4316     copy_opt_exact_info(now, alt);
4317     return ;
4318   }
4319   else if (v1 <= 2 && v2 <= 2) {
4320     /* ByteValTable[x] is big value --> low price */
4321     v2 = map_position_value(enc, now->s[0]);
4322     v1 = map_position_value(enc, alt->s[0]);
4323 
4324     if (now->len > 1) v1 += 5;
4325     if (alt->len > 1) v2 += 5;
4326   }
4327 
4328   if (now->ignore_case == 0) v1 *= 2;
4329   if (alt->ignore_case == 0) v2 *= 2;
4330 
4331   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4332     copy_opt_exact_info(now, alt);
4333 }
4334 
4335 static void
clear_opt_map_info(OptMapInfo * map)4336 clear_opt_map_info(OptMapInfo* map)
4337 {
4338   static const OptMapInfo clean_info = {
4339     {0, 0}, {0, 0}, 0,
4340     {
4341       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4342       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4343       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4344       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4345       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4346       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4347       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4348       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4349       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4350       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4351       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4352       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4353       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4354       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4355       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4356       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4357     }
4358   };
4359 
4360   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4361 }
4362 
4363 static void
copy_opt_map_info(OptMapInfo * to,OptMapInfo * from)4364 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4365 {
4366   CopyMem (to, from, sizeof (OptMapInfo));
4367 }
4368 
4369 static void
add_char_opt_map_info(OptMapInfo * map,UChar c,OnigEncoding enc)4370 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4371 {
4372   if (map->map[c] == 0) {
4373     map->map[c] = 1;
4374     map->value += map_position_value(enc, c);
4375   }
4376 }
4377 
4378 static int
add_char_amb_opt_map_info(OptMapInfo * map,UChar * p,UChar * end,OnigEncoding enc,OnigCaseFoldType case_fold_flag)4379 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4380                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4381 {
4382   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4383   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4384   int i, n;
4385 
4386   add_char_opt_map_info(map, p[0], enc);
4387 
4388   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4389   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4390   if (n < 0) return n;
4391 
4392   for (i = 0; i < n; i++) {
4393     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4394     add_char_opt_map_info(map, buf[0], enc);
4395   }
4396 
4397   return 0;
4398 }
4399 
4400 static void
select_opt_map_info(OptMapInfo * now,OptMapInfo * alt)4401 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4402 {
4403   static int z = 1<<15; /* 32768: something big value */
4404 
4405   int v1, v2;
4406 
4407   if (alt->value == 0) return ;
4408   if (now->value == 0) {
4409     copy_opt_map_info(now, alt);
4410     return ;
4411   }
4412 
4413   v1 = z / now->value;
4414   v2 = z / alt->value;
4415   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4416     copy_opt_map_info(now, alt);
4417 }
4418 
4419 static int
comp_opt_exact_or_map_info(OptExactInfo * e,OptMapInfo * m)4420 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4421 {
4422 #define COMP_EM_BASE  20
4423   int ve, vm;
4424 
4425   if (m->value <= 0) return -1;
4426 
4427   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4428   vm = COMP_EM_BASE * 5 * 2 / m->value;
4429   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4430 }
4431 
4432 static void
alt_merge_opt_map_info(OnigEncoding enc,OptMapInfo * to,OptMapInfo * add)4433 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4434 {
4435   int i, val;
4436 
4437   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4438   if (to->value == 0) return ;
4439   if (add->value == 0 || to->mmd.max < add->mmd.min) {
4440     clear_opt_map_info(to);
4441     return ;
4442   }
4443 
4444   alt_merge_mml(&to->mmd, &add->mmd);
4445 
4446   val = 0;
4447   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4448     if (add->map[i])
4449       to->map[i] = 1;
4450 
4451     if (to->map[i])
4452       val += map_position_value(enc, i);
4453   }
4454   to->value = val;
4455 
4456   alt_merge_opt_anc_info(&to->anc, &add->anc);
4457 }
4458 
4459 static void
set_bound_node_opt_info(NodeOptInfo * opt,MinMaxLen * mmd)4460 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4461 {
4462   copy_mml(&(opt->exb.mmd),  mmd);
4463   copy_mml(&(opt->expr.mmd), mmd);
4464   copy_mml(&(opt->map.mmd),  mmd);
4465 }
4466 
4467 static void
clear_node_opt_info(NodeOptInfo * opt)4468 clear_node_opt_info(NodeOptInfo* opt)
4469 {
4470   clear_mml(&opt->len);
4471   clear_opt_anc_info(&opt->anc);
4472   clear_opt_exact_info(&opt->exb);
4473   clear_opt_exact_info(&opt->exm);
4474   clear_opt_exact_info(&opt->expr);
4475   clear_opt_map_info(&opt->map);
4476 }
4477 
4478 static void
copy_node_opt_info(NodeOptInfo * to,NodeOptInfo * from)4479 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4480 {
4481   CopyMem (to, from, sizeof (NodeOptInfo));
4482 }
4483 
4484 static void
concat_left_node_opt_info(OnigEncoding enc,NodeOptInfo * to,NodeOptInfo * add)4485 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4486 {
4487   int exb_reach, exm_reach;
4488   OptAncInfo tanc;
4489 
4490   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4491   copy_opt_anc_info(&to->anc, &tanc);
4492 
4493   if (add->exb.len > 0 && to->len.max == 0) {
4494     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4495 			to->len.max, add->len.max);
4496     copy_opt_anc_info(&add->exb.anc, &tanc);
4497   }
4498 
4499   if (add->map.value > 0 && to->len.max == 0) {
4500     if (add->map.mmd.max == 0)
4501       add->map.anc.left_anchor |= to->anc.left_anchor;
4502   }
4503 
4504   exb_reach = to->exb.reach_end;
4505   exm_reach = to->exm.reach_end;
4506 
4507   if (add->len.max != 0)
4508     to->exb.reach_end = to->exm.reach_end = 0;
4509 
4510   if (add->exb.len > 0) {
4511     if (exb_reach) {
4512       concat_opt_exact_info(&to->exb, &add->exb, enc);
4513       clear_opt_exact_info(&add->exb);
4514     }
4515     else if (exm_reach) {
4516       concat_opt_exact_info(&to->exm, &add->exb, enc);
4517       clear_opt_exact_info(&add->exb);
4518     }
4519   }
4520   select_opt_exact_info(enc, &to->exm, &add->exb);
4521   select_opt_exact_info(enc, &to->exm, &add->exm);
4522 
4523   if (to->expr.len > 0) {
4524     if (add->len.max > 0) {
4525       if (to->expr.len > (int )add->len.max)
4526 	to->expr.len = add->len.max;
4527 
4528       if (to->expr.mmd.max == 0)
4529 	select_opt_exact_info(enc, &to->exb, &to->expr);
4530       else
4531 	select_opt_exact_info(enc, &to->exm, &to->expr);
4532     }
4533   }
4534   else if (add->expr.len > 0) {
4535     copy_opt_exact_info(&to->expr, &add->expr);
4536   }
4537 
4538   select_opt_map_info(&to->map, &add->map);
4539 
4540   add_mml(&to->len, &add->len);
4541 }
4542 
4543 static void
alt_merge_node_opt_info(NodeOptInfo * to,NodeOptInfo * add,OptEnv * env)4544 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4545 {
4546   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4547   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4548   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4549   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4550   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4551 
4552   alt_merge_mml(&to->len, &add->len);
4553 }
4554 
4555 
4556 #define MAX_NODE_OPT_INFO_REF_COUNT    5
4557 
4558 static int
optimize_node_left(Node * node,NodeOptInfo * opt,OptEnv * env)4559 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4560 {
4561   int type;
4562   int r = 0;
4563 
4564   clear_node_opt_info(opt);
4565   set_bound_node_opt_info(opt, &env->mmd);
4566 
4567   type = NTYPE(node);
4568   switch (type) {
4569   case NT_LIST:
4570     {
4571       OptEnv nenv;
4572       NodeOptInfo nopt;
4573       Node* nd = node;
4574 
4575       copy_opt_env(&nenv, env);
4576       do {
4577 	r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4578 	if (r == 0) {
4579 	  add_mml(&nenv.mmd, &nopt.len);
4580 	  concat_left_node_opt_info(env->enc, opt, &nopt);
4581 	}
4582       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4583     }
4584     break;
4585 
4586   case NT_ALT:
4587     {
4588       NodeOptInfo nopt;
4589       Node* nd = node;
4590 
4591       do {
4592 	r = optimize_node_left(NCAR(nd), &nopt, env);
4593 	if (r == 0) {
4594 	  if (nd == node) copy_node_opt_info(opt, &nopt);
4595 	  else            alt_merge_node_opt_info(opt, &nopt, env);
4596 	}
4597       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4598     }
4599     break;
4600 
4601   case NT_STR:
4602     {
4603       StrNode* sn = NSTR(node);
4604       int slen = (int)(sn->end - sn->s);
4605       int is_raw = NSTRING_IS_RAW(node);
4606 
4607       if (! NSTRING_IS_AMBIG(node)) {
4608 	concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4609 				  NSTRING_IS_RAW(node), env->enc);
4610 	if (slen > 0) {
4611 	  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4612 	}
4613         set_mml(&opt->len, slen, slen);
4614       }
4615       else {
4616         int max;
4617 
4618 	if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4619           int n = onigenc_strlen(env->enc, sn->s, sn->end);
4620           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4621 	}
4622 	else {
4623 	  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4624 				    is_raw, env->enc);
4625 	  opt->exb.ignore_case = 1;
4626 
4627 	  if (slen > 0) {
4628 	    r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4629 					  env->enc, env->case_fold_flag);
4630 	    if (r != 0) break;
4631 	  }
4632 
4633 	  max = slen;
4634 	}
4635 
4636         set_mml(&opt->len, slen, max);
4637       }
4638 
4639       if (opt->exb.len == slen)
4640 	opt->exb.reach_end = 1;
4641     }
4642     break;
4643 
4644   case NT_CCLASS:
4645     {
4646       int i, z;
4647       CClassNode* cc = NCCLASS(node);
4648 
4649       /* no need to check ignore case. (setted in setup_tree()) */
4650 
4651       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4652         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4653 	OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4654 
4655 	set_mml(&opt->len, min, max);
4656       }
4657       else {
4658         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4659           z = BITSET_AT(cc->bs, i);
4660           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4661             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4662           }
4663         }
4664 	set_mml(&opt->len, 1, 1);
4665       }
4666     }
4667     break;
4668 
4669   case NT_CTYPE:
4670     {
4671       int i, min, max;
4672 
4673       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4674 
4675       if (max == 1) {
4676         min = 1;
4677 
4678 	switch (NCTYPE(node)->ctype) {
4679 	case ONIGENC_CTYPE_WORD:
4680 	  if (NCTYPE(node)->not != 0) {
4681 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4682 	      if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4683 		add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4684 	      }
4685 	    }
4686 	  }
4687 	  else {
4688 	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4689 	      if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4690 		add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4691 	      }
4692 	    }
4693 	  }
4694 	  break;
4695 	}
4696       }
4697       else {
4698         min = ONIGENC_MBC_MINLEN(env->enc);
4699       }
4700       set_mml(&opt->len, min, max);
4701     }
4702     break;
4703 
4704   case NT_CANY:
4705     {
4706       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4707       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4708       set_mml(&opt->len, min, max);
4709     }
4710     break;
4711 
4712   case NT_ANCHOR:
4713     switch (NANCHOR(node)->type) {
4714     case ANCHOR_BEGIN_BUF:
4715     case ANCHOR_BEGIN_POSITION:
4716     case ANCHOR_BEGIN_LINE:
4717     case ANCHOR_END_BUF:
4718     case ANCHOR_SEMI_END_BUF:
4719     case ANCHOR_END_LINE:
4720       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4721       break;
4722 
4723     case ANCHOR_PREC_READ:
4724       {
4725 	NodeOptInfo nopt;
4726 
4727 	r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4728 	if (r == 0) {
4729 	  if (nopt.exb.len > 0)
4730 	    copy_opt_exact_info(&opt->expr, &nopt.exb);
4731 	  else if (nopt.exm.len > 0)
4732 	    copy_opt_exact_info(&opt->expr, &nopt.exm);
4733 
4734 	  opt->expr.reach_end = 0;
4735 
4736 	  if (nopt.map.value > 0)
4737 	    copy_opt_map_info(&opt->map, &nopt.map);
4738 	}
4739       }
4740       break;
4741 
4742     case ANCHOR_PREC_READ_NOT:
4743     case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4744     case ANCHOR_LOOK_BEHIND_NOT:
4745       break;
4746     }
4747     break;
4748 
4749   case NT_BREF:
4750     {
4751       int i;
4752       int* backs;
4753       OnigDistance min, max, tmin, tmax;
4754       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4755       BRefNode* br = NBREF(node);
4756 
4757       if (br->state & NST_RECURSION) {
4758 	set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4759 	break;
4760       }
4761       backs = BACKREFS_P(br);
4762       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4763       if (r != 0) break;
4764       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4765       if (r != 0) break;
4766       for (i = 1; i < br->back_num; i++) {
4767 	r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4768 	if (r != 0) break;
4769 	r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4770 	if (r != 0) break;
4771 	if (min > tmin) min = tmin;
4772 	if (max < tmax) max = tmax;
4773       }
4774       if (r == 0) set_mml(&opt->len, min, max);
4775     }
4776     break;
4777 
4778 #ifdef USE_SUBEXP_CALL
4779   case NT_CALL:
4780     if (IS_CALL_RECURSION(NCALL(node)))
4781       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4782     else {
4783       OnigOptionType save = env->options;
4784       env->options = NENCLOSE(NCALL(node)->target)->option;
4785       r = optimize_node_left(NCALL(node)->target, opt, env);
4786       env->options = save;
4787     }
4788     break;
4789 #endif
4790 
4791   case NT_QTFR:
4792     {
4793       int i;
4794       OnigDistance min, max;
4795       NodeOptInfo nopt;
4796       QtfrNode* qn = NQTFR(node);
4797 
4798       r = optimize_node_left(qn->target, &nopt, env);
4799       if (r) break;
4800 
4801       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4802 	if (env->mmd.max == 0 &&
4803 	    NTYPE(qn->target) == NT_CANY && qn->greedy) {
4804 	  if (IS_MULTILINE(env->options))
4805 	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
4806 	  else
4807 	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
4808 	}
4809       }
4810       else {
4811 	if (qn->lower > 0) {
4812 	  copy_node_opt_info(opt, &nopt);
4813 	  if (nopt.exb.len > 0) {
4814 	    if (nopt.exb.reach_end) {
4815 	      for (i = 2; i <= qn->lower &&
4816 		          ! is_full_opt_exact_info(&opt->exb); i++) {
4817 		concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4818 	      }
4819 	      if (i < qn->lower) {
4820 		opt->exb.reach_end = 0;
4821 	      }
4822 	    }
4823 	  }
4824 
4825 	  if (qn->lower != qn->upper) {
4826 	    opt->exb.reach_end = 0;
4827 	    opt->exm.reach_end = 0;
4828 	  }
4829 	  if (qn->lower > 1)
4830 	    opt->exm.reach_end = 0;
4831 	}
4832       }
4833 
4834       min = distance_multiply(nopt.len.min, qn->lower);
4835       if (IS_REPEAT_INFINITE(qn->upper))
4836 	max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4837       else
4838 	max = distance_multiply(nopt.len.max, qn->upper);
4839 
4840       set_mml(&opt->len, min, max);
4841     }
4842     break;
4843 
4844   case NT_ENCLOSE:
4845     {
4846       EncloseNode* en = NENCLOSE(node);
4847 
4848       switch (en->type) {
4849       case ENCLOSE_OPTION:
4850 	{
4851 	  OnigOptionType save = env->options;
4852 
4853 	  env->options = en->option;
4854 	  r = optimize_node_left(en->target, opt, env);
4855 	  env->options = save;
4856 	}
4857 	break;
4858 
4859       case ENCLOSE_MEMORY:
4860 #ifdef USE_SUBEXP_CALL
4861 	en->opt_count++;
4862 	if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
4863 	  OnigDistance min, max;
4864 
4865 	  min = 0;
4866 	  max = ONIG_INFINITE_DISTANCE;
4867 	  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4868 	  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4869 	  set_mml(&opt->len, min, max);
4870 	}
4871 	else
4872 #endif
4873 	{
4874 	  r = optimize_node_left(en->target, opt, env);
4875 
4876 	  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
4877 	    if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4878 	      remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
4879 	  }
4880 	}
4881 	break;
4882 
4883       case ENCLOSE_STOP_BACKTRACK:
4884 	r = optimize_node_left(en->target, opt, env);
4885 	break;
4886       }
4887     }
4888     break;
4889 
4890   default:
4891 #ifdef ONIG_DEBUG
4892     fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4893 	    NTYPE(node));
4894 #endif
4895     r = ONIGERR_TYPE_BUG;
4896     break;
4897   }
4898 
4899   return r;
4900 }
4901 
4902 static int
set_optimize_exact_info(regex_t * reg,OptExactInfo * e)4903 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
4904 {
4905   int r;
4906 
4907   if (e->len == 0) return 0;
4908 
4909   if (e->ignore_case) {
4910     reg->exact = (UChar* )xmalloc(e->len);
4911     CHECK_NULL_RETURN_MEMERR(reg->exact);
4912     xmemcpy(reg->exact, e->s, e->len);
4913     reg->exact_end = reg->exact + e->len;
4914     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
4915   }
4916   else {
4917     int allow_reverse;
4918 
4919     reg->exact = str_dup(e->s, e->s + e->len);
4920     CHECK_NULL_RETURN_MEMERR(reg->exact);
4921     reg->exact_end = reg->exact + e->len;
4922 
4923     allow_reverse =
4924 	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
4925 
4926     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4927       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4928 	              reg->map, &(reg->int_map));
4929       if (r) return r;
4930 
4931       reg->optimize = (allow_reverse != 0
4932 		       ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
4933     }
4934     else {
4935       reg->optimize = ONIG_OPTIMIZE_EXACT;
4936     }
4937   }
4938 
4939   reg->dmin = e->mmd.min;
4940   reg->dmax = e->mmd.max;
4941 
4942   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4943     reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact);
4944   }
4945 
4946   return 0;
4947 }
4948 
4949 static void
set_optimize_map_info(regex_t * reg,OptMapInfo * m)4950 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
4951 {
4952   int i;
4953 
4954   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4955     reg->map[i] = m->map[i];
4956 
4957   reg->optimize   = ONIG_OPTIMIZE_MAP;
4958   reg->dmin       = m->mmd.min;
4959   reg->dmax       = m->mmd.max;
4960 
4961   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4962     reg->threshold_len = reg->dmin + 1;
4963   }
4964 }
4965 
4966 static void
set_sub_anchor(regex_t * reg,OptAncInfo * anc)4967 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
4968 {
4969   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
4970   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4971 }
4972 
4973 #ifdef ONIG_DEBUG
4974 static void print_optimize_info(FILE* f, regex_t* reg);
4975 #endif
4976 
4977 static int
set_optimize_info_from_tree(Node * node,regex_t * reg,ScanEnv * scan_env)4978 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
4979 {
4980 
4981   int r;
4982   NodeOptInfo opt;
4983   OptEnv env;
4984 
4985   env.enc            = reg->enc;
4986   env.options        = reg->options;
4987   env.case_fold_flag = reg->case_fold_flag;
4988   env.scan_env   = scan_env;
4989   clear_mml(&env.mmd);
4990 
4991   r = optimize_node_left(node, &opt, &env);
4992   if (r) return r;
4993 
4994   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
4995         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
4996 
4997   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
4998 
4999   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5000     reg->anchor_dmin = opt.len.min;
5001     reg->anchor_dmax = opt.len.max;
5002   }
5003 
5004   if (opt.exb.len > 0 || opt.exm.len > 0) {
5005     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5006     if (opt.map.value > 0 &&
5007 	comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5008       goto set_map;
5009     }
5010     else {
5011       r = set_optimize_exact_info(reg, &opt.exb);
5012       set_sub_anchor(reg, &opt.exb.anc);
5013     }
5014   }
5015   else if (opt.map.value > 0) {
5016   set_map:
5017     set_optimize_map_info(reg, &opt.map);
5018     set_sub_anchor(reg, &opt.map.anc);
5019   }
5020   else {
5021     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5022     if (opt.len.max == 0)
5023       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5024   }
5025 
5026 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5027   print_optimize_info(stderr, reg);
5028 #endif
5029   return r;
5030 }
5031 
5032 static void
clear_optimize_info(regex_t * reg)5033 clear_optimize_info(regex_t* reg)
5034 {
5035   reg->optimize      = ONIG_OPTIMIZE_NONE;
5036   reg->anchor        = 0;
5037   reg->anchor_dmin   = 0;
5038   reg->anchor_dmax   = 0;
5039   reg->sub_anchor    = 0;
5040   reg->exact_end     = (UChar* )NULL;
5041   reg->threshold_len = 0;
5042   if (IS_NOT_NULL(reg->exact)) {
5043     xfree(reg->exact);
5044     reg->exact = (UChar* )NULL;
5045   }
5046 }
5047 
5048 #ifdef ONIG_DEBUG
5049 
print_enc_string(FILE * fp,OnigEncoding enc,const UChar * s,const UChar * end)5050 static void print_enc_string(FILE* fp, OnigEncoding enc,
5051 			     const UChar *s, const UChar *end)
5052 {
5053   fprintf(fp, "\nPATTERN: /");
5054 
5055   if (ONIGENC_MBC_MINLEN(enc) > 1) {
5056     const UChar *p;
5057     OnigCodePoint code;
5058 
5059     p = s;
5060     while (p < end) {
5061       code = ONIGENC_MBC_TO_CODE(enc, p, end);
5062       if (code >= 0x80) {
5063 	fprintf(fp, " 0x%04x ", (int )code);
5064       }
5065       else {
5066 	fputc((int )code, fp);
5067       }
5068 
5069       p += enclen(enc, p);
5070     }
5071   }
5072   else {
5073     while (s < end) {
5074       fputc((int )*s, fp);
5075       s++;
5076     }
5077   }
5078 
5079   fprintf(fp, "/\n");
5080 }
5081 
5082 static void
print_distance_range(FILE * f,OnigDistance a,OnigDistance b)5083 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5084 {
5085   if (a == ONIG_INFINITE_DISTANCE)
5086     fputs("inf", f);
5087   else
5088     fprintf(f, "(%u)", a);
5089 
5090   fputs("-", f);
5091 
5092   if (b == ONIG_INFINITE_DISTANCE)
5093     fputs("inf", f);
5094   else
5095     fprintf(f, "(%u)", b);
5096 }
5097 
5098 static void
print_anchor(FILE * f,int anchor)5099 print_anchor(FILE* f, int anchor)
5100 {
5101   int q = 0;
5102 
5103   fprintf(f, "[");
5104 
5105   if (anchor & ANCHOR_BEGIN_BUF) {
5106     fprintf(f, "begin-buf");
5107     q = 1;
5108   }
5109   if (anchor & ANCHOR_BEGIN_LINE) {
5110     if (q) fprintf(f, ", ");
5111     q = 1;
5112     fprintf(f, "begin-line");
5113   }
5114   if (anchor & ANCHOR_BEGIN_POSITION) {
5115     if (q) fprintf(f, ", ");
5116     q = 1;
5117     fprintf(f, "begin-pos");
5118   }
5119   if (anchor & ANCHOR_END_BUF) {
5120     if (q) fprintf(f, ", ");
5121     q = 1;
5122     fprintf(f, "end-buf");
5123   }
5124   if (anchor & ANCHOR_SEMI_END_BUF) {
5125     if (q) fprintf(f, ", ");
5126     q = 1;
5127     fprintf(f, "semi-end-buf");
5128   }
5129   if (anchor & ANCHOR_END_LINE) {
5130     if (q) fprintf(f, ", ");
5131     q = 1;
5132     fprintf(f, "end-line");
5133   }
5134   if (anchor & ANCHOR_ANYCHAR_STAR) {
5135     if (q) fprintf(f, ", ");
5136     q = 1;
5137     fprintf(f, "anychar-star");
5138   }
5139   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5140     if (q) fprintf(f, ", ");
5141     fprintf(f, "anychar-star-pl");
5142   }
5143 
5144   fprintf(f, "]");
5145 }
5146 
5147 static void
print_optimize_info(FILE * f,regex_t * reg)5148 print_optimize_info(FILE* f, regex_t* reg)
5149 {
5150   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5151                               "EXACT_IC", "MAP" };
5152 
5153   fprintf(f, "optimize: %s\n", on[reg->optimize]);
5154   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5155   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5156     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5157   fprintf(f, "\n");
5158 
5159   if (reg->optimize) {
5160     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5161     fprintf(f, "\n");
5162   }
5163   fprintf(f, "\n");
5164 
5165   if (reg->exact) {
5166     UChar *p;
5167     fprintf(f, "exact: [");
5168     for (p = reg->exact; p < reg->exact_end; p++) {
5169       fputc(*p, f);
5170     }
5171     fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
5172   }
5173   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5174     int c, i, n = 0;
5175 
5176     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5177       if (reg->map[i]) n++;
5178 
5179     fprintf(f, "map: n=%d\n", n);
5180     if (n > 0) {
5181       c = 0;
5182       fputc('[', f);
5183       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5184 	if (reg->map[i] != 0) {
5185           if (c > 0)  fputs(", ", f);
5186           c++;
5187           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5188               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5189             fputc(i, f);
5190           else
5191             fprintf(f, "%d", i);
5192         }
5193       }
5194       fprintf(f, "]\n");
5195     }
5196   }
5197 }
5198 #endif /* ONIG_DEBUG */
5199 
5200 
5201 extern void
onig_free_body(regex_t * reg)5202 onig_free_body(regex_t* reg)
5203 {
5204   if (IS_NOT_NULL(reg)) {
5205     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5206     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5207     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
5208     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5209     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5210     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5211 
5212 #ifdef USE_NAMED_GROUP
5213     onig_names_free(reg);
5214 #endif
5215   }
5216 }
5217 
5218 extern void
onig_free(regex_t * reg)5219 onig_free(regex_t* reg)
5220 {
5221   if (IS_NOT_NULL(reg)) {
5222     onig_free_body(reg);
5223     xfree(reg);
5224   }
5225 }
5226 
5227 #define REGEX_TRANSFER(to,from) do {\
5228   (to)->state = ONIG_STATE_MODIFY;\
5229   onig_free_body(to);\
5230   xmemcpy(to, from, sizeof(regex_t));\
5231   xfree(from);\
5232 } while (0)
5233 
5234 extern void
onig_transfer(regex_t * to,regex_t * from)5235 onig_transfer(regex_t* to, regex_t* from)
5236 {
5237   THREAD_ATOMIC_START;
5238   REGEX_TRANSFER(to, from);
5239   THREAD_ATOMIC_END;
5240 }
5241 
5242 #define REGEX_CHAIN_HEAD(reg) do {\
5243   while (IS_NOT_NULL((reg)->chain)) {\
5244     (reg) = (reg)->chain;\
5245   }\
5246 } while (0)
5247 
5248 extern void
onig_chain_link_add(regex_t * to,regex_t * add)5249 onig_chain_link_add(regex_t* to, regex_t* add)
5250 {
5251   THREAD_ATOMIC_START;
5252   REGEX_CHAIN_HEAD(to);
5253   to->chain = add;
5254   THREAD_ATOMIC_END;
5255 }
5256 
5257 extern void
onig_chain_reduce(regex_t * reg)5258 onig_chain_reduce(regex_t* reg)
5259 {
5260   regex_t *head, *prev;
5261 
5262   prev = reg;
5263   head = prev->chain;
5264   if (IS_NOT_NULL(head)) {
5265     reg->state = ONIG_STATE_MODIFY;
5266     while (IS_NOT_NULL(head->chain)) {
5267       prev = head;
5268       head = head->chain;
5269     }
5270     prev->chain = (regex_t* )NULL;
5271     REGEX_TRANSFER(reg, head);
5272   }
5273 }
5274 
5275 #ifdef ONIG_DEBUG
5276 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5277 #endif
5278 #ifdef ONIG_DEBUG_PARSE_TREE
5279 static void print_tree P_((FILE* f, Node* node));
5280 #endif
5281 
5282 extern int
onig_compile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigErrorInfo * einfo)5283 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5284 	      OnigErrorInfo* einfo)
5285 {
5286 #define COMPILE_INIT_SIZE  20
5287 
5288   int r, init_size;
5289   Node*  root;
5290   ScanEnv  scan_env;
5291 #ifdef USE_SUBEXP_CALL
5292   UnsetAddrList  uslist;
5293 #endif
5294 
5295   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5296 
5297   reg->state = ONIG_STATE_COMPILING;
5298 
5299 #ifdef ONIG_DEBUG
5300   print_enc_string(stderr, reg->enc, pattern, pattern_end);
5301 #endif
5302 
5303   if (reg->alloc == 0) {
5304     init_size = ((int)(pattern_end - pattern)) * 2;
5305     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5306     r = BBUF_INIT(reg, init_size);
5307     if (r != 0) goto end;
5308   }
5309   else
5310     reg->used = 0;
5311 
5312   reg->num_mem            = 0;
5313   reg->num_repeat         = 0;
5314   reg->num_null_check     = 0;
5315   reg->repeat_range_alloc = 0;
5316   reg->repeat_range       = (OnigRepeatRange* )NULL;
5317 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5318   reg->num_comb_exp_check = 0;
5319 #endif
5320 
5321   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5322   if (r != 0 || root == NULL) goto err;
5323 
5324 #ifdef USE_NAMED_GROUP
5325   /* mixed use named group and no-named group */
5326   if (scan_env.num_named > 0 &&
5327       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5328       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5329     if (scan_env.num_named != scan_env.num_mem)
5330       r = disable_noname_group_capture(&root, reg, &scan_env);
5331     else
5332       r = numbered_ref_check(root);
5333 
5334     if (r != 0) goto err;
5335   }
5336 #endif
5337 
5338 #ifdef USE_SUBEXP_CALL
5339   if (scan_env.num_call > 0) {
5340     r = unset_addr_list_init(&uslist, scan_env.num_call);
5341     if (r != 0) goto err;
5342     scan_env.unset_addr_list = &uslist;
5343     r = setup_subexp_call(root, &scan_env);
5344     if (r != 0) goto err_unset;
5345     r = subexp_recursive_check_trav(root, &scan_env);
5346     if (r  < 0) goto err_unset;
5347     r = subexp_inf_recursive_check_trav(root, &scan_env);
5348     if (r != 0) goto err_unset;
5349 
5350     reg->num_call = scan_env.num_call;
5351   }
5352   else
5353     reg->num_call = 0;
5354 #endif
5355 
5356   r = setup_tree(root, reg, 0, &scan_env);
5357   if (r != 0) goto err_unset;
5358 
5359 #ifdef ONIG_DEBUG_PARSE_TREE
5360   print_tree(stderr, root);
5361 #endif
5362 
5363   reg->capture_history  = scan_env.capture_history;
5364   reg->bt_mem_start     = scan_env.bt_mem_start;
5365   reg->bt_mem_start    |= reg->capture_history;
5366   if (IS_FIND_CONDITION(reg->options))
5367     BIT_STATUS_ON_ALL(reg->bt_mem_end);
5368   else {
5369     reg->bt_mem_end  = scan_env.bt_mem_end;
5370     reg->bt_mem_end |= reg->capture_history;
5371   }
5372 
5373 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5374   if (scan_env.backrefed_mem == 0
5375 #ifdef USE_SUBEXP_CALL
5376       || scan_env.num_call == 0
5377 #endif
5378       ) {
5379     setup_comb_exp_check(root, 0, &scan_env);
5380 #ifdef USE_SUBEXP_CALL
5381     if (scan_env.has_recursion != 0) {
5382       scan_env.num_comb_exp_check = 0;
5383     }
5384     else
5385 #endif
5386     if (scan_env.comb_exp_max_regnum > 0) {
5387       int i;
5388       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5389 	if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5390 	  scan_env.num_comb_exp_check = 0;
5391 	  break;
5392 	}
5393       }
5394     }
5395   }
5396 
5397   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5398 #endif
5399 
5400   clear_optimize_info(reg);
5401 #ifndef ONIG_DONT_OPTIMIZE
5402   r = set_optimize_info_from_tree(root, reg, &scan_env);
5403   if (r != 0) goto err_unset;
5404 #endif
5405 
5406   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5407     xfree(scan_env.mem_nodes_dynamic);
5408     scan_env.mem_nodes_dynamic = (Node** )NULL;
5409   }
5410 
5411   r = compile_tree(root, reg);
5412   if (r == 0) {
5413     r = add_opcode(reg, OP_END);
5414 #ifdef USE_SUBEXP_CALL
5415     if (scan_env.num_call > 0) {
5416       r = unset_addr_list_fix(&uslist, reg);
5417       unset_addr_list_end(&uslist);
5418       if (r) goto err;
5419     }
5420 #endif
5421 
5422     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5423       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5424     else {
5425       if (reg->bt_mem_start != 0)
5426 	reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5427       else
5428 	reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5429     }
5430   }
5431 #ifdef USE_SUBEXP_CALL
5432   else if (scan_env.num_call > 0) {
5433     unset_addr_list_end(&uslist);
5434   }
5435 #endif
5436   onig_node_free(root);
5437 
5438 #ifdef ONIG_DEBUG_COMPILE
5439 #ifdef USE_NAMED_GROUP
5440   onig_print_names(stderr, reg);
5441 #endif
5442   print_compiled_byte_code_list(stderr, reg);
5443 #endif
5444 
5445  end:
5446   reg->state = ONIG_STATE_NORMAL;
5447   return r;
5448 
5449  err_unset:
5450 #ifdef USE_SUBEXP_CALL
5451   if (scan_env.num_call > 0) {
5452     unset_addr_list_end(&uslist);
5453   }
5454 #endif
5455  err:
5456   if (IS_NOT_NULL(scan_env.error)) {
5457     if (IS_NOT_NULL(einfo)) {
5458       einfo->enc     = scan_env.enc;
5459       einfo->par     = scan_env.error;
5460       einfo->par_end = scan_env.error_end;
5461     }
5462   }
5463 
5464   onig_node_free(root);
5465   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5466       xfree(scan_env.mem_nodes_dynamic);
5467   return r;
5468 }
5469 
5470 #ifdef USE_RECOMPILE_API
5471 extern int
onig_recompile(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5472 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5473 	    OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5474 	    OnigErrorInfo* einfo)
5475 {
5476   int r;
5477   regex_t *new_reg;
5478 
5479   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5480   if (r) return r;
5481   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5482     onig_transfer(reg, new_reg);
5483   }
5484   else {
5485     onig_chain_link_add(reg, new_reg);
5486   }
5487   return 0;
5488 }
5489 #endif
5490 
5491 static int onig_inited = 0;
5492 
5493 extern int
onig_reg_init(regex_t * reg,OnigOptionType option,OnigCaseFoldType case_fold_flag,OnigEncoding enc,OnigSyntaxType * syntax)5494 onig_reg_init(regex_t* reg, OnigOptionType option,
5495 	      OnigCaseFoldType case_fold_flag,
5496 	      OnigEncoding enc, OnigSyntaxType* syntax)
5497 {
5498   if (! onig_inited)
5499     onig_init();
5500 
5501   if (IS_NULL(reg))
5502     return ONIGERR_INVALID_ARGUMENT;
5503 
5504   if (ONIGENC_IS_UNDEF(enc))
5505     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
5506 
5507   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5508       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5509     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5510   }
5511 
5512   (reg)->state = ONIG_STATE_MODIFY;
5513 
5514   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5515     option |= syntax->options;
5516     option &= ~ONIG_OPTION_SINGLELINE;
5517   }
5518   else
5519     option |= syntax->options;
5520 
5521   (reg)->enc              = enc;
5522   (reg)->options          = option;
5523   (reg)->syntax           = syntax;
5524   (reg)->optimize         = 0;
5525   (reg)->exact            = (UChar* )NULL;
5526   (reg)->int_map          = (int* )NULL;
5527   (reg)->int_map_backward = (int* )NULL;
5528   (reg)->chain            = (regex_t* )NULL;
5529 
5530   (reg)->p                = (UChar* )NULL;
5531   (reg)->alloc            = 0;
5532   (reg)->used             = 0;
5533   (reg)->name_table       = (void* )NULL;
5534 
5535   (reg)->case_fold_flag   = case_fold_flag;
5536   return 0;
5537 }
5538 
5539 extern int
onig_new_without_alloc(regex_t * reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5540 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5541           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5542           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5543 {
5544   int r;
5545 
5546   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5547   if (r) return r;
5548 
5549   r = onig_compile(reg, pattern, pattern_end, einfo);
5550   return r;
5551 }
5552 
5553 extern int
onig_new(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigOptionType option,OnigEncoding enc,OnigSyntaxType * syntax,OnigErrorInfo * einfo)5554 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5555 	  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5556 	  OnigErrorInfo* einfo)
5557 {
5558   int r;
5559 
5560   *reg = (regex_t* )xmalloc(sizeof(regex_t));
5561   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5562 
5563   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5564   if (r) goto err;
5565 
5566   r = onig_compile(*reg, pattern, pattern_end, einfo);
5567   if (r) {
5568   err:
5569     onig_free(*reg);
5570     *reg = NULL;
5571   }
5572   return r;
5573 }
5574 
5575 
5576 extern int
onig_init(void)5577 onig_init(void)
5578 {
5579   if (onig_inited != 0)
5580     return 0;
5581 
5582   THREAD_SYSTEM_INIT;
5583   THREAD_ATOMIC_START;
5584 
5585   onig_inited = 1;
5586 
5587   onigenc_init();
5588   /* onigenc_set_default_caseconv_table((UChar* )0); */
5589 
5590 #ifdef ONIG_DEBUG_STATISTICS
5591   onig_statistics_init();
5592 #endif
5593 
5594   THREAD_ATOMIC_END;
5595   return 0;
5596 }
5597 
5598 
5599 static OnigEndCallListItemType* EndCallTop;
5600 
onig_add_end_call(void (* func)(void))5601 extern void onig_add_end_call(void (*func)(void))
5602 {
5603   OnigEndCallListItemType* item;
5604 
5605   item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5606   if (item == 0) return ;
5607 
5608   item->next = EndCallTop;
5609   item->func = func;
5610 
5611   EndCallTop = item;
5612 }
5613 
5614 static void
exec_end_call_list(void)5615 exec_end_call_list(void)
5616 {
5617   OnigEndCallListItemType* prev;
5618   void (*func)(void);
5619 
5620   while (EndCallTop != 0) {
5621     func = EndCallTop->func;
5622     (*func)();
5623 
5624     prev = EndCallTop;
5625     EndCallTop = EndCallTop->next;
5626     xfree(prev);
5627   }
5628 }
5629 
5630 extern int
onig_end(void)5631 onig_end(void)
5632 {
5633   THREAD_ATOMIC_START;
5634 
5635   exec_end_call_list();
5636 
5637 #ifdef ONIG_DEBUG_STATISTICS
5638   onig_print_statistics(stderr);
5639 #endif
5640 
5641 #ifdef USE_SHARED_CCLASS_TABLE
5642   onig_free_shared_cclass_table();
5643 #endif
5644 
5645 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5646   onig_free_node_list();
5647 #endif
5648 
5649   onig_inited = 0;
5650 
5651   THREAD_ATOMIC_END;
5652   THREAD_SYSTEM_END;
5653   return 0;
5654 }
5655 
5656 extern int
onig_is_in_code_range(const UChar * p,OnigCodePoint code)5657 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5658 {
5659   OnigCodePoint n, *data;
5660   OnigCodePoint low, high, x;
5661 
5662   GET_CODE_POINT(n, p);
5663   data = (OnigCodePoint* )p;
5664   data++;
5665 
5666   for (low = 0, high = n; low < high; ) {
5667     x = (low + high) >> 1;
5668     if (code > data[x * 2 + 1])
5669       low = x + 1;
5670     else
5671       high = x;
5672   }
5673 
5674   return ((low < n && code >= data[low * 2]) ? 1 : 0);
5675 }
5676 
5677 extern int
onig_is_code_in_cc_len(int elen,OnigCodePoint code,CClassNode * cc)5678 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5679 {
5680   int found;
5681 
5682   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5683     if (IS_NULL(cc->mbuf)) {
5684       found = 0;
5685     }
5686     else {
5687       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5688     }
5689   }
5690   else {
5691     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5692   }
5693 
5694   if (IS_NCCLASS_NOT(cc))
5695     return !found;
5696   else
5697     return found;
5698 }
5699 
5700 extern int
onig_is_code_in_cc(OnigEncoding enc,OnigCodePoint code,CClassNode * cc)5701 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
5702 {
5703   int len;
5704 
5705   if (ONIGENC_MBC_MINLEN(enc) > 1) {
5706     len = 2;
5707   }
5708   else {
5709     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5710   }
5711   return onig_is_code_in_cc_len(len, code, cc);
5712 }
5713 
5714 
5715 #ifdef ONIG_DEBUG
5716 
5717 /* arguments type */
5718 #define ARG_SPECIAL     -1
5719 #define ARG_NON          0
5720 #define ARG_RELADDR      1
5721 #define ARG_ABSADDR      2
5722 #define ARG_LENGTH       3
5723 #define ARG_MEMNUM       4
5724 #define ARG_OPTION       5
5725 #define ARG_STATE_CHECK  6
5726 
5727 OnigOpInfoType OnigOpInfo[] = {
5728   { OP_FINISH,            "finish",          ARG_NON },
5729   { OP_END,               "end",             ARG_NON },
5730   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
5731   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
5732   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
5733   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
5734   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
5735   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
5736   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
5737   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
5738   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
5739   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
5740   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
5741   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
5742   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
5743   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
5744   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
5745   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
5746   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
5747   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
5748   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
5749   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
5750   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
5751   { OP_ANYCHAR,           "anychar",         ARG_NON },
5752   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
5753   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
5754   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
5755   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5756   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5757   { OP_WORD,                "word",            ARG_NON },
5758   { OP_NOT_WORD,            "not-word",        ARG_NON },
5759   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
5760   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
5761   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
5762   { OP_WORD_END,            "word-end",        ARG_NON },
5763   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
5764   { OP_END_BUF,             "end-buf",         ARG_NON },
5765   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
5766   { OP_END_LINE,            "end-line",        ARG_NON },
5767   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
5768   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
5769   { OP_BACKREF1,            "backref1",             ARG_NON },
5770   { OP_BACKREF2,            "backref2",             ARG_NON },
5771   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
5772   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
5773   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
5774   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
5775   { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
5776   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
5777   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
5778   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
5779   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
5780   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
5781   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
5782   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
5783   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
5784   { OP_FAIL,                "fail",                 ARG_NON },
5785   { OP_JUMP,                "jump",                 ARG_RELADDR },
5786   { OP_PUSH,                "push",                 ARG_RELADDR },
5787   { OP_POP,                 "pop",                  ARG_NON },
5788   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
5789   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
5790   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
5791   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
5792   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
5793   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
5794   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
5795   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
5796   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
5797   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
5798   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
5799   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
5800   { OP_PUSH_POS,             "push-pos",             ARG_NON },
5801   { OP_POP_POS,              "pop-pos",              ARG_NON },
5802   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
5803   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
5804   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
5805   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
5806   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
5807   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5808   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5809   { OP_CALL,                 "call",                 ARG_ABSADDR },
5810   { OP_RETURN,               "return",               ARG_NON },
5811   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
5812   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5813   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
5814   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
5815   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
5816     "state-check-anychar-ml*", ARG_STATE_CHECK },
5817   { -1, "", ARG_NON }
5818 };
5819 
5820 static char*
op2name(int opcode)5821 op2name(int opcode)
5822 {
5823   int i;
5824 
5825   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5826     if (opcode == OnigOpInfo[i].opcode)
5827       return OnigOpInfo[i].name;
5828   }
5829   return "";
5830 }
5831 
5832 static int
op2arg_type(int opcode)5833 op2arg_type(int opcode)
5834 {
5835   int i;
5836 
5837   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5838     if (opcode == OnigOpInfo[i].opcode)
5839       return OnigOpInfo[i].arg_type;
5840   }
5841   return ARG_SPECIAL;
5842 }
5843 
5844 static void
Indent(FILE * f,int indent)5845 Indent(FILE* f, int indent)
5846 {
5847   int i;
5848   for (i = 0; i < indent; i++) putc(' ', f);
5849 }
5850 
5851 static void
p_string(FILE * f,int len,UChar * s)5852 p_string(FILE* f, int len, UChar* s)
5853 {
5854   fputs(":", f);
5855   while (len-- > 0) { fputc(*s++, f); }
5856 }
5857 
5858 static void
p_len_string(FILE * f,LengthType len,int mb_len,UChar * s)5859 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5860 {
5861   int x = len * mb_len;
5862 
5863   fprintf(f, ":%d:", len);
5864   while (x-- > 0) { fputc(*s++, f); }
5865 }
5866 
5867 extern void
onig_print_compiled_byte_code(FILE * f,UChar * bp,UChar ** nextp,OnigEncoding enc)5868 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
5869                               OnigEncoding enc)
5870 {
5871   int i, n, arg_type;
5872   RelAddrType addr;
5873   LengthType len;
5874   MemNumType mem;
5875   StateCheckNumType scn;
5876   OnigCodePoint code;
5877   UChar *q;
5878 
5879   fprintf(f, "[%s", op2name(*bp));
5880   arg_type = op2arg_type(*bp);
5881   if (arg_type != ARG_SPECIAL) {
5882     bp++;
5883     switch (arg_type) {
5884     case ARG_NON:
5885       break;
5886     case ARG_RELADDR:
5887       GET_RELADDR_INC(addr, bp);
5888       fprintf(f, ":(%d)", addr);
5889       break;
5890     case ARG_ABSADDR:
5891       GET_ABSADDR_INC(addr, bp);
5892       fprintf(f, ":(%d)", addr);
5893       break;
5894     case ARG_LENGTH:
5895       GET_LENGTH_INC(len, bp);
5896       fprintf(f, ":%d", len);
5897       break;
5898     case ARG_MEMNUM:
5899       mem = *((MemNumType* )bp);
5900       bp += SIZE_MEMNUM;
5901       fprintf(f, ":%d", mem);
5902       break;
5903     case ARG_OPTION:
5904       {
5905 	OnigOptionType option = *((OnigOptionType* )bp);
5906 	bp += SIZE_OPTION;
5907 	fprintf(f, ":%d", option);
5908       }
5909       break;
5910 
5911     case ARG_STATE_CHECK:
5912       scn = *((StateCheckNumType* )bp);
5913       bp += SIZE_STATE_CHECK_NUM;
5914       fprintf(f, ":%d", scn);
5915       break;
5916     }
5917   }
5918   else {
5919     switch (*bp++) {
5920     case OP_EXACT1:
5921     case OP_ANYCHAR_STAR_PEEK_NEXT:
5922     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
5923       p_string(f, 1, bp++); break;
5924     case OP_EXACT2:
5925       p_string(f, 2, bp); bp += 2; break;
5926     case OP_EXACT3:
5927       p_string(f, 3, bp); bp += 3; break;
5928     case OP_EXACT4:
5929       p_string(f, 4, bp); bp += 4; break;
5930     case OP_EXACT5:
5931       p_string(f, 5, bp); bp += 5; break;
5932     case OP_EXACTN:
5933       GET_LENGTH_INC(len, bp);
5934       p_len_string(f, len, 1, bp);
5935       bp += len;
5936       break;
5937 
5938     case OP_EXACTMB2N1:
5939       p_string(f, 2, bp); bp += 2; break;
5940     case OP_EXACTMB2N2:
5941       p_string(f, 4, bp); bp += 4; break;
5942     case OP_EXACTMB2N3:
5943       p_string(f, 6, bp); bp += 6; break;
5944     case OP_EXACTMB2N:
5945       GET_LENGTH_INC(len, bp);
5946       p_len_string(f, len, 2, bp);
5947       bp += len * 2;
5948       break;
5949     case OP_EXACTMB3N:
5950       GET_LENGTH_INC(len, bp);
5951       p_len_string(f, len, 3, bp);
5952       bp += len * 3;
5953       break;
5954     case OP_EXACTMBN:
5955       {
5956 	int mb_len;
5957 
5958 	GET_LENGTH_INC(mb_len, bp);
5959 	GET_LENGTH_INC(len, bp);
5960 	fprintf(f, ":%d:%d:", mb_len, len);
5961 	n = len * mb_len;
5962 	while (n-- > 0) { fputc(*bp++, f); }
5963       }
5964       break;
5965 
5966     case OP_EXACT1_IC:
5967       len = enclen(enc, bp);
5968       p_string(f, len, bp);
5969       bp += len;
5970       break;
5971     case OP_EXACTN_IC:
5972       GET_LENGTH_INC(len, bp);
5973       p_len_string(f, len, 1, bp);
5974       bp += len;
5975       break;
5976 
5977     case OP_CCLASS:
5978       n = bitset_on_num((BitSetRef )bp);
5979       bp += SIZE_BITSET;
5980       fprintf(f, ":%d", n);
5981       break;
5982 
5983     case OP_CCLASS_NOT:
5984       n = bitset_on_num((BitSetRef )bp);
5985       bp += SIZE_BITSET;
5986       fprintf(f, ":%d", n);
5987       break;
5988 
5989     case OP_CCLASS_MB:
5990     case OP_CCLASS_MB_NOT:
5991       GET_LENGTH_INC(len, bp);
5992       q = bp;
5993 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5994       ALIGNMENT_RIGHT(q);
5995 #endif
5996       GET_CODE_POINT(code, q);
5997       bp += len;
5998       fprintf(f, ":%d:%d", (int )code, len);
5999       break;
6000 
6001     case OP_CCLASS_MIX:
6002     case OP_CCLASS_MIX_NOT:
6003       n = bitset_on_num((BitSetRef )bp);
6004       bp += SIZE_BITSET;
6005       GET_LENGTH_INC(len, bp);
6006       q = bp;
6007 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6008       ALIGNMENT_RIGHT(q);
6009 #endif
6010       GET_CODE_POINT(code, q);
6011       bp += len;
6012       fprintf(f, ":%d:%d:%d", n, (int )code, len);
6013       break;
6014 
6015     case OP_CCLASS_NODE:
6016       {
6017         CClassNode *cc;
6018 
6019         GET_POINTER_INC(cc, bp);
6020         n = bitset_on_num(cc->bs);
6021         fprintf(f, ":%u:%d", (unsigned int )cc, n);
6022       }
6023       break;
6024 
6025     case OP_BACKREFN_IC:
6026       mem = *((MemNumType* )bp);
6027       bp += SIZE_MEMNUM;
6028       fprintf(f, ":%d", mem);
6029       break;
6030 
6031     case OP_BACKREF_MULTI_IC:
6032     case OP_BACKREF_MULTI:
6033       fputs(" ", f);
6034       GET_LENGTH_INC(len, bp);
6035       for (i = 0; i < len; i++) {
6036 	GET_MEMNUM_INC(mem, bp);
6037 	if (i > 0) fputs(", ", f);
6038 	fprintf(f, "%d", mem);
6039       }
6040       break;
6041 
6042     case OP_BACKREF_WITH_LEVEL:
6043       {
6044 	OnigOptionType option;
6045 	LengthType level;
6046 
6047 	GET_OPTION_INC(option, bp);
6048 	fprintf(f, ":%d", option);
6049 	GET_LENGTH_INC(level, bp);
6050 	fprintf(f, ":%d", level);
6051 
6052 	fputs(" ", f);
6053 	GET_LENGTH_INC(len, bp);
6054 	for (i = 0; i < len; i++) {
6055 	  GET_MEMNUM_INC(mem, bp);
6056 	  if (i > 0) fputs(", ", f);
6057 	  fprintf(f, "%d", mem);
6058 	}
6059       }
6060       break;
6061 
6062     case OP_REPEAT:
6063     case OP_REPEAT_NG:
6064       {
6065 	mem = *((MemNumType* )bp);
6066 	bp += SIZE_MEMNUM;
6067 	addr = *((RelAddrType* )bp);
6068 	bp += SIZE_RELADDR;
6069 	fprintf(f, ":%d:%d", mem, addr);
6070       }
6071       break;
6072 
6073     case OP_PUSH_OR_JUMP_EXACT1:
6074     case OP_PUSH_IF_PEEK_NEXT:
6075       addr = *((RelAddrType* )bp);
6076       bp += SIZE_RELADDR;
6077       fprintf(f, ":(%d)", addr);
6078       p_string(f, 1, bp);
6079       bp += 1;
6080       break;
6081 
6082     case OP_LOOK_BEHIND:
6083       GET_LENGTH_INC(len, bp);
6084       fprintf(f, ":%d", len);
6085       break;
6086 
6087     case OP_PUSH_LOOK_BEHIND_NOT:
6088       GET_RELADDR_INC(addr, bp);
6089       GET_LENGTH_INC(len, bp);
6090       fprintf(f, ":%d:(%d)", len, addr);
6091       break;
6092 
6093     case OP_STATE_CHECK_PUSH:
6094     case OP_STATE_CHECK_PUSH_OR_JUMP:
6095       scn = *((StateCheckNumType* )bp);
6096       bp += SIZE_STATE_CHECK_NUM;
6097       addr = *((RelAddrType* )bp);
6098       bp += SIZE_RELADDR;
6099       fprintf(f, ":%d:(%d)", scn, addr);
6100       break;
6101 
6102     default:
6103       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6104 	      *--bp);
6105     }
6106   }
6107   fputs("]", f);
6108   if (nextp) *nextp = bp;
6109 }
6110 
6111 static void
print_compiled_byte_code_list(FILE * f,regex_t * reg)6112 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6113 {
6114   int ncode;
6115   UChar* bp = reg->p;
6116   UChar* end = reg->p + reg->used;
6117 
6118   fprintf(f, "code length: %d\n", reg->used);
6119 
6120   ncode = 0;
6121   while (bp < end) {
6122     ncode++;
6123     if (bp > reg->p) {
6124       if (ncode % 5 == 0)
6125 	fprintf(f, "\n");
6126       else
6127 	fputs(" ", f);
6128     }
6129     onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
6130   }
6131 
6132   fprintf(f, "\n");
6133 }
6134 
6135 static void
print_indent_tree(FILE * f,Node * node,int indent)6136 print_indent_tree(FILE* f, Node* node, int indent)
6137 {
6138   int i, type;
6139   int add = 3;
6140   UChar* p;
6141 
6142   Indent(f, indent);
6143   if (IS_NULL(node)) {
6144     fprintf(f, "ERROR: null node!!!\n");
6145     exit (0);
6146   }
6147 
6148   type = NTYPE(node);
6149   switch (type) {
6150   case NT_LIST:
6151   case NT_ALT:
6152     if (NTYPE(node) == NT_LIST)
6153       fprintf(f, "<list:%x>\n", (int )node);
6154     else
6155       fprintf(f, "<alt:%x>\n", (int )node);
6156 
6157     print_indent_tree(f, NCAR(node), indent + add);
6158     while (IS_NOT_NULL(node = NCDR(node))) {
6159       if (NTYPE(node) != type) {
6160 	fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6161 	exit(0);
6162       }
6163       print_indent_tree(f, NCAR(node), indent + add);
6164     }
6165     break;
6166 
6167   case NT_STR:
6168     fprintf(f, "<string%s:%x>",
6169 	    (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
6170     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6171       if (*p >= 0x20 && *p < 0x7f)
6172 	fputc(*p, f);
6173       else {
6174 	fprintf(f, " 0x%02x", *p);
6175       }
6176     }
6177     break;
6178 
6179   case NT_CCLASS:
6180     fprintf(f, "<cclass:%x>", (int )node);
6181     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6182     if (NCCLASS(node)->mbuf) {
6183       BBuf* bbuf = NCCLASS(node)->mbuf;
6184       for (i = 0; i < bbuf->used; i++) {
6185 	if (i > 0) fprintf(f, ",");
6186 	fprintf(f, "%0x", bbuf->p[i]);
6187       }
6188     }
6189     break;
6190 
6191   case NT_CTYPE:
6192     fprintf(f, "<ctype:%x> ", (int )node);
6193     switch (NCTYPE(node)->ctype) {
6194     case ONIGENC_CTYPE_WORD:
6195       if (NCTYPE(node)->not != 0)
6196 	fputs("not word",       f);
6197       else
6198 	fputs("word",           f);
6199       break;
6200 
6201     default:
6202       fprintf(f, "ERROR: undefined ctype.\n");
6203       exit(0);
6204     }
6205     break;
6206 
6207   case NT_CANY:
6208     fprintf(f, "<anychar:%x>", (int )node);
6209     break;
6210 
6211   case NT_ANCHOR:
6212     fprintf(f, "<anchor:%x> ", (int )node);
6213     switch (NANCHOR(node)->type) {
6214     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6215     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6216     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6217     case ANCHOR_END_LINE:       fputs("end line",       f); break;
6218     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6219     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6220 
6221     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6222     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6223 #ifdef USE_WORD_BEGIN_END
6224     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6225     case ANCHOR_WORD_END:        fputs("word end", f);       break;
6226 #endif
6227     case ANCHOR_PREC_READ:       fputs("prec read",      f); break;
6228     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break;
6229     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break;
6230     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
6231 
6232     default:
6233       fprintf(f, "ERROR: undefined anchor type.\n");
6234       break;
6235     }
6236     break;
6237 
6238   case NT_BREF:
6239     {
6240       int* p;
6241       BRefNode* br = NBREF(node);
6242       p = BACKREFS_P(br);
6243       fprintf(f, "<backref:%x>", (int )node);
6244       for (i = 0; i < br->back_num; i++) {
6245 	if (i > 0) fputs(", ", f);
6246 	fprintf(f, "%d", p[i]);
6247       }
6248     }
6249     break;
6250 
6251 #ifdef USE_SUBEXP_CALL
6252   case NT_CALL:
6253     {
6254       CallNode* cn = NCALL(node);
6255       fprintf(f, "<call:%x>", (int )node);
6256       p_string(f, cn->name_end - cn->name, cn->name);
6257     }
6258     break;
6259 #endif
6260 
6261   case NT_QTFR:
6262     fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
6263 	    NQTFR(node)->lower, NQTFR(node)->upper,
6264 	    (NQTFR(node)->greedy ? "" : "?"));
6265     print_indent_tree(f, NQTFR(node)->target, indent + add);
6266     break;
6267 
6268   case NT_ENCLOSE:
6269     fprintf(f, "<enclose:%x> ", (int )node);
6270     switch (NENCLOSE(node)->type) {
6271     case ENCLOSE_OPTION:
6272       fprintf(f, "option:%d", NENCLOSE(node)->option);
6273       break;
6274     case ENCLOSE_MEMORY:
6275       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6276       break;
6277     case ENCLOSE_STOP_BACKTRACK:
6278       fprintf(f, "stop-bt");
6279       break;
6280 
6281     default:
6282       break;
6283     }
6284     fprintf(f, "\n");
6285     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6286     break;
6287 
6288   default:
6289     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6290     break;
6291   }
6292 
6293   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6294       type != NT_ENCLOSE)
6295     fprintf(f, "\n");
6296   fflush(f);
6297 }
6298 #endif /* ONIG_DEBUG */
6299 
6300 #ifdef ONIG_DEBUG_PARSE_TREE
6301 static void
print_tree(FILE * f,Node * node)6302 print_tree(FILE* f, Node* node)
6303 {
6304   print_indent_tree(f, node, 0);
6305 }
6306 #endif
6307