1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "regint.h"
33
34 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35
36 #ifdef USE_CRNL_AS_LINE_TERMINATOR
37 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
38 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
39 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
40 #endif
41
42 #ifdef USE_CAPTURE_HISTORY
43 static void history_tree_free(OnigCaptureTreeNode* node);
44
45 static void
history_tree_clear(OnigCaptureTreeNode * node)46 history_tree_clear(OnigCaptureTreeNode* node)
47 {
48 int i;
49
50 if (IS_NOT_NULL(node)) {
51 for (i = 0; i < node->num_childs; i++) {
52 if (IS_NOT_NULL(node->childs[i])) {
53 history_tree_free(node->childs[i]);
54 }
55 }
56 for (i = 0; i < node->allocated; i++) {
57 node->childs[i] = (OnigCaptureTreeNode* )0;
58 }
59 node->num_childs = 0;
60 node->beg = ONIG_REGION_NOTPOS;
61 node->end = ONIG_REGION_NOTPOS;
62 node->group = -1;
63 }
64 }
65
66 static void
history_tree_free(OnigCaptureTreeNode * node)67 history_tree_free(OnigCaptureTreeNode* node)
68 {
69 history_tree_clear(node);
70 xfree(node);
71 }
72
73 static void
history_root_free(OnigRegion * r)74 history_root_free(OnigRegion* r)
75 {
76 if (IS_NOT_NULL(r->history_root)) {
77 history_tree_free(r->history_root);
78 r->history_root = (OnigCaptureTreeNode* )0;
79 }
80 }
81
82 static OnigCaptureTreeNode*
history_node_new(void)83 history_node_new(void)
84 {
85 OnigCaptureTreeNode* node;
86
87 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
88 CHECK_NULL_RETURN(node);
89 node->childs = (OnigCaptureTreeNode** )0;
90 node->allocated = 0;
91 node->num_childs = 0;
92 node->group = -1;
93 node->beg = ONIG_REGION_NOTPOS;
94 node->end = ONIG_REGION_NOTPOS;
95
96 return node;
97 }
98
99 static int
history_tree_add_child(OnigCaptureTreeNode * parent,OnigCaptureTreeNode * child)100 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
101 {
102 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
103
104 if (parent->num_childs >= parent->allocated) {
105 int n, i;
106
107 if (IS_NULL(parent->childs)) {
108 n = HISTORY_TREE_INIT_ALLOC_SIZE;
109 parent->childs =
110 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
111 }
112 else {
113 n = parent->allocated * 2;
114 parent->childs =
115 (OnigCaptureTreeNode** )xrealloc(parent->childs,
116 sizeof(OnigCaptureTreeNode*) * n,
117 sizeof(OnigCaptureTreeNode*) * parent->allocated);
118 }
119 CHECK_NULL_RETURN_MEMERR(parent->childs);
120 for (i = parent->allocated; i < n; i++) {
121 parent->childs[i] = (OnigCaptureTreeNode* )0;
122 }
123 parent->allocated = n;
124 }
125
126 parent->childs[parent->num_childs] = child;
127 parent->num_childs++;
128 return 0;
129 }
130
131 static OnigCaptureTreeNode*
history_tree_clone(OnigCaptureTreeNode * node)132 history_tree_clone(OnigCaptureTreeNode* node)
133 {
134 int i;
135 OnigCaptureTreeNode *clone, *child;
136
137 clone = history_node_new();
138 CHECK_NULL_RETURN(clone);
139
140 clone->beg = node->beg;
141 clone->end = node->end;
142 for (i = 0; i < node->num_childs; i++) {
143 child = history_tree_clone(node->childs[i]);
144 if (IS_NULL(child)) {
145 history_tree_free(clone);
146 return (OnigCaptureTreeNode* )0;
147 }
148 history_tree_add_child(clone, child);
149 }
150
151 return clone;
152 }
153
154 extern OnigCaptureTreeNode*
onig_get_capture_tree(OnigRegion * region)155 onig_get_capture_tree(OnigRegion* region)
156 {
157 return region->history_root;
158 }
159 #endif /* USE_CAPTURE_HISTORY */
160
161 extern void
onig_region_clear(OnigRegion * region)162 onig_region_clear(OnigRegion* region)
163 {
164 int i;
165
166 for (i = 0; i < region->num_regs; i++) {
167 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
168 }
169 #ifdef USE_CAPTURE_HISTORY
170 history_root_free(region);
171 #endif
172 }
173
174 extern int
onig_region_resize(OnigRegion * region,int n)175 onig_region_resize(OnigRegion* region, int n)
176 {
177 region->num_regs = n;
178
179 if (n < ONIG_NREGION)
180 n = ONIG_NREGION;
181
182 if (region->allocated == 0) {
183 region->beg = (int* )xmalloc(n * sizeof(int));
184 region->end = (int* )xmalloc(n * sizeof(int));
185
186 if (region->beg == 0 || region->end == 0)
187 return ONIGERR_MEMORY;
188
189 region->allocated = n;
190 }
191 else if (region->allocated < n) {
192 region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int));
193 region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int));
194
195 if (region->beg == 0 || region->end == 0)
196 return ONIGERR_MEMORY;
197
198 region->allocated = n;
199 }
200
201 return 0;
202 }
203
204 static int
onig_region_resize_clear(OnigRegion * region,int n)205 onig_region_resize_clear(OnigRegion* region, int n)
206 {
207 int r;
208
209 r = onig_region_resize(region, n);
210 if (r != 0) return r;
211 onig_region_clear(region);
212 return 0;
213 }
214
215 extern int
onig_region_set(OnigRegion * region,int at,int beg,int end)216 onig_region_set(OnigRegion* region, int at, int beg, int end)
217 {
218 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
219
220 if (at >= region->allocated) {
221 int r = onig_region_resize(region, at + 1);
222 if (r < 0) return r;
223 }
224
225 region->beg[at] = beg;
226 region->end[at] = end;
227 return 0;
228 }
229
230 extern void
onig_region_init(OnigRegion * region)231 onig_region_init(OnigRegion* region)
232 {
233 region->num_regs = 0;
234 region->allocated = 0;
235 region->beg = (int* )0;
236 region->end = (int* )0;
237 region->history_root = (OnigCaptureTreeNode* )0;
238 }
239
240 extern OnigRegion*
onig_region_new(void)241 onig_region_new(void)
242 {
243 OnigRegion* r;
244
245 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
246 if (r != NULL) {
247 onig_region_init(r);
248 }
249 return r;
250 }
251
252 extern void
onig_region_free(OnigRegion * r,int free_self)253 onig_region_free(OnigRegion* r, int free_self)
254 {
255 if (r) {
256 if (r->allocated > 0) {
257 if (r->beg) xfree(r->beg);
258 if (r->end) xfree(r->end);
259 r->allocated = 0;
260 }
261 #ifdef USE_CAPTURE_HISTORY
262 history_root_free(r);
263 #endif
264 if (free_self) xfree(r);
265 }
266 }
267
268 extern void
onig_region_copy(OnigRegion * to,OnigRegion * from)269 onig_region_copy(OnigRegion* to, OnigRegion* from)
270 {
271 #define RREGC_SIZE (sizeof(int) * from->num_regs)
272 int i;
273
274 if (to == from) return;
275
276 if (to->allocated == 0) {
277 if (from->num_regs > 0) {
278 to->beg = (int* )xmalloc(RREGC_SIZE);
279 to->end = (int* )xmalloc(RREGC_SIZE);
280 to->allocated = from->num_regs;
281 }
282 }
283 else if (to->allocated < from->num_regs) {
284 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated);
285 to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated);
286 to->allocated = from->num_regs;
287 }
288
289 if (to->beg == NULL || to->end == NULL) {
290 return;
291 }
292
293 for (i = 0; i < from->num_regs; i++) {
294 to->beg[i] = from->beg[i];
295 to->end[i] = from->end[i];
296 }
297 to->num_regs = from->num_regs;
298
299 #ifdef USE_CAPTURE_HISTORY
300 history_root_free(to);
301
302 if (IS_NOT_NULL(from->history_root)) {
303 to->history_root = history_tree_clone(from->history_root);
304 }
305 #endif
306 }
307
308
309 /** stack **/
310 #define INVALID_STACK_INDEX -1
311
312 /* stack type */
313 /* used by normal-POP */
314 #define STK_ALT 0x0001
315 #define STK_LOOK_BEHIND_NOT 0x0002
316 #define STK_POS_NOT 0x0003
317 /* handled by normal-POP */
318 #define STK_MEM_START 0x0100
319 #define STK_MEM_END 0x8200
320 #define STK_REPEAT_INC 0x0300
321 #define STK_STATE_CHECK_MARK 0x1000
322 /* avoided by normal-POP */
323 #define STK_NULL_CHECK_START 0x3000
324 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
325 #define STK_MEM_END_MARK 0x8400
326 #define STK_POS 0x0500 /* used when POP-POS */
327 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
328 #define STK_REPEAT 0x0700
329 #define STK_CALL_FRAME 0x0800
330 #define STK_RETURN 0x0900
331 #define STK_VOID 0x0a00 /* for fill a blank */
332
333 /* stack type check mask */
334 #define STK_MASK_POP_USED 0x00ff
335 #define STK_MASK_TO_VOID_TARGET 0x10ff
336 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
337
338 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
339 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
340 (msa).stack_p = (void* )0;\
341 (msa).options = (arg_option);\
342 (msa).region = (arg_region);\
343 (msa).start = (arg_start);\
344 (msa).best_len = ONIG_MISMATCH;\
345 } while(0)
346 #else
347 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
348 (msa).stack_p = (void* )0;\
349 (msa).options = (arg_option);\
350 (msa).region = (arg_region);\
351 (msa).start = (arg_start);\
352 } while(0)
353 #endif
354
355 #ifdef USE_COMBINATION_EXPLOSION_CHECK
356
357 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
358
359 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
360 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
361 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
362 offset = ((offset) * (state_num)) >> 3;\
363 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
364 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
365 (msa).state_check_buff = (void* )xmalloc(size);\
366 else \
367 (msa).state_check_buff = (void* )xalloca(size);\
368 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
369 (size_t )(size - (offset))); \
370 (msa).state_check_buff_size = size;\
371 }\
372 else {\
373 (msa).state_check_buff = (void* )0;\
374 (msa).state_check_buff_size = 0;\
375 }\
376 }\
377 else {\
378 (msa).state_check_buff = (void* )0;\
379 (msa).state_check_buff_size = 0;\
380 }\
381 } while(0)
382
383 #define MATCH_ARG_FREE(msa) do {\
384 if ((msa).stack_p) xfree((msa).stack_p);\
385 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
386 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
387 }\
388 } while(0)
389 #else
390 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
391 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
392 #endif
393
394
395
396 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
397 if (msa->stack_p) {\
398 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\
399 stk_alloc = (OnigStackType* )(msa->stack_p);\
400 stk_base = stk_alloc;\
401 stk = stk_base;\
402 stk_end = stk_base + msa->stack_n;\
403 }\
404 else {\
405 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\
406 + sizeof(OnigStackType) * (stack_num));\
407 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
408 stk_base = stk_alloc;\
409 stk = stk_base;\
410 stk_end = stk_base + (stack_num);\
411 }\
412 } while(0)
413
414 #define STACK_SAVE do{\
415 if (stk_base != stk_alloc) {\
416 msa->stack_p = stk_base;\
417 msa->stack_n = (int)(stk_end - stk_base);\
418 };\
419 } while(0)
420
421 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
422
423 extern unsigned int
onig_get_match_stack_limit_size(void)424 onig_get_match_stack_limit_size(void)
425 {
426 return MatchStackLimitSize;
427 }
428
429 extern int
onig_set_match_stack_limit_size(unsigned int size)430 onig_set_match_stack_limit_size(unsigned int size)
431 {
432 MatchStackLimitSize = size;
433 return 0;
434 }
435
436 static int
stack_double(OnigStackType ** arg_stk_base,OnigStackType ** arg_stk_end,OnigStackType ** arg_stk,OnigStackType * stk_alloc,OnigMatchArg * msa)437 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
438 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
439 {
440 unsigned int n;
441 OnigStackType *x, *stk_base, *stk_end, *stk;
442
443 stk_base = *arg_stk_base;
444 stk_end = *arg_stk_end;
445 stk = *arg_stk;
446
447 n = (unsigned int)(stk_end - stk_base);
448 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
449 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
450 if (IS_NULL(x)) {
451 STACK_SAVE;
452 return ONIGERR_MEMORY;
453 }
454 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
455 n *= 2;
456 }
457 else {
458 n *= 2;
459 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
460 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
461 return ONIGERR_MATCH_STACK_LIMIT_OVER;
462 else
463 n = MatchStackLimitSize;
464 }
465 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n, sizeof(OnigStackType) * (stk_end - stk_base));
466 if (IS_NULL(x)) {
467 STACK_SAVE;
468 return ONIGERR_MEMORY;
469 }
470 }
471 *arg_stk = x + (stk - stk_base);
472 *arg_stk_base = x;
473 *arg_stk_end = x + n;
474 return 0;
475 }
476
477 #define STACK_ENSURE(n) do {\
478 if (stk_end - stk < (n)) {\
479 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
480 if (r != 0) { STACK_SAVE; return r; } \
481 }\
482 } while(0)
483
484 #define STACK_AT(index) (stk_base + (index))
485 #define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base))
486
487 #define STACK_PUSH_TYPE(stack_type) do {\
488 STACK_ENSURE(1);\
489 stk->type = (stack_type);\
490 STACK_INC;\
491 } while(0)
492
493 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
494
495 #ifdef USE_COMBINATION_EXPLOSION_CHECK
496 #define STATE_CHECK_POS(s,snum) \
497 (((s) - str) * num_comb_exp_check + ((snum) - 1))
498 #define STATE_CHECK_VAL(v,snum) do {\
499 if (state_check_buff != NULL) {\
500 int x = STATE_CHECK_POS(s,snum);\
501 (v) = state_check_buff[x/8] & (1<<(x%8));\
502 }\
503 else (v) = 0;\
504 } while(0)
505
506
507 #define ELSE_IF_STATE_CHECK_MARK(stk) \
508 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
509 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
510 state_check_buff[x/8] |= (1<<(x%8)); \
511 }
512
513 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
514 STACK_ENSURE(1);\
515 stk->type = (stack_type);\
516 stk->u.state.pcode = (pat);\
517 stk->u.state.pstr = (s);\
518 stk->u.state.pstr_prev = (sprev);\
519 stk->u.state.state_check = 0;\
520 STACK_INC;\
521 } while(0)
522
523 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
524 stk->type = (stack_type);\
525 stk->u.state.pcode = (pat);\
526 stk->u.state.state_check = 0;\
527 STACK_INC;\
528 } while(0)
529
530 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
531 STACK_ENSURE(1);\
532 stk->type = STK_ALT;\
533 stk->u.state.pcode = (pat);\
534 stk->u.state.pstr = (s);\
535 stk->u.state.pstr_prev = (sprev);\
536 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
537 STACK_INC;\
538 } while(0)
539
540 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
541 if (state_check_buff != NULL) {\
542 STACK_ENSURE(1);\
543 stk->type = STK_STATE_CHECK_MARK;\
544 stk->u.state.pstr = (s);\
545 stk->u.state.state_check = (snum);\
546 STACK_INC;\
547 }\
548 } while(0)
549
550 #else /* USE_COMBINATION_EXPLOSION_CHECK */
551
552 #define ELSE_IF_STATE_CHECK_MARK(stk)
553
554 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
555 STACK_ENSURE(1);\
556 stk->type = (stack_type);\
557 stk->u.state.pcode = (pat);\
558 stk->u.state.pstr = (s);\
559 stk->u.state.pstr_prev = (sprev);\
560 STACK_INC;\
561 } while(0)
562
563 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
564 stk->type = (stack_type);\
565 stk->u.state.pcode = (pat);\
566 STACK_INC;\
567 } while(0)
568 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
569
570 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
571 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
572 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
573 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
574 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
575 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
576
577 #define STACK_PUSH_REPEAT(id, pat) do {\
578 STACK_ENSURE(1);\
579 stk->type = STK_REPEAT;\
580 stk->u.repeat.num = (id);\
581 stk->u.repeat.pcode = (pat);\
582 stk->u.repeat.count = 0;\
583 STACK_INC;\
584 } while(0)
585
586 #define STACK_PUSH_REPEAT_INC(sindex) do {\
587 STACK_ENSURE(1);\
588 stk->type = STK_REPEAT_INC;\
589 stk->u.repeat_inc.si = (sindex);\
590 STACK_INC;\
591 } while(0)
592
593 #define STACK_PUSH_MEM_START(mnum, s) do {\
594 STACK_ENSURE(1);\
595 stk->type = STK_MEM_START;\
596 stk->u.mem.num = (int)(mnum);\
597 stk->u.mem.pstr = (s);\
598 stk->u.mem.start = mem_start_stk[mnum];\
599 stk->u.mem.end = mem_end_stk[mnum];\
600 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
601 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
602 STACK_INC;\
603 } while(0)
604
605 #define STACK_PUSH_MEM_END(mnum, s) do {\
606 STACK_ENSURE(1);\
607 stk->type = STK_MEM_END;\
608 stk->u.mem.num = (mnum);\
609 stk->u.mem.pstr = (s);\
610 stk->u.mem.start = mem_start_stk[mnum];\
611 stk->u.mem.end = mem_end_stk[mnum];\
612 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
613 STACK_INC;\
614 } while(0)
615
616 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
617 STACK_ENSURE(1);\
618 stk->type = STK_MEM_END_MARK;\
619 stk->u.mem.num = (mnum);\
620 STACK_INC;\
621 } while(0)
622
623 #define STACK_GET_MEM_START(mnum, k) do {\
624 int level = 0;\
625 k = stk;\
626 while (k > stk_base) {\
627 k--;\
628 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
629 && k->u.mem.num == (mnum)) {\
630 level++;\
631 }\
632 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
633 if (level == 0) break;\
634 level--;\
635 }\
636 }\
637 } while(0)
638
639 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
640 int level = 0;\
641 while (k < stk) {\
642 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
643 if (level == 0) (start) = k->u.mem.pstr;\
644 level++;\
645 }\
646 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
647 level--;\
648 if (level == 0) {\
649 (end) = k->u.mem.pstr;\
650 break;\
651 }\
652 }\
653 k++;\
654 }\
655 } while(0)
656
657 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
658 STACK_ENSURE(1);\
659 stk->type = STK_NULL_CHECK_START;\
660 stk->u.null_check.num = (cnum);\
661 stk->u.null_check.pstr = (s);\
662 STACK_INC;\
663 } while(0)
664
665 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
666 STACK_ENSURE(1);\
667 stk->type = STK_NULL_CHECK_END;\
668 stk->u.null_check.num = (cnum);\
669 STACK_INC;\
670 } while(0)
671
672 #define STACK_PUSH_CALL_FRAME(pat) do {\
673 STACK_ENSURE(1);\
674 stk->type = STK_CALL_FRAME;\
675 stk->u.call_frame.ret_addr = (pat);\
676 STACK_INC;\
677 } while(0)
678
679 #define STACK_PUSH_RETURN do {\
680 STACK_ENSURE(1);\
681 stk->type = STK_RETURN;\
682 STACK_INC;\
683 } while(0)
684
685
686 #ifdef ONIG_DEBUG
687 #define STACK_BASE_CHECK(p, at) \
688 if ((p) < stk_base) {\
689 fprintf(stderr, "at %s\n", at);\
690 goto stack_error;\
691 }
692 #else
693 #define STACK_BASE_CHECK(p, at)
694 #endif
695
696 #define STACK_POP_ONE do {\
697 stk--;\
698 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
699 } while(0)
700
701 #define STACK_POP do {\
702 switch (pop_level) {\
703 case STACK_POP_LEVEL_FREE:\
704 while (1) {\
705 stk--;\
706 STACK_BASE_CHECK(stk, "STACK_POP"); \
707 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
708 ELSE_IF_STATE_CHECK_MARK(stk);\
709 }\
710 break;\
711 case STACK_POP_LEVEL_MEM_START:\
712 while (1) {\
713 stk--;\
714 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
715 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
716 else if (stk->type == STK_MEM_START) {\
717 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
718 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
719 }\
720 ELSE_IF_STATE_CHECK_MARK(stk);\
721 }\
722 break;\
723 default:\
724 while (1) {\
725 stk--;\
726 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
727 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
728 else if (stk->type == STK_MEM_START) {\
729 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
730 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
731 }\
732 else if (stk->type == STK_REPEAT_INC) {\
733 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
734 }\
735 else if (stk->type == STK_MEM_END) {\
736 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
737 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
738 }\
739 ELSE_IF_STATE_CHECK_MARK(stk);\
740 }\
741 break;\
742 }\
743 } while(0)
744
745 #define STACK_POP_TIL_POS_NOT do {\
746 while (1) {\
747 stk--;\
748 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
749 if (stk->type == STK_POS_NOT) break;\
750 else if (stk->type == STK_MEM_START) {\
751 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
752 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
753 }\
754 else if (stk->type == STK_REPEAT_INC) {\
755 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
756 }\
757 else if (stk->type == STK_MEM_END) {\
758 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
759 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
760 }\
761 ELSE_IF_STATE_CHECK_MARK(stk);\
762 }\
763 } while(0)
764
765 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
766 while (1) {\
767 stk--;\
768 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
769 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
770 else if (stk->type == STK_MEM_START) {\
771 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
772 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
773 }\
774 else if (stk->type == STK_REPEAT_INC) {\
775 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
776 }\
777 else if (stk->type == STK_MEM_END) {\
778 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
779 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
780 }\
781 ELSE_IF_STATE_CHECK_MARK(stk);\
782 }\
783 } while(0)
784
785 #define STACK_POS_END(k) do {\
786 k = stk;\
787 while (1) {\
788 k--;\
789 STACK_BASE_CHECK(k, "STACK_POS_END"); \
790 if (IS_TO_VOID_TARGET(k)) {\
791 k->type = STK_VOID;\
792 }\
793 else if (k->type == STK_POS) {\
794 k->type = STK_VOID;\
795 break;\
796 }\
797 }\
798 } while(0)
799
800 #define STACK_STOP_BT_END do {\
801 OnigStackType *k = stk;\
802 while (1) {\
803 k--;\
804 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
805 if (IS_TO_VOID_TARGET(k)) {\
806 k->type = STK_VOID;\
807 }\
808 else if (k->type == STK_STOP_BT) {\
809 k->type = STK_VOID;\
810 break;\
811 }\
812 }\
813 } while(0)
814
815 #define STACK_NULL_CHECK(isnull,id,s) do {\
816 OnigStackType* k = stk;\
817 while (1) {\
818 k--;\
819 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
820 if (k->type == STK_NULL_CHECK_START) {\
821 if (k->u.null_check.num == (id)) {\
822 (isnull) = (k->u.null_check.pstr == (s));\
823 break;\
824 }\
825 }\
826 }\
827 } while(0)
828
829 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
830 int level = 0;\
831 OnigStackType* k = stk;\
832 while (1) {\
833 k--;\
834 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
835 if (k->type == STK_NULL_CHECK_START) {\
836 if (k->u.null_check.num == (id)) {\
837 if (level == 0) {\
838 (isnull) = (k->u.null_check.pstr == (s));\
839 break;\
840 }\
841 else level--;\
842 }\
843 }\
844 else if (k->type == STK_NULL_CHECK_END) {\
845 level++;\
846 }\
847 }\
848 } while(0)
849
850 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
851 OnigStackType* k = stk;\
852 while (1) {\
853 k--;\
854 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
855 if (k->type == STK_NULL_CHECK_START) {\
856 if (k->u.null_check.num == (id)) {\
857 if (k->u.null_check.pstr != (s)) {\
858 (isnull) = 0;\
859 break;\
860 }\
861 else {\
862 UChar* endp;\
863 (isnull) = 1;\
864 while (k < stk) {\
865 if (k->type == STK_MEM_START) {\
866 if (k->u.mem.end == INVALID_STACK_INDEX) {\
867 (isnull) = 0; break;\
868 }\
869 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
870 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
871 else\
872 endp = (UChar* )(UINTN)k->u.mem.end;\
873 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
874 (isnull) = 0; break;\
875 }\
876 else if (endp != s) {\
877 (isnull) = -1; /* empty, but position changed */ \
878 }\
879 }\
880 k++;\
881 }\
882 break;\
883 }\
884 }\
885 }\
886 }\
887 } while(0)
888
889 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
890 int level = 0;\
891 OnigStackType* k = stk;\
892 while (1) {\
893 k--;\
894 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
895 if (k->type == STK_NULL_CHECK_START) {\
896 if (k->u.null_check.num == (id)) {\
897 if (level == 0) {\
898 if (k->u.null_check.pstr != (s)) {\
899 (isnull) = 0;\
900 break;\
901 }\
902 else {\
903 UChar* endp;\
904 (isnull) = 1;\
905 while (k < stk) {\
906 if (k->type == STK_MEM_START) {\
907 if (k->u.mem.end == INVALID_STACK_INDEX) {\
908 (isnull) = 0; break;\
909 }\
910 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
911 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
912 else\
913 endp = (UChar* )(UINTN)k->u.mem.end;\
914 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
915 (isnull) = 0; break;\
916 }\
917 else if (endp != s) {\
918 (isnull) = -1; /* empty, but position changed */ \
919 }\
920 }\
921 k++;\
922 }\
923 break;\
924 }\
925 }\
926 else {\
927 level--;\
928 }\
929 }\
930 }\
931 else if (k->type == STK_NULL_CHECK_END) {\
932 if (k->u.null_check.num == (id)) level++;\
933 }\
934 }\
935 } while(0)
936
937 #define STACK_GET_REPEAT(id, k) do {\
938 int level = 0;\
939 k = stk;\
940 while (1) {\
941 k--;\
942 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
943 if (k->type == STK_REPEAT) {\
944 if (level == 0) {\
945 if (k->u.repeat.num == (id)) {\
946 break;\
947 }\
948 }\
949 }\
950 else if (k->type == STK_CALL_FRAME) level--;\
951 else if (k->type == STK_RETURN) level++;\
952 }\
953 } while(0)
954
955 #define STACK_RETURN(addr) do {\
956 int level = 0;\
957 OnigStackType* k = stk;\
958 while (1) {\
959 k--;\
960 STACK_BASE_CHECK(k, "STACK_RETURN"); \
961 if (k->type == STK_CALL_FRAME) {\
962 if (level == 0) {\
963 (addr) = k->u.call_frame.ret_addr;\
964 break;\
965 }\
966 else level--;\
967 }\
968 else if (k->type == STK_RETURN)\
969 level++;\
970 }\
971 } while(0)
972
973
974 #define STRING_CMP(s1,s2,len) do {\
975 while (len-- > 0) {\
976 if (*s1++ != *s2++) goto fail;\
977 }\
978 } while(0)
979
980 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
981 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
982 goto fail; \
983 } while(0)
984
string_cmp_ic(OnigEncoding enc,int case_fold_flag,UChar * s1,UChar ** ps2,int mblen)985 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
986 UChar* s1, UChar** ps2, int mblen)
987 {
988 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
989 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
990 UChar *p1, *p2, *end1, *s2, *end2;
991 int len1, len2;
992
993 s2 = *ps2;
994 end1 = s1 + mblen;
995 end2 = s2 + mblen;
996 while (s1 < end1) {
997 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
998 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
999 if (len1 != len2) return 0;
1000 p1 = buf1;
1001 p2 = buf2;
1002 while (len1-- > 0) {
1003 if (*p1 != *p2) return 0;
1004 p1++;
1005 p2++;
1006 }
1007 }
1008
1009 *ps2 = s2;
1010 return 1;
1011 }
1012
1013 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1014 is_fail = 0;\
1015 while (len-- > 0) {\
1016 if (*s1++ != *s2++) {\
1017 is_fail = 1; break;\
1018 }\
1019 }\
1020 } while(0)
1021
1022 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1023 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1024 is_fail = 1; \
1025 else \
1026 is_fail = 0; \
1027 } while(0)
1028
1029
1030 #define IS_EMPTY_STR (str == end)
1031 #define ON_STR_BEGIN(s) ((s) == str)
1032 #define ON_STR_END(s) ((s) == end)
1033 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1034 #define DATA_ENSURE_CHECK1 (s < right_range)
1035 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1036 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1037 #else
1038 #define DATA_ENSURE_CHECK1 (s < end)
1039 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1040 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1041 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1042
1043
1044 #ifdef USE_CAPTURE_HISTORY
1045 static int
make_capture_history_tree(OnigCaptureTreeNode * node,OnigStackType ** kp,OnigStackType * stk_top,UChar * str,regex_t * reg)1046 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1047 OnigStackType* stk_top, UChar* str, regex_t* reg)
1048 {
1049 int n, r;
1050 OnigCaptureTreeNode* child;
1051 OnigStackType* k = *kp;
1052
1053 while (k < stk_top) {
1054 if (k->type == STK_MEM_START) {
1055 n = k->u.mem.num;
1056 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1057 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1058 child = history_node_new();
1059 CHECK_NULL_RETURN_MEMERR(child);
1060 child->group = n;
1061 child->beg = (int )(k->u.mem.pstr - str);
1062 r = history_tree_add_child(node, child);
1063 if (r != 0) return r;
1064 *kp = (k + 1);
1065 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1066 if (r != 0) return r;
1067
1068 k = *kp;
1069 child->end = (int )(k->u.mem.pstr - str);
1070 }
1071 }
1072 else if (k->type == STK_MEM_END) {
1073 if (k->u.mem.num == node->group) {
1074 node->end = (int )(k->u.mem.pstr - str);
1075 *kp = k;
1076 return 0;
1077 }
1078 }
1079 k++;
1080 }
1081
1082 return 1; /* 1: root node ending. */
1083 }
1084 #endif
1085
1086 #ifdef USE_BACKREF_WITH_LEVEL
mem_is_in_memp(int mem,int num,UChar * memp)1087 static int mem_is_in_memp(int mem, int num, UChar* memp)
1088 {
1089 int i;
1090 MemNumType m;
1091
1092 for (i = 0; i < num; i++) {
1093 GET_MEMNUM_INC(m, memp);
1094 if (mem == (int )m) return 1;
1095 }
1096 return 0;
1097 }
1098
backref_match_at_nested_level(regex_t * reg,OnigStackType * top,OnigStackType * stk_base,int ignore_case,int case_fold_flag,int nest,int mem_num,UChar * memp,UChar ** s,const UChar * send)1099 static int backref_match_at_nested_level(regex_t* reg
1100 , OnigStackType* top, OnigStackType* stk_base
1101 , int ignore_case, int case_fold_flag
1102 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1103 {
1104 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1105 int level;
1106 OnigStackType* k;
1107
1108 level = 0;
1109 k = top;
1110 k--;
1111 while (k >= stk_base) {
1112 if (k->type == STK_CALL_FRAME) {
1113 level--;
1114 }
1115 else if (k->type == STK_RETURN) {
1116 level++;
1117 }
1118 else if (level == nest) {
1119 if (k->type == STK_MEM_START) {
1120 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1121 pstart = k->u.mem.pstr;
1122 if (pend != NULL_UCHARP) {
1123 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1124 p = pstart;
1125 ss = *s;
1126
1127 if (ignore_case != 0) {
1128 if (string_cmp_ic(reg->enc, case_fold_flag,
1129 pstart, &ss, (int )(pend - pstart)) == 0)
1130 return 0; /* or goto next_mem; */
1131 }
1132 else {
1133 while (p < pend) {
1134 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1135 }
1136 }
1137
1138 *s = ss;
1139 return 1;
1140 }
1141 }
1142 }
1143 else if (k->type == STK_MEM_END) {
1144 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1145 pend = k->u.mem.pstr;
1146 }
1147 }
1148 }
1149 k--;
1150 }
1151
1152 return 0;
1153 }
1154 #endif /* USE_BACKREF_WITH_LEVEL */
1155
1156
1157 #ifdef ONIG_DEBUG_STATISTICS
1158
1159 #define USE_TIMEOFDAY
1160
1161 #ifdef USE_TIMEOFDAY
1162 #ifdef HAVE_SYS_TIME_H
1163 #include <sys/time.h>
1164 #endif
1165 #ifdef HAVE_UNISTD_H
1166 #include <unistd.h>
1167 #endif
1168 static struct timeval ts, te;
1169 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1170 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1171 (((te).tv_sec - (ts).tv_sec)*1000000))
1172 #else
1173 #ifdef HAVE_SYS_TIMES_H
1174 #include <sys/times.h>
1175 #endif
1176 static struct tms ts, te;
1177 #define GETTIME(t) times(&(t))
1178 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1179 #endif
1180
1181 static int OpCounter[256];
1182 static int OpPrevCounter[256];
1183 static unsigned long OpTime[256];
1184 static int OpCurr = OP_FINISH;
1185 static int OpPrevTarget = OP_FAIL;
1186 static int MaxStackDepth = 0;
1187
1188 #define MOP_IN(opcode) do {\
1189 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1190 OpCurr = opcode;\
1191 OpCounter[opcode]++;\
1192 GETTIME(ts);\
1193 } while(0)
1194
1195 #define MOP_OUT do {\
1196 GETTIME(te);\
1197 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1198 } while(0)
1199
1200 extern void
onig_statistics_init(void)1201 onig_statistics_init(void)
1202 {
1203 int i;
1204 for (i = 0; i < 256; i++) {
1205 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1206 }
1207 MaxStackDepth = 0;
1208 }
1209
1210 extern void
onig_print_statistics(FILE * f)1211 onig_print_statistics(FILE* f)
1212 {
1213 int i;
1214 fprintf(f, " count prev time\n");
1215 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1216 fprintf(f, "%8d: %8d: %10ld: %s\n",
1217 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1218 }
1219 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1220 }
1221
1222 #define STACK_INC do {\
1223 stk++;\
1224 if (stk - stk_base > MaxStackDepth) \
1225 MaxStackDepth = stk - stk_base;\
1226 } while(0)
1227
1228 #else
1229 #define STACK_INC stk++
1230
1231 #define MOP_IN(opcode)
1232 #define MOP_OUT
1233 #endif
1234
1235
1236 /* matching region of POSIX API */
1237 typedef int regoff_t;
1238
1239 typedef struct {
1240 regoff_t rm_so;
1241 regoff_t rm_eo;
1242 } posix_regmatch_t;
1243
1244 /* match data(str - end) from position (sstart). */
1245 /* if sstart == str then set sprev to NULL. */
1246 static int
match_at(regex_t * reg,const UChar * str,const UChar * end,const UChar * right_range,const UChar * sstart,UChar * sprev,OnigMatchArg * msa)1247 match_at(regex_t* reg, const UChar* str, const UChar* end,
1248 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1249 const UChar* right_range,
1250 #endif
1251 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1252 {
1253 static UChar FinishCode[] = { OP_FINISH };
1254
1255 int i, n, num_mem, best_len, pop_level;
1256 LengthType tlen, tlen2;
1257 MemNumType mem;
1258 RelAddrType addr;
1259 OnigOptionType option = reg->options;
1260 OnigEncoding encode = reg->enc;
1261 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1262 UChar *s, *q, *sbegin;
1263 UChar *p = reg->p;
1264 char *alloca_base;
1265 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1266 OnigStackType *stkp; /* used as any purpose. */
1267 OnigStackIndex si;
1268 OnigStackIndex *repeat_stk;
1269 OnigStackIndex *mem_start_stk, *mem_end_stk;
1270 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1271 int scv;
1272 unsigned char* state_check_buff = msa->state_check_buff;
1273 int num_comb_exp_check = reg->num_comb_exp_check;
1274 #endif
1275 n = reg->num_repeat + reg->num_mem * 2;
1276
1277 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1278 pop_level = reg->stack_pop_level;
1279 num_mem = reg->num_mem;
1280 repeat_stk = (OnigStackIndex* )alloca_base;
1281
1282 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1283 mem_end_stk = mem_start_stk + num_mem;
1284 mem_start_stk--; /* for index start from 1,
1285 mem_start_stk[1]..mem_start_stk[num_mem] */
1286 mem_end_stk--; /* for index start from 1,
1287 mem_end_stk[1]..mem_end_stk[num_mem] */
1288 for (i = 1; i <= num_mem; i++) {
1289 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1290 }
1291
1292 #ifdef ONIG_DEBUG_MATCH
1293 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1294 (int )str, (int )end, (int )sstart, (int )sprev);
1295 fprintf(stderr, "size: %d, start offset: %d\n",
1296 (int )(end - str), (int )(sstart - str));
1297 #endif
1298
1299 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1300 best_len = ONIG_MISMATCH;
1301 s = (UChar* )sstart;
1302 while (1) {
1303 #ifdef ONIG_DEBUG_MATCH
1304 {
1305 UChar *q, *bp, buf[50];
1306 int len;
1307 fprintf(stderr, "%4d> \"", (int )(s - str));
1308 bp = buf;
1309 for (i = 0, q = s; i < 7 && q < end; i++) {
1310 len = enclen(encode, q);
1311 while (len-- > 0) *bp++ = *q++;
1312 }
1313 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1314 else { xmemcpy(bp, "\"", 1); bp += 1; }
1315 *bp = 0;
1316 fputs((char* )buf, stderr);
1317 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1318 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1319 fprintf(stderr, "\n");
1320 }
1321 #endif
1322
1323 sbegin = s;
1324 switch (*p++) {
1325 case OP_END: MOP_IN(OP_END);
1326 n = (int)(s - sstart);
1327 if (n > best_len) {
1328 OnigRegion* region;
1329 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1330 if (IS_FIND_LONGEST(option)) {
1331 if (n > msa->best_len) {
1332 msa->best_len = n;
1333 msa->best_s = (UChar* )sstart;
1334 }
1335 else
1336 goto end_best_len;
1337 }
1338 #endif
1339 best_len = n;
1340 region = msa->region;
1341 if (region) {
1342 #ifdef USE_POSIX_API_REGION_OPTION
1343 if (IS_POSIX_REGION(msa->options)) {
1344 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1345
1346 rmt[0].rm_so = (regoff_t)(sstart - str);
1347 rmt[0].rm_eo = (regoff_t)(s - str);
1348 for (i = 1; i <= num_mem; i++) {
1349 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1350 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1351 rmt[i].rm_so = (regoff_t)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
1352 else
1353 rmt[i].rm_so = (regoff_t)((UChar* )((void* )(UINTN)(mem_start_stk[i])) - str);
1354
1355 rmt[i].rm_eo = (regoff_t)((BIT_STATUS_AT(reg->bt_mem_end, i)
1356 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1357 : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str);
1358 }
1359 else {
1360 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1361 }
1362 }
1363 }
1364 else {
1365 #endif /* USE_POSIX_API_REGION_OPTION */
1366 region->beg[0] = (int)(sstart - str);
1367 region->end[0] = (int)(s - str);
1368 for (i = 1; i <= num_mem; i++) {
1369 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1370 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1371 region->beg[i] = (int)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
1372 else
1373 region->beg[i] = (int)((UChar* )((void* )(UINTN)mem_start_stk[i]) - str);
1374
1375 region->end[i] = (int)((BIT_STATUS_AT(reg->bt_mem_end, i)
1376 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1377 : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str);
1378 }
1379 else {
1380 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1381 }
1382 }
1383
1384 #ifdef USE_CAPTURE_HISTORY
1385 if (reg->capture_history != 0) {
1386 int r;
1387 OnigCaptureTreeNode* node;
1388
1389 if (IS_NULL(region->history_root)) {
1390 region->history_root = node = history_node_new();
1391 CHECK_NULL_RETURN_MEMERR(node);
1392 }
1393 else {
1394 node = region->history_root;
1395 history_tree_clear(node);
1396 }
1397
1398 node->group = 0;
1399 node->beg = (int)(sstart - str);
1400 node->end = (int)(s - str);
1401
1402 stkp = stk_base;
1403 r = make_capture_history_tree(region->history_root, &stkp,
1404 stk, (UChar* )str, reg);
1405 if (r < 0) {
1406 best_len = r; /* error code */
1407 goto finish;
1408 }
1409 }
1410 #endif /* USE_CAPTURE_HISTORY */
1411 #ifdef USE_POSIX_API_REGION_OPTION
1412 } /* else IS_POSIX_REGION() */
1413 #endif
1414 } /* if (region) */
1415 } /* n > best_len */
1416
1417 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1418 end_best_len:
1419 #endif
1420 MOP_OUT;
1421
1422 if (IS_FIND_CONDITION(option)) {
1423 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1424 best_len = ONIG_MISMATCH;
1425 goto fail; /* for retry */
1426 }
1427 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1428 goto fail; /* for retry */
1429 }
1430 }
1431
1432 /* default behavior: return first-matching result. */
1433 goto finish;
1434 break;
1435
1436 case OP_EXACT1: MOP_IN(OP_EXACT1);
1437 #if 0
1438 DATA_ENSURE(1);
1439 if (*p != *s) goto fail;
1440 p++; s++;
1441 #endif
1442 if (*p != *s++) goto fail;
1443 DATA_ENSURE(0);
1444 p++;
1445 MOP_OUT;
1446 break;
1447
1448 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1449 {
1450 int len;
1451 UChar *q1, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1452
1453 DATA_ENSURE(1);
1454 len = ONIGENC_MBC_CASE_FOLD(encode,
1455 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1456 case_fold_flag,
1457 &s, end, lowbuf);
1458 DATA_ENSURE(0);
1459 q1 = lowbuf;
1460 while (len-- > 0) {
1461 if (*p != *q1) {
1462 goto fail;
1463 }
1464 p++; q1++;
1465 }
1466 }
1467 MOP_OUT;
1468 break;
1469
1470 case OP_EXACT2: MOP_IN(OP_EXACT2);
1471 DATA_ENSURE(2);
1472 if (*p != *s) goto fail;
1473 p++; s++;
1474 if (*p != *s) goto fail;
1475 sprev = s;
1476 p++; s++;
1477 MOP_OUT;
1478 continue;
1479 break;
1480
1481 case OP_EXACT3: MOP_IN(OP_EXACT3);
1482 DATA_ENSURE(3);
1483 if (*p != *s) goto fail;
1484 p++; s++;
1485 if (*p != *s) goto fail;
1486 p++; s++;
1487 if (*p != *s) goto fail;
1488 sprev = s;
1489 p++; s++;
1490 MOP_OUT;
1491 continue;
1492 break;
1493
1494 case OP_EXACT4: MOP_IN(OP_EXACT4);
1495 DATA_ENSURE(4);
1496 if (*p != *s) goto fail;
1497 p++; s++;
1498 if (*p != *s) goto fail;
1499 p++; s++;
1500 if (*p != *s) goto fail;
1501 p++; s++;
1502 if (*p != *s) goto fail;
1503 sprev = s;
1504 p++; s++;
1505 MOP_OUT;
1506 continue;
1507 break;
1508
1509 case OP_EXACT5: MOP_IN(OP_EXACT5);
1510 DATA_ENSURE(5);
1511 if (*p != *s) goto fail;
1512 p++; s++;
1513 if (*p != *s) goto fail;
1514 p++; s++;
1515 if (*p != *s) goto fail;
1516 p++; s++;
1517 if (*p != *s) goto fail;
1518 p++; s++;
1519 if (*p != *s) goto fail;
1520 sprev = s;
1521 p++; s++;
1522 MOP_OUT;
1523 continue;
1524 break;
1525
1526 case OP_EXACTN: MOP_IN(OP_EXACTN);
1527 GET_LENGTH_INC(tlen, p);
1528 DATA_ENSURE(tlen);
1529 while (tlen-- > 0) {
1530 if (*p++ != *s++) goto fail;
1531 }
1532 sprev = s - 1;
1533 MOP_OUT;
1534 continue;
1535 break;
1536
1537 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1538 {
1539 int len;
1540 UChar *qn, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1541
1542 GET_LENGTH_INC(tlen, p);
1543 endp = p + tlen;
1544
1545 while (p < endp) {
1546 sprev = s;
1547 DATA_ENSURE(1);
1548 len = ONIGENC_MBC_CASE_FOLD(encode,
1549 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1550 case_fold_flag,
1551 &s, end, lowbuf);
1552 DATA_ENSURE(0);
1553 qn = lowbuf;
1554 while (len-- > 0) {
1555 if (*p != *qn) goto fail;
1556 p++; qn++;
1557 }
1558 }
1559 }
1560
1561 MOP_OUT;
1562 continue;
1563 break;
1564
1565 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1566 DATA_ENSURE(2);
1567 if (*p != *s) goto fail;
1568 p++; s++;
1569 if (*p != *s) goto fail;
1570 p++; s++;
1571 MOP_OUT;
1572 break;
1573
1574 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1575 DATA_ENSURE(4);
1576 if (*p != *s) goto fail;
1577 p++; s++;
1578 if (*p != *s) goto fail;
1579 p++; s++;
1580 sprev = s;
1581 if (*p != *s) goto fail;
1582 p++; s++;
1583 if (*p != *s) goto fail;
1584 p++; s++;
1585 MOP_OUT;
1586 continue;
1587 break;
1588
1589 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1590 DATA_ENSURE(6);
1591 if (*p != *s) goto fail;
1592 p++; s++;
1593 if (*p != *s) goto fail;
1594 p++; s++;
1595 if (*p != *s) goto fail;
1596 p++; s++;
1597 if (*p != *s) goto fail;
1598 p++; s++;
1599 sprev = s;
1600 if (*p != *s) goto fail;
1601 p++; s++;
1602 if (*p != *s) goto fail;
1603 p++; s++;
1604 MOP_OUT;
1605 continue;
1606 break;
1607
1608 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1609 GET_LENGTH_INC(tlen, p);
1610 DATA_ENSURE(tlen * 2);
1611 while (tlen-- > 0) {
1612 if (*p != *s) goto fail;
1613 p++; s++;
1614 if (*p != *s) goto fail;
1615 p++; s++;
1616 }
1617 sprev = s - 2;
1618 MOP_OUT;
1619 continue;
1620 break;
1621
1622 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1623 GET_LENGTH_INC(tlen, p);
1624 DATA_ENSURE(tlen * 3);
1625 while (tlen-- > 0) {
1626 if (*p != *s) goto fail;
1627 p++; s++;
1628 if (*p != *s) goto fail;
1629 p++; s++;
1630 if (*p != *s) goto fail;
1631 p++; s++;
1632 }
1633 sprev = s - 3;
1634 MOP_OUT;
1635 continue;
1636 break;
1637
1638 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1639 GET_LENGTH_INC(tlen, p); /* mb-len */
1640 GET_LENGTH_INC(tlen2, p); /* string len */
1641 tlen2 *= tlen;
1642 DATA_ENSURE(tlen2);
1643 while (tlen2-- > 0) {
1644 if (*p != *s) goto fail;
1645 p++; s++;
1646 }
1647 sprev = s - tlen;
1648 MOP_OUT;
1649 continue;
1650 break;
1651
1652 case OP_CCLASS: MOP_IN(OP_CCLASS);
1653 DATA_ENSURE(1);
1654 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1655 p += SIZE_BITSET;
1656 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1657 MOP_OUT;
1658 break;
1659
1660 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1661 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1662
1663 cclass_mb:
1664 GET_LENGTH_INC(tlen, p);
1665 {
1666 OnigCodePoint code;
1667 UChar *ss;
1668 int mb_len;
1669
1670 DATA_ENSURE(1);
1671 mb_len = enclen(encode, s);
1672 DATA_ENSURE(mb_len);
1673 ss = s;
1674 s += mb_len;
1675 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1676
1677 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1678 if (! onig_is_in_code_range(p, code)) goto fail;
1679 #else
1680 q = p;
1681 ALIGNMENT_RIGHT(q);
1682 if (! onig_is_in_code_range(q, code)) goto fail;
1683 #endif
1684 }
1685 p += tlen;
1686 MOP_OUT;
1687 break;
1688
1689 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1690 DATA_ENSURE(1);
1691 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1692 p += SIZE_BITSET;
1693 goto cclass_mb;
1694 }
1695 else {
1696 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1697 goto fail;
1698
1699 p += SIZE_BITSET;
1700 GET_LENGTH_INC(tlen, p);
1701 p += tlen;
1702 s++;
1703 }
1704 MOP_OUT;
1705 break;
1706
1707 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1708 DATA_ENSURE(1);
1709 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1710 p += SIZE_BITSET;
1711 s += enclen(encode, s);
1712 MOP_OUT;
1713 break;
1714
1715 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1716 DATA_ENSURE(1);
1717 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1718 s++;
1719 GET_LENGTH_INC(tlen, p);
1720 p += tlen;
1721 goto cc_mb_not_success;
1722 }
1723
1724 cclass_mb_not:
1725 GET_LENGTH_INC(tlen, p);
1726 {
1727 OnigCodePoint code;
1728 UChar *ss;
1729 int mb_len = enclen(encode, s);
1730
1731 if (! DATA_ENSURE_CHECK(mb_len)) {
1732 DATA_ENSURE(1);
1733 s = (UChar* )end;
1734 p += tlen;
1735 goto cc_mb_not_success;
1736 }
1737
1738 ss = s;
1739 s += mb_len;
1740 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1741
1742 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1743 if (onig_is_in_code_range(p, code)) goto fail;
1744 #else
1745 q = p;
1746 ALIGNMENT_RIGHT(q);
1747 if (onig_is_in_code_range(q, code)) goto fail;
1748 #endif
1749 }
1750 p += tlen;
1751
1752 cc_mb_not_success:
1753 MOP_OUT;
1754 break;
1755
1756 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1757 DATA_ENSURE(1);
1758 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1759 p += SIZE_BITSET;
1760 goto cclass_mb_not;
1761 }
1762 else {
1763 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1764 goto fail;
1765
1766 p += SIZE_BITSET;
1767 GET_LENGTH_INC(tlen, p);
1768 p += tlen;
1769 s++;
1770 }
1771 MOP_OUT;
1772 break;
1773
1774 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1775 {
1776 OnigCodePoint code;
1777 void *node;
1778 int mb_len;
1779 UChar *ss;
1780
1781 DATA_ENSURE(1);
1782 GET_POINTER_INC(node, p);
1783 mb_len = enclen(encode, s);
1784 ss = s;
1785 s += mb_len;
1786 DATA_ENSURE(0);
1787 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1788 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1789 }
1790 MOP_OUT;
1791 break;
1792
1793 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1794 DATA_ENSURE(1);
1795 n = enclen(encode, s);
1796 DATA_ENSURE(n);
1797 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1798 s += n;
1799 MOP_OUT;
1800 break;
1801
1802 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1803 DATA_ENSURE(1);
1804 n = enclen(encode, s);
1805 DATA_ENSURE(n);
1806 s += n;
1807 MOP_OUT;
1808 break;
1809
1810 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1811 while (DATA_ENSURE_CHECK1) {
1812 STACK_PUSH_ALT(p, s, sprev);
1813 n = enclen(encode, s);
1814 DATA_ENSURE(n);
1815 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1816 sprev = s;
1817 s += n;
1818 }
1819 MOP_OUT;
1820 break;
1821
1822 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1823 while (DATA_ENSURE_CHECK1) {
1824 STACK_PUSH_ALT(p, s, sprev);
1825 n = enclen(encode, s);
1826 if (n > 1) {
1827 DATA_ENSURE(n);
1828 sprev = s;
1829 s += n;
1830 }
1831 else {
1832 sprev = s;
1833 s++;
1834 }
1835 }
1836 MOP_OUT;
1837 break;
1838
1839 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1840 while (DATA_ENSURE_CHECK1) {
1841 if (*p == *s) {
1842 STACK_PUSH_ALT(p + 1, s, sprev);
1843 }
1844 n = enclen(encode, s);
1845 DATA_ENSURE(n);
1846 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1847 sprev = s;
1848 s += n;
1849 }
1850 p++;
1851 MOP_OUT;
1852 break;
1853
1854 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1855 while (DATA_ENSURE_CHECK1) {
1856 if (*p == *s) {
1857 STACK_PUSH_ALT(p + 1, s, sprev);
1858 }
1859 n = enclen(encode, s);
1860 if (n > 1) {
1861 DATA_ENSURE(n);
1862 sprev = s;
1863 s += n;
1864 }
1865 else {
1866 sprev = s;
1867 s++;
1868 }
1869 }
1870 p++;
1871 MOP_OUT;
1872 break;
1873
1874 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1875 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1876 GET_STATE_CHECK_NUM_INC(mem, p);
1877 while (DATA_ENSURE_CHECK1) {
1878 STATE_CHECK_VAL(scv, mem);
1879 if (scv) goto fail;
1880
1881 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1882 n = enclen(encode, s);
1883 DATA_ENSURE(n);
1884 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1885 sprev = s;
1886 s += n;
1887 }
1888 MOP_OUT;
1889 break;
1890
1891 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1892 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1893
1894 GET_STATE_CHECK_NUM_INC(mem, p);
1895 while (DATA_ENSURE_CHECK1) {
1896 STATE_CHECK_VAL(scv, mem);
1897 if (scv) goto fail;
1898
1899 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1900 n = enclen(encode, s);
1901 if (n > 1) {
1902 DATA_ENSURE(n);
1903 sprev = s;
1904 s += n;
1905 }
1906 else {
1907 sprev = s;
1908 s++;
1909 }
1910 }
1911 MOP_OUT;
1912 break;
1913 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1914
1915 case OP_WORD: MOP_IN(OP_WORD);
1916 DATA_ENSURE(1);
1917 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1918 goto fail;
1919
1920 s += enclen(encode, s);
1921 MOP_OUT;
1922 break;
1923
1924 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1925 DATA_ENSURE(1);
1926 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1927 goto fail;
1928
1929 s += enclen(encode, s);
1930 MOP_OUT;
1931 break;
1932
1933 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1934 if (ON_STR_BEGIN(s)) {
1935 DATA_ENSURE(1);
1936 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1937 goto fail;
1938 }
1939 else if (ON_STR_END(s)) {
1940 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1941 goto fail;
1942 }
1943 else {
1944 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1945 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1946 goto fail;
1947 }
1948 MOP_OUT;
1949 continue;
1950 break;
1951
1952 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1953 if (ON_STR_BEGIN(s)) {
1954 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1955 goto fail;
1956 }
1957 else if (ON_STR_END(s)) {
1958 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1959 goto fail;
1960 }
1961 else {
1962 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1963 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1964 goto fail;
1965 }
1966 MOP_OUT;
1967 continue;
1968 break;
1969
1970 #ifdef USE_WORD_BEGIN_END
1971 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
1972 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
1973 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1974 MOP_OUT;
1975 continue;
1976 }
1977 }
1978 goto fail;
1979 break;
1980
1981 case OP_WORD_END: MOP_IN(OP_WORD_END);
1982 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1983 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
1984 MOP_OUT;
1985 continue;
1986 }
1987 }
1988 goto fail;
1989 break;
1990 #endif
1991
1992 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
1993 if (! ON_STR_BEGIN(s)) goto fail;
1994
1995 MOP_OUT;
1996 continue;
1997 break;
1998
1999 case OP_END_BUF: MOP_IN(OP_END_BUF);
2000 if (! ON_STR_END(s)) goto fail;
2001
2002 MOP_OUT;
2003 continue;
2004 break;
2005
2006 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
2007 if (ON_STR_BEGIN(s)) {
2008 if (IS_NOTBOL(msa->options)) goto fail;
2009 MOP_OUT;
2010 continue;
2011 }
2012 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2013 MOP_OUT;
2014 continue;
2015 }
2016 goto fail;
2017 break;
2018
2019 case OP_END_LINE: MOP_IN(OP_END_LINE);
2020 if (ON_STR_END(s)) {
2021 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2022 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2023 #endif
2024 if (IS_NOTEOL(msa->options)) goto fail;
2025 MOP_OUT;
2026 continue;
2027 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2028 }
2029 #endif
2030 }
2031 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2032 MOP_OUT;
2033 continue;
2034 }
2035 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2036 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2037 MOP_OUT;
2038 continue;
2039 }
2040 #endif
2041 goto fail;
2042 break;
2043
2044 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2045 if (ON_STR_END(s)) {
2046 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2047 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2048 #endif
2049 if (IS_NOTEOL(msa->options)) goto fail;
2050 MOP_OUT;
2051 continue;
2052 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2053 }
2054 #endif
2055 }
2056 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2057 ON_STR_END(s + enclen(encode, s))) {
2058 MOP_OUT;
2059 continue;
2060 }
2061 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2062 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2063 UChar* ss = s + enclen(encode, s);
2064 ss += enclen(encode, ss);
2065 if (ON_STR_END(ss)) {
2066 MOP_OUT;
2067 continue;
2068 }
2069 }
2070 #endif
2071 goto fail;
2072 break;
2073
2074 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2075 if (s != msa->start)
2076 goto fail;
2077
2078 MOP_OUT;
2079 continue;
2080 break;
2081
2082 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2083 GET_MEMNUM_INC(mem, p);
2084 STACK_PUSH_MEM_START(mem, s);
2085 MOP_OUT;
2086 continue;
2087 break;
2088
2089 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2090 GET_MEMNUM_INC(mem, p);
2091 mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )s);
2092 MOP_OUT;
2093 continue;
2094 break;
2095
2096 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2097 GET_MEMNUM_INC(mem, p);
2098 STACK_PUSH_MEM_END(mem, s);
2099 MOP_OUT;
2100 continue;
2101 break;
2102
2103 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2104 GET_MEMNUM_INC(mem, p);
2105 mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s);
2106 MOP_OUT;
2107 continue;
2108 break;
2109
2110 #ifdef USE_SUBEXP_CALL
2111 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2112 GET_MEMNUM_INC(mem, p);
2113 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2114 STACK_PUSH_MEM_END(mem, s);
2115 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2116 MOP_OUT;
2117 continue;
2118 break;
2119
2120 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2121 GET_MEMNUM_INC(mem, p);
2122 mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s);
2123 STACK_GET_MEM_START(mem, stkp);
2124
2125 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2126 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2127 else
2128 mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )stkp->u.mem.pstr);
2129
2130 STACK_PUSH_MEM_END_MARK(mem);
2131 MOP_OUT;
2132 continue;
2133 break;
2134 #endif
2135
2136 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2137 mem = 1;
2138 goto backref;
2139 break;
2140
2141 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2142 mem = 2;
2143 goto backref;
2144 break;
2145
2146 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2147 GET_MEMNUM_INC(mem, p);
2148 backref:
2149 {
2150 int len;
2151 UChar *pstart, *pend;
2152
2153 /* if you want to remove following line,
2154 you should check in parse and compile time. */
2155 if (mem > num_mem) goto fail;
2156 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2157 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2158
2159 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2160 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2161 else
2162 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);
2163
2164 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2165 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2166 : (UChar* )((void* )(UINTN)mem_end_stk[mem]));
2167 n = (int)(pend - pstart);
2168 DATA_ENSURE(n);
2169 sprev = s;
2170 STRING_CMP(pstart, s, n);
2171 while (sprev + (len = enclen(encode, sprev)) < s)
2172 sprev += len;
2173
2174 MOP_OUT;
2175 continue;
2176 }
2177 break;
2178
2179 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2180 GET_MEMNUM_INC(mem, p);
2181 {
2182 int len;
2183 UChar *pstart, *pend;
2184
2185 /* if you want to remove following line,
2186 you should check in parse and compile time. */
2187 if (mem > num_mem) goto fail;
2188 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2189 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2190
2191 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2192 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2193 else
2194 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);
2195
2196 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2197 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2198 : (UChar* )((void* )(UINTN)mem_end_stk[mem]));
2199 n = (int)(pend - pstart);
2200 DATA_ENSURE(n);
2201 sprev = s;
2202 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2203 while (sprev + (len = enclen(encode, sprev)) < s)
2204 sprev += len;
2205
2206 MOP_OUT;
2207 continue;
2208 }
2209 break;
2210
2211 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2212 {
2213 int len, is_fail;
2214 UChar *pstart, *pend, *swork;
2215
2216 GET_LENGTH_INC(tlen, p);
2217 for (i = 0; i < tlen; i++) {
2218 GET_MEMNUM_INC(mem, p);
2219
2220 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2221 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2222
2223 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2224 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2225 else
2226 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);
2227
2228 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2229 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2230 : (UChar* )((void* )(UINTN)mem_end_stk[mem]));
2231 n = (int)(pend - pstart);
2232 DATA_ENSURE(n);
2233 sprev = s;
2234 swork = s;
2235 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2236 if (is_fail) continue;
2237 s = swork;
2238 while (sprev + (len = enclen(encode, sprev)) < s)
2239 sprev += len;
2240
2241 p += (SIZE_MEMNUM * (tlen - i - 1));
2242 break; /* success */
2243 }
2244 if (i == tlen) goto fail;
2245 MOP_OUT;
2246 continue;
2247 }
2248 break;
2249
2250 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2251 {
2252 int len, is_fail;
2253 UChar *pstart, *pend, *swork;
2254
2255 GET_LENGTH_INC(tlen, p);
2256 for (i = 0; i < tlen; i++) {
2257 GET_MEMNUM_INC(mem, p);
2258
2259 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2260 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2261
2262 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2263 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2264 else
2265 pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);
2266
2267 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2268 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2269 : (UChar* )((void* )(UINTN)mem_end_stk[mem]));
2270 n = (int)(pend - pstart);
2271 DATA_ENSURE(n);
2272 sprev = s;
2273 swork = s;
2274 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2275 if (is_fail) continue;
2276 s = swork;
2277 while (sprev + (len = enclen(encode, sprev)) < s)
2278 sprev += len;
2279
2280 p += (SIZE_MEMNUM * (tlen - i - 1));
2281 break; /* success */
2282 }
2283 if (i == tlen) goto fail;
2284 MOP_OUT;
2285 continue;
2286 }
2287 break;
2288
2289 #ifdef USE_BACKREF_WITH_LEVEL
2290 case OP_BACKREF_WITH_LEVEL:
2291 {
2292 int len;
2293 OnigOptionType ic;
2294 LengthType level;
2295
2296 GET_OPTION_INC(ic, p);
2297 GET_LENGTH_INC(level, p);
2298 GET_LENGTH_INC(tlen, p);
2299
2300 sprev = s;
2301 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2302 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2303 while (sprev + (len = enclen(encode, sprev)) < s)
2304 sprev += len;
2305
2306 p += (SIZE_MEMNUM * tlen);
2307 }
2308 else
2309 goto fail;
2310
2311 MOP_OUT;
2312 continue;
2313 }
2314
2315 break;
2316 #endif
2317
2318 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2319 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2320 GET_OPTION_INC(option, p);
2321 STACK_PUSH_ALT(p, s, sprev);
2322 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2323 MOP_OUT;
2324 continue;
2325 break;
2326
2327 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2328 GET_OPTION_INC(option, p);
2329 MOP_OUT;
2330 continue;
2331 break;
2332 #endif
2333
2334 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2335 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2336 STACK_PUSH_NULL_CHECK_START(mem, s);
2337 MOP_OUT;
2338 continue;
2339 break;
2340
2341 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2342 {
2343 int isnull;
2344
2345 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2346 STACK_NULL_CHECK(isnull, mem, s);
2347 if (isnull) {
2348 #ifdef ONIG_DEBUG_MATCH
2349 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2350 (int )mem, (int )s);
2351 #endif
2352 null_check_found:
2353 /* empty loop founded, skip next instruction */
2354 switch (*p++) {
2355 case OP_JUMP:
2356 case OP_PUSH:
2357 p += SIZE_RELADDR;
2358 break;
2359 case OP_REPEAT_INC:
2360 case OP_REPEAT_INC_NG:
2361 case OP_REPEAT_INC_SG:
2362 case OP_REPEAT_INC_NG_SG:
2363 p += SIZE_MEMNUM;
2364 break;
2365 default:
2366 goto unexpected_bytecode_error;
2367 break;
2368 }
2369 }
2370 }
2371 MOP_OUT;
2372 continue;
2373 break;
2374
2375 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2376 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2377 {
2378 int isnull;
2379
2380 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2381 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2382 if (isnull) {
2383 #ifdef ONIG_DEBUG_MATCH
2384 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2385 (int )mem, (int )s);
2386 #endif
2387 if (isnull == -1) goto fail;
2388 goto null_check_found;
2389 }
2390 }
2391 MOP_OUT;
2392 continue;
2393 break;
2394 #endif
2395
2396 #ifdef USE_SUBEXP_CALL
2397 case OP_NULL_CHECK_END_MEMST_PUSH:
2398 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2399 {
2400 int isnull;
2401
2402 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2403 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2404 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2405 #else
2406 STACK_NULL_CHECK_REC(isnull, mem, s);
2407 #endif
2408 if (isnull) {
2409 #ifdef ONIG_DEBUG_MATCH
2410 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2411 (int )mem, (int )s);
2412 #endif
2413 if (isnull == -1) goto fail;
2414 goto null_check_found;
2415 }
2416 else {
2417 STACK_PUSH_NULL_CHECK_END(mem);
2418 }
2419 }
2420 MOP_OUT;
2421 continue;
2422 break;
2423 #endif
2424
2425 case OP_JUMP: MOP_IN(OP_JUMP);
2426 GET_RELADDR_INC(addr, p);
2427 p += addr;
2428 MOP_OUT;
2429 CHECK_INTERRUPT_IN_MATCH_AT;
2430 continue;
2431 break;
2432
2433 case OP_PUSH: MOP_IN(OP_PUSH);
2434 GET_RELADDR_INC(addr, p);
2435 STACK_PUSH_ALT(p + addr, s, sprev);
2436 MOP_OUT;
2437 continue;
2438 break;
2439
2440 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2441 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2442 GET_STATE_CHECK_NUM_INC(mem, p);
2443 STATE_CHECK_VAL(scv, mem);
2444 if (scv) goto fail;
2445
2446 GET_RELADDR_INC(addr, p);
2447 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2448 MOP_OUT;
2449 continue;
2450 break;
2451
2452 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2453 GET_STATE_CHECK_NUM_INC(mem, p);
2454 GET_RELADDR_INC(addr, p);
2455 STATE_CHECK_VAL(scv, mem);
2456 if (scv) {
2457 p += addr;
2458 }
2459 else {
2460 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2461 }
2462 MOP_OUT;
2463 continue;
2464 break;
2465
2466 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2467 GET_STATE_CHECK_NUM_INC(mem, p);
2468 STATE_CHECK_VAL(scv, mem);
2469 if (scv) goto fail;
2470
2471 STACK_PUSH_STATE_CHECK(s, mem);
2472 MOP_OUT;
2473 continue;
2474 break;
2475 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2476
2477 case OP_POP: MOP_IN(OP_POP);
2478 STACK_POP_ONE;
2479 MOP_OUT;
2480 continue;
2481 break;
2482
2483 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2484 GET_RELADDR_INC(addr, p);
2485 if (*p == *s && DATA_ENSURE_CHECK1) {
2486 p++;
2487 STACK_PUSH_ALT(p + addr, s, sprev);
2488 MOP_OUT;
2489 continue;
2490 }
2491 p += (addr + 1);
2492 MOP_OUT;
2493 continue;
2494 break;
2495
2496 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2497 GET_RELADDR_INC(addr, p);
2498 if (*p == *s) {
2499 p++;
2500 STACK_PUSH_ALT(p + addr, s, sprev);
2501 MOP_OUT;
2502 continue;
2503 }
2504 p++;
2505 MOP_OUT;
2506 continue;
2507 break;
2508
2509 case OP_REPEAT: MOP_IN(OP_REPEAT);
2510 {
2511 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2512 GET_RELADDR_INC(addr, p);
2513
2514 STACK_ENSURE(1);
2515 repeat_stk[mem] = GET_STACK_INDEX(stk);
2516 STACK_PUSH_REPEAT(mem, p);
2517
2518 if (reg->repeat_range[mem].lower == 0) {
2519 STACK_PUSH_ALT(p + addr, s, sprev);
2520 }
2521 }
2522 MOP_OUT;
2523 continue;
2524 break;
2525
2526 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2527 {
2528 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2529 GET_RELADDR_INC(addr, p);
2530
2531 STACK_ENSURE(1);
2532 repeat_stk[mem] = GET_STACK_INDEX(stk);
2533 STACK_PUSH_REPEAT(mem, p);
2534
2535 if (reg->repeat_range[mem].lower == 0) {
2536 STACK_PUSH_ALT(p, s, sprev);
2537 p += addr;
2538 }
2539 }
2540 MOP_OUT;
2541 continue;
2542 break;
2543
2544 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2545 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2546 si = repeat_stk[mem];
2547 stkp = STACK_AT(si);
2548
2549 repeat_inc:
2550 stkp->u.repeat.count++;
2551 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2552 /* end of repeat. Nothing to do. */
2553 }
2554 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2555 STACK_PUSH_ALT(p, s, sprev);
2556 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2557 }
2558 else {
2559 p = stkp->u.repeat.pcode;
2560 }
2561 STACK_PUSH_REPEAT_INC(si);
2562 MOP_OUT;
2563 CHECK_INTERRUPT_IN_MATCH_AT;
2564 continue;
2565 break;
2566
2567 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2568 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2569 STACK_GET_REPEAT(mem, stkp);
2570 si = GET_STACK_INDEX(stkp);
2571 goto repeat_inc;
2572 break;
2573
2574 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2575 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2576 si = repeat_stk[mem];
2577 stkp = STACK_AT(si);
2578
2579 repeat_inc_ng:
2580 stkp->u.repeat.count++;
2581 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2582 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2583 UChar* pcode = stkp->u.repeat.pcode;
2584
2585 STACK_PUSH_REPEAT_INC(si);
2586 STACK_PUSH_ALT(pcode, s, sprev);
2587 }
2588 else {
2589 p = stkp->u.repeat.pcode;
2590 STACK_PUSH_REPEAT_INC(si);
2591 }
2592 }
2593 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2594 STACK_PUSH_REPEAT_INC(si);
2595 }
2596 MOP_OUT;
2597 CHECK_INTERRUPT_IN_MATCH_AT;
2598 continue;
2599 break;
2600
2601 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2602 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2603 STACK_GET_REPEAT(mem, stkp);
2604 si = GET_STACK_INDEX(stkp);
2605 goto repeat_inc_ng;
2606 break;
2607
2608 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2609 STACK_PUSH_POS(s, sprev);
2610 MOP_OUT;
2611 continue;
2612 break;
2613
2614 case OP_POP_POS: MOP_IN(OP_POP_POS);
2615 {
2616 STACK_POS_END(stkp);
2617 s = stkp->u.state.pstr;
2618 sprev = stkp->u.state.pstr_prev;
2619 }
2620 MOP_OUT;
2621 continue;
2622 break;
2623
2624 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2625 GET_RELADDR_INC(addr, p);
2626 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2627 MOP_OUT;
2628 continue;
2629 break;
2630
2631 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2632 STACK_POP_TIL_POS_NOT;
2633 goto fail;
2634 break;
2635
2636 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2637 STACK_PUSH_STOP_BT;
2638 MOP_OUT;
2639 continue;
2640 break;
2641
2642 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2643 STACK_STOP_BT_END;
2644 MOP_OUT;
2645 continue;
2646 break;
2647
2648 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2649 GET_LENGTH_INC(tlen, p);
2650 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2651 if (IS_NULL(s)) goto fail;
2652 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2653 MOP_OUT;
2654 continue;
2655 break;
2656
2657 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2658 GET_RELADDR_INC(addr, p);
2659 GET_LENGTH_INC(tlen, p);
2660 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2661 if (IS_NULL(q)) {
2662 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2663 If you want to change to fail, replace following line. */
2664 p += addr;
2665 /* goto fail; */
2666 }
2667 else {
2668 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2669 s = q;
2670 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2671 }
2672 MOP_OUT;
2673 continue;
2674 break;
2675
2676 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2677 STACK_POP_TIL_LOOK_BEHIND_NOT;
2678 goto fail;
2679 break;
2680
2681 #ifdef USE_SUBEXP_CALL
2682 case OP_CALL: MOP_IN(OP_CALL);
2683 GET_ABSADDR_INC(addr, p);
2684 STACK_PUSH_CALL_FRAME(p);
2685 p = reg->p + addr;
2686 MOP_OUT;
2687 continue;
2688 break;
2689
2690 case OP_RETURN: MOP_IN(OP_RETURN);
2691 STACK_RETURN(p);
2692 STACK_PUSH_RETURN;
2693 MOP_OUT;
2694 continue;
2695 break;
2696 #endif
2697
2698 case OP_FINISH:
2699 goto finish;
2700 break;
2701
2702 fail:
2703 MOP_OUT;
2704 /* fall */
2705 case OP_FAIL: MOP_IN(OP_FAIL);
2706 STACK_POP;
2707 p = stk->u.state.pcode;
2708 s = stk->u.state.pstr;
2709 sprev = stk->u.state.pstr_prev;
2710
2711 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2712 if (stk->u.state.state_check != 0) {
2713 stk->type = STK_STATE_CHECK_MARK;
2714 stk++;
2715 }
2716 #endif
2717
2718 MOP_OUT;
2719 continue;
2720 break;
2721
2722 default:
2723 goto bytecode_error;
2724
2725 } /* end of switch */
2726 sprev = sbegin;
2727 } /* end of while(1) */
2728
2729 finish:
2730 STACK_SAVE;
2731 xfree(alloca_base);
2732 return best_len;
2733
2734 #ifdef ONIG_DEBUG
2735 stack_error:
2736 STACK_SAVE;
2737 xfree(alloca_base);
2738 return ONIGERR_STACK_BUG;
2739 #endif
2740
2741 bytecode_error:
2742 STACK_SAVE;
2743 xfree(alloca_base);
2744 return ONIGERR_UNDEFINED_BYTECODE;
2745
2746 unexpected_bytecode_error:
2747 STACK_SAVE;
2748 xfree(alloca_base);
2749 return ONIGERR_UNEXPECTED_BYTECODE;
2750 }
2751
2752
2753 static UChar*
slow_search(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2754 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2755 const UChar* text, const UChar* text_end, UChar* text_range)
2756 {
2757 UChar *t, *p, *s, *end;
2758
2759 end = (UChar* )text_end;
2760 end -= target_end - target - 1;
2761 if (end > text_range)
2762 end = text_range;
2763
2764 s = (UChar* )text;
2765
2766 while (s < end) {
2767 if (*s == *target) {
2768 p = s + 1;
2769 t = target + 1;
2770 while (t < target_end) {
2771 if (*t != *p++)
2772 break;
2773 t++;
2774 }
2775 if (t == target_end)
2776 return s;
2777 }
2778 s += enclen(enc, s);
2779 }
2780
2781 return (UChar* )NULL;
2782 }
2783
2784 static int
str_lower_case_match(OnigEncoding enc,int case_fold_flag,const UChar * t,const UChar * tend,const UChar * p,const UChar * end)2785 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2786 const UChar* t, const UChar* tend,
2787 const UChar* p, const UChar* end)
2788 {
2789 int lowlen;
2790 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2791
2792 while (t < tend) {
2793 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2794 q = lowbuf;
2795 while (lowlen > 0) {
2796 if (*t++ != *q++) return 0;
2797 lowlen--;
2798 }
2799 }
2800
2801 return 1;
2802 }
2803
2804 static UChar*
slow_search_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * text_end,UChar * text_range)2805 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2806 UChar* target, UChar* target_end,
2807 const UChar* text, const UChar* text_end, UChar* text_range)
2808 {
2809 UChar *s, *end;
2810
2811 end = (UChar* )text_end;
2812 end -= target_end - target - 1;
2813 if (end > text_range)
2814 end = text_range;
2815
2816 s = (UChar* )text;
2817
2818 while (s < end) {
2819 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2820 s, text_end))
2821 return s;
2822
2823 s += enclen(enc, s);
2824 }
2825
2826 return (UChar* )NULL;
2827 }
2828
2829 static UChar*
slow_search_backward(OnigEncoding enc,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2830 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2831 const UChar* text, const UChar* adjust_text,
2832 const UChar* text_end, const UChar* text_start)
2833 {
2834 UChar *t, *p, *s;
2835
2836 s = (UChar* )text_end;
2837 s -= (target_end - target);
2838 if (s > text_start)
2839 s = (UChar* )text_start;
2840 else
2841 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2842
2843 while (s >= text) {
2844 if (*s == *target) {
2845 p = s + 1;
2846 t = target + 1;
2847 while (t < target_end) {
2848 if (*t != *p++)
2849 break;
2850 t++;
2851 }
2852 if (t == target_end)
2853 return s;
2854 }
2855 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2856 }
2857
2858 return (UChar* )NULL;
2859 }
2860
2861 static UChar*
slow_search_backward_ic(OnigEncoding enc,int case_fold_flag,UChar * target,UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)2862 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2863 UChar* target, UChar* target_end,
2864 const UChar* text, const UChar* adjust_text,
2865 const UChar* text_end, const UChar* text_start)
2866 {
2867 UChar *s;
2868
2869 s = (UChar* )text_end;
2870 s -= (target_end - target);
2871 if (s > text_start)
2872 s = (UChar* )text_start;
2873 else
2874 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2875
2876 while (s >= text) {
2877 if (str_lower_case_match(enc, case_fold_flag,
2878 target, target_end, s, text_end))
2879 return s;
2880
2881 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2882 }
2883
2884 return (UChar* )NULL;
2885 }
2886
2887 static UChar*
bm_search_notrev(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2888 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2889 const UChar* text, const UChar* text_end,
2890 const UChar* text_range)
2891 {
2892 const UChar *s, *se, *t, *p, *end;
2893 const UChar *tail;
2894 int skip, tlen1;
2895
2896 #ifdef ONIG_DEBUG_SEARCH
2897 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2898 (int )text, (int )text_end, (int )text_range);
2899 #endif
2900
2901 tail = target_end - 1;
2902 tlen1 = (int)(tail - target);
2903 end = text_range;
2904 if (end + tlen1 > text_end)
2905 end = text_end - tlen1;
2906
2907 s = text;
2908
2909 if (IS_NULL(reg->int_map)) {
2910 while (s < end) {
2911 p = se = s + tlen1;
2912 t = tail;
2913 while (*p == *t) {
2914 if (t == target) return (UChar* )s;
2915 p--; t--;
2916 }
2917 skip = reg->map[*se];
2918 t = s;
2919 do {
2920 s += enclen(reg->enc, s);
2921 } while ((s - t) < skip && s < end);
2922 }
2923 }
2924 else {
2925 while (s < end) {
2926 p = se = s + tlen1;
2927 t = tail;
2928 while (*p == *t) {
2929 if (t == target) return (UChar* )s;
2930 p--; t--;
2931 }
2932 skip = reg->int_map[*se];
2933 t = s;
2934 do {
2935 s += enclen(reg->enc, s);
2936 } while ((s - t) < skip && s < end);
2937 }
2938 }
2939
2940 return (UChar* )NULL;
2941 }
2942
2943 static UChar*
bm_search(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * text_end,const UChar * text_range)2944 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2945 const UChar* text, const UChar* text_end, const UChar* text_range)
2946 {
2947 const UChar *s, *t, *p, *end;
2948 const UChar *tail;
2949
2950 end = text_range + (target_end - target) - 1;
2951 if (end > text_end)
2952 end = text_end;
2953
2954 tail = target_end - 1;
2955 s = text + (target_end - target) - 1;
2956 if (IS_NULL(reg->int_map)) {
2957 while (s < end) {
2958 p = s;
2959 t = tail;
2960 while (*p == *t) {
2961 if (t == target) return (UChar* )p;
2962 p--; t--;
2963 }
2964 s += reg->map[*s];
2965 }
2966 }
2967 else { /* see int_map[] */
2968 while (s < end) {
2969 p = s;
2970 t = tail;
2971 while (*p == *t) {
2972 if (t == target) return (UChar* )p;
2973 p--; t--;
2974 }
2975 s += reg->int_map[*s];
2976 }
2977 }
2978 return (UChar* )NULL;
2979 }
2980
2981 static int
set_bm_backward_skip(UChar * s,UChar * end,OnigEncoding enc ARG_UNUSED,int ** skip)2982 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
2983 int** skip)
2984
2985 {
2986 int i, len;
2987
2988 if (IS_NULL(*skip)) {
2989 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
2990 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
2991 }
2992
2993 len = (int)(end - s);
2994 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
2995 (*skip)[i] = len;
2996
2997 for (i = len - 1; i > 0; i--)
2998 (*skip)[s[i]] = i;
2999
3000 return 0;
3001 }
3002
3003 static UChar*
bm_search_backward(regex_t * reg,const UChar * target,const UChar * target_end,const UChar * text,const UChar * adjust_text,const UChar * text_end,const UChar * text_start)3004 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3005 const UChar* text, const UChar* adjust_text,
3006 const UChar* text_end, const UChar* text_start)
3007 {
3008 const UChar *s, *t, *p;
3009
3010 s = text_end - (target_end - target);
3011 if (text_start < s)
3012 s = text_start;
3013 else
3014 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3015
3016 while (s >= text) {
3017 p = s;
3018 t = target;
3019 while (t < target_end && *p == *t) {
3020 p++; t++;
3021 }
3022 if (t == target_end)
3023 return (UChar* )s;
3024
3025 s -= reg->int_map_backward[*s];
3026 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3027 }
3028
3029 return (UChar* )NULL;
3030 }
3031
3032 static UChar*
map_search(OnigEncoding enc,UChar map[],const UChar * text,const UChar * text_range)3033 map_search(OnigEncoding enc, UChar map[],
3034 const UChar* text, const UChar* text_range)
3035 {
3036 const UChar *s = text;
3037
3038 while (s < text_range) {
3039 if (map[*s]) return (UChar* )s;
3040
3041 s += enclen(enc, s);
3042 }
3043 return (UChar* )NULL;
3044 }
3045
3046 static UChar*
map_search_backward(OnigEncoding enc,UChar map[],const UChar * text,const UChar * adjust_text,const UChar * text_start)3047 map_search_backward(OnigEncoding enc, UChar map[],
3048 const UChar* text, const UChar* adjust_text,
3049 const UChar* text_start)
3050 {
3051 const UChar *s = text_start;
3052
3053 while (s >= text) {
3054 if (map[*s]) return (UChar* )s;
3055
3056 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3057 }
3058 return (UChar* )NULL;
3059 }
3060
3061 extern int
onig_match(regex_t * reg,const UChar * str,const UChar * end,const UChar * at,OnigRegion * region,OnigOptionType option)3062 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3063 OnigOptionType option)
3064 {
3065 int r;
3066 UChar *prev;
3067 OnigMatchArg msa;
3068
3069 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3070 start:
3071 THREAD_ATOMIC_START;
3072 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3073 ONIG_STATE_INC(reg);
3074 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3075 onig_chain_reduce(reg);
3076 ONIG_STATE_INC(reg);
3077 }
3078 }
3079 else {
3080 int n;
3081
3082 THREAD_ATOMIC_END;
3083 n = 0;
3084 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3085 if (++n > THREAD_PASS_LIMIT_COUNT)
3086 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3087 THREAD_PASS;
3088 }
3089 goto start;
3090 }
3091 THREAD_ATOMIC_END;
3092 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3093
3094 MATCH_ARG_INIT(msa, option, region, at);
3095 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3096 {
3097 int offset = at - str;
3098 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3099 }
3100 #endif
3101
3102 if (region
3103 #ifdef USE_POSIX_API_REGION_OPTION
3104 && !IS_POSIX_REGION(option)
3105 #endif
3106 ) {
3107 r = onig_region_resize_clear(region, reg->num_mem + 1);
3108 }
3109 else
3110 r = 0;
3111
3112 if (r == 0) {
3113 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3114 r = match_at(reg, str, end,
3115 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3116 end,
3117 #endif
3118 at, prev, &msa);
3119 }
3120
3121 MATCH_ARG_FREE(msa);
3122 ONIG_STATE_DEC_THREAD(reg);
3123 return r;
3124 }
3125
3126 static int
forward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,UChar * range,UChar ** low,UChar ** high,UChar ** low_prev)3127 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3128 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3129 {
3130 UChar *p, *pprev = (UChar* )NULL;
3131
3132 #ifdef ONIG_DEBUG_SEARCH
3133 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3134 (int )str, (int )end, (int )s, (int )range);
3135 #endif
3136
3137 p = s;
3138 if (reg->dmin > 0) {
3139 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3140 p += reg->dmin;
3141 }
3142 else {
3143 UChar *q = p + reg->dmin;
3144 while (p < q) p += enclen(reg->enc, p);
3145 }
3146 }
3147
3148 retry:
3149 switch (reg->optimize) {
3150 case ONIG_OPTIMIZE_EXACT:
3151 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3152 break;
3153 case ONIG_OPTIMIZE_EXACT_IC:
3154 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3155 reg->exact, reg->exact_end, p, end, range);
3156 break;
3157
3158 case ONIG_OPTIMIZE_EXACT_BM:
3159 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3160 break;
3161
3162 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3163 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3164 break;
3165
3166 case ONIG_OPTIMIZE_MAP:
3167 p = map_search(reg->enc, reg->map, p, range);
3168 break;
3169 }
3170
3171 if (p && p < range) {
3172 if (p - reg->dmin < s) {
3173 retry_gate:
3174 pprev = p;
3175 p += enclen(reg->enc, p);
3176 goto retry;
3177 }
3178
3179 if (reg->sub_anchor) {
3180 UChar* prev;
3181
3182 switch (reg->sub_anchor) {
3183 case ANCHOR_BEGIN_LINE:
3184 if (!ON_STR_BEGIN(p)) {
3185 prev = onigenc_get_prev_char_head(reg->enc,
3186 (pprev ? pprev : str), p);
3187 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3188 goto retry_gate;
3189 }
3190 break;
3191
3192 case ANCHOR_END_LINE:
3193 if (ON_STR_END(p)) {
3194 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3195 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3196 (pprev ? pprev : str), p);
3197 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3198 goto retry_gate;
3199 #endif
3200 }
3201 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3202 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3203 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3204 #endif
3205 )
3206 goto retry_gate;
3207 break;
3208 }
3209 }
3210
3211 if (reg->dmax == 0) {
3212 *low = p;
3213 if (low_prev) {
3214 if (*low > s)
3215 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3216 else
3217 *low_prev = onigenc_get_prev_char_head(reg->enc,
3218 (pprev ? pprev : str), p);
3219 }
3220 }
3221 else {
3222 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3223 *low = p - reg->dmax;
3224 if (*low > s) {
3225 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3226 *low, (const UChar** )low_prev);
3227 if (low_prev && IS_NULL(*low_prev))
3228 *low_prev = onigenc_get_prev_char_head(reg->enc,
3229 (pprev ? pprev : s), *low);
3230 }
3231 else {
3232 if (low_prev)
3233 *low_prev = onigenc_get_prev_char_head(reg->enc,
3234 (pprev ? pprev : str), *low);
3235 }
3236 }
3237 }
3238 /* no needs to adjust *high, *high is used as range check only */
3239 *high = p - reg->dmin;
3240
3241 #ifdef ONIG_DEBUG_SEARCH
3242 fprintf(stderr,
3243 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3244 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3245 #endif
3246 return 1; /* success */
3247 }
3248
3249 return 0; /* fail */
3250 }
3251
3252 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3253 int** skip));
3254
3255 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3256
3257 static int
backward_search_range(regex_t * reg,const UChar * str,const UChar * end,UChar * s,const UChar * range,UChar * adjrange,UChar ** low,UChar ** high)3258 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3259 UChar* s, const UChar* range, UChar* adjrange,
3260 UChar** low, UChar** high)
3261 {
3262 int r;
3263 UChar *p;
3264
3265 range += reg->dmin;
3266 p = s;
3267
3268 retry:
3269 switch (reg->optimize) {
3270 case ONIG_OPTIMIZE_EXACT:
3271 exact_method:
3272 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3273 range, adjrange, end, p);
3274 break;
3275
3276 case ONIG_OPTIMIZE_EXACT_IC:
3277 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3278 reg->exact, reg->exact_end,
3279 range, adjrange, end, p);
3280 break;
3281
3282 case ONIG_OPTIMIZE_EXACT_BM:
3283 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3284 if (IS_NULL(reg->int_map_backward)) {
3285 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3286 goto exact_method;
3287
3288 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3289 &(reg->int_map_backward));
3290 if (r) return r;
3291 }
3292 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3293 end, p);
3294 break;
3295
3296 case ONIG_OPTIMIZE_MAP:
3297 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3298 break;
3299 }
3300
3301 if (p) {
3302 if (reg->sub_anchor) {
3303 UChar* prev;
3304
3305 switch (reg->sub_anchor) {
3306 case ANCHOR_BEGIN_LINE:
3307 if (!ON_STR_BEGIN(p)) {
3308 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3309 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3310 p = prev;
3311 goto retry;
3312 }
3313 }
3314 break;
3315
3316 case ANCHOR_END_LINE:
3317 if (ON_STR_END(p)) {
3318 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3319 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3320 if (IS_NULL(prev)) goto fail;
3321 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3322 p = prev;
3323 goto retry;
3324 }
3325 #endif
3326 }
3327 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3328 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3329 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3330 #endif
3331 ) {
3332 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3333 if (IS_NULL(p)) goto fail;
3334 goto retry;
3335 }
3336 break;
3337 }
3338 }
3339
3340 /* no needs to adjust *high, *high is used as range check only */
3341 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3342 *low = p - reg->dmax;
3343 *high = p - reg->dmin;
3344 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3345 }
3346
3347 #ifdef ONIG_DEBUG_SEARCH
3348 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3349 (int )(*low - str), (int )(*high - str));
3350 #endif
3351 return 1; /* success */
3352 }
3353
3354 fail:
3355 #ifdef ONIG_DEBUG_SEARCH
3356 fprintf(stderr, "backward_search_range: fail.\n");
3357 #endif
3358 return 0; /* fail */
3359 }
3360
3361
3362 extern int
onig_search(regex_t * reg,const UChar * str,const UChar * end,const UChar * start,const UChar * range,OnigRegion * region,OnigOptionType option)3363 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3364 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3365 {
3366 int r;
3367 UChar *s, *prev;
3368 OnigMatchArg msa;
3369 const UChar *orig_start = start;
3370 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3371 const UChar *orig_range = range;
3372 #endif
3373
3374 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3375 start:
3376 THREAD_ATOMIC_START;
3377 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3378 ONIG_STATE_INC(reg);
3379 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3380 onig_chain_reduce(reg);
3381 ONIG_STATE_INC(reg);
3382 }
3383 }
3384 else {
3385 int n;
3386
3387 THREAD_ATOMIC_END;
3388 n = 0;
3389 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3390 if (++n > THREAD_PASS_LIMIT_COUNT)
3391 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3392 THREAD_PASS;
3393 }
3394 goto start;
3395 }
3396 THREAD_ATOMIC_END;
3397 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3398
3399 #ifdef ONIG_DEBUG_SEARCH
3400 fprintf(stderr,
3401 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3402 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3403 #endif
3404
3405 if (region
3406 #ifdef USE_POSIX_API_REGION_OPTION
3407 && !IS_POSIX_REGION(option)
3408 #endif
3409 ) {
3410 r = onig_region_resize_clear(region, reg->num_mem + 1);
3411 if (r) goto finish_no_msa;
3412 }
3413
3414 if (start > end || start < str) goto mismatch_no_msa;
3415
3416
3417 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3418 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3419 #define MATCH_AND_RETURN_CHECK(upper_range) \
3420 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3421 if (r != ONIG_MISMATCH) {\
3422 if (r >= 0) {\
3423 if (! IS_FIND_LONGEST(reg->options)) {\
3424 goto match;\
3425 }\
3426 }\
3427 else goto finish; /* error */ \
3428 }
3429 #else
3430 #define MATCH_AND_RETURN_CHECK(upper_range) \
3431 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3432 if (r != ONIG_MISMATCH) {\
3433 if (r >= 0) {\
3434 goto match;\
3435 }\
3436 else goto finish; /* error */ \
3437 }
3438 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3439 #else
3440 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3441 #define MATCH_AND_RETURN_CHECK(none) \
3442 r = match_at(reg, str, end, s, prev, &msa);\
3443 if (r != ONIG_MISMATCH) {\
3444 if (r >= 0) {\
3445 if (! IS_FIND_LONGEST(reg->options)) {\
3446 goto match;\
3447 }\
3448 }\
3449 else goto finish; /* error */ \
3450 }
3451 #else
3452 #define MATCH_AND_RETURN_CHECK(none) \
3453 r = match_at(reg, str, end, s, prev, &msa);\
3454 if (r != ONIG_MISMATCH) {\
3455 if (r >= 0) {\
3456 goto match;\
3457 }\
3458 else goto finish; /* error */ \
3459 }
3460 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3461 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3462
3463
3464 /* anchor optimize: resume search range */
3465 if (reg->anchor != 0 && str < end) {
3466 UChar *min_semi_end, *max_semi_end;
3467
3468 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3469 /* search start-position only */
3470 begin_position:
3471 if (range > start)
3472 range = start + 1;
3473 else
3474 range = start;
3475 }
3476 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3477 /* search str-position only */
3478 if (range > start) {
3479 if (start != str) goto mismatch_no_msa;
3480 range = str + 1;
3481 }
3482 else {
3483 if (range <= str) {
3484 start = str;
3485 range = str;
3486 }
3487 else
3488 goto mismatch_no_msa;
3489 }
3490 }
3491 else if (reg->anchor & ANCHOR_END_BUF) {
3492 min_semi_end = max_semi_end = (UChar* )end;
3493
3494 end_buf:
3495 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3496 goto mismatch_no_msa;
3497
3498 if (range > start) {
3499 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3500 start = min_semi_end - reg->anchor_dmax;
3501 if (start < end)
3502 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3503 else { /* match with empty at end */
3504 start = onigenc_get_prev_char_head(reg->enc, str, end);
3505 }
3506 }
3507 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3508 range = max_semi_end - reg->anchor_dmin + 1;
3509 }
3510
3511 if (start >= range) goto mismatch_no_msa;
3512 }
3513 else {
3514 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3515 range = min_semi_end - reg->anchor_dmax;
3516 }
3517 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3518 start = max_semi_end - reg->anchor_dmin;
3519 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3520 }
3521 if (range > start) goto mismatch_no_msa;
3522 }
3523 }
3524 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3525 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3526
3527 max_semi_end = (UChar* )end;
3528 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3529 min_semi_end = pre_end;
3530
3531 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3532 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3533 if (IS_NOT_NULL(pre_end) &&
3534 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3535 min_semi_end = pre_end;
3536 }
3537 #endif
3538 if (min_semi_end > str && start <= min_semi_end) {
3539 goto end_buf;
3540 }
3541 }
3542 else {
3543 min_semi_end = (UChar* )end;
3544 goto end_buf;
3545 }
3546 }
3547 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3548 goto begin_position;
3549 }
3550 }
3551 else if (str == end) { /* empty string */
3552 static const UChar* address_for_empty_string = (UChar* )"";
3553
3554 #ifdef ONIG_DEBUG_SEARCH
3555 fprintf(stderr, "onig_search: empty string.\n");
3556 #endif
3557
3558 if (reg->threshold_len == 0) {
3559 start = end = str = address_for_empty_string;
3560 s = (UChar* )start;
3561 prev = (UChar* )NULL;
3562
3563 MATCH_ARG_INIT(msa, option, region, start);
3564 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3565 msa.state_check_buff = (void* )0;
3566 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3567 #endif
3568 MATCH_AND_RETURN_CHECK(end);
3569 goto mismatch;
3570 }
3571 goto mismatch_no_msa;
3572 }
3573
3574 #ifdef ONIG_DEBUG_SEARCH
3575 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3576 (int )(end - str), (int )(start - str), (int )(range - str));
3577 #endif
3578
3579 MATCH_ARG_INIT(msa, option, region, orig_start);
3580 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3581 {
3582 int offset = (MIN(start, range) - str);
3583 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3584 }
3585 #endif
3586
3587 s = (UChar* )start;
3588 if (range > start) { /* forward search */
3589 if (s > str)
3590 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3591 else
3592 prev = (UChar* )NULL;
3593
3594 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3595 UChar *sch_range, *low, *high, *low_prev;
3596
3597 sch_range = (UChar* )range;
3598 if (reg->dmax != 0) {
3599 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3600 sch_range = (UChar* )end;
3601 else {
3602 sch_range += reg->dmax;
3603 if (sch_range > end) sch_range = (UChar* )end;
3604 }
3605 }
3606
3607 if ((end - start) < reg->threshold_len)
3608 goto mismatch;
3609
3610 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3611 do {
3612 if (! forward_search_range(reg, str, end, s, sch_range,
3613 &low, &high, &low_prev)) goto mismatch;
3614 if (s < low) {
3615 s = low;
3616 prev = low_prev;
3617 }
3618 while (s <= high) {
3619 MATCH_AND_RETURN_CHECK(orig_range);
3620 prev = s;
3621 s += enclen(reg->enc, s);
3622 }
3623 } while (s < range);
3624 goto mismatch;
3625 }
3626 else { /* check only. */
3627 if (! forward_search_range(reg, str, end, s, sch_range,
3628 &low, &high, (UChar** )NULL)) goto mismatch;
3629
3630 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3631 do {
3632 MATCH_AND_RETURN_CHECK(orig_range);
3633 prev = s;
3634 s += enclen(reg->enc, s);
3635
3636 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3637 prev = s;
3638 s += enclen(reg->enc, s);
3639 }
3640 } while (s < range);
3641 goto mismatch;
3642 }
3643 }
3644 }
3645
3646 do {
3647 MATCH_AND_RETURN_CHECK(orig_range);
3648 prev = s;
3649 s += enclen(reg->enc, s);
3650 } while (s < range);
3651
3652 if (s == range) { /* because empty match with /$/. */
3653 MATCH_AND_RETURN_CHECK(orig_range);
3654 }
3655 }
3656 else { /* backward search */
3657 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3658 if (orig_start < end)
3659 orig_start += enclen(reg->enc, orig_start); /* is upper range */
3660 #endif
3661
3662 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3663 UChar *low, *high, *adjrange, *sch_start;
3664
3665 if (range < end)
3666 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3667 else
3668 adjrange = (UChar* )end;
3669
3670 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3671 (end - range) >= reg->threshold_len) {
3672 do {
3673 sch_start = s + reg->dmax;
3674 if (sch_start > end) sch_start = (UChar* )end;
3675 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3676 &low, &high) <= 0)
3677 goto mismatch;
3678
3679 if (s > high)
3680 s = high;
3681
3682 while (s >= low) {
3683 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3684 MATCH_AND_RETURN_CHECK(orig_start);
3685 s = prev;
3686 }
3687 } while (s >= range);
3688 goto mismatch;
3689 }
3690 else { /* check only. */
3691 if ((end - range) < reg->threshold_len) goto mismatch;
3692
3693 sch_start = s;
3694 if (reg->dmax != 0) {
3695 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3696 sch_start = (UChar* )end;
3697 else {
3698 sch_start += reg->dmax;
3699 if (sch_start > end) sch_start = (UChar* )end;
3700 else
3701 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3702 start, sch_start);
3703 }
3704 }
3705 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3706 &low, &high) <= 0) goto mismatch;
3707 }
3708 }
3709
3710 do {
3711 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3712 MATCH_AND_RETURN_CHECK(orig_start);
3713 s = prev;
3714 } while (s >= range);
3715 }
3716
3717 mismatch:
3718 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3719 if (IS_FIND_LONGEST(reg->options)) {
3720 if (msa.best_len >= 0) {
3721 s = msa.best_s;
3722 goto match;
3723 }
3724 }
3725 #endif
3726 r = ONIG_MISMATCH;
3727
3728 finish:
3729 MATCH_ARG_FREE(msa);
3730 ONIG_STATE_DEC_THREAD(reg);
3731
3732 /* If result is mismatch and no FIND_NOT_EMPTY option,
3733 then the region is not setted in match_at(). */
3734 if (IS_FIND_NOT_EMPTY(reg->options) && region
3735 #ifdef USE_POSIX_API_REGION_OPTION
3736 && !IS_POSIX_REGION(option)
3737 #endif
3738 ) {
3739 onig_region_clear(region);
3740 }
3741
3742 #ifdef ONIG_DEBUG
3743 if (r != ONIG_MISMATCH)
3744 fprintf(stderr, "onig_search: error %d\n", r);
3745 #endif
3746 return r;
3747
3748 mismatch_no_msa:
3749 r = ONIG_MISMATCH;
3750 finish_no_msa:
3751 ONIG_STATE_DEC_THREAD(reg);
3752 #ifdef ONIG_DEBUG
3753 if (r != ONIG_MISMATCH)
3754 fprintf(stderr, "onig_search: error %d\n", r);
3755 #endif
3756 return r;
3757
3758 match:
3759 ONIG_STATE_DEC_THREAD(reg);
3760 MATCH_ARG_FREE(msa);
3761 return (int)(s - str);
3762 }
3763
3764 extern OnigEncoding
onig_get_encoding(regex_t * reg)3765 onig_get_encoding(regex_t* reg)
3766 {
3767 return reg->enc;
3768 }
3769
3770 extern OnigOptionType
onig_get_options(regex_t * reg)3771 onig_get_options(regex_t* reg)
3772 {
3773 return reg->options;
3774 }
3775
3776 extern OnigCaseFoldType
onig_get_case_fold_flag(regex_t * reg)3777 onig_get_case_fold_flag(regex_t* reg)
3778 {
3779 return reg->case_fold_flag;
3780 }
3781
3782 extern OnigSyntaxType*
onig_get_syntax(regex_t * reg)3783 onig_get_syntax(regex_t* reg)
3784 {
3785 return reg->syntax;
3786 }
3787
3788 extern int
onig_number_of_captures(regex_t * reg)3789 onig_number_of_captures(regex_t* reg)
3790 {
3791 return reg->num_mem;
3792 }
3793
3794 extern int
onig_number_of_capture_histories(regex_t * reg)3795 onig_number_of_capture_histories(regex_t* reg)
3796 {
3797 #ifdef USE_CAPTURE_HISTORY
3798 int i, n;
3799
3800 n = 0;
3801 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3802 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3803 n++;
3804 }
3805 return n;
3806 #else
3807 return 0;
3808 #endif
3809 }
3810
3811 extern void
onig_copy_encoding(OnigEncoding to,OnigEncoding from)3812 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3813 {
3814 *to = *from;
3815 }
3816
3817