Ruby 3.5.0dev (2025-11-03 revision 4a3d8346a6d0e068508631541f6bc43e8b154ea1)
regexec.c (4a3d8346a6d0e068508631541f6bc43e8b154ea1)
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234#ifdef USE_MATCH_CACHE
235
236/*
237Glossary for "match cache"
238
239"match cache" or "match cache optimization"
240The `Regexp#match` optimization by using a cache.
241
242"cache opcode"
243A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
244It is corresponding to some cache points.
245
246"cache point"
247A cacheable point on matching.
248Usually, one-to-one corresponding between a cache opcode and a cache point exists,
249but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
250cache points depending on repetition counts.
251
252"match cache point"
253A pair of a cache point and a position on an input string.
254We encode a match cache point to an integer value by the following equation:
255"match cache point" = "position on input string" * "total number of cache points" + "cache point"
256
257"match cache buffer"
258A bit-array for memoizing (recording) match cache points once backtracked.
259*/
260
261static OnigPosition count_num_cache_opcodes_inner(
262 const regex_t* reg,
263 MemNumType current_repeat_mem, int lookaround_nesting,
264 UChar** pp, long* num_cache_opcodes_ptr
265)
266{
267 UChar* p = *pp;
268 UChar* pend = reg->p + reg->used;
269 LengthType len;
270 MemNumType repeat_mem;
271 OnigEncoding enc = reg->enc;
272 long num_cache_opcodes = *num_cache_opcodes_ptr;
273 OnigPosition result;
274
275 while (p < pend) {
276 switch (*p++) {
277 case OP_FINISH:
278 case OP_END:
279 break;
280
281 case OP_EXACT1: p++; break;
282 case OP_EXACT2: p += 2; break;
283 case OP_EXACT3: p += 3; break;
284 case OP_EXACT4: p += 4; break;
285 case OP_EXACT5: p += 5; break;
286 case OP_EXACTN:
287 GET_LENGTH_INC(len, p); p += len; break;
288 case OP_EXACTMB2N1: p += 2; break;
289 case OP_EXACTMB2N2: p += 4; break;
290 case OP_EXACTMB2N3: p += 6; break;
291 case OP_EXACTMB2N:
292 GET_LENGTH_INC(len, p); p += len * 2; break;
293 case OP_EXACTMB3N:
294 GET_LENGTH_INC(len, p); p += len * 3; break;
295 case OP_EXACTMBN:
296 {
297 int mb_len;
298 GET_LENGTH_INC(mb_len, p);
299 GET_LENGTH_INC(len, p);
300 p += mb_len * len;
301 }
302 break;
303
304 case OP_EXACT1_IC:
305 len = enclen(enc, p, pend); p += len; break;
306 case OP_EXACTN_IC:
307 GET_LENGTH_INC(len, p); p += len; break;
308
309 case OP_CCLASS:
310 case OP_CCLASS_NOT:
311 p += SIZE_BITSET; break;
312 case OP_CCLASS_MB:
313 case OP_CCLASS_MB_NOT:
314 GET_LENGTH_INC(len, p); p += len; break;
315 case OP_CCLASS_MIX:
316 case OP_CCLASS_MIX_NOT:
317 p += SIZE_BITSET;
318 GET_LENGTH_INC(len, p);
319 p += len;
320 break;
321
322 case OP_ANYCHAR:
323 case OP_ANYCHAR_ML:
324 break;
325 case OP_ANYCHAR_STAR:
326 case OP_ANYCHAR_ML_STAR:
327 num_cache_opcodes++; break;
328 case OP_ANYCHAR_STAR_PEEK_NEXT:
329 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
330 p++; num_cache_opcodes++; break;
331
332 case OP_WORD:
333 case OP_NOT_WORD:
334 case OP_WORD_BOUND:
335 case OP_NOT_WORD_BOUND:
336 case OP_WORD_BEGIN:
337 case OP_WORD_END:
338 break;
339
340 case OP_ASCII_WORD:
341 case OP_NOT_ASCII_WORD:
342 case OP_ASCII_WORD_BOUND:
343 case OP_NOT_ASCII_WORD_BOUND:
344 case OP_ASCII_WORD_BEGIN:
345 case OP_ASCII_WORD_END:
346 break;
347
348 case OP_BEGIN_BUF:
349 case OP_END_BUF:
350 case OP_BEGIN_LINE:
351 case OP_END_LINE:
352 case OP_SEMI_END_BUF:
353 case OP_BEGIN_POSITION:
354 break;
355
356 case OP_BACKREF1:
357 case OP_BACKREF2:
358 case OP_BACKREFN:
359 case OP_BACKREFN_IC:
360 case OP_BACKREF_MULTI:
361 case OP_BACKREF_MULTI_IC:
362 case OP_BACKREF_WITH_LEVEL:
363 goto impossible;
364
365 case OP_MEMORY_START:
366 case OP_MEMORY_START_PUSH:
367 case OP_MEMORY_END_PUSH:
368 case OP_MEMORY_END_PUSH_REC:
369 case OP_MEMORY_END:
370 case OP_MEMORY_END_REC:
371 p += SIZE_MEMNUM;
372 // A memory (capture) in look-around is found.
373 if (lookaround_nesting != 0) {
374 goto impossible;
375 }
376 break;
377
378 case OP_KEEP:
379 break;
380
381 case OP_FAIL:
382 break;
383 case OP_JUMP:
384 p += SIZE_RELADDR;
385 break;
386 case OP_PUSH:
387 p += SIZE_RELADDR;
388 num_cache_opcodes++;
389 break;
390 case OP_POP:
391 break;
392 case OP_PUSH_OR_JUMP_EXACT1:
393 case OP_PUSH_IF_PEEK_NEXT:
394 p += SIZE_RELADDR + 1; num_cache_opcodes++; break;
395 case OP_REPEAT:
396 case OP_REPEAT_NG:
397 if (current_repeat_mem != -1) {
398 // A nested OP_REPEAT is not yet supported.
399 goto impossible;
400 }
401 GET_MEMNUM_INC(repeat_mem, p);
402 p += SIZE_RELADDR;
403 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
404 long dummy_num_cache_opcodes = 0;
405 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes);
406 if (result < 0 || dummy_num_cache_opcodes < 0) {
407 goto fail;
408 }
409 } else {
410 if (reg->repeat_range[repeat_mem].lower == 0) {
411 num_cache_opcodes++;
412 }
413 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
414 if (result < 0 || num_cache_opcodes < 0) {
415 goto fail;
416 }
417 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
418 if (repeat_range->lower < repeat_range->upper) {
419 num_cache_opcodes++;
420 }
421 }
422 break;
423 case OP_REPEAT_INC:
424 case OP_REPEAT_INC_NG:
425 GET_MEMNUM_INC(repeat_mem, p);
426 if (repeat_mem != current_repeat_mem) {
427 // A lone or invalid OP_REPEAT_INC is found.
428 goto impossible;
429 }
430 goto exit;
431 case OP_REPEAT_INC_SG:
432 case OP_REPEAT_INC_NG_SG:
433 goto impossible;
434 case OP_NULL_CHECK_START:
435 p += SIZE_MEMNUM;
436 break;
437 case OP_NULL_CHECK_END:
438 case OP_NULL_CHECK_END_MEMST_PUSH:
439 p += SIZE_MEMNUM;
440 break;
441 case OP_NULL_CHECK_END_MEMST:
442 p += SIZE_MEMNUM;
443 break;
444
445 case OP_PUSH_POS:
446 if (lookaround_nesting < 0) {
447 // A look-around nested in a atomic grouping is found.
448 goto impossible;
449 }
450 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
451 if (result < 0 || num_cache_opcodes < 0) {
452 goto fail;
453 }
454 break;
455 case OP_PUSH_POS_NOT:
456 if (lookaround_nesting < 0) {
457 // A look-around nested in a atomic grouping is found.
458 goto impossible;
459 }
460 p += SIZE_RELADDR;
461 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
462 if (result < 0 || num_cache_opcodes < 0) {
463 goto fail;
464 }
465 break;
466 case OP_PUSH_LOOK_BEHIND_NOT:
467 if (lookaround_nesting < 0) {
468 // A look-around nested in a atomic grouping is found.
469 goto impossible;
470 }
471 p += SIZE_RELADDR;
472 p += SIZE_LENGTH;
473 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
474 if (result < 0 || num_cache_opcodes < 0) {
475 goto fail;
476 }
477 break;
478 case OP_PUSH_STOP_BT:
479 if (lookaround_nesting != 0) {
480 // A nested atomic grouping is found.
481 goto impossible;
482 }
483 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
484 if (result < 0 || num_cache_opcodes < 0) {
485 goto fail;
486 }
487 break;
488 case OP_POP_POS:
489 case OP_FAIL_POS:
490 case OP_FAIL_LOOK_BEHIND_NOT:
491 case OP_POP_STOP_BT:
492 goto exit;
493 case OP_LOOK_BEHIND:
494 p += SIZE_LENGTH;
495 break;
496
497 case OP_PUSH_ABSENT_POS:
498 case OP_ABSENT_END:
499 case OP_ABSENT:
500 goto impossible;
501
502 case OP_CALL:
503 case OP_RETURN:
504 goto impossible;
505
506 case OP_CONDITION:
507 goto impossible;
508
509 case OP_STATE_CHECK_PUSH:
510 case OP_STATE_CHECK_PUSH_OR_JUMP:
511 case OP_STATE_CHECK:
512 case OP_STATE_CHECK_ANYCHAR_STAR:
513 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
514 goto impossible;
515
516 case OP_SET_OPTION_PUSH:
517 case OP_SET_OPTION:
518 p += SIZE_OPTION;
519 break;
520
521 default:
522 goto bytecode_error;
523 }
524 }
525
526exit:
527 *pp = p;
528 *num_cache_opcodes_ptr = num_cache_opcodes;
529 return 0;
530
531fail:
532 *num_cache_opcodes_ptr = num_cache_opcodes;
533 return result;
534
535impossible:
536 *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
537 return 0;
538
539bytecode_error:
540 return ONIGERR_UNDEFINED_BYTECODE;
541}
542
543/* count the total number of cache opcodes for allocating a match cache buffer. */
544static OnigPosition
545count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
546{
547 UChar* p = reg->p;
548 *num_cache_opcodes_ptr = 0;
549 OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
550 if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
551 return ONIGERR_UNDEFINED_BYTECODE;
552 }
553
554 return result;
555}
556
557static OnigPosition
558init_cache_opcodes_inner(
559 const regex_t* reg,
560 MemNumType current_repeat_mem, int lookaround_nesting,
561 OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr
562)
563{
564 UChar* p = *pp;
565 UChar* pend = reg->p + reg->used;
566 UChar* pbegin;
567 LengthType len;
568 MemNumType repeat_mem;
569 OnigEncoding enc = reg->enc;
570 long cache_point = *num_cache_points_ptr;
571 OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr;
572 OnigPosition result;
573
574# define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
575 cache_opcodes->addr = pbegin;\
576 cache_opcodes->cache_point = cache_point;\
577 cache_opcodes->outer_repeat_mem = current_repeat_mem;\
578 cache_opcodes->num_cache_points_at_outer_repeat = 0;\
579 cache_opcodes->num_cache_points_in_outer_repeat = 0;\
580 cache_opcodes->lookaround_nesting = lookaround_nesting;\
581 cache_opcodes->match_addr = NULL;\
582 cache_point += lookaround_nesting != 0 ? 2 : 1;\
583 cache_opcodes++;\
584 }
585
586 while (p < pend) {
587 pbegin = p;
588 switch (*p++) {
589 case OP_FINISH:
590 case OP_END:
591 break;
592
593 case OP_EXACT1: p++; break;
594 case OP_EXACT2: p += 2; break;
595 case OP_EXACT3: p += 3; break;
596 case OP_EXACT4: p += 4; break;
597 case OP_EXACT5: p += 5; break;
598 case OP_EXACTN:
599 GET_LENGTH_INC(len, p); p += len; break;
600 case OP_EXACTMB2N1: p += 2; break;
601 case OP_EXACTMB2N2: p += 4; break;
602 case OP_EXACTMB2N3: p += 6; break;
603 case OP_EXACTMB2N:
604 GET_LENGTH_INC(len, p); p += len * 2; break;
605 case OP_EXACTMB3N:
606 GET_LENGTH_INC(len, p); p += len * 3; break;
607 case OP_EXACTMBN:
608 {
609 int mb_len;
610 GET_LENGTH_INC(mb_len, p);
611 GET_LENGTH_INC(len, p);
612 p += mb_len * len;
613 }
614 break;
615
616 case OP_EXACT1_IC:
617 len = enclen(enc, p, pend); p += len; break;
618 case OP_EXACTN_IC:
619 GET_LENGTH_INC(len, p); p += len; break;
620
621 case OP_CCLASS:
622 case OP_CCLASS_NOT:
623 p += SIZE_BITSET; break;
624 case OP_CCLASS_MB:
625 case OP_CCLASS_MB_NOT:
626 GET_LENGTH_INC(len, p); p += len; break;
627 case OP_CCLASS_MIX:
628 case OP_CCLASS_MIX_NOT:
629 p += SIZE_BITSET;
630 GET_LENGTH_INC(len, p);
631 p += len;
632 break;
633
634 case OP_ANYCHAR:
635 case OP_ANYCHAR_ML:
636 break;
637 case OP_ANYCHAR_STAR:
638 case OP_ANYCHAR_ML_STAR:
639 INC_CACHE_OPCODES;
640 break;
641 case OP_ANYCHAR_STAR_PEEK_NEXT:
642 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
643 p++;
644 INC_CACHE_OPCODES;
645 break;
646
647 case OP_WORD:
648 case OP_NOT_WORD:
649 case OP_WORD_BOUND:
650 case OP_NOT_WORD_BOUND:
651 case OP_WORD_BEGIN:
652 case OP_WORD_END:
653 break;
654
655 case OP_ASCII_WORD:
656 case OP_NOT_ASCII_WORD:
657 case OP_ASCII_WORD_BOUND:
658 case OP_NOT_ASCII_WORD_BOUND:
659 case OP_ASCII_WORD_BEGIN:
660 case OP_ASCII_WORD_END:
661 break;
662
663 case OP_BEGIN_BUF:
664 case OP_END_BUF:
665 case OP_BEGIN_LINE:
666 case OP_END_LINE:
667 case OP_SEMI_END_BUF:
668 case OP_BEGIN_POSITION:
669 break;
670
671 case OP_BACKREF1:
672 case OP_BACKREF2:
673 case OP_BACKREFN:
674 case OP_BACKREFN_IC:
675 case OP_BACKREF_MULTI:
676 case OP_BACKREF_MULTI_IC:
677 case OP_BACKREF_WITH_LEVEL:
678 goto unexpected_bytecode_error;
679
680 case OP_MEMORY_START:
681 case OP_MEMORY_START_PUSH:
682 case OP_MEMORY_END_PUSH:
683 case OP_MEMORY_END_PUSH_REC:
684 case OP_MEMORY_END:
685 case OP_MEMORY_END_REC:
686 p += SIZE_MEMNUM;
687 if (lookaround_nesting != 0) {
688 goto unexpected_bytecode_error;
689 }
690 break;
691
692 case OP_KEEP:
693 break;
694
695 case OP_FAIL:
696 break;
697 case OP_JUMP:
698 p += SIZE_RELADDR;
699 break;
700 case OP_PUSH:
701 p += SIZE_RELADDR;
702 INC_CACHE_OPCODES;
703 break;
704 case OP_POP:
705 break;
706 case OP_PUSH_OR_JUMP_EXACT1:
707 case OP_PUSH_IF_PEEK_NEXT:
708 p += SIZE_RELADDR + 1;
709 INC_CACHE_OPCODES;
710 break;
711 case OP_REPEAT:
712 case OP_REPEAT_NG:
713 GET_MEMNUM_INC(repeat_mem, p);
714 p += SIZE_RELADDR;
715 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
716 long dummy_num_cache_points = 0;
717 OnigCacheOpcode* dummy_cache_opcodes = NULL;
718 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points);
719 if (result != 0) {
720 goto fail;
721 }
722 } else {
723 if (reg->repeat_range[repeat_mem].lower == 0) {
724 INC_CACHE_OPCODES;
725 }
726 {
727 long num_cache_points_in_repeat = 0;
728 long num_cache_points_at_repeat = cache_point;
729 OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes;
730 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
731 if (result != 0) {
732 goto fail;
733 }
734 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
735 if (repeat_range->lower < repeat_range->upper) {
736 INC_CACHE_OPCODES;
737 cache_point -= lookaround_nesting != 0 ? 2 : 1;
738 }
739 int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
740 cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
741 for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
742 cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
743 cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
744 }
745 }
746 }
747 break;
748 case OP_REPEAT_INC:
749 case OP_REPEAT_INC_NG:
750 p += SIZE_MEMNUM;
751 goto exit;
752 case OP_REPEAT_INC_SG:
753 case OP_REPEAT_INC_NG_SG:
754 goto unexpected_bytecode_error;
755 case OP_NULL_CHECK_START:
756 p += SIZE_MEMNUM;
757 break;
758 case OP_NULL_CHECK_END:
759 case OP_NULL_CHECK_END_MEMST_PUSH:
760 p += SIZE_MEMNUM;
761 break;
762 case OP_NULL_CHECK_END_MEMST:
763 p += SIZE_MEMNUM;
764 break;
765
766 case OP_PUSH_POS:
767 lookaround:
768 {
769 OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes;
770 result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
771 if (result != 0) {
772 goto fail;
773 }
774 UChar* match_addr = p - 1;
775 for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
776 if (cache_opcodes_in_lookaround->match_addr == NULL) {
777 cache_opcodes_in_lookaround->match_addr = match_addr;
778 }
779 }
780 }
781 break;
782 case OP_PUSH_POS_NOT:
783 p += SIZE_RELADDR;
784 goto lookaround;
785 case OP_PUSH_LOOK_BEHIND_NOT:
786 p += SIZE_RELADDR;
787 p += SIZE_LENGTH;
788 goto lookaround;
789 case OP_PUSH_STOP_BT:
790 {
791 OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes;
792 result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
793 if (result != 0) {
794 goto fail;
795 }
796 UChar* match_addr = p - 1;
797 for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
798 if (cache_opcodes_in_atomic->match_addr == NULL) {
799 cache_opcodes_in_atomic->match_addr = match_addr;
800 }
801 }
802 }
803 break;
804 case OP_POP_POS:
805 case OP_FAIL_POS:
806 case OP_FAIL_LOOK_BEHIND_NOT:
807 case OP_POP_STOP_BT:
808 goto exit;
809 case OP_LOOK_BEHIND:
810 p += SIZE_LENGTH;
811 break;
812
813 case OP_ABSENT_END:
814 case OP_ABSENT:
815 goto unexpected_bytecode_error;
816
817 case OP_CALL:
818 case OP_RETURN:
819 goto unexpected_bytecode_error;
820
821 case OP_CONDITION:
822 goto unexpected_bytecode_error;
823
824 case OP_STATE_CHECK_PUSH:
825 case OP_STATE_CHECK_PUSH_OR_JUMP:
826 case OP_STATE_CHECK:
827 case OP_STATE_CHECK_ANYCHAR_STAR:
828 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
829 goto unexpected_bytecode_error;
830
831 case OP_SET_OPTION_PUSH:
832 case OP_SET_OPTION:
833 p += SIZE_OPTION;
834 break;
835
836 default:
837 goto bytecode_error;
838 }
839 }
840
841exit:
842 *cache_opcodes_ptr = cache_opcodes;
843 *pp = p;
844 *num_cache_points_ptr = cache_point;
845 return 0;
846
847fail:
848 return result;
849
850unexpected_bytecode_error:
851 return ONIGERR_UNEXPECTED_BYTECODE;
852
853bytecode_error:
854 return ONIGERR_UNDEFINED_BYTECODE;
855}
856
857/* collect cache opcodes from the given regex program, and compute the total number of cache points. */
858static OnigPosition
859init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr)
860{
861 UChar* p = reg->p;
862 *num_cache_points_ptr = 0;
863 OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
864 if (result == 0 && p != reg->p + reg->used) {
865 return ONIGERR_UNDEFINED_BYTECODE;
866 }
867
868 return result;
869}
870#else
871static OnigPosition
872count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
873{
874 *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
875 return 0;
876}
877#endif /* USE_MATCH_CACHE */
878
879extern int
880onig_check_linear_time(OnigRegexType* reg)
881{
882 long num_cache_opcodes = 0;
883 count_num_cache_opcodes(reg, &num_cache_opcodes);
884 return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
885}
886
887extern void
888onig_region_clear(OnigRegion* region)
889{
890 int i;
891
892 for (i = 0; i < region->num_regs; i++) {
893 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
894 }
895#ifdef USE_CAPTURE_HISTORY
896 history_root_free(region);
897#endif
898}
899
900extern int
901onig_region_resize(OnigRegion* region, int n)
902{
903 region->num_regs = n;
904
905 if (n < ONIG_NREGION)
906 n = ONIG_NREGION;
907
908 if (region->allocated == 0) {
909 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
910 if (region->beg == 0)
911 return ONIGERR_MEMORY;
912
913 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
914 if (region->end == 0) {
915 xfree(region->beg);
916 return ONIGERR_MEMORY;
917 }
918
919 region->allocated = n;
920 }
921 else if (region->allocated < n) {
922 OnigPosition *tmp;
923
924 region->allocated = 0;
925 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
926 if (tmp == 0) {
927 xfree(region->beg);
928 xfree(region->end);
929 return ONIGERR_MEMORY;
930 }
931 region->beg = tmp;
932 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
933 if (tmp == 0) {
934 xfree(region->beg);
935 xfree(region->end);
936 return ONIGERR_MEMORY;
937 }
938 region->end = tmp;
939
940 region->allocated = n;
941 }
942
943 return 0;
944}
945
946static int
947onig_region_resize_clear(OnigRegion* region, int n)
948{
949 int r;
950
951 r = onig_region_resize(region, n);
952 if (r != 0) return r;
953 onig_region_clear(region);
954 return 0;
955}
956
957extern int
958onig_region_set(OnigRegion* region, int at, int beg, int end)
959{
960 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
961
962 if (at >= region->allocated) {
963 int r = onig_region_resize(region, at + 1);
964 if (r < 0) return r;
965 }
966
967 region->beg[at] = beg;
968 region->end[at] = end;
969 return 0;
970}
971
972extern void
973onig_region_init(OnigRegion* region)
974{
975 region->num_regs = 0;
976 region->allocated = 0;
977 region->beg = (OnigPosition* )0;
978 region->end = (OnigPosition* )0;
979#ifdef USE_CAPTURE_HISTORY
980 region->history_root = (OnigCaptureTreeNode* )0;
981#endif
982}
983
984extern OnigRegion*
985onig_region_new(void)
986{
987 OnigRegion* r;
988
989 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
990 if (r)
991 onig_region_init(r);
992 return r;
993}
994
995extern void
996onig_region_free(OnigRegion* r, int free_self)
997{
998 if (r) {
999 if (r->allocated > 0) {
1000 xfree(r->beg);
1001 xfree(r->end);
1002 }
1003#ifdef USE_CAPTURE_HISTORY
1004 history_root_free(r);
1005#endif
1006 if (free_self) {
1007 xfree(r);
1008 }
1009 else {
1010 memset(r, 0, sizeof(OnigRegion));
1011 }
1012 }
1013}
1014
1015extern void
1016onig_region_copy(OnigRegion* to, const OnigRegion* from)
1017{
1018#define RREGC_SIZE (sizeof(int) * from->num_regs)
1019 int i, r;
1020
1021 if (to == from) return;
1022
1023 r = onig_region_resize(to, from->num_regs);
1024 if (r) return;
1025
1026 for (i = 0; i < from->num_regs; i++) {
1027 to->beg[i] = from->beg[i];
1028 to->end[i] = from->end[i];
1029 }
1030 to->num_regs = from->num_regs;
1031
1032#ifdef USE_CAPTURE_HISTORY
1033 history_root_free(to);
1034
1035 if (IS_NOT_NULL(from->history_root)) {
1036 to->history_root = history_tree_clone(from->history_root);
1037 }
1038#endif
1039}
1040
1041
1043#define INVALID_STACK_INDEX -1
1044
1045/* stack type */
1046/* used by normal-POP */
1047#define STK_ALT 0x0001
1048#define STK_LOOK_BEHIND_NOT 0x0002
1049#define STK_POS_NOT 0x0003
1050/* handled by normal-POP */
1051#define STK_MEM_START 0x0100
1052#define STK_MEM_END 0x8200
1053#define STK_REPEAT_INC 0x0300
1054#define STK_STATE_CHECK_MARK 0x1000
1055/* avoided by normal-POP */
1056#define STK_NULL_CHECK_START 0x3000
1057#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
1058#define STK_MEM_END_MARK 0x8400
1059#define STK_POS 0x0500 /* used when POP-POS */
1060#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
1061#define STK_REPEAT 0x0700
1062#define STK_CALL_FRAME 0x0800
1063#define STK_RETURN 0x0900
1064#define STK_VOID 0x0a00 /* for fill a blank */
1065#define STK_ABSENT_POS 0x0b00 /* for absent */
1066#define STK_ABSENT 0x0c00 /* absent inner loop marker */
1067#define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */
1068#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1069
1070/* stack type check mask */
1071#define STK_MASK_POP_USED 0x00ff
1072#define STK_MASK_TO_VOID_TARGET 0x10ff
1073#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1074
1075#ifdef USE_MATCH_CACHE
1076#define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1077 (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1078 (msa).num_fails = 0;\
1079 (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1080 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1081 (msa).num_cache_points = 0;\
1082 (msa).match_cache_buf = (uint8_t*)NULL;\
1083} while(0)
1084#define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1085 xfree((msa).cache_opcodes);\
1086 xfree((msa).match_cache_buf);\
1087 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1088 (msa).match_cache_buf = (uint8_t*)NULL;\
1089} while(0)
1090#else
1091#define MATCH_ARG_INIT_MATCH_CACHE(msa)
1092#define MATCH_ARG_FREE_MATCH_CACHE(msa)
1093#endif
1094
1095#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1096# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1097 (msa).stack_p = (void* )0;\
1098 (msa).options = (arg_option);\
1099 (msa).region = (arg_region);\
1100 (msa).start = (arg_start);\
1101 (msa).gpos = (arg_gpos);\
1102 (msa).best_len = ONIG_MISMATCH;\
1103 (msa).counter = 0;\
1104 (msa).end_time = 0;\
1105 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1106} while(0)
1107#else
1108# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1109 (msa).stack_p = (void* )0;\
1110 (msa).options = (arg_option);\
1111 (msa).region = (arg_region);\
1112 (msa).start = (arg_start);\
1113 (msa).gpos = (arg_gpos);\
1114 (msa).counter = 0;\
1115 (msa).end_time = 0;\
1116 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1117} while(0)
1118#endif
1119
1120#ifdef USE_COMBINATION_EXPLOSION_CHECK
1121
1122# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
1123
1124# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
1125 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1126 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1127 offset = ((offset) * (state_num)) >> 3;\
1128 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1129 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1130 (msa).state_check_buff = (void* )xmalloc(size);\
1131 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1132 }\
1133 else \
1134 (msa).state_check_buff = (void* )xalloca(size);\
1135 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1136 (size_t )(size - (offset))); \
1137 (msa).state_check_buff_size = size;\
1138 }\
1139 else {\
1140 (msa).state_check_buff = (void* )0;\
1141 (msa).state_check_buff_size = 0;\
1142 }\
1143 }\
1144 else {\
1145 (msa).state_check_buff = (void* )0;\
1146 (msa).state_check_buff_size = 0;\
1147 }\
1148 } while(0)
1149
1150# define MATCH_ARG_FREE(msa) do {\
1151 xfree((msa).stack_p);\
1152 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1153 xfree((msa).state_check_buff);\
1154 }\
1155 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1156} while(0)
1157#else /* USE_COMBINATION_EXPLOSION_CHECK */
1158# define MATCH_ARG_FREE(msa) do {\
1159 xfree((msa).stack_p);\
1160 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1161} while (0)
1162#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1163
1164
1165
1166#define MAX_PTR_NUM 100
1167
1168#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
1169 if (ptr_num > MAX_PTR_NUM) {\
1170 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1171 heap_addr = alloc_addr;\
1172 if (msa->stack_p) {\
1173 stk_alloc = (OnigStackType* )(msa->stack_p);\
1174 stk_base = stk_alloc;\
1175 stk = stk_base;\
1176 stk_end = stk_base + msa->stack_n;\
1177 }\
1178 else {\
1179 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1180 stk_base = stk_alloc;\
1181 stk = stk_base;\
1182 stk_end = stk_base + (stack_num);\
1183 }\
1184 }\
1185 else if (msa->stack_p) {\
1186 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1187 heap_addr = NULL;\
1188 stk_alloc = (OnigStackType* )(msa->stack_p);\
1189 stk_base = stk_alloc;\
1190 stk = stk_base;\
1191 stk_end = stk_base + msa->stack_n;\
1192 }\
1193 else {\
1194 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1195 + sizeof(OnigStackType) * (stack_num));\
1196 heap_addr = NULL;\
1197 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1198 stk_base = stk_alloc;\
1199 stk = stk_base;\
1200 stk_end = stk_base + (stack_num);\
1201 }\
1202} while(0)
1203
1204#define STACK_SAVE do{\
1205 if (stk_base != stk_alloc) {\
1206 msa->stack_p = stk_base;\
1207 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1208 };\
1209} while(0)
1210
1211static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1212
1213extern unsigned int
1214onig_get_match_stack_limit_size(void)
1215{
1216 return MatchStackLimitSize;
1217}
1218
1219extern int
1220onig_set_match_stack_limit_size(unsigned int size)
1221{
1222 MatchStackLimitSize = size;
1223 return 0;
1224}
1225
1226static int
1227stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1228 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1229{
1230 size_t n;
1231 OnigStackType *x, *stk_base, *stk_end, *stk;
1232
1233 stk_base = *arg_stk_base;
1234 stk_end = *arg_stk_end;
1235 stk = *arg_stk;
1236
1237 n = stk_end - stk_base;
1238 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1239 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1240 if (IS_NULL(x)) {
1241 STACK_SAVE;
1242 return ONIGERR_MEMORY;
1243 }
1244 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1245 n *= 2;
1246 }
1247 else {
1248 unsigned int limit_size = MatchStackLimitSize;
1249 n *= 2;
1250 if (limit_size != 0 && n > limit_size) {
1251 if ((unsigned int )(stk_end - stk_base) == limit_size)
1252 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1253 else
1254 n = limit_size;
1255 }
1256 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1257 if (IS_NULL(x)) {
1258 STACK_SAVE;
1259 return ONIGERR_MEMORY;
1260 }
1261 }
1262 *arg_stk = x + (stk - stk_base);
1263 *arg_stk_base = x;
1264 *arg_stk_end = x + n;
1265 return 0;
1266}
1267
1268#define STACK_ENSURE(n) do {\
1269 if (stk_end - stk < (n)) {\
1270 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1271 if (r != 0) {\
1272 STACK_SAVE;\
1273 xfree(xmalloc_base);\
1274 return r;\
1275 }\
1276 }\
1277} while(0)
1278
1279#define STACK_AT(index) (stk_base + (index))
1280#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1281
1282#define STACK_PUSH_TYPE(stack_type) do {\
1283 STACK_ENSURE(1);\
1284 stk->type = (stack_type);\
1285 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1286 STACK_INC;\
1287} while(0)
1288
1289#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1290
1291#ifdef USE_COMBINATION_EXPLOSION_CHECK
1292# define STATE_CHECK_POS(s,snum) \
1293 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1294# define STATE_CHECK_VAL(v,snum) do {\
1295 if (state_check_buff != NULL) {\
1296 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1297 (v) = state_check_buff[x/8] & (1<<(x%8));\
1298 }\
1299 else (v) = 0;\
1300} while(0)
1301
1302
1303# define ELSE_IF_STATE_CHECK_MARK(stk) \
1304 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1305 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1306 state_check_buff[x/8] |= (1<<(x%8)); \
1307 }
1308
1309# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1310 STACK_ENSURE(1);\
1311 stk->type = (stack_type);\
1312 stk->u.state.pcode = (pat);\
1313 stk->u.state.pstr = (s);\
1314 stk->u.state.pstr_prev = (sprev);\
1315 stk->u.state.state_check = 0;\
1316 stk->u.state.pkeep = (keep);\
1317 STACK_INC;\
1318} while(0)
1319
1320# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1321 stk->type = (stack_type);\
1322 stk->u.state.pcode = (pat);\
1323 stk->u.state.state_check = 0;\
1324 STACK_INC;\
1325} while(0)
1326
1327# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1328 STACK_ENSURE(1);\
1329 stk->type = STK_ALT;\
1330 stk->u.state.pcode = (pat);\
1331 stk->u.state.pstr = (s);\
1332 stk->u.state.pstr_prev = (sprev);\
1333 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1334 stk->u.state.pkeep = (keep);\
1335 STACK_INC;\
1336} while(0)
1337
1338# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1339 if (state_check_buff != NULL) {\
1340 STACK_ENSURE(1);\
1341 stk->type = STK_STATE_CHECK_MARK;\
1342 stk->u.state.pstr = (s);\
1343 stk->u.state.state_check = (snum);\
1344 STACK_INC;\
1345 }\
1346} while(0)
1347
1348#else /* USE_COMBINATION_EXPLOSION_CHECK */
1349
1350# define ELSE_IF_STATE_CHECK_MARK(stk)
1351
1352# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1353 STACK_ENSURE(1);\
1354 stk->type = (stack_type);\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.state.pcode = (pat);\
1357 stk->u.state.pstr = (s);\
1358 stk->u.state.pstr_prev = (sprev);\
1359 stk->u.state.pkeep = (keep);\
1360 STACK_INC;\
1361} while(0)
1362
1363# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1364 stk->type = (stack_type);\
1365 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1366 stk->u.state.pcode = (pat);\
1367 STACK_INC;\
1368} while(0)
1369#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1370
1371#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1372#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1373#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1374#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1375#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1376#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1377 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1378
1379#define STACK_PUSH_REPEAT(id, pat) do {\
1380 STACK_ENSURE(1);\
1381 stk->type = STK_REPEAT;\
1382 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1383 stk->u.repeat.num = (id);\
1384 stk->u.repeat.pcode = (pat);\
1385 stk->u.repeat.count = 0;\
1386 STACK_INC;\
1387} while(0)
1388
1389#define STACK_PUSH_REPEAT_INC(sindex) do {\
1390 STACK_ENSURE(1);\
1391 stk->type = STK_REPEAT_INC;\
1392 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1393 stk->u.repeat_inc.si = (sindex);\
1394 STACK_INC;\
1395} while(0)
1396
1397#define STACK_PUSH_MEM_START(mnum, s) do {\
1398 STACK_ENSURE(1);\
1399 stk->type = STK_MEM_START;\
1400 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1401 stk->u.mem.num = (mnum);\
1402 stk->u.mem.pstr = (s);\
1403 stk->u.mem.start = mem_start_stk[mnum];\
1404 stk->u.mem.end = mem_end_stk[mnum];\
1405 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1406 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1407 STACK_INC;\
1408} while(0)
1409
1410#define STACK_PUSH_MEM_END(mnum, s) do {\
1411 STACK_ENSURE(1);\
1412 stk->type = STK_MEM_END;\
1413 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1414 stk->u.mem.num = (mnum);\
1415 stk->u.mem.pstr = (s);\
1416 stk->u.mem.start = mem_start_stk[mnum];\
1417 stk->u.mem.end = mem_end_stk[mnum];\
1418 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1419 STACK_INC;\
1420} while(0)
1421
1422#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1423 STACK_ENSURE(1);\
1424 stk->type = STK_MEM_END_MARK;\
1425 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1426 stk->u.mem.num = (mnum);\
1427 STACK_INC;\
1428} while(0)
1429
1430#define STACK_GET_MEM_START(mnum, k) do {\
1431 int level = 0;\
1432 k = stk;\
1433 while (k > stk_base) {\
1434 k--;\
1435 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1436 && k->u.mem.num == (mnum)) {\
1437 level++;\
1438 }\
1439 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1440 if (level == 0) break;\
1441 level--;\
1442 }\
1443 }\
1444} while(0)
1445
1446#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1447 int level = 0;\
1448 while (k < stk) {\
1449 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1450 if (level == 0) (start) = k->u.mem.pstr;\
1451 level++;\
1452 }\
1453 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1454 level--;\
1455 if (level == 0) {\
1456 (end) = k->u.mem.pstr;\
1457 break;\
1458 }\
1459 }\
1460 k++;\
1461 }\
1462} while(0)
1463
1464#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1465 STACK_ENSURE(1);\
1466 stk->type = STK_NULL_CHECK_START;\
1467 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1468 stk->u.null_check.num = (cnum);\
1469 stk->u.null_check.pstr = (s);\
1470 STACK_INC;\
1471} while(0)
1472
1473#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1474 STACK_ENSURE(1);\
1475 stk->type = STK_NULL_CHECK_END;\
1476 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1477 stk->u.null_check.num = (cnum);\
1478 STACK_INC;\
1479} while(0)
1480
1481#define STACK_PUSH_CALL_FRAME(pat) do {\
1482 STACK_ENSURE(1);\
1483 stk->type = STK_CALL_FRAME;\
1484 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1485 stk->u.call_frame.ret_addr = (pat);\
1486 STACK_INC;\
1487} while(0)
1488
1489#define STACK_PUSH_RETURN do {\
1490 STACK_ENSURE(1);\
1491 stk->type = STK_RETURN;\
1492 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1493 STACK_INC;\
1494} while(0)
1495
1496#define STACK_PUSH_ABSENT_POS(start, end) do {\
1497 STACK_ENSURE(1);\
1498 stk->type = STK_ABSENT_POS;\
1499 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1500 stk->u.absent_pos.abs_pstr = (start);\
1501 stk->u.absent_pos.end_pstr = (end);\
1502 STACK_INC;\
1503} while(0)
1504
1505#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1506 STACK_ENSURE(1);\
1507 stk->type = STK_MATCH_CACHE_POINT;\
1508 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1509 stk->u.match_cache_point.index = (match_cache_point_index);\
1510 stk->u.match_cache_point.mask = (match_cache_point_mask);\
1511 STACK_INC;\
1512} while(0)
1513
1514
1515#ifdef ONIG_DEBUG
1516# define STACK_BASE_CHECK(p, at) \
1517 if ((p) < stk_base) {\
1518 fprintf(stderr, "at %s\n", at);\
1519 goto stack_error;\
1520 }
1521#else
1522# define STACK_BASE_CHECK(p, at)
1523#endif
1524
1525#ifdef ONIG_DEBUG_MATCH_CACHE
1526# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1527#else
1528# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1529#endif
1530
1531#ifdef USE_MATCH_CACHE
1532# define INC_NUM_FAILS msa->num_fails++
1533# define MEMOIZE_MATCH_CACHE_POINT do {\
1534 if (stk->type == STK_MATCH_CACHE_POINT) {\
1535 msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1536 MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1537 }\
1538 else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1539 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1540 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1541 }\
1542 } while(0)
1543# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1544 if (stkp->type == STK_MATCH_CACHE_POINT) {\
1545 stkp->type = STK_VOID;\
1546 memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1547 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1548 }\
1549 } while(0)
1550# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1551 if (stk->type == STK_MATCH_CACHE_POINT) {\
1552 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1553 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1554 }\
1555 } while(0)
1556#else
1557# define INC_NUM_FAILS ((void) 0)
1558# define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1559# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
1560#endif
1561
1562#define STACK_POP_ONE do {\
1563 stk--;\
1564 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1565} while(0)
1566
1567#define STACK_POP do {\
1568 switch (pop_level) {\
1569 case STACK_POP_LEVEL_FREE:\
1570 while (1) {\
1571 stk--;\
1572 STACK_BASE_CHECK(stk, "STACK_POP"); \
1573 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1574 ELSE_IF_STATE_CHECK_MARK(stk);\
1575 MEMOIZE_MATCH_CACHE_POINT;\
1576 }\
1577 break;\
1578 case STACK_POP_LEVEL_MEM_START:\
1579 while (1) {\
1580 stk--;\
1581 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1582 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1583 else if (stk->type == STK_MEM_START) {\
1584 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1585 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1586 }\
1587 ELSE_IF_STATE_CHECK_MARK(stk);\
1588 MEMOIZE_MATCH_CACHE_POINT;\
1589 }\
1590 break;\
1591 default:\
1592 while (1) {\
1593 stk--;\
1594 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1595 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1596 else if (stk->type == STK_MEM_START) {\
1597 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1598 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1599 }\
1600 else if (stk->type == STK_REPEAT_INC) {\
1601 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1602 }\
1603 else if (stk->type == STK_MEM_END) {\
1604 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1605 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1606 }\
1607 ELSE_IF_STATE_CHECK_MARK(stk);\
1608 MEMOIZE_MATCH_CACHE_POINT;\
1609 }\
1610 break;\
1611 }\
1612} while(0)
1613
1614#define STACK_POP_TIL_POS_NOT do {\
1615 while (1) {\
1616 stk--;\
1617 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1618 if (stk->type == STK_POS_NOT) break;\
1619 else if (stk->type == STK_MEM_START) {\
1620 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1621 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1622 }\
1623 else if (stk->type == STK_REPEAT_INC) {\
1624 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1625 }\
1626 else if (stk->type == STK_MEM_END) {\
1627 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1628 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1629 }\
1630 else if (IS_TO_VOID_TARGET(stk)) {\
1631 INC_NUM_FAILS;\
1632 }\
1633 ELSE_IF_STATE_CHECK_MARK(stk);\
1634 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1635 }\
1636} while(0)
1637
1638#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1639 while (1) {\
1640 stk--;\
1641 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1642 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1643 else if (stk->type == STK_MEM_START) {\
1644 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1645 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1646 }\
1647 else if (stk->type == STK_REPEAT_INC) {\
1648 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1649 }\
1650 else if (stk->type == STK_MEM_END) {\
1651 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1652 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1653 }\
1654 ELSE_IF_STATE_CHECK_MARK(stk);\
1655 }\
1656} while(0)
1657
1658#define STACK_POP_TIL_ABSENT do {\
1659 while (1) {\
1660 stk--;\
1661 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1662 if (stk->type == STK_ABSENT) break;\
1663 else if (stk->type == STK_MEM_START) {\
1664 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1665 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1666 }\
1667 else if (stk->type == STK_REPEAT_INC) {\
1668 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1669 }\
1670 else if (stk->type == STK_MEM_END) {\
1671 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1672 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1673 }\
1674 ELSE_IF_STATE_CHECK_MARK(stk);\
1675 }\
1676} while(0)
1677
1678#define STACK_POP_ABSENT_POS(start, end) do {\
1679 stk--;\
1680 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1681 (start) = stk->u.absent_pos.abs_pstr;\
1682 (end) = stk->u.absent_pos.end_pstr;\
1683} while(0)
1684
1685#define STACK_POS_END(k) do {\
1686 k = stk;\
1687 while (1) {\
1688 k--;\
1689 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1690 if (IS_TO_VOID_TARGET(k)) {\
1691 INC_NUM_FAILS;\
1692 k->type = STK_VOID;\
1693 }\
1694 else if (k->type == STK_POS) {\
1695 k->type = STK_VOID;\
1696 break;\
1697 }\
1698 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1699 }\
1700} while(0)
1701
1702#define STACK_STOP_BT_END do {\
1703 OnigStackType *k = stk;\
1704 while (1) {\
1705 k--;\
1706 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1707 if (IS_TO_VOID_TARGET(k)) {\
1708 INC_NUM_FAILS;\
1709 k->type = STK_VOID;\
1710 }\
1711 else if (k->type == STK_STOP_BT) {\
1712 k->type = STK_VOID;\
1713 break;\
1714 }\
1715 else if (k->type == STK_MATCH_CACHE_POINT) {\
1716 k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1717 }\
1718 }\
1719} while(0)
1720
1721#define STACK_STOP_BT_FAIL do {\
1722 while (1) {\
1723 stk--;\
1724 STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1725 if (stk->type == STK_STOP_BT) {\
1726 stk->type = STK_VOID;\
1727 break;\
1728 }\
1729 MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1730 }\
1731} while(0)
1732
1733#define STACK_NULL_CHECK(isnull,id,s) do {\
1734 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1735 while (1) {\
1736 k--;\
1737 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1738 if (k->type == STK_NULL_CHECK_START) {\
1739 if (k->u.null_check.num == (id)) {\
1740 (isnull) = (k->u.null_check.pstr == (s));\
1741 break;\
1742 }\
1743 }\
1744 }\
1745} while(0)
1746
1747#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1748 int level = 0;\
1749 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1750 while (1) {\
1751 k--;\
1752 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1753 if (k->type == STK_NULL_CHECK_START) {\
1754 if (k->u.null_check.num == (id)) {\
1755 if (level == 0) {\
1756 (isnull) = (k->u.null_check.pstr == (s));\
1757 break;\
1758 }\
1759 else level--;\
1760 }\
1761 }\
1762 else if (k->type == STK_NULL_CHECK_END) {\
1763 level++;\
1764 }\
1765 }\
1766} while(0)
1767
1768#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1769 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1770 while (1) {\
1771 k--;\
1772 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1773 if (k->type == STK_NULL_CHECK_START) {\
1774 if (k->u.null_check.num == (id)) {\
1775 if (k->u.null_check.pstr != (s)) {\
1776 (isnull) = 0;\
1777 break;\
1778 }\
1779 else {\
1780 UChar* endp;\
1781 (isnull) = 1;\
1782 while (k < stk) {\
1783 if (k->type == STK_MEM_START) {\
1784 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1785 (isnull) = 0; break;\
1786 }\
1787 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1788 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1789 else\
1790 endp = (UChar* )k->u.mem.end;\
1791 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1792 (isnull) = 0; break;\
1793 }\
1794 else if (endp != s) {\
1795 (isnull) = -1; /* empty, but position changed */ \
1796 }\
1797 }\
1798 k++;\
1799 }\
1800 break;\
1801 }\
1802 }\
1803 }\
1804 }\
1805} while(0)
1806
1807#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1808 int level = 0;\
1809 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1810 while (1) {\
1811 k--;\
1812 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1813 if (k->type == STK_NULL_CHECK_START) {\
1814 if (k->u.null_check.num == (id)) {\
1815 if (level == 0) {\
1816 if (k->u.null_check.pstr != (s)) {\
1817 (isnull) = 0;\
1818 break;\
1819 }\
1820 else {\
1821 UChar* endp;\
1822 (isnull) = 1;\
1823 while (k < stk) {\
1824 if (k->type == STK_MEM_START) {\
1825 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1826 (isnull) = 0; break;\
1827 }\
1828 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1829 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1830 else\
1831 endp = (UChar* )k->u.mem.end;\
1832 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1833 (isnull) = 0; break;\
1834 }\
1835 else if (endp != s) {\
1836 (isnull) = -1; /* empty, but position changed */ \
1837 }\
1838 }\
1839 k++;\
1840 }\
1841 break;\
1842 }\
1843 }\
1844 else {\
1845 level--;\
1846 }\
1847 }\
1848 }\
1849 else if (k->type == STK_NULL_CHECK_END) {\
1850 if (k->u.null_check.num == (id)) level++;\
1851 }\
1852 }\
1853} while(0)
1854
1855#define STACK_GET_REPEAT(id, k) do {\
1856 int level = 0;\
1857 k = stk;\
1858 while (1) {\
1859 k--;\
1860 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1861 if (k->type == STK_REPEAT) {\
1862 if (level == 0) {\
1863 if (k->u.repeat.num == (id)) {\
1864 break;\
1865 }\
1866 }\
1867 }\
1868 else if (k->type == STK_CALL_FRAME) level--;\
1869 else if (k->type == STK_RETURN) level++;\
1870 }\
1871} while(0)
1872
1873#define STACK_RETURN(addr) do {\
1874 int level = 0;\
1875 OnigStackType* k = stk;\
1876 while (1) {\
1877 k--;\
1878 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1879 if (k->type == STK_CALL_FRAME) {\
1880 if (level == 0) {\
1881 (addr) = k->u.call_frame.ret_addr;\
1882 break;\
1883 }\
1884 else level--;\
1885 }\
1886 else if (k->type == STK_RETURN)\
1887 level++;\
1888 }\
1889} while(0)
1890
1891
1892#define STRING_CMP(s1,s2,len) do {\
1893 while (len-- > 0) {\
1894 if (*s1++ != *s2++) goto fail;\
1895 }\
1896} while(0)
1897
1898#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1899 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1900 goto fail; \
1901} while(0)
1902
1903static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1904 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1905{
1906 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1907 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1908 UChar *p1, *p2, *end1, *s2;
1909 int len1, len2;
1910
1911 s2 = *ps2;
1912 end1 = s1 + mblen;
1913 while (s1 < end1) {
1914 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1915 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1916 if (len1 != len2) return 0;
1917 p1 = buf1;
1918 p2 = buf2;
1919 while (len1-- > 0) {
1920 if (*p1 != *p2) return 0;
1921 p1++;
1922 p2++;
1923 }
1924 }
1925
1926 *ps2 = s2;
1927 return 1;
1928}
1929
1930#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1931 is_fail = 0;\
1932 while (len-- > 0) {\
1933 if (*s1++ != *s2++) {\
1934 is_fail = 1; break;\
1935 }\
1936 }\
1937} while(0)
1938
1939#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1940 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1941 is_fail = 1; \
1942 else \
1943 is_fail = 0; \
1944} while(0)
1945
1946
1947#define IS_EMPTY_STR (str == end)
1948#define ON_STR_BEGIN(s) ((s) == str)
1949#define ON_STR_END(s) ((s) == end)
1950#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1951# define DATA_ENSURE_CHECK1 (s < right_range)
1952# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1953# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1954# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1955# define ABSENT_END_POS right_range
1956#else
1957# define DATA_ENSURE_CHECK1 (s < end)
1958# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1959# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1960# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1961# define ABSENT_END_POS end
1962#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1963
1964int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
1965
1966static inline int
1967enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
1968{
1969 if (enc->max_enc_len == enc->min_enc_len) {
1970 return (p < e ? enc->min_enc_len : 0);
1971 }
1972 else {
1973 return onigenc_mbclen_approximate(p, e, enc);
1974 }
1975}
1976
1977
1978#ifdef USE_CAPTURE_HISTORY
1979static int
1980make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1981 OnigStackType* stk_top, UChar* str, regex_t* reg)
1982{
1983 int n, r;
1984 OnigCaptureTreeNode* child;
1985 OnigStackType* k = *kp;
1986
1987 while (k < stk_top) {
1988 if (k->type == STK_MEM_START) {
1989 n = k->u.mem.num;
1990 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1991 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1992 child = history_node_new();
1993 CHECK_NULL_RETURN_MEMERR(child);
1994 child->group = n;
1995 child->beg = k->u.mem.pstr - str;
1996 r = history_tree_add_child(node, child);
1997 if (r != 0) {
1998 history_tree_free(child);
1999 return r;
2000 }
2001 *kp = (k + 1);
2002 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2003 if (r != 0) return r;
2004
2005 k = *kp;
2006 child->end = k->u.mem.pstr - str;
2007 }
2008 }
2009 else if (k->type == STK_MEM_END) {
2010 if (k->u.mem.num == node->group) {
2011 node->end = k->u.mem.pstr - str;
2012 *kp = k;
2013 return 0;
2014 }
2015 }
2016 k++;
2017 }
2018
2019 return 1; /* 1: root node ending. */
2020}
2021#endif /* USE_CAPTURE_HISTORY */
2022
2023#ifdef USE_BACKREF_WITH_LEVEL
2024static int
2025mem_is_in_memp(int mem, int num, UChar* memp)
2026{
2027 int i;
2028 MemNumType m;
2029
2030 for (i = 0; i < num; i++) {
2031 GET_MEMNUM_INC(m, memp);
2032 if (mem == (int )m) return 1;
2033 }
2034 return 0;
2035}
2036
2037static int backref_match_at_nested_level(regex_t* reg,
2038 OnigStackType* top, OnigStackType* stk_base,
2039 int ignore_case, int case_fold_flag,
2040 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
2041{
2042 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2043 int level;
2044 OnigStackType* k;
2045
2046 level = 0;
2047 k = top;
2048 k--;
2049 while (k >= stk_base) {
2050 if (k->type == STK_CALL_FRAME) {
2051 level--;
2052 }
2053 else if (k->type == STK_RETURN) {
2054 level++;
2055 }
2056 else if (level == nest) {
2057 if (k->type == STK_MEM_START) {
2058 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2059 pstart = k->u.mem.pstr;
2060 if (pend != NULL_UCHARP) {
2061 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2062 p = pstart;
2063 ss = *s;
2064
2065 if (ignore_case != 0) {
2066 if (string_cmp_ic(reg->enc, case_fold_flag,
2067 pstart, &ss, pend - pstart, send) == 0)
2068 return 0; /* or goto next_mem; */
2069 }
2070 else {
2071 while (p < pend) {
2072 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2073 }
2074 }
2075
2076 *s = ss;
2077 return 1;
2078 }
2079 }
2080 }
2081 else if (k->type == STK_MEM_END) {
2082 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2083 pend = k->u.mem.pstr;
2084 }
2085 }
2086 }
2087 k--;
2088 }
2089
2090 return 0;
2091}
2092#endif /* USE_BACKREF_WITH_LEVEL */
2093
2094
2095#ifdef ONIG_DEBUG_STATISTICS
2096
2097# ifdef _WIN32
2098# include <windows.h>
2099static LARGE_INTEGER ts, te, freq;
2100# define GETTIME(t) QueryPerformanceCounter(&(t))
2101# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2102 * 1000000 / freq.QuadPart)
2103# else /* _WIN32 */
2104
2105# define USE_TIMEOFDAY
2106
2107# ifdef USE_TIMEOFDAY
2108# ifdef HAVE_SYS_TIME_H
2109# include <sys/time.h>
2110# endif
2111# ifdef HAVE_UNISTD_H
2112# include <unistd.h>
2113# endif
2114static struct timeval ts, te;
2115# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2116# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2117 (((te).tv_sec - (ts).tv_sec)*1000000))
2118# else /* USE_TIMEOFDAY */
2119# ifdef HAVE_SYS_TIMES_H
2120# include <sys/times.h>
2121# endif
2122static struct tms ts, te;
2123# define GETTIME(t) times(&(t))
2124# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2125# endif /* USE_TIMEOFDAY */
2126
2127# endif /* _WIN32 */
2128
2129static int OpCounter[256];
2130static int OpPrevCounter[256];
2131static unsigned long OpTime[256];
2132static int OpCurr = OP_FINISH;
2133static int OpPrevTarget = OP_FAIL;
2134static int MaxStackDepth = 0;
2135
2136# define MOP_IN(opcode) do {\
2137 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2138 OpCurr = opcode;\
2139 OpCounter[opcode]++;\
2140 GETTIME(ts);\
2141} while(0)
2142
2143# define MOP_OUT do {\
2144 GETTIME(te);\
2145 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2146} while(0)
2147
2148extern void
2149onig_statistics_init(void)
2150{
2151 int i;
2152 for (i = 0; i < 256; i++) {
2153 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2154 }
2155 MaxStackDepth = 0;
2156# ifdef _WIN32
2157 QueryPerformanceFrequency(&freq);
2158# endif
2159}
2160
2161extern void
2162onig_print_statistics(FILE* f)
2163{
2164 int i;
2165 fprintf(f, " count prev time\n");
2166 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2167 fprintf(f, "%8d: %8d: %10lu: %s\n",
2168 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2169 }
2170 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2171}
2172
2173# define STACK_INC do {\
2174 stk++;\
2175 if (stk - stk_base > MaxStackDepth) \
2176 MaxStackDepth = stk - stk_base;\
2177} while(0)
2178
2179#else /* ONIG_DEBUG_STATISTICS */
2180# define STACK_INC stk++
2181
2182# define MOP_IN(opcode)
2183# define MOP_OUT
2184#endif /* ONIG_DEBUG_STATISTICS */
2185
2186
2187#ifdef ONIG_DEBUG_MATCH
2188static const char *
2189stack_type_str(int stack_type)
2190{
2191 switch (stack_type) {
2192 case STK_ALT: return "Alt ";
2193 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2194 case STK_POS_NOT: return "PosNot";
2195 case STK_MEM_START: return "MemS ";
2196 case STK_MEM_END: return "MemE ";
2197 case STK_REPEAT_INC: return "RepInc";
2198 case STK_STATE_CHECK_MARK: return "StChMk";
2199 case STK_NULL_CHECK_START: return "NulChS";
2200 case STK_NULL_CHECK_END: return "NulChE";
2201 case STK_MEM_END_MARK: return "MemEMk";
2202 case STK_POS: return "Pos ";
2203 case STK_STOP_BT: return "StopBt";
2204 case STK_REPEAT: return "Rep ";
2205 case STK_CALL_FRAME: return "Call ";
2206 case STK_RETURN: return "Ret ";
2207 case STK_VOID: return "Void ";
2208 case STK_ABSENT_POS: return "AbsPos";
2209 case STK_ABSENT: return "Absent";
2210 case STK_MATCH_CACHE_POINT: return "MCache";
2211 default: return " ";
2212 }
2213}
2214#endif
2215#ifdef USE_MATCH_CACHE
2216
2217static long
2218bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p)
2219{
2220 long l = 0, r = num_cache_opcodes - 1, m = 0;
2221
2222 while (l <= r) {
2223 m = (l + r) / 2;
2224 if (cache_opcodes[m].addr == p) break;
2225 if (cache_opcodes[m].addr < p) l = m + 1;
2226 else r = m - 1;
2227 }
2228 return m;
2229}
2230
2231static long
2232find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr)
2233{
2234 long m;
2235 const OnigCacheOpcode* cache_opcode;
2236 const OnigRepeatRange* range;
2237 const OnigStackType *stkp;
2238 int count = 0;
2239 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
2240 long cache_point;
2241 long num_cache_points_at_outer_repeat;
2242 long num_cache_points_in_outer_repeat;
2243
2244 m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
2245
2246 if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
2247 return -1;
2248 }
2249
2250 cache_opcode = &cache_opcodes[m];
2251 *cache_opcode_ptr = &cache_opcodes[m];
2252 cache_point = cache_opcode->cache_point;
2253 if (cache_opcode->outer_repeat_mem == -1) {
2254 return cache_point;
2255 }
2256
2257 num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
2258 num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
2259
2260 range = &reg->repeat_range[cache_opcode->outer_repeat_mem];
2261
2262 stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
2263 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
2264
2265 if (count < range->lower) {
2266 return num_cache_points_at_outer_repeat +
2267 num_cache_points_in_outer_repeat * count +
2268 cache_point;
2269 }
2270
2271 if (range->upper == 0x7fffffff) {
2272 return num_cache_points_at_outer_repeat +
2273 num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
2274 cache_point;
2275 }
2276
2277 return num_cache_points_at_outer_repeat +
2278 num_cache_points_in_outer_repeat * (range->lower - 1) +
2279 (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
2280 cache_point;
2281}
2282
2283static int
2284check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
2285{
2286 if (match_cache_point_mask & 0x80) {
2287 return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
2288 }
2289 else {
2290 return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
2291 }
2292}
2293
2294static void
2295memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
2296{
2297 match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
2298 if (match_cache_point_mask & 0x80) {
2299 match_cache_buf[match_cache_point_index + 1] |= 0x01;
2300 }
2301 else {
2302 match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
2303 }
2304}
2305
2306#endif /* USE_MATCH_CACHE */
2307
2308/* match data(str - end) from position (sstart). */
2309/* if sstart == str then set sprev to NULL. */
2310static OnigPosition
2311match_at(regex_t* reg, const UChar* str, const UChar* end,
2312#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2313 const UChar* right_range,
2314#endif
2315 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2316{
2317 static const UChar FinishCode[] = { OP_FINISH };
2318
2319 int i, num_mem, pop_level;
2320 ptrdiff_t n, best_len;
2321 LengthType tlen, tlen2;
2322 MemNumType mem;
2323 RelAddrType addr;
2324 OnigOptionType option = reg->options;
2325 OnigEncoding encode = reg->enc;
2326 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2327 UChar *s, *q, *sbegin;
2328 UChar *p = reg->p;
2329 UChar *pbegin = p;
2330 UChar *pkeep;
2331 char *alloca_base;
2332 char *xmalloc_base = NULL;
2333 OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
2334 OnigStackType *stkp; /* used as any purpose. */
2335 OnigStackIndex si;
2336 OnigStackIndex *repeat_stk;
2337 OnigStackIndex *mem_start_stk, *mem_end_stk;
2338#ifdef USE_COMBINATION_EXPLOSION_CHECK
2339 int scv;
2340 unsigned char* state_check_buff = msa->state_check_buff;
2341 int num_comb_exp_check = reg->num_comb_exp_check;
2342#endif
2343
2344#if USE_TOKEN_THREADED_VM
2345# define OP_OFFSET 1
2346# define VM_LOOP JUMP;
2347# define VM_LOOP_END
2348# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2349# define DEFAULT L_DEFAULT:
2350# define NEXT sprev = sbegin; JUMP
2351# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2352
2353 RB_GNUC_EXTENSION static const void *oplabels[] = {
2354 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2355 &&L_OP_END, /* pattern code terminator (success end) */
2356
2357 &&L_OP_EXACT1, /* single byte, N = 1 */
2358 &&L_OP_EXACT2, /* single byte, N = 2 */
2359 &&L_OP_EXACT3, /* single byte, N = 3 */
2360 &&L_OP_EXACT4, /* single byte, N = 4 */
2361 &&L_OP_EXACT5, /* single byte, N = 5 */
2362 &&L_OP_EXACTN, /* single byte */
2363 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2364 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2365 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2366 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2367 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2368 &&L_OP_EXACTMBN, /* other length */
2369
2370 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2371 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2372
2373 &&L_OP_CCLASS,
2374 &&L_OP_CCLASS_MB,
2375 &&L_OP_CCLASS_MIX,
2376 &&L_OP_CCLASS_NOT,
2377 &&L_OP_CCLASS_MB_NOT,
2378 &&L_OP_CCLASS_MIX_NOT,
2379
2380 &&L_OP_ANYCHAR, /* "." */
2381 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2382 &&L_OP_ANYCHAR_STAR, /* ".*" */
2383 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2384 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2385 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2386
2387 &&L_OP_WORD,
2388 &&L_OP_NOT_WORD,
2389 &&L_OP_WORD_BOUND,
2390 &&L_OP_NOT_WORD_BOUND,
2391# ifdef USE_WORD_BEGIN_END
2392 &&L_OP_WORD_BEGIN,
2393 &&L_OP_WORD_END,
2394# else
2395 &&L_DEFAULT,
2396 &&L_DEFAULT,
2397# endif
2398 &&L_OP_ASCII_WORD,
2399 &&L_OP_NOT_ASCII_WORD,
2400 &&L_OP_ASCII_WORD_BOUND,
2401 &&L_OP_NOT_ASCII_WORD_BOUND,
2402# ifdef USE_WORD_BEGIN_END
2403 &&L_OP_ASCII_WORD_BEGIN,
2404 &&L_OP_ASCII_WORD_END,
2405# else
2406 &&L_DEFAULT,
2407 &&L_DEFAULT,
2408# endif
2409
2410 &&L_OP_BEGIN_BUF,
2411 &&L_OP_END_BUF,
2412 &&L_OP_BEGIN_LINE,
2413 &&L_OP_END_LINE,
2414 &&L_OP_SEMI_END_BUF,
2415 &&L_OP_BEGIN_POSITION,
2416
2417 &&L_OP_BACKREF1,
2418 &&L_OP_BACKREF2,
2419 &&L_OP_BACKREFN,
2420 &&L_OP_BACKREFN_IC,
2421 &&L_OP_BACKREF_MULTI,
2422 &&L_OP_BACKREF_MULTI_IC,
2423# ifdef USE_BACKREF_WITH_LEVEL
2424 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2425# else
2426 &&L_DEFAULT,
2427# endif
2428 &&L_OP_MEMORY_START,
2429 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2430 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2431# ifdef USE_SUBEXP_CALL
2432 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2433# else
2434 &&L_DEFAULT,
2435# endif
2436 &&L_OP_MEMORY_END,
2437# ifdef USE_SUBEXP_CALL
2438 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2439# else
2440 &&L_DEFAULT,
2441# endif
2442
2443 &&L_OP_KEEP,
2444
2445 &&L_OP_FAIL, /* pop stack and move */
2446 &&L_OP_JUMP,
2447 &&L_OP_PUSH,
2448 &&L_OP_POP,
2449# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2450 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2451# else
2452 &&L_DEFAULT,
2453# endif
2454 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2455 &&L_OP_REPEAT, /* {n,m} */
2456 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2457 &&L_OP_REPEAT_INC,
2458 &&L_OP_REPEAT_INC_NG, /* non greedy */
2459 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2460 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2461 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2462 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2463# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2464 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2465# else
2466 &&L_DEFAULT,
2467# endif
2468# ifdef USE_SUBEXP_CALL
2469 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2470# else
2471 &&L_DEFAULT,
2472# endif
2473
2474 &&L_OP_PUSH_POS, /* (?=...) start */
2475 &&L_OP_POP_POS, /* (?=...) end */
2476 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2477 &&L_OP_FAIL_POS, /* (?!...) end */
2478 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2479 &&L_OP_POP_STOP_BT, /* (?>...) end */
2480 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2481 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2482 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2483 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2484 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2485 &&L_OP_ABSENT_END, /* (?~...) end */
2486
2487# ifdef USE_SUBEXP_CALL
2488 &&L_OP_CALL, /* \g<name> */
2489 &&L_OP_RETURN,
2490# else
2491 &&L_DEFAULT,
2492 &&L_DEFAULT,
2493# endif
2494 &&L_OP_CONDITION,
2495
2496# ifdef USE_COMBINATION_EXPLOSION_CHECK
2497 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2498 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2499 &&L_OP_STATE_CHECK, /* check only */
2500# else
2501 &&L_DEFAULT,
2502 &&L_DEFAULT,
2503 &&L_DEFAULT,
2504# endif
2505# ifdef USE_COMBINATION_EXPLOSION_CHECK
2506 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2507 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2508# else
2509 &&L_DEFAULT,
2510 &&L_DEFAULT,
2511# endif
2512 /* no need: IS_DYNAMIC_OPTION() == 0 */
2513# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2514 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2515 &&L_OP_SET_OPTION /* set option */
2516# else
2517 &&L_DEFAULT,
2518 &&L_DEFAULT
2519# endif
2520 };
2521#else /* USE_TOKEN_THREADED_VM */
2522
2523# define OP_OFFSET 0
2524# define VM_LOOP \
2525 while (1) { \
2526 OPCODE_EXEC_HOOK; \
2527 pbegin = p; \
2528 sbegin = s; \
2529 switch (*p++) {
2530# define VM_LOOP_END } sprev = sbegin; }
2531# define CASE(x) case x:
2532# define DEFAULT default:
2533# define NEXT break
2534# define JUMP continue; break
2535#endif /* USE_TOKEN_THREADED_VM */
2536
2537
2538#ifdef USE_SUBEXP_CALL
2539/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2540 etc. Additional space is required. */
2541# define ADD_NUMMEM 1
2542#else
2543/* Stack #0 not is used. */
2544# define ADD_NUMMEM 0
2545#endif
2546
2547 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2548
2549 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2550 pop_level = reg->stack_pop_level;
2551 num_mem = reg->num_mem;
2552 repeat_stk = (OnigStackIndex* )alloca_base;
2553
2554 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2555 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2556 {
2557 OnigStackIndex *pp = mem_start_stk;
2558 for (; pp < repeat_stk + n; pp += 2) {
2559 pp[0] = INVALID_STACK_INDEX;
2560 pp[1] = INVALID_STACK_INDEX;
2561 }
2562 }
2563#ifndef USE_SUBEXP_CALL
2564 mem_start_stk--; /* for index start from 1,
2565 mem_start_stk[1]..mem_start_stk[num_mem] */
2566 mem_end_stk--; /* for index start from 1,
2567 mem_end_stk[1]..mem_end_stk[num_mem] */
2568#endif
2569
2570#ifdef ONIG_DEBUG_MATCH
2571 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2572 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2573 fprintf(stderr, "size: %d, start offset: %d\n",
2574 (int )(end - str), (int )(sstart - str));
2575 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2576#endif
2577
2578 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2579 best_len = ONIG_MISMATCH;
2580 s = (UChar* )sstart;
2581 pkeep = (UChar* )sstart;
2582
2583
2584#ifdef ONIG_DEBUG_MATCH
2585# define OPCODE_EXEC_HOOK \
2586 if (s) { \
2587 UChar *op, *q, *bp, buf[50]; \
2588 int len; \
2589 op = p - OP_OFFSET; \
2590 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2591 bp = buf; \
2592 q = s; \
2593 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2594 for (i = 0; i < 7 && q < end; i++) { \
2595 len = enclen(encode, q, end); \
2596 while (len-- > 0) *bp++ = *q++; \
2597 } \
2598 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2599 } \
2600 xmemcpy(bp, "\"", 1); bp += 1; \
2601 *bp = 0; \
2602 fputs((char* )buf, stderr); \
2603 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2604 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2605 stk - stk_base - 1, \
2606 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2607 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2608 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2609 fprintf(stderr, "\n"); \
2610 }
2611#else
2612# define OPCODE_EXEC_HOOK ((void) 0)
2613#endif
2614
2615#ifdef USE_MATCH_CACHE
2616#ifdef ONIG_DEBUG_MATCH_CACHE
2617#define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2618#define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2619#else
2620#define MATCH_CACHE_DEBUG ((void) 0)
2621#define MATCH_CACHE_DEBUG_HIT ((void) 0)
2622#endif
2623
2624#define MATCH_CACHE_HIT ((void) 0)
2625
2626# define CHECK_MATCH_CACHE do {\
2627 if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2628 const OnigCacheOpcode *cache_opcode;\
2629 long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2630 if (cache_point >= 0) {\
2631 long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2632 long match_cache_point_index = match_cache_point >> 3;\
2633 uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2634 MATCH_CACHE_DEBUG;\
2635 if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2636 MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2637 if (cache_opcode->lookaround_nesting == 0) goto fail;\
2638 else if (cache_opcode->lookaround_nesting < 0) {\
2639 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2640 STACK_STOP_BT_FAIL;\
2641 goto fail;\
2642 }\
2643 else goto fail;\
2644 }\
2645 else {\
2646 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2647 p = cache_opcode->match_addr;\
2648 MOP_OUT;\
2649 JUMP;\
2650 }\
2651 else goto fail;\
2652 }\
2653 }\
2654 STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2655 }\
2656 }\
2657} while (0)
2658#else
2659# define CHECK_MATCH_CACHE ((void) 0)
2660#endif
2661
2662 VM_LOOP {
2663 CASE(OP_END) MOP_IN(OP_END);
2664 n = s - sstart;
2665 if (n > best_len) {
2666 OnigRegion* region;
2667#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2668 if (IS_FIND_LONGEST(option)) {
2669 if (n > msa->best_len) {
2670 msa->best_len = n;
2671 msa->best_s = (UChar* )sstart;
2672 }
2673 else
2674 goto end_best_len;
2675 }
2676#endif
2677 best_len = n;
2678 region = msa->region;
2679 if (region) {
2680 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2681 region->end[0] = s - str;
2682 for (i = 1; i <= num_mem; i++) {
2683 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2684 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2685 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2686 else
2687 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2688
2689 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2690 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2691 : (UChar* )((void* )mem_end_stk[i])) - str;
2692 }
2693 else {
2694 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2695 }
2696 }
2697
2698#ifdef USE_CAPTURE_HISTORY
2699 if (reg->capture_history != 0) {
2700 int r;
2701 OnigCaptureTreeNode* node;
2702
2703 if (IS_NULL(region->history_root)) {
2704 region->history_root = node = history_node_new();
2705 CHECK_NULL_RETURN_MEMERR(node);
2706 }
2707 else {
2708 node = region->history_root;
2709 history_tree_clear(node);
2710 }
2711
2712 node->group = 0;
2713 node->beg = ((pkeep > s) ? s : pkeep) - str;
2714 node->end = s - str;
2715
2716 stkp = stk_base;
2717 r = make_capture_history_tree(region->history_root, &stkp,
2718 stk, (UChar* )str, reg);
2719 if (r < 0) {
2720 best_len = r; /* error code */
2721 goto finish;
2722 }
2723 }
2724#endif /* USE_CAPTURE_HISTORY */
2725 } /* if (region) */
2726 } /* n > best_len */
2727
2728#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2729 end_best_len:
2730#endif
2731 MOP_OUT;
2732
2733 if (IS_FIND_CONDITION(option)) {
2734 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2735 best_len = ONIG_MISMATCH;
2736 goto fail; /* for retry */
2737 }
2738 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2739 goto fail; /* for retry */
2740 }
2741 }
2742
2743 /* default behavior: return first-matching result. */
2744 goto finish;
2745
2746 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2747 DATA_ENSURE(1);
2748 if (*p != *s) goto fail;
2749 p++; s++;
2750 MOP_OUT;
2751 NEXT;
2752
2753 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2754 {
2755 int len;
2756 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2757
2758 DATA_ENSURE(1);
2759 len = ONIGENC_MBC_CASE_FOLD(encode,
2760 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2761 case_fold_flag,
2762 &s, end, lowbuf);
2763 DATA_ENSURE(0);
2764 q = lowbuf;
2765 while (len-- > 0) {
2766 if (*p != *q) {
2767 goto fail;
2768 }
2769 p++; q++;
2770 }
2771 }
2772 MOP_OUT;
2773 NEXT;
2774
2775 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2776 DATA_ENSURE(2);
2777 if (*p != *s) goto fail;
2778 p++; s++;
2779 if (*p != *s) goto fail;
2780 sprev = s;
2781 p++; s++;
2782 MOP_OUT;
2783 JUMP;
2784
2785 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2786 DATA_ENSURE(3);
2787 if (*p != *s) goto fail;
2788 p++; s++;
2789 if (*p != *s) goto fail;
2790 p++; s++;
2791 if (*p != *s) goto fail;
2792 sprev = s;
2793 p++; s++;
2794 MOP_OUT;
2795 JUMP;
2796
2797 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2798 DATA_ENSURE(4);
2799 if (*p != *s) goto fail;
2800 p++; s++;
2801 if (*p != *s) goto fail;
2802 p++; s++;
2803 if (*p != *s) goto fail;
2804 p++; s++;
2805 if (*p != *s) goto fail;
2806 sprev = s;
2807 p++; s++;
2808 MOP_OUT;
2809 JUMP;
2810
2811 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2812 DATA_ENSURE(5);
2813 if (*p != *s) goto fail;
2814 p++; s++;
2815 if (*p != *s) goto fail;
2816 p++; s++;
2817 if (*p != *s) goto fail;
2818 p++; s++;
2819 if (*p != *s) goto fail;
2820 p++; s++;
2821 if (*p != *s) goto fail;
2822 sprev = s;
2823 p++; s++;
2824 MOP_OUT;
2825 JUMP;
2826
2827 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2828 GET_LENGTH_INC(tlen, p);
2829 DATA_ENSURE(tlen);
2830 while (tlen-- > 0) {
2831 if (*p++ != *s++) goto fail;
2832 }
2833 sprev = s - 1;
2834 MOP_OUT;
2835 JUMP;
2836
2837 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2838 {
2839 int len;
2840 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2841
2842 GET_LENGTH_INC(tlen, p);
2843 endp = p + tlen;
2844
2845 while (p < endp) {
2846 sprev = s;
2847 DATA_ENSURE(1);
2848 len = ONIGENC_MBC_CASE_FOLD(encode,
2849 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2850 case_fold_flag,
2851 &s, end, lowbuf);
2852 DATA_ENSURE(0);
2853 q = lowbuf;
2854 while (len-- > 0) {
2855 if (*p != *q) goto fail;
2856 p++; q++;
2857 }
2858 }
2859 }
2860
2861 MOP_OUT;
2862 JUMP;
2863
2864 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2865 DATA_ENSURE(2);
2866 if (*p != *s) goto fail;
2867 p++; s++;
2868 if (*p != *s) goto fail;
2869 p++; s++;
2870 MOP_OUT;
2871 NEXT;
2872
2873 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2874 DATA_ENSURE(4);
2875 if (*p != *s) goto fail;
2876 p++; s++;
2877 if (*p != *s) goto fail;
2878 p++; s++;
2879 sprev = s;
2880 if (*p != *s) goto fail;
2881 p++; s++;
2882 if (*p != *s) goto fail;
2883 p++; s++;
2884 MOP_OUT;
2885 JUMP;
2886
2887 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2888 DATA_ENSURE(6);
2889 if (*p != *s) goto fail;
2890 p++; s++;
2891 if (*p != *s) goto fail;
2892 p++; s++;
2893 if (*p != *s) goto fail;
2894 p++; s++;
2895 if (*p != *s) goto fail;
2896 p++; s++;
2897 sprev = s;
2898 if (*p != *s) goto fail;
2899 p++; s++;
2900 if (*p != *s) goto fail;
2901 p++; s++;
2902 MOP_OUT;
2903 JUMP;
2904
2905 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2906 GET_LENGTH_INC(tlen, p);
2907 DATA_ENSURE(tlen * 2);
2908 while (tlen-- > 0) {
2909 if (*p != *s) goto fail;
2910 p++; s++;
2911 if (*p != *s) goto fail;
2912 p++; s++;
2913 }
2914 sprev = s - 2;
2915 MOP_OUT;
2916 JUMP;
2917
2918 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2919 GET_LENGTH_INC(tlen, p);
2920 DATA_ENSURE(tlen * 3);
2921 while (tlen-- > 0) {
2922 if (*p != *s) goto fail;
2923 p++; s++;
2924 if (*p != *s) goto fail;
2925 p++; s++;
2926 if (*p != *s) goto fail;
2927 p++; s++;
2928 }
2929 sprev = s - 3;
2930 MOP_OUT;
2931 JUMP;
2932
2933 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2934 GET_LENGTH_INC(tlen, p); /* mb-len */
2935 GET_LENGTH_INC(tlen2, p); /* string len */
2936 tlen2 *= tlen;
2937 DATA_ENSURE(tlen2);
2938 while (tlen2-- > 0) {
2939 if (*p != *s) goto fail;
2940 p++; s++;
2941 }
2942 sprev = s - tlen;
2943 MOP_OUT;
2944 JUMP;
2945
2946 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2947 DATA_ENSURE(1);
2948 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2949 p += SIZE_BITSET;
2950 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2951 MOP_OUT;
2952 NEXT;
2953
2954 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2955 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2956
2957 cclass_mb:
2958 GET_LENGTH_INC(tlen, p);
2959 {
2960 OnigCodePoint code;
2961 UChar *ss;
2962 int mb_len;
2963
2964 DATA_ENSURE(1);
2965 mb_len = enclen_approx(encode, s, end);
2966 DATA_ENSURE(mb_len);
2967 ss = s;
2968 s += mb_len;
2969 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2970
2971#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2972 if (! onig_is_in_code_range(p, code)) goto fail;
2973#else
2974 q = p;
2975 ALIGNMENT_RIGHT(q);
2976 if (! onig_is_in_code_range(q, code)) goto fail;
2977#endif
2978 }
2979 p += tlen;
2980 MOP_OUT;
2981 NEXT;
2982
2983 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2984 DATA_ENSURE(1);
2985 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2986 p += SIZE_BITSET;
2987 goto cclass_mb;
2988 }
2989 else {
2990 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2991 goto fail;
2992
2993 p += SIZE_BITSET;
2994 GET_LENGTH_INC(tlen, p);
2995 p += tlen;
2996 s++;
2997 }
2998 MOP_OUT;
2999 NEXT;
3000
3001 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
3002 DATA_ENSURE(1);
3003 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
3004 p += SIZE_BITSET;
3005 s += enclen(encode, s, end);
3006 MOP_OUT;
3007 NEXT;
3008
3009 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
3010 DATA_ENSURE(1);
3011 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3012 s++;
3013 GET_LENGTH_INC(tlen, p);
3014 p += tlen;
3015 goto cc_mb_not_success;
3016 }
3017
3018 cclass_mb_not:
3019 GET_LENGTH_INC(tlen, p);
3020 {
3021 OnigCodePoint code;
3022 UChar *ss;
3023 int mb_len = enclen(encode, s, end);
3024
3025 if (! DATA_ENSURE_CHECK(mb_len)) {
3026 DATA_ENSURE(1);
3027 s = (UChar* )end;
3028 p += tlen;
3029 goto cc_mb_not_success;
3030 }
3031
3032 ss = s;
3033 s += mb_len;
3034 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3035
3036#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3037 if (onig_is_in_code_range(p, code)) goto fail;
3038#else
3039 q = p;
3040 ALIGNMENT_RIGHT(q);
3041 if (onig_is_in_code_range(q, code)) goto fail;
3042#endif
3043 }
3044 p += tlen;
3045
3046 cc_mb_not_success:
3047 MOP_OUT;
3048 NEXT;
3049
3050 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
3051 DATA_ENSURE(1);
3052 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3053 p += SIZE_BITSET;
3054 goto cclass_mb_not;
3055 }
3056 else {
3057 if (BITSET_AT(((BitSetRef )p), *s) != 0)
3058 goto fail;
3059
3060 p += SIZE_BITSET;
3061 GET_LENGTH_INC(tlen, p);
3062 p += tlen;
3063 s++;
3064 }
3065 MOP_OUT;
3066 NEXT;
3067
3068 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
3069 DATA_ENSURE(1);
3070 n = enclen_approx(encode, s, end);
3071 DATA_ENSURE(n);
3072 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3073 s += n;
3074 MOP_OUT;
3075 NEXT;
3076
3077 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
3078 DATA_ENSURE(1);
3079 n = enclen_approx(encode, s, end);
3080 DATA_ENSURE(n);
3081 s += n;
3082 MOP_OUT;
3083 NEXT;
3084
3085 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
3086 while (DATA_ENSURE_CHECK1) {
3087 CHECK_MATCH_CACHE;
3088 STACK_PUSH_ALT(p, s, sprev, pkeep);
3089 n = enclen_approx(encode, s, end);
3090 DATA_ENSURE(n);
3091 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3092 sprev = s;
3093 s += n;
3094 }
3095 MOP_OUT;
3096 JUMP;
3097
3098 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
3099 while (DATA_ENSURE_CHECK1) {
3100 CHECK_MATCH_CACHE;
3101 STACK_PUSH_ALT(p, s, sprev, pkeep);
3102 n = enclen_approx(encode, s, end);
3103 if (n > 1) {
3104 DATA_ENSURE(n);
3105 sprev = s;
3106 s += n;
3107 }
3108 else {
3109 sprev = s;
3110 s++;
3111 }
3112 }
3113 MOP_OUT;
3114 JUMP;
3115
3116 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3117 while (DATA_ENSURE_CHECK1) {
3118 CHECK_MATCH_CACHE;
3119 if (*p == *s) {
3120 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3121 } else {
3122#ifdef USE_MATCH_CACHE
3123 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3124 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
3125 msa->num_fails++;
3126#endif
3127 }
3128 n = enclen_approx(encode, s, end);
3129 DATA_ENSURE(n);
3130 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3131 sprev = s;
3132 s += n;
3133 }
3134 p++;
3135 MOP_OUT;
3136 NEXT;
3137
3138 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3139 while (DATA_ENSURE_CHECK1) {
3140 CHECK_MATCH_CACHE;
3141 if (*p == *s) {
3142 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3143 } else {
3144#ifdef USE_MATCH_CACHE
3145 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3146 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
3147 msa->num_fails++;
3148#endif
3149 }
3150 n = enclen_approx(encode, s, end);
3151 if (n > 1) {
3152 DATA_ENSURE(n);
3153 sprev = s;
3154 s += n;
3155 }
3156 else {
3157 sprev = s;
3158 s++;
3159 }
3160 }
3161 p++;
3162 MOP_OUT;
3163 NEXT;
3164
3165#ifdef USE_COMBINATION_EXPLOSION_CHECK
3166 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3167 GET_STATE_CHECK_NUM_INC(mem, p);
3168 while (DATA_ENSURE_CHECK1) {
3169 STATE_CHECK_VAL(scv, mem);
3170 if (scv) goto fail;
3171
3172 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3173 n = enclen_approx(encode, s, end);
3174 DATA_ENSURE(n);
3175 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3176 sprev = s;
3177 s += n;
3178 }
3179 MOP_OUT;
3180 NEXT;
3181
3182 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3183 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3184
3185 GET_STATE_CHECK_NUM_INC(mem, p);
3186 while (DATA_ENSURE_CHECK1) {
3187 STATE_CHECK_VAL(scv, mem);
3188 if (scv) goto fail;
3189
3190 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3191 n = enclen_approx(encode, s, end);
3192 if (n > 1) {
3193 DATA_ENSURE(n);
3194 sprev = s;
3195 s += n;
3196 }
3197 else {
3198 sprev = s;
3199 s++;
3200 }
3201 }
3202 MOP_OUT;
3203 NEXT;
3204#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3205
3206 CASE(OP_WORD) MOP_IN(OP_WORD);
3207 DATA_ENSURE(1);
3208 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3209 goto fail;
3210
3211 s += enclen(encode, s, end);
3212 MOP_OUT;
3213 NEXT;
3214
3215 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
3216 DATA_ENSURE(1);
3217 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3218 goto fail;
3219
3220 s += enclen(encode, s, end);
3221 MOP_OUT;
3222 NEXT;
3223
3224 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
3225 DATA_ENSURE(1);
3226 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3227 goto fail;
3228
3229 s += enclen(encode, s, end);
3230 MOP_OUT;
3231 NEXT;
3232
3233 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
3234 DATA_ENSURE(1);
3235 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3236 goto fail;
3237
3238 s += enclen(encode, s, end);
3239 MOP_OUT;
3240 NEXT;
3241
3242 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
3243 if (ON_STR_BEGIN(s)) {
3244 DATA_ENSURE(1);
3245 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3246 goto fail;
3247 }
3248 else if (ON_STR_END(s)) {
3249 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3250 goto fail;
3251 }
3252 else {
3253 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3254 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3255 goto fail;
3256 }
3257 MOP_OUT;
3258 JUMP;
3259
3260 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
3261 if (ON_STR_BEGIN(s)) {
3262 DATA_ENSURE(1);
3263 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3264 goto fail;
3265 }
3266 else if (ON_STR_END(s)) {
3267 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3268 goto fail;
3269 }
3270 else {
3271 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3272 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3273 goto fail;
3274 }
3275 MOP_OUT;
3276 JUMP;
3277
3278 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3279 if (ON_STR_BEGIN(s)) {
3280 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3281 goto fail;
3282 }
3283 else if (ON_STR_END(s)) {
3284 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3285 goto fail;
3286 }
3287 else {
3288 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3289 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3290 goto fail;
3291 }
3292 MOP_OUT;
3293 JUMP;
3294
3295 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3296 if (ON_STR_BEGIN(s)) {
3297 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3298 goto fail;
3299 }
3300 else if (ON_STR_END(s)) {
3301 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3302 goto fail;
3303 }
3304 else {
3305 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3306 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3307 goto fail;
3308 }
3309 MOP_OUT;
3310 JUMP;
3311
3312#ifdef USE_WORD_BEGIN_END
3313 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3314 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3315 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3316 MOP_OUT;
3317 JUMP;
3318 }
3319 }
3320 goto fail;
3321
3322 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3323 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3324 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3325 MOP_OUT;
3326 JUMP;
3327 }
3328 }
3329 goto fail;
3330
3331 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3332 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3333 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3334 MOP_OUT;
3335 JUMP;
3336 }
3337 }
3338 goto fail;
3339
3340 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3341 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3342 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3343 MOP_OUT;
3344 JUMP;
3345 }
3346 }
3347 goto fail;
3348#endif
3349
3350 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3351 if (! ON_STR_BEGIN(s)) goto fail;
3352 if (IS_NOTBOS(msa->options)) goto fail;
3353
3354 MOP_OUT;
3355 JUMP;
3356
3357 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3358 if (! ON_STR_END(s)) goto fail;
3359 if (IS_NOTEOS(msa->options)) goto fail;
3360
3361 MOP_OUT;
3362 JUMP;
3363
3364 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3365 if (ON_STR_BEGIN(s)) {
3366 if (IS_NOTBOL(msa->options)) goto fail;
3367 MOP_OUT;
3368 JUMP;
3369 }
3370 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3371#ifdef USE_CRNL_AS_LINE_TERMINATOR
3372 && !(IS_NEWLINE_CRLF(option)
3373 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3374#endif
3375 && !ON_STR_END(s)) {
3376 MOP_OUT;
3377 JUMP;
3378 }
3379 goto fail;
3380
3381 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3382 if (ON_STR_END(s)) {
3383#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3384 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3385#endif
3386 if (IS_NOTEOL(msa->options)) goto fail;
3387 MOP_OUT;
3388 JUMP;
3389#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3390 }
3391#endif
3392 }
3393 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3394 MOP_OUT;
3395 JUMP;
3396 }
3397 goto fail;
3398
3399 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3400 if (ON_STR_END(s)) {
3401#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3402 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3403#endif
3404 if (IS_NOTEOL(msa->options)) goto fail;
3405 MOP_OUT;
3406 JUMP;
3407#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3408 }
3409#endif
3410 }
3411 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3412 UChar* ss = s + enclen(encode, s, end);
3413 if (ON_STR_END(ss)) {
3414 MOP_OUT;
3415 JUMP;
3416 }
3417#ifdef USE_CRNL_AS_LINE_TERMINATOR
3418 else if (IS_NEWLINE_CRLF(option)
3419 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3420 ss += enclen(encode, ss, end);
3421 if (ON_STR_END(ss)) {
3422 MOP_OUT;
3423 JUMP;
3424 }
3425 }
3426#endif
3427 }
3428 goto fail;
3429
3430 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3431 if (s != msa->gpos)
3432 goto fail;
3433
3434 MOP_OUT;
3435 JUMP;
3436
3437 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3438 GET_MEMNUM_INC(mem, p);
3439 STACK_PUSH_MEM_START(mem, s);
3440 MOP_OUT;
3441 JUMP;
3442
3443 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3444 GET_MEMNUM_INC(mem, p);
3445 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3446 mem_end_stk[mem] = INVALID_STACK_INDEX;
3447 MOP_OUT;
3448 JUMP;
3449
3450 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3451 GET_MEMNUM_INC(mem, p);
3452 STACK_PUSH_MEM_END(mem, s);
3453 MOP_OUT;
3454 JUMP;
3455
3456 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3457 GET_MEMNUM_INC(mem, p);
3458 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3459 MOP_OUT;
3460 JUMP;
3461
3462 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3463 pkeep = s;
3464 MOP_OUT;
3465 JUMP;
3466
3467#ifdef USE_SUBEXP_CALL
3468 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3469 GET_MEMNUM_INC(mem, p);
3470 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3471 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3472 STACK_PUSH_MEM_END(mem, s);
3473 MOP_OUT;
3474 JUMP;
3475
3476 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3477 GET_MEMNUM_INC(mem, p);
3478 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3479 STACK_GET_MEM_START(mem, stkp);
3480
3481 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3482 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3483 else
3484 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3485
3486 STACK_PUSH_MEM_END_MARK(mem);
3487 MOP_OUT;
3488 JUMP;
3489#endif
3490
3491 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3492 mem = 1;
3493 goto backref;
3494
3495 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3496 mem = 2;
3497 goto backref;
3498
3499 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3500 GET_MEMNUM_INC(mem, p);
3501 backref:
3502 {
3503 int len;
3504 UChar *pstart, *pend;
3505
3506 /* if you want to remove following line,
3507 you should check in parse and compile time. */
3508 if (mem > num_mem) goto fail;
3509 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3510 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3511
3512 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3513 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3514 else
3515 pstart = (UChar* )((void* )mem_start_stk[mem]);
3516
3517 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3518 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3519 : (UChar* )((void* )mem_end_stk[mem]));
3520 n = pend - pstart;
3521 DATA_ENSURE(n);
3522 sprev = s;
3523 STRING_CMP(pstart, s, n);
3524 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3525 sprev += len;
3526
3527 MOP_OUT;
3528 JUMP;
3529 }
3530
3531 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3532 GET_MEMNUM_INC(mem, p);
3533 {
3534 int len;
3535 UChar *pstart, *pend;
3536
3537 /* if you want to remove following line,
3538 you should check in parse and compile time. */
3539 if (mem > num_mem) goto fail;
3540 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3541 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3542
3543 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3544 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3545 else
3546 pstart = (UChar* )((void* )mem_start_stk[mem]);
3547
3548 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3549 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3550 : (UChar* )((void* )mem_end_stk[mem]));
3551 n = pend - pstart;
3552 DATA_ENSURE(n);
3553 sprev = s;
3554 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3555 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3556 sprev += len;
3557
3558 MOP_OUT;
3559 JUMP;
3560 }
3561 NEXT;
3562
3563 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3564 {
3565 int len, is_fail;
3566 UChar *pstart, *pend, *swork;
3567
3568 GET_LENGTH_INC(tlen, p);
3569 for (i = 0; i < tlen; i++) {
3570 GET_MEMNUM_INC(mem, p);
3571
3572 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3573 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3574
3575 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3576 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3577 else
3578 pstart = (UChar* )((void* )mem_start_stk[mem]);
3579
3580 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3581 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3582 : (UChar* )((void* )mem_end_stk[mem]));
3583 n = pend - pstart;
3584 DATA_ENSURE_CONTINUE(n);
3585 sprev = s;
3586 swork = s;
3587 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3588 if (is_fail) continue;
3589 s = swork;
3590 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3591 sprev += len;
3592
3593 p += (SIZE_MEMNUM * (tlen - i - 1));
3594 break; /* success */
3595 }
3596 if (i == tlen) goto fail;
3597 MOP_OUT;
3598 JUMP;
3599 }
3600 NEXT;
3601
3602 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3603 {
3604 int len, is_fail;
3605 UChar *pstart, *pend, *swork;
3606
3607 GET_LENGTH_INC(tlen, p);
3608 for (i = 0; i < tlen; i++) {
3609 GET_MEMNUM_INC(mem, p);
3610
3611 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3612 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3613
3614 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3615 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3616 else
3617 pstart = (UChar* )((void* )mem_start_stk[mem]);
3618
3619 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3620 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3621 : (UChar* )((void* )mem_end_stk[mem]));
3622 n = pend - pstart;
3623 DATA_ENSURE_CONTINUE(n);
3624 sprev = s;
3625 swork = s;
3626 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3627 if (is_fail) continue;
3628 s = swork;
3629 while (sprev + (len = enclen(encode, sprev, end)) < s)
3630 sprev += len;
3631
3632 p += (SIZE_MEMNUM * (tlen - i - 1));
3633 break; /* success */
3634 }
3635 if (i == tlen) goto fail;
3636 MOP_OUT;
3637 JUMP;
3638 }
3639
3640#ifdef USE_BACKREF_WITH_LEVEL
3641 CASE(OP_BACKREF_WITH_LEVEL)
3642 {
3643 int len;
3644 OnigOptionType ic;
3645 LengthType level;
3646
3647 GET_OPTION_INC(ic, p);
3648 GET_LENGTH_INC(level, p);
3649 GET_LENGTH_INC(tlen, p);
3650
3651 sprev = s;
3652 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3653 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3654 while (sprev + (len = enclen(encode, sprev, end)) < s)
3655 sprev += len;
3656
3657 p += (SIZE_MEMNUM * tlen);
3658 }
3659 else
3660 goto fail;
3661
3662 MOP_OUT;
3663 JUMP;
3664 }
3665
3666#endif
3667
3668#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3669 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3670 GET_OPTION_INC(option, p);
3671 STACK_PUSH_ALT(p, s, sprev, pkeep);
3672 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3673 MOP_OUT;
3674 JUMP;
3675
3676 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3677 GET_OPTION_INC(option, p);
3678 MOP_OUT;
3679 JUMP;
3680#endif
3681
3682 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3683 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3684 STACK_PUSH_NULL_CHECK_START(mem, s);
3685 MOP_OUT;
3686 JUMP;
3687
3688 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3689 {
3690 int isnull;
3691
3692 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3693 STACK_NULL_CHECK(isnull, mem, s);
3694 if (isnull) {
3695#ifdef ONIG_DEBUG_MATCH
3696 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3697 (int )mem, (uintptr_t )s, s);
3698#endif
3699 null_check_found:
3700 /* empty loop founded, skip next instruction */
3701 switch (*p++) {
3702 case OP_JUMP:
3703 case OP_PUSH:
3704 p += SIZE_RELADDR;
3705 break;
3706 case OP_REPEAT_INC:
3707 case OP_REPEAT_INC_NG:
3708 case OP_REPEAT_INC_SG:
3709 case OP_REPEAT_INC_NG_SG:
3710 p += SIZE_MEMNUM;
3711 break;
3712 default:
3713 goto unexpected_bytecode_error;
3714 break;
3715 }
3716 }
3717 }
3718 MOP_OUT;
3719 JUMP;
3720
3721#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3722 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3723 {
3724 int isnull;
3725
3726 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3727 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3728 if (isnull) {
3729# ifdef ONIG_DEBUG_MATCH
3730 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3731 (int )mem, (uintptr_t )s, s);
3732# endif
3733 if (isnull == -1) goto fail;
3734 goto null_check_found;
3735 }
3736 }
3737 MOP_OUT;
3738 JUMP;
3739#endif
3740
3741#ifdef USE_SUBEXP_CALL
3742 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3743 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3744 {
3745 int isnull;
3746
3747 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3748# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3749 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3750# else
3751 STACK_NULL_CHECK_REC(isnull, mem, s);
3752# endif
3753 if (isnull) {
3754# ifdef ONIG_DEBUG_MATCH
3755 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3756 (int )mem, (uintptr_t )s, s);
3757# endif
3758 if (isnull == -1) goto fail;
3759 goto null_check_found;
3760 }
3761 else {
3762 STACK_PUSH_NULL_CHECK_END(mem);
3763 }
3764 }
3765 MOP_OUT;
3766 JUMP;
3767#endif
3768
3769 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3770 GET_RELADDR_INC(addr, p);
3771 p += addr;
3772 MOP_OUT;
3773 CHECK_INTERRUPT_IN_MATCH_AT;
3774 JUMP;
3775
3776 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3777 GET_RELADDR_INC(addr, p);
3778 CHECK_MATCH_CACHE;
3779 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3780 MOP_OUT;
3781 JUMP;
3782
3783#ifdef USE_COMBINATION_EXPLOSION_CHECK
3784 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3785 GET_STATE_CHECK_NUM_INC(mem, p);
3786 STATE_CHECK_VAL(scv, mem);
3787 if (scv) goto fail;
3788
3789 GET_RELADDR_INC(addr, p);
3790 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3791 MOP_OUT;
3792 JUMP;
3793
3794 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3795 GET_STATE_CHECK_NUM_INC(mem, p);
3796 GET_RELADDR_INC(addr, p);
3797 STATE_CHECK_VAL(scv, mem);
3798 if (scv) {
3799 p += addr;
3800 }
3801 else {
3802 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3803 }
3804 MOP_OUT;
3805 JUMP;
3806
3807 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3808 GET_STATE_CHECK_NUM_INC(mem, p);
3809 STATE_CHECK_VAL(scv, mem);
3810 if (scv) goto fail;
3811
3812 STACK_PUSH_STATE_CHECK(s, mem);
3813 MOP_OUT;
3814 JUMP;
3815#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3816
3817 CASE(OP_POP) MOP_IN(OP_POP);
3818 STACK_POP_ONE;
3819#ifdef USE_MATCH_CACHE
3820 /* We need to increment num_fails here, for invoking a cache optimization correctly, */
3821 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3822 msa->num_fails++;
3823#endif
3824 MOP_OUT;
3825 JUMP;
3826
3827#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3828 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3829 GET_RELADDR_INC(addr, p);
3830 if (*p == *s && DATA_ENSURE_CHECK1) {
3831 p++;
3832 CHECK_MATCH_CACHE;
3833 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3834 MOP_OUT;
3835 JUMP;
3836 }
3837 p += (addr + 1);
3838 MOP_OUT;
3839 JUMP;
3840#endif
3841
3842 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3843 GET_RELADDR_INC(addr, p);
3844 CHECK_MATCH_CACHE;
3845 if (*p == *s) {
3846 p++;
3847 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3848 MOP_OUT;
3849 JUMP;
3850 }
3851 p++;
3852 INC_NUM_FAILS;
3853 MOP_OUT;
3854 JUMP;
3855
3856 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3857 {
3858 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3859 GET_RELADDR_INC(addr, p);
3860
3861 STACK_ENSURE(1);
3862 repeat_stk[mem] = GET_STACK_INDEX(stk);
3863 STACK_PUSH_REPEAT(mem, p);
3864
3865 if (reg->repeat_range[mem].lower == 0) {
3866 CHECK_MATCH_CACHE;
3867 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3868 }
3869 }
3870 MOP_OUT;
3871 JUMP;
3872
3873 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3874 {
3875 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3876 GET_RELADDR_INC(addr, p);
3877
3878 STACK_ENSURE(1);
3879 repeat_stk[mem] = GET_STACK_INDEX(stk);
3880 STACK_PUSH_REPEAT(mem, p);
3881
3882 if (reg->repeat_range[mem].lower == 0) {
3883 CHECK_MATCH_CACHE;
3884 STACK_PUSH_ALT(p, s, sprev, pkeep);
3885 p += addr;
3886 }
3887 }
3888 MOP_OUT;
3889 JUMP;
3890
3891 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3892 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3893 si = repeat_stk[mem];
3894 stkp = STACK_AT(si);
3895
3896 repeat_inc:
3897 stkp->u.repeat.count++;
3898 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3899 /* end of repeat. Nothing to do. */
3900 }
3901 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3902#ifdef USE_MATCH_CACHE
3903 if (*pbegin == OP_REPEAT_INC) {
3904#undef MATCH_CACHE_HIT
3905#define MATCH_CACHE_HIT stkp->u.repeat.count--;
3906 CHECK_MATCH_CACHE;
3907#undef MATCH_CACHE_HIT
3908#define MATCH_CACHE_HIT ((void) 0)
3909 }
3910#endif
3911 STACK_PUSH_ALT(p, s, sprev, pkeep);
3912 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3913 }
3914 else {
3915 p = stkp->u.repeat.pcode;
3916 }
3917 STACK_PUSH_REPEAT_INC(si);
3918 MOP_OUT;
3919 CHECK_INTERRUPT_IN_MATCH_AT;
3920 JUMP;
3921
3922 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3923 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3924 STACK_GET_REPEAT(mem, stkp);
3925 si = GET_STACK_INDEX(stkp);
3926 goto repeat_inc;
3927
3928 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3929 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3930 si = repeat_stk[mem];
3931 stkp = STACK_AT(si);
3932
3933 repeat_inc_ng:
3934 stkp->u.repeat.count++;
3935 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3936 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3937 UChar* pcode = stkp->u.repeat.pcode;
3938
3939 STACK_PUSH_REPEAT_INC(si);
3940 if (*pbegin == OP_REPEAT_INC_NG) {
3941 CHECK_MATCH_CACHE;
3942 }
3943 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3944 }
3945 else {
3946 p = stkp->u.repeat.pcode;
3947 STACK_PUSH_REPEAT_INC(si);
3948 }
3949 }
3950 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3951 STACK_PUSH_REPEAT_INC(si);
3952 }
3953 MOP_OUT;
3954 CHECK_INTERRUPT_IN_MATCH_AT;
3955 JUMP;
3956
3957 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3958 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3959 STACK_GET_REPEAT(mem, stkp);
3960 si = GET_STACK_INDEX(stkp);
3961 goto repeat_inc_ng;
3962
3963 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3964 STACK_PUSH_POS(s, sprev, pkeep);
3965 MOP_OUT;
3966 JUMP;
3967
3968 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3969 {
3970 STACK_POS_END(stkp);
3971 s = stkp->u.state.pstr;
3972 sprev = stkp->u.state.pstr_prev;
3973 }
3974 MOP_OUT;
3975 JUMP;
3976
3977 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3978 GET_RELADDR_INC(addr, p);
3979 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3980 MOP_OUT;
3981 JUMP;
3982
3983 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3984 STACK_POP_TIL_POS_NOT;
3985 goto fail;
3986
3987 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3988 STACK_PUSH_STOP_BT;
3989 MOP_OUT;
3990 JUMP;
3991
3992 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3993 STACK_STOP_BT_END;
3994 MOP_OUT;
3995 JUMP;
3996
3997 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3998 GET_LENGTH_INC(tlen, p);
3999 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4000 if (IS_NULL(s)) goto fail;
4001 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4002 MOP_OUT;
4003 JUMP;
4004
4005 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
4006 GET_RELADDR_INC(addr, p);
4007 GET_LENGTH_INC(tlen, p);
4008 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4009 if (IS_NULL(q)) {
4010 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
4011 If you want to change to fail, replace following line. */
4012 p += addr;
4013 /* goto fail; */
4014 }
4015 else {
4016 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4017 s = q;
4018 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4019 }
4020 MOP_OUT;
4021 JUMP;
4022
4023 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4024 STACK_POP_TIL_LOOK_BEHIND_NOT;
4025 goto fail;
4026
4027 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
4028 /* Save the absent-start-pos and the original end-pos. */
4029 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4030 MOP_OUT;
4031 JUMP;
4032
4033 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
4034 {
4035 const UChar* aend = ABSENT_END_POS;
4036 UChar* absent;
4037 UChar* selfp = p - 1;
4038
4039 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
4040 GET_RELADDR_INC(addr, p);
4041#ifdef ONIG_DEBUG_MATCH
4042 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4043#endif
4044 if ((absent > aend) && (s > absent)) {
4045 /* An empty match occurred in (?~...) at the start point.
4046 * Never match. */
4047 STACK_POP;
4048 goto fail;
4049 }
4050 else if ((s >= aend) && (s > absent)) {
4051 if (s > aend) {
4052 /* Only one (or less) character matched in the last iteration.
4053 * This is not a possible point. */
4054 goto fail;
4055 }
4056 /* All possible points were found. Try matching after (?~...). */
4057 DATA_ENSURE(0);
4058 p += addr;
4059 }
4060 else if (s == end) {
4061 /* At the end of the string, just match with it */
4062 DATA_ENSURE(0);
4063 p += addr;
4064 }
4065 else {
4066 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
4067 n = enclen(encode, s, end);
4068 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
4069 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
4070 STACK_PUSH_ABSENT;
4071 ABSENT_END_POS = aend;
4072 }
4073 }
4074 MOP_OUT;
4075 JUMP;
4076
4077 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
4078 /* The pattern inside (?~...) was matched.
4079 * Set the end-pos temporary and go to next iteration. */
4080 if (sprev < ABSENT_END_POS)
4081 ABSENT_END_POS = sprev;
4082#ifdef ONIG_DEBUG_MATCH
4083 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
4084#endif
4085 STACK_POP_TIL_ABSENT;
4086 goto fail;
4087
4088#ifdef USE_SUBEXP_CALL
4089 CASE(OP_CALL) MOP_IN(OP_CALL);
4090 GET_ABSADDR_INC(addr, p);
4091 STACK_PUSH_CALL_FRAME(p);
4092 p = reg->p + addr;
4093 MOP_OUT;
4094 JUMP;
4095
4096 CASE(OP_RETURN) MOP_IN(OP_RETURN);
4097 STACK_RETURN(p);
4098 STACK_PUSH_RETURN;
4099 MOP_OUT;
4100 JUMP;
4101#endif
4102
4103 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
4104 GET_MEMNUM_INC(mem, p);
4105 GET_RELADDR_INC(addr, p);
4106 if ((mem > num_mem) ||
4107 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
4108 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4109 p += addr;
4110 }
4111 MOP_OUT;
4112 JUMP;
4113
4114 CASE(OP_FINISH)
4115 goto finish;
4116
4117 CASE(OP_FAIL)
4118 if (0) {
4119 /* fall */
4120 fail:
4121 MOP_OUT;
4122 }
4123 MOP_IN(OP_FAIL);
4124 STACK_POP;
4125 p = stk->u.state.pcode;
4126 s = stk->u.state.pstr;
4127 sprev = stk->u.state.pstr_prev;
4128 pkeep = stk->u.state.pkeep;
4129
4130#ifdef USE_MATCH_CACHE
4131 if (
4132 msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4133 ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
4134 ) {
4135 if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4136 msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4137 OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4138 if (r < 0) goto bytecode_error;
4139 }
4140 if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4141 msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4142 goto fail_match_cache;
4143 }
4144 if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
4145 goto fail_match_cache;
4146 }
4147 if (msa->cache_opcodes == NULL) {
4148 msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4149 OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
4150 if (cache_opcodes == NULL) {
4151 return ONIGERR_MEMORY;
4152 }
4153 OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4154 if (r < 0) {
4155 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
4156 else goto bytecode_error;
4157 }
4158 msa->cache_opcodes = cache_opcodes;
4159#ifdef ONIG_DEBUG_MATCH_CACHE
4160 fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4161 fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4162 fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4163 for (int i = 0; i < msa->num_cache_opcodes; i++) {
4164 fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4165 }
4166#endif
4167 }
4168 if (msa->match_cache_buf == NULL) {
4169 size_t length = (end - str) + 1;
4170 size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4171#ifdef ONIG_DEBUG_MATCH_CACHE
4172 fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4173#endif
4174 /* Overflow check */
4175 if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
4176 return ONIGERR_MEMORY;
4177 }
4178 if (num_match_cache_points >= LONG_MAX_LIMIT) {
4179 return ONIGERR_MEMORY;
4180 }
4181 size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4182 uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
4183 if (match_cache_buf == NULL) {
4184 return ONIGERR_MEMORY;
4185 }
4186 xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
4187 msa->match_cache_buf = match_cache_buf;
4188 }
4189 }
4190 fail_match_cache:
4191#endif
4192
4193#ifdef USE_COMBINATION_EXPLOSION_CHECK
4194 if (stk->u.state.state_check != 0) {
4195 stk->type = STK_STATE_CHECK_MARK;
4196 stk++;
4197 }
4198#endif
4199
4200 MOP_OUT;
4201 CHECK_INTERRUPT_IN_MATCH_AT;
4202 JUMP;
4203
4204 DEFAULT
4205 goto bytecode_error;
4206 } VM_LOOP_END
4207
4208 finish:
4209 STACK_SAVE;
4210 xfree(xmalloc_base);
4211 return best_len;
4212
4213#ifdef ONIG_DEBUG
4214 stack_error:
4215 STACK_SAVE;
4216 xfree(xmalloc_base);
4217 return ONIGERR_STACK_BUG;
4218#endif
4219
4220 bytecode_error:
4221 STACK_SAVE;
4222 xfree(xmalloc_base);
4223 return ONIGERR_UNDEFINED_BYTECODE;
4224
4225 unexpected_bytecode_error:
4226 STACK_SAVE;
4227 xfree(xmalloc_base);
4228 return ONIGERR_UNEXPECTED_BYTECODE;
4229
4230 timeout:
4231 STACK_SAVE;
4232 xfree(xmalloc_base);
4233 return ONIGERR_TIMEOUT;
4234}
4235
4236
4237static UChar*
4238slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4239 const UChar* text, const UChar* text_end, UChar* text_range)
4240{
4241 UChar *t, *p, *s, *end;
4242
4243 end = (UChar* )text_end;
4244 end -= target_end - target - 1;
4245 if (end > text_range)
4246 end = text_range;
4247
4248 s = (UChar* )text;
4249
4250 if (enc->max_enc_len == enc->min_enc_len) {
4251 int n = enc->max_enc_len;
4252
4253 while (s < end) {
4254 if (*s == *target) {
4255 p = s + 1;
4256 t = target + 1;
4257 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4258 return s;
4259 }
4260 s += n;
4261 }
4262 return (UChar* )NULL;
4263 }
4264 while (s < end) {
4265 if (*s == *target) {
4266 p = s + 1;
4267 t = target + 1;
4268 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4269 return s;
4270 }
4271 s += enclen(enc, s, text_end);
4272 }
4273
4274 return (UChar* )NULL;
4275}
4276
4277static int
4278str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4279 const UChar* t, const UChar* tend,
4280 const UChar* p, const UChar* end)
4281{
4282 int lowlen;
4283 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4284
4285 while (t < tend) {
4286 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4287 q = lowbuf;
4288 while (lowlen > 0) {
4289 if (*t++ != *q++) return 0;
4290 lowlen--;
4291 }
4292 }
4293
4294 return 1;
4295}
4296
4297static UChar*
4298slow_search_ic(OnigEncoding enc, int case_fold_flag,
4299 UChar* target, UChar* target_end,
4300 const UChar* text, const UChar* text_end, UChar* text_range)
4301{
4302 UChar *s, *end;
4303
4304 end = (UChar* )text_end;
4305 end -= target_end - target - 1;
4306 if (end > text_range)
4307 end = text_range;
4308
4309 s = (UChar* )text;
4310
4311 while (s < end) {
4312 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4313 s, text_end))
4314 return s;
4315
4316 s += enclen(enc, s, text_end);
4317 }
4318
4319 return (UChar* )NULL;
4320}
4321
4322static UChar*
4323slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4324 const UChar* text, const UChar* adjust_text,
4325 const UChar* text_end, const UChar* text_start)
4326{
4327 UChar *t, *p, *s;
4328
4329 s = (UChar* )text_end;
4330 s -= (target_end - target);
4331 if (s > text_start)
4332 s = (UChar* )text_start;
4333 else
4334 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4335
4336 while (s >= text) {
4337 if (*s == *target) {
4338 p = s + 1;
4339 t = target + 1;
4340 while (t < target_end) {
4341 if (*t != *p++)
4342 break;
4343 t++;
4344 }
4345 if (t == target_end)
4346 return s;
4347 }
4348 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4349 }
4350
4351 return (UChar* )NULL;
4352}
4353
4354static UChar*
4355slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4356 UChar* target, UChar* target_end,
4357 const UChar* text, const UChar* adjust_text,
4358 const UChar* text_end, const UChar* text_start)
4359{
4360 UChar *s;
4361
4362 s = (UChar* )text_end;
4363 s -= (target_end - target);
4364 if (s > text_start)
4365 s = (UChar* )text_start;
4366 else
4367 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4368
4369 while (s >= text) {
4370 if (str_lower_case_match(enc, case_fold_flag,
4371 target, target_end, s, text_end))
4372 return s;
4373
4374 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4375 }
4376
4377 return (UChar* )NULL;
4378}
4379
4380/* Sunday's quick search applied to a multibyte string */
4381static UChar*
4382bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4383 const UChar* text, const UChar* text_end,
4384 const UChar* text_range)
4385{
4386 const UChar *s, *se, *t, *p, *end;
4387 const UChar *tail;
4388 ptrdiff_t skip, tlen1;
4389 OnigEncoding enc = reg->enc;
4390
4391# ifdef ONIG_DEBUG_SEARCH
4392 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4393 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4394# endif
4395
4396 tail = target_end - 1;
4397 tlen1 = tail - target;
4398 end = text_range;
4399 if (end + tlen1 > text_end)
4400 end = text_end - tlen1;
4401
4402 s = text;
4403
4404 if (IS_NULL(reg->int_map)) {
4405 while (s < end) {
4406 p = se = s + tlen1;
4407 t = tail;
4408 while (*p == *t) {
4409 if (t == target) return (UChar* )s;
4410 p--; t--;
4411 }
4412 if (s + 1 >= end) break;
4413 skip = reg->map[se[1]];
4414 t = s;
4415 do {
4416 s += enclen(enc, s, end);
4417 } while ((s - t) < skip && s < end);
4418 }
4419 }
4420 else {
4421# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4422 while (s < end) {
4423 p = se = s + tlen1;
4424 t = tail;
4425 while (*p == *t) {
4426 if (t == target) return (UChar* )s;
4427 p--; t--;
4428 }
4429 if (s + 1 >= end) break;
4430 skip = reg->int_map[se[1]];
4431 t = s;
4432 do {
4433 s += enclen(enc, s, end);
4434 } while ((s - t) < skip && s < end);
4435 }
4436# endif
4437 }
4438
4439 return (UChar* )NULL;
4440}
4441
4442/* Sunday's quick search */
4443static UChar*
4444bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4445 const UChar* text, const UChar* text_end, const UChar* text_range)
4446{
4447 const UChar *s, *t, *p, *end;
4448 const UChar *tail;
4449 ptrdiff_t tlen1;
4450
4451# ifdef ONIG_DEBUG_SEARCH
4452 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4453 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4454# endif
4455
4456 tail = target_end - 1;
4457 tlen1 = tail - target;
4458 end = text_range + tlen1;
4459 if (end > text_end)
4460 end = text_end;
4461
4462 s = text + tlen1;
4463 if (IS_NULL(reg->int_map)) {
4464 while (s < end) {
4465 p = s;
4466 t = tail;
4467 while (*p == *t) {
4468 if (t == target) return (UChar* )p;
4469 p--; t--;
4470 }
4471 if (s + 1 >= end) break;
4472 s += reg->map[s[1]];
4473 }
4474 }
4475 else { /* see int_map[] */
4476# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4477 while (s < end) {
4478 p = s;
4479 t = tail;
4480 while (*p == *t) {
4481 if (t == target) return (UChar* )p;
4482 p--; t--;
4483 }
4484 if (s + 1 >= end) break;
4485 s += reg->int_map[s[1]];
4486 }
4487# endif
4488 }
4489 return (UChar* )NULL;
4490}
4491
4492/* Sunday's quick search applied to a multibyte string (ignore case) */
4493static UChar*
4494bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4495 const UChar* text, const UChar* text_end,
4496 const UChar* text_range)
4497{
4498 const UChar *s, *se, *t, *end;
4499 const UChar *tail;
4500 ptrdiff_t skip, tlen1;
4501 OnigEncoding enc = reg->enc;
4502 int case_fold_flag = reg->case_fold_flag;
4503
4504# ifdef ONIG_DEBUG_SEARCH
4505 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4506 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4507# endif
4508
4509 tail = target_end - 1;
4510 tlen1 = tail - target;
4511 end = text_range;
4512 if (end + tlen1 > text_end)
4513 end = text_end - tlen1;
4514
4515 s = text;
4516
4517 if (IS_NULL(reg->int_map)) {
4518 while (s < end) {
4519 se = s + tlen1;
4520 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4521 s, se + 1))
4522 return (UChar* )s;
4523 if (s + 1 >= end) break;
4524 skip = reg->map[se[1]];
4525 t = s;
4526 do {
4527 s += enclen(enc, s, end);
4528 } while ((s - t) < skip && s < end);
4529 }
4530 }
4531 else {
4532# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4533 while (s < end) {
4534 se = s + tlen1;
4535 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4536 s, se + 1))
4537 return (UChar* )s;
4538 if (s + 1 >= end) break;
4539 skip = reg->int_map[se[1]];
4540 t = s;
4541 do {
4542 s += enclen(enc, s, end);
4543 } while ((s - t) < skip && s < end);
4544 }
4545# endif
4546 }
4547
4548 return (UChar* )NULL;
4549}
4550
4551/* Sunday's quick search (ignore case) */
4552static UChar*
4553bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4554 const UChar* text, const UChar* text_end, const UChar* text_range)
4555{
4556 const UChar *s, *p, *end;
4557 const UChar *tail;
4558 ptrdiff_t tlen1;
4559 OnigEncoding enc = reg->enc;
4560 int case_fold_flag = reg->case_fold_flag;
4561
4562# ifdef ONIG_DEBUG_SEARCH
4563 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4564 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4565# endif
4566
4567 tail = target_end - 1;
4568 tlen1 = tail - target;
4569 end = text_range + tlen1;
4570 if (end > text_end)
4571 end = text_end;
4572
4573 s = text + tlen1;
4574 if (IS_NULL(reg->int_map)) {
4575 while (s < end) {
4576 p = s - tlen1;
4577 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4578 p, s + 1))
4579 return (UChar* )p;
4580 if (s + 1 >= end) break;
4581 s += reg->map[s[1]];
4582 }
4583 }
4584 else { /* see int_map[] */
4585# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4586 while (s < end) {
4587 p = s - tlen1;
4588 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4589 p, s + 1))
4590 return (UChar* )p;
4591 if (s + 1 >= end) break;
4592 s += reg->int_map[s[1]];
4593 }
4594# endif
4595 }
4596 return (UChar* )NULL;
4597}
4598
4599#ifdef USE_INT_MAP_BACKWARD
4600static int
4601set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4602 int** skip)
4603{
4604 int i, len;
4605
4606 if (IS_NULL(*skip)) {
4607 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4608 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4609 }
4610
4611 len = (int )(end - s);
4612 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4613 (*skip)[i] = len;
4614
4615 for (i = len - 1; i > 0; i--)
4616 (*skip)[s[i]] = i;
4617
4618 return 0;
4619}
4620
4621static UChar*
4622bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4623 const UChar* text, const UChar* adjust_text,
4624 const UChar* text_end, const UChar* text_start)
4625{
4626 const UChar *s, *t, *p;
4627
4628 s = text_end - (target_end - target);
4629 if (text_start < s)
4630 s = text_start;
4631 else
4632 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4633
4634 while (s >= text) {
4635 p = s;
4636 t = target;
4637 while (t < target_end && *p == *t) {
4638 p++; t++;
4639 }
4640 if (t == target_end)
4641 return (UChar* )s;
4642
4643 s -= reg->int_map_backward[*s];
4644 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4645 }
4646
4647 return (UChar* )NULL;
4648}
4649#endif
4650
4651static UChar*
4652map_search(OnigEncoding enc, UChar map[],
4653 const UChar* text, const UChar* text_range, const UChar* text_end)
4654{
4655 const UChar *s = text;
4656
4657 while (s < text_range) {
4658 if (map[*s]) return (UChar* )s;
4659
4660 s += enclen(enc, s, text_end);
4661 }
4662 return (UChar* )NULL;
4663}
4664
4665static UChar*
4666map_search_backward(OnigEncoding enc, UChar map[],
4667 const UChar* text, const UChar* adjust_text,
4668 const UChar* text_start, const UChar* text_end)
4669{
4670 const UChar *s = text_start;
4671
4672 while (s >= text) {
4673 if (map[*s]) return (UChar* )s;
4674
4675 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4676 }
4677 return (UChar* )NULL;
4678}
4679
4680extern OnigPosition
4681onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4682 OnigOptionType option)
4683{
4684 ptrdiff_t r;
4685 UChar *prev;
4686 OnigMatchArg msa;
4687
4688 MATCH_ARG_INIT(msa, option, region, at, at);
4689#ifdef USE_COMBINATION_EXPLOSION_CHECK
4690 {
4691 ptrdiff_t offset = at - str;
4692 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4693 }
4694#endif
4695
4696 if (region) {
4697 r = onig_region_resize_clear(region, reg->num_mem + 1);
4698 }
4699 else
4700 r = 0;
4701
4702 if (r == 0) {
4703 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4704 r = match_at(reg, str, end,
4705#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4706 end,
4707#endif
4708 at, prev, &msa);
4709 }
4710
4711 MATCH_ARG_FREE(msa);
4712 return r;
4713}
4714
4715static int
4716forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4717 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4718{
4719 UChar *p, *pprev = (UChar* )NULL;
4720 size_t input_len = end - str;
4721
4722#ifdef ONIG_DEBUG_SEARCH
4723 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4724 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4725#endif
4726
4727 if (reg->dmin > input_len) {
4728 return 0;
4729 }
4730
4731 p = s;
4732 if (reg->dmin != 0) {
4733 if ((OnigDistance)(end - p) <= reg->dmin) return 0; /* fail */
4734 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4735 p += reg->dmin;
4736 }
4737 else {
4738 UChar *q = p + reg->dmin;
4739
4740 while (p < q) p += enclen(reg->enc, p, end);
4741 }
4742 }
4743
4744 retry:
4745 switch (reg->optimize) {
4746 case ONIG_OPTIMIZE_EXACT:
4747 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4748 break;
4749 case ONIG_OPTIMIZE_EXACT_IC:
4750 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4751 reg->exact, reg->exact_end, p, end, range);
4752 break;
4753
4754 case ONIG_OPTIMIZE_EXACT_BM:
4755 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4756 break;
4757
4758 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4759 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4760 break;
4761
4762 case ONIG_OPTIMIZE_EXACT_BM_IC:
4763 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4764 break;
4765
4766 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4767 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4768 break;
4769
4770 case ONIG_OPTIMIZE_MAP:
4771 p = map_search(reg->enc, reg->map, p, range, end);
4772 break;
4773 }
4774
4775 if (p && p < range) {
4776 if ((OnigDistance)(p - s) < reg->dmin) {
4777 retry_gate:
4778 pprev = p;
4779 p += enclen(reg->enc, p, end);
4780 goto retry;
4781 }
4782
4783 if (reg->sub_anchor) {
4784 UChar* prev;
4785
4786 switch (reg->sub_anchor) {
4787 case ANCHOR_BEGIN_LINE:
4788 if (!ON_STR_BEGIN(p)) {
4789 prev = onigenc_get_prev_char_head(reg->enc,
4790 (pprev ? pprev : str), p, end);
4791 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4792 goto retry_gate;
4793 }
4794 break;
4795
4796 case ANCHOR_END_LINE:
4797 if (ON_STR_END(p)) {
4798#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4799 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4800 (pprev ? pprev : str), p);
4801 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4802 goto retry_gate;
4803#endif
4804 }
4805 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4806 goto retry_gate;
4807 break;
4808 }
4809 }
4810
4811 if (reg->dmax == 0) {
4812 *low = p;
4813 if (low_prev) {
4814 if (*low > s)
4815 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4816 else
4817 *low_prev = onigenc_get_prev_char_head(reg->enc,
4818 (pprev ? pprev : str), p, end);
4819 }
4820 *high = p;
4821 }
4822 else {
4823 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4824 if ((OnigDistance)(p - str) < reg->dmax) {
4825 *low = (UChar* )str;
4826 if (low_prev)
4827 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4828 }
4829 else {
4830 *low = p - reg->dmax;
4831 if (*low > s) {
4832 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4833 *low, end, (const UChar** )low_prev);
4834 if (low_prev && IS_NULL(*low_prev))
4835 *low_prev = onigenc_get_prev_char_head(reg->enc,
4836 (pprev ? pprev : s), *low, end);
4837 }
4838 else {
4839 if (low_prev)
4840 *low_prev = onigenc_get_prev_char_head(reg->enc,
4841 (pprev ? pprev : str), *low, end);
4842 }
4843 }
4844 }
4845 /* no needs to adjust *high, *high is used as range check only */
4846 if ((OnigDistance)(p - str) < reg->dmin)
4847 *high = (UChar* )str;
4848 else
4849 *high = p - reg->dmin;
4850 }
4851
4852#ifdef ONIG_DEBUG_SEARCH
4853 fprintf(stderr,
4854 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4855 *low - str, *high - str, reg->dmin, reg->dmax);
4856#endif
4857 return 1; /* success */
4858 }
4859
4860 return 0; /* fail */
4861}
4862
4863#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4864
4865static int
4866backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4867 UChar* s, const UChar* range, UChar* adjrange,
4868 UChar** low, UChar** high)
4869{
4870 UChar *p;
4871 size_t input_len = end - str;
4872
4873 if (reg->dmin > input_len) {
4874 return 0;
4875 }
4876
4877 p = s;
4878
4879 retry:
4880 switch (reg->optimize) {
4881 case ONIG_OPTIMIZE_EXACT:
4882 exact_method:
4883 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4884 range, adjrange, end, p);
4885 break;
4886
4887 case ONIG_OPTIMIZE_EXACT_IC:
4888 case ONIG_OPTIMIZE_EXACT_BM_IC:
4889 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4890 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4891 reg->exact, reg->exact_end,
4892 range, adjrange, end, p);
4893 break;
4894
4895 case ONIG_OPTIMIZE_EXACT_BM:
4896 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4897#ifdef USE_INT_MAP_BACKWARD
4898 if (IS_NULL(reg->int_map_backward)) {
4899 int r;
4900 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4901 goto exact_method;
4902
4903 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4904 &(reg->int_map_backward));
4905 if (r) return r;
4906 }
4907 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4908 end, p);
4909#else
4910 goto exact_method;
4911#endif
4912 break;
4913
4914 case ONIG_OPTIMIZE_MAP:
4915 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4916 break;
4917 }
4918
4919 if (p) {
4920 if (reg->sub_anchor) {
4921 UChar* prev;
4922
4923 switch (reg->sub_anchor) {
4924 case ANCHOR_BEGIN_LINE:
4925 if (!ON_STR_BEGIN(p)) {
4926 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4927 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4928 p = prev;
4929 goto retry;
4930 }
4931 }
4932 break;
4933
4934 case ANCHOR_END_LINE:
4935 if (ON_STR_END(p)) {
4936#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4937 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4938 if (IS_NULL(prev)) goto fail;
4939 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4940 p = prev;
4941 goto retry;
4942 }
4943#endif
4944 }
4945 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4946 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4947 if (IS_NULL(p)) goto fail;
4948 goto retry;
4949 }
4950 break;
4951 }
4952 }
4953
4954 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4955 if ((OnigDistance)(p - str) < reg->dmax)
4956 *low = (UChar* )str;
4957 else
4958 *low = p - reg->dmax;
4959
4960 if (reg->dmin != 0) {
4961 if ((OnigDistance)(p - str) < reg->dmin)
4962 *high = (UChar* )str;
4963 else
4964 *high = p - reg->dmin;
4965 }
4966 else {
4967 *high = p;
4968 }
4969
4970 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4971 }
4972
4973#ifdef ONIG_DEBUG_SEARCH
4974 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4975 (int )(*low - str), (int )(*high - str));
4976#endif
4977 return 1; /* success */
4978 }
4979
4980 fail:
4981#ifdef ONIG_DEBUG_SEARCH
4982 fprintf(stderr, "backward_search_range: fail.\n");
4983#endif
4984 return 0; /* fail */
4985}
4986
4987
4988extern OnigPosition
4989onig_search(regex_t* reg, const UChar* str, const UChar* end,
4990 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4991{
4992 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4993}
4994
4995extern OnigPosition
4996onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4997 const UChar* global_pos,
4998 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4999{
5000 ptrdiff_t r;
5001 UChar *s, *prev;
5002 OnigMatchArg msa;
5003#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5004 const UChar *orig_start = start;
5005 const UChar *orig_range = range;
5006#endif
5007
5008#ifdef ONIG_DEBUG_SEARCH
5009 fprintf(stderr,
5010 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
5011 (uintptr_t )str, str, end - str, start - str, range - str);
5012#endif
5013
5014 if (region) {
5015 r = onig_region_resize_clear(region, reg->num_mem + 1);
5016 if (r) goto finish_no_msa;
5017 }
5018
5019 if (start > end || start < str) goto mismatch_no_msa;
5020
5021
5022#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5023# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5024# define MATCH_AND_RETURN_CHECK(upper_range) \
5025 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5026 switch (r) { \
5027 case ONIG_MISMATCH: \
5028 break; \
5029 case ONIGERR_TIMEOUT: \
5030 goto timeout; \
5031 default: \
5032 if (r >= 0) { \
5033 if (! IS_FIND_LONGEST(reg->options)) { \
5034 goto match; \
5035 }\
5036 }\
5037 else goto finish; /* error */ \
5038 }
5039# else
5040# define MATCH_AND_RETURN_CHECK(upper_range) \
5041 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5042 switch (r) { \
5043 case ONIG_MISMATCH: \
5044 break; \
5045 case ONIGERR_TIMEOUT: \
5046 goto timeout; \
5047 default: \
5048 if (r >= 0) { \
5049 goto match; \
5050 }\
5051 else goto finish; /* error */ \
5052 }
5053# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5054#else
5055# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5056# define MATCH_AND_RETURN_CHECK(none) \
5057 r = match_at(reg, str, end, s, prev, &msa);\
5058 switch (r) { \
5059 case ONIG_MISMATCH: \
5060 break; \
5061 case ONIGERR_TIMEOUT: \
5062 goto timeout; \
5063 default: \
5064 if (r >= 0) { \
5065 if (! IS_FIND_LONGEST(reg->options)) { \
5066 goto match; \
5067 } \
5068 } \
5069 else goto finish; /* error */ \
5070 }
5071# else
5072# define MATCH_AND_RETURN_CHECK(none) \
5073 r = match_at(reg, str, end, s, prev, &msa);\
5074 switch (r) { \
5075 case ONIG_MISMATCH: \
5076 break; \
5077 case ONIGERR_TIMEOUT: \
5078 goto timeout; \
5079 default: \
5080 if (r >= 0) { \
5081 goto match; \
5082 } \
5083 else goto finish; /* error */ \
5084 }
5085# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5086#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
5087
5088
5089 /* anchor optimize: resume search range */
5090 if (reg->anchor != 0 && str < end) {
5091 UChar *min_semi_end, *max_semi_end;
5092
5093 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5094 /* search start-position only */
5095 begin_position:
5096 if (range > start)
5097 {
5098 if (global_pos > start)
5099 {
5100 if (global_pos < range)
5101 range = global_pos + 1;
5102 }
5103 else
5104 range = start + 1;
5105 }
5106 else
5107 range = start;
5108 }
5109 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5110 /* search str-position only */
5111 if (range > start) {
5112 if (start != str) goto mismatch_no_msa;
5113 range = str + 1;
5114 }
5115 else {
5116 if (range <= str) {
5117 start = str;
5118 range = str;
5119 }
5120 else
5121 goto mismatch_no_msa;
5122 }
5123 }
5124 else if (reg->anchor & ANCHOR_END_BUF) {
5125 min_semi_end = max_semi_end = (UChar* )end;
5126
5127 end_buf:
5128 if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
5129 goto mismatch_no_msa;
5130
5131 if (range > start) {
5132 if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) {
5133 start = min_semi_end - reg->anchor_dmax;
5134 if (start < end)
5135 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5136 }
5137 if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5138 if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin)
5139 goto mismatch_no_msa;
5140 else
5141 range = max_semi_end - reg->anchor_dmin + 1;
5142 }
5143
5144 if (start > range) goto mismatch_no_msa;
5145 /* If start == range, match with empty at end.
5146 Backward search is used. */
5147 }
5148 else {
5149 if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) {
5150 range = min_semi_end - reg->anchor_dmax;
5151 }
5152 if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) {
5153 if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
5154 goto mismatch_no_msa;
5155 else {
5156 start = max_semi_end - reg->anchor_dmin;
5157 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5158 }
5159 }
5160 if (range > start) goto mismatch_no_msa;
5161 }
5162 }
5163 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5164 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5165
5166 max_semi_end = (UChar* )end;
5167 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5168 min_semi_end = pre_end;
5169
5170#ifdef USE_CRNL_AS_LINE_TERMINATOR
5171 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5172 if (IS_NOT_NULL(pre_end) &&
5173 IS_NEWLINE_CRLF(reg->options) &&
5174 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5175 min_semi_end = pre_end;
5176 }
5177#endif
5178 if (min_semi_end > str && start <= min_semi_end) {
5179 goto end_buf;
5180 }
5181 }
5182 else {
5183 min_semi_end = (UChar* )end;
5184 goto end_buf;
5185 }
5186 }
5187 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5188 goto begin_position;
5189 }
5190 }
5191 else if (str == end) { /* empty string */
5192 static const UChar address_for_empty_string[] = "";
5193
5194#ifdef ONIG_DEBUG_SEARCH
5195 fprintf(stderr, "onig_search: empty string.\n");
5196#endif
5197
5198 if (reg->threshold_len == 0) {
5199 start = end = str = address_for_empty_string;
5200 s = (UChar* )start;
5201 prev = (UChar* )NULL;
5202
5203 MATCH_ARG_INIT(msa, option, region, start, start);
5204#ifdef USE_COMBINATION_EXPLOSION_CHECK
5205 msa.state_check_buff = (void* )0;
5206 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5207#endif
5208 MATCH_AND_RETURN_CHECK(end);
5209 goto mismatch;
5210 }
5211 goto mismatch_no_msa;
5212 }
5213
5214#ifdef ONIG_DEBUG_SEARCH
5215 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5216 (int )(end - str), (int )(start - str), (int )(range - str));
5217#endif
5218
5219 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5220#ifdef USE_COMBINATION_EXPLOSION_CHECK
5221 {
5222 ptrdiff_t offset = (MIN(start, range) - str);
5223 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5224 }
5225#endif
5226
5227 s = (UChar* )start;
5228 if (range > start) { /* forward search */
5229 if (s > str)
5230 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5231 else
5232 prev = (UChar* )NULL;
5233
5234 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5235 UChar *sch_range, *low, *high, *low_prev;
5236
5237 if (reg->dmax != 0) {
5238 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5239 sch_range = (UChar* )end;
5240 else {
5241 if ((OnigDistance)(end - range) < reg->dmax)
5242 sch_range = (UChar* )end;
5243 else {
5244 sch_range = (UChar* )range + reg->dmax;
5245 }
5246 }
5247 }
5248 else
5249 sch_range = (UChar* )range;
5250
5251 if ((end - start) < reg->threshold_len)
5252 goto mismatch;
5253
5254 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5255 do {
5256 if (! forward_search_range(reg, str, end, s, sch_range,
5257 &low, &high, &low_prev)) goto mismatch;
5258 if (s < low) {
5259 s = low;
5260 prev = low_prev;
5261 }
5262 while (s <= high) {
5263 MATCH_AND_RETURN_CHECK(orig_range);
5264 prev = s;
5265 s += enclen(reg->enc, s, end);
5266 }
5267 } while (s < range);
5268 goto mismatch;
5269 }
5270 else { /* check only. */
5271 if (! forward_search_range(reg, str, end, s, sch_range,
5272 &low, &high, (UChar** )NULL)) goto mismatch;
5273
5274 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5275 do {
5276 MATCH_AND_RETURN_CHECK(orig_range);
5277 prev = s;
5278 s += enclen(reg->enc, s, end);
5279
5280 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5281 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5282 && s < range) {
5283 prev = s;
5284 s += enclen(reg->enc, s, end);
5285 }
5286 }
5287 } while (s < range);
5288 goto mismatch;
5289 }
5290 }
5291 }
5292
5293 do {
5294 MATCH_AND_RETURN_CHECK(orig_range);
5295 prev = s;
5296 s += enclen(reg->enc, s, end);
5297 } while (s < range);
5298
5299 if (s == range) { /* because empty match with /$/. */
5300 MATCH_AND_RETURN_CHECK(orig_range);
5301 }
5302 }
5303 else { /* backward search */
5304 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5305 UChar *low, *high, *adjrange, *sch_start;
5306 const UChar *min_range;
5307
5308 if (range < end)
5309 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5310 else
5311 adjrange = (UChar* )end;
5312
5313 if ((OnigDistance)(end - range) > reg->dmin)
5314 min_range = range + reg->dmin;
5315 else
5316 min_range = end;
5317
5318 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5319 end - range >= reg->threshold_len) {
5320 do {
5321 if ((OnigDistance)(end - s) > reg->dmax)
5322 sch_start = s + reg->dmax;
5323 else
5324 sch_start = (UChar* )end;
5325
5326 if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
5327 &low, &high) <= 0)
5328 goto mismatch;
5329
5330 if (s > high)
5331 s = high;
5332
5333 while (s >= low) {
5334 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5335 MATCH_AND_RETURN_CHECK(orig_start);
5336 s = prev;
5337 }
5338 } while (s >= range);
5339 goto mismatch;
5340 }
5341 else { /* check only. */
5342 if (end - range < reg->threshold_len) goto mismatch;
5343
5344 if (reg->dmax != 0) {
5345 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5346 sch_start = (UChar* )end;
5347 else {
5348 if ((OnigDistance)(end - s) > reg->dmax) {
5349 sch_start = s + reg->dmax;
5350 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5351 start, sch_start, end);
5352 } else
5353 sch_start = (UChar* )end;
5354 }
5355 }
5356 else
5357 sch_start = (UChar* )s;
5358
5359 if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
5360 &low, &high) <= 0) goto mismatch;
5361 }
5362 }
5363
5364 do {
5365 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5366 MATCH_AND_RETURN_CHECK(orig_start);
5367 s = prev;
5368 } while (s >= range);
5369 }
5370
5371 mismatch:
5372#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5373 if (IS_FIND_LONGEST(reg->options)) {
5374 if (msa.best_len >= 0) {
5375 s = msa.best_s;
5376 goto match;
5377 }
5378 }
5379#endif
5380 r = ONIG_MISMATCH;
5381
5382 finish:
5383 MATCH_ARG_FREE(msa);
5384
5385 /* If result is mismatch and no FIND_NOT_EMPTY option,
5386 then the region is not set in match_at(). */
5387 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5388 onig_region_clear(region);
5389 }
5390
5391#ifdef ONIG_DEBUG
5392 if (r != ONIG_MISMATCH)
5393 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5394#endif
5395 return r;
5396
5397 mismatch_no_msa:
5398 r = ONIG_MISMATCH;
5399 finish_no_msa:
5400#ifdef ONIG_DEBUG
5401 if (r != ONIG_MISMATCH)
5402 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5403#endif
5404 return r;
5405
5406 match:
5407 MATCH_ARG_FREE(msa);
5408 return s - str;
5409
5410timeout:
5411 MATCH_ARG_FREE(msa);
5412 return ONIGERR_TIMEOUT;
5413}
5414
5415extern OnigPosition
5416onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5417 OnigRegion* region, OnigOptionType option,
5418 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5419 void* callback_arg)
5420{
5421 OnigPosition r;
5422 OnigPosition n;
5423 int rs;
5424 const UChar* start;
5425
5426 n = 0;
5427 start = str;
5428 while (1) {
5429 r = onig_search(reg, str, end, start, end, region, option);
5430 if (r >= 0) {
5431 rs = scan_callback(n, r, region, callback_arg);
5432 n++;
5433 if (rs != 0)
5434 return rs;
5435
5436 if (region->end[0] == start - str) {
5437 if (start >= end) break;
5438 start += enclen(reg->enc, start, end);
5439 }
5440 else
5441 start = str + region->end[0];
5442
5443 if (start > end)
5444 break;
5445 }
5446 else if (r == ONIG_MISMATCH) {
5447 break;
5448 }
5449 else { /* error */
5450 return r;
5451 }
5452 }
5453
5454 return n;
5455}
5456
5457extern OnigEncoding
5458onig_get_encoding(const regex_t* reg)
5459{
5460 return reg->enc;
5461}
5462
5463extern OnigOptionType
5464onig_get_options(const regex_t* reg)
5465{
5466 return reg->options;
5467}
5468
5469extern OnigCaseFoldType
5470onig_get_case_fold_flag(const regex_t* reg)
5471{
5472 return reg->case_fold_flag;
5473}
5474
5475extern const OnigSyntaxType*
5476onig_get_syntax(const regex_t* reg)
5477{
5478 return reg->syntax;
5479}
5480
5481extern int
5482onig_number_of_captures(const regex_t* reg)
5483{
5484 return reg->num_mem;
5485}
5486
5487extern int
5488onig_number_of_capture_histories(const regex_t* reg)
5489{
5490#ifdef USE_CAPTURE_HISTORY
5491 int i, n;
5492
5493 n = 0;
5494 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5495 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5496 n++;
5497 }
5498 return n;
5499#else
5500 return 0;
5501#endif
5502}
5503
5504extern void
5505onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5506{
5507 *to = *from;
5508}
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition defines.h:89
int len
Length of the buffer.
Definition io.h:8
Definition win32.h:723