Ruby 3.5.0dev (2025-01-10 revision 5fab31b15e32622c4b71d1d347a41937e9f9c212)
regexec.c (5fab31b15e32622c4b71d1d347a41937e9f9c212)
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234#ifdef USE_MATCH_CACHE
235
236/*
237Glossary for "match cache"
238
239"match cache" or "match cache optimization"
240The `Regexp#match` optimization by using a cache.
241
242"cache opcode"
243A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
244It is corresponding to some cache points.
245
246"cache point"
247A cacheable point on matching.
248Usually, one-to-one corresponding between a cache opcode and a cache point exists,
249but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
250cache points depending on repetition counts.
251
252"match cache point"
253A pair of a cache point and a position on an input string.
254We encode a match cache point to an integer value by the following equation:
255"match cache point" = "position on input string" * "total number of cache points" + "cache point"
256
257"match cache buffer"
258A bit-array for memoizing (recording) match cache points once backtracked.
259*/
260
261static OnigPosition count_num_cache_opcodes_inner(
262 const regex_t* reg,
263 MemNumType current_repeat_mem, int lookaround_nesting,
264 UChar** pp, long* num_cache_opcodes_ptr
265)
266{
267 UChar* p = *pp;
268 UChar* pend = reg->p + reg->used;
269 LengthType len;
270 MemNumType repeat_mem;
271 OnigEncoding enc = reg->enc;
272 long num_cache_opcodes = *num_cache_opcodes_ptr;
273 OnigPosition result;
274
275 while (p < pend) {
276 switch (*p++) {
277 case OP_FINISH:
278 case OP_END:
279 break;
280
281 case OP_EXACT1: p++; break;
282 case OP_EXACT2: p += 2; break;
283 case OP_EXACT3: p += 3; break;
284 case OP_EXACT4: p += 4; break;
285 case OP_EXACT5: p += 5; break;
286 case OP_EXACTN:
287 GET_LENGTH_INC(len, p); p += len; break;
288 case OP_EXACTMB2N1: p += 2; break;
289 case OP_EXACTMB2N2: p += 4; break;
290 case OP_EXACTMB2N3: p += 6; break;
291 case OP_EXACTMB2N:
292 GET_LENGTH_INC(len, p); p += len * 2; break;
293 case OP_EXACTMB3N:
294 GET_LENGTH_INC(len, p); p += len * 3; break;
295 case OP_EXACTMBN:
296 {
297 int mb_len;
298 GET_LENGTH_INC(mb_len, p);
299 GET_LENGTH_INC(len, p);
300 p += mb_len * len;
301 }
302 break;
303
304 case OP_EXACT1_IC:
305 len = enclen(enc, p, pend); p += len; break;
306 case OP_EXACTN_IC:
307 GET_LENGTH_INC(len, p); p += len; break;
308
309 case OP_CCLASS:
310 case OP_CCLASS_NOT:
311 p += SIZE_BITSET; break;
312 case OP_CCLASS_MB:
313 case OP_CCLASS_MB_NOT:
314 GET_LENGTH_INC(len, p); p += len; break;
315 case OP_CCLASS_MIX:
316 case OP_CCLASS_MIX_NOT:
317 p += SIZE_BITSET;
318 GET_LENGTH_INC(len, p);
319 p += len;
320 break;
321
322 case OP_ANYCHAR:
323 case OP_ANYCHAR_ML:
324 break;
325 case OP_ANYCHAR_STAR:
326 case OP_ANYCHAR_ML_STAR:
327 num_cache_opcodes++; break;
328 case OP_ANYCHAR_STAR_PEEK_NEXT:
329 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
330 p++; num_cache_opcodes++; break;
331
332 case OP_WORD:
333 case OP_NOT_WORD:
334 case OP_WORD_BOUND:
335 case OP_NOT_WORD_BOUND:
336 case OP_WORD_BEGIN:
337 case OP_WORD_END:
338 break;
339
340 case OP_ASCII_WORD:
341 case OP_NOT_ASCII_WORD:
342 case OP_ASCII_WORD_BOUND:
343 case OP_NOT_ASCII_WORD_BOUND:
344 case OP_ASCII_WORD_BEGIN:
345 case OP_ASCII_WORD_END:
346 break;
347
348 case OP_BEGIN_BUF:
349 case OP_END_BUF:
350 case OP_BEGIN_LINE:
351 case OP_END_LINE:
352 case OP_SEMI_END_BUF:
353 case OP_BEGIN_POSITION:
354 break;
355
356 case OP_BACKREF1:
357 case OP_BACKREF2:
358 case OP_BACKREFN:
359 case OP_BACKREFN_IC:
360 case OP_BACKREF_MULTI:
361 case OP_BACKREF_MULTI_IC:
362 case OP_BACKREF_WITH_LEVEL:
363 goto impossible;
364
365 case OP_MEMORY_START:
366 case OP_MEMORY_START_PUSH:
367 case OP_MEMORY_END_PUSH:
368 case OP_MEMORY_END_PUSH_REC:
369 case OP_MEMORY_END:
370 case OP_MEMORY_END_REC:
371 p += SIZE_MEMNUM;
372 // A memory (capture) in look-around is found.
373 if (lookaround_nesting != 0) {
374 goto impossible;
375 }
376 break;
377
378 case OP_KEEP:
379 break;
380
381 case OP_FAIL:
382 break;
383 case OP_JUMP:
384 p += SIZE_RELADDR;
385 break;
386 case OP_PUSH:
387 p += SIZE_RELADDR;
388 num_cache_opcodes++;
389 break;
390 case OP_POP:
391 break;
392 case OP_PUSH_OR_JUMP_EXACT1:
393 case OP_PUSH_IF_PEEK_NEXT:
394 p += SIZE_RELADDR + 1; num_cache_opcodes++; break;
395 case OP_REPEAT:
396 case OP_REPEAT_NG:
397 if (current_repeat_mem != -1) {
398 // A nested OP_REPEAT is not yet supported.
399 goto impossible;
400 }
401 GET_MEMNUM_INC(repeat_mem, p);
402 p += SIZE_RELADDR;
403 if (reg->repeat_range[repeat_mem].lower == 0) {
404 num_cache_opcodes++;
405 }
406 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
407 if (result < 0 || num_cache_opcodes < 0) {
408 goto fail;
409 }
410 {
411 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
412 if (repeat_range->lower < repeat_range->upper) {
413 num_cache_opcodes++;
414 }
415 }
416 break;
417 case OP_REPEAT_INC:
418 case OP_REPEAT_INC_NG:
419 GET_MEMNUM_INC(repeat_mem, p);
420 if (repeat_mem != current_repeat_mem) {
421 // A lone or invalid OP_REPEAT_INC is found.
422 goto impossible;
423 }
424 goto exit;
425 case OP_REPEAT_INC_SG:
426 case OP_REPEAT_INC_NG_SG:
427 goto impossible;
428 case OP_NULL_CHECK_START:
429 p += SIZE_MEMNUM;
430 break;
431 case OP_NULL_CHECK_END:
432 case OP_NULL_CHECK_END_MEMST_PUSH:
433 p += SIZE_MEMNUM;
434 break;
435 case OP_NULL_CHECK_END_MEMST:
436 p += SIZE_MEMNUM;
437 break;
438
439 case OP_PUSH_POS:
440 if (lookaround_nesting < 0) {
441 // A look-around nested in a atomic grouping is found.
442 goto impossible;
443 }
444 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
445 if (result < 0 || num_cache_opcodes < 0) {
446 goto fail;
447 }
448 break;
449 case OP_PUSH_POS_NOT:
450 if (lookaround_nesting < 0) {
451 // A look-around nested in a atomic grouping is found.
452 goto impossible;
453 }
454 p += SIZE_RELADDR;
455 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
456 if (result < 0 || num_cache_opcodes < 0) {
457 goto fail;
458 }
459 break;
460 case OP_PUSH_LOOK_BEHIND_NOT:
461 if (lookaround_nesting < 0) {
462 // A look-around nested in a atomic grouping is found.
463 goto impossible;
464 }
465 p += SIZE_RELADDR;
466 p += SIZE_LENGTH;
467 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
468 if (result < 0 || num_cache_opcodes < 0) {
469 goto fail;
470 }
471 break;
472 case OP_PUSH_STOP_BT:
473 if (lookaround_nesting != 0) {
474 // A nested atomic grouping is found.
475 goto impossible;
476 }
477 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
478 if (result < 0 || num_cache_opcodes < 0) {
479 goto fail;
480 }
481 break;
482 case OP_POP_POS:
483 case OP_FAIL_POS:
484 case OP_FAIL_LOOK_BEHIND_NOT:
485 case OP_POP_STOP_BT:
486 goto exit;
487 case OP_LOOK_BEHIND:
488 p += SIZE_LENGTH;
489 break;
490
491 case OP_PUSH_ABSENT_POS:
492 case OP_ABSENT_END:
493 case OP_ABSENT:
494 goto impossible;
495
496 case OP_CALL:
497 case OP_RETURN:
498 goto impossible;
499
500 case OP_CONDITION:
501 goto impossible;
502
503 case OP_STATE_CHECK_PUSH:
504 case OP_STATE_CHECK_PUSH_OR_JUMP:
505 case OP_STATE_CHECK:
506 case OP_STATE_CHECK_ANYCHAR_STAR:
507 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
508 goto impossible;
509
510 case OP_SET_OPTION_PUSH:
511 case OP_SET_OPTION:
512 p += SIZE_OPTION;
513 break;
514
515 default:
516 goto bytecode_error;
517 }
518 }
519
520exit:
521 *pp = p;
522 *num_cache_opcodes_ptr = num_cache_opcodes;
523 return 0;
524
525fail:
526 *num_cache_opcodes_ptr = num_cache_opcodes;
527 return result;
528
529impossible:
530 *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
531 return 0;
532
533bytecode_error:
534 return ONIGERR_UNDEFINED_BYTECODE;
535}
536
537/* count the total number of cache opcodes for allocating a match cache buffer. */
538static OnigPosition
539count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
540{
541 UChar* p = reg->p;
542 *num_cache_opcodes_ptr = 0;
543 OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
544 if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
545 return ONIGERR_UNDEFINED_BYTECODE;
546 }
547
548 return result;
549}
550
551static OnigPosition
552init_cache_opcodes_inner(
553 const regex_t* reg,
554 MemNumType current_repeat_mem, int lookaround_nesting,
555 OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr
556)
557{
558 UChar* p = *pp;
559 UChar* pend = reg->p + reg->used;
560 UChar* pbegin;
561 LengthType len;
562 MemNumType repeat_mem;
563 OnigEncoding enc = reg->enc;
564 long cache_point = *num_cache_points_ptr;
565 OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr;
566 OnigPosition result;
567
568# define INC_CACHE_OPCODES do {\
569 cache_opcodes->addr = pbegin;\
570 cache_opcodes->cache_point = cache_point;\
571 cache_opcodes->outer_repeat_mem = current_repeat_mem;\
572 cache_opcodes->num_cache_points_at_outer_repeat = 0;\
573 cache_opcodes->num_cache_points_in_outer_repeat = 0;\
574 cache_opcodes->lookaround_nesting = lookaround_nesting;\
575 cache_opcodes->match_addr = NULL;\
576 cache_point += lookaround_nesting != 0 ? 2 : 1;\
577 cache_opcodes++;\
578 } while (0)
579
580 while (p < pend) {
581 pbegin = p;
582 switch (*p++) {
583 case OP_FINISH:
584 case OP_END:
585 break;
586
587 case OP_EXACT1: p++; break;
588 case OP_EXACT2: p += 2; break;
589 case OP_EXACT3: p += 3; break;
590 case OP_EXACT4: p += 4; break;
591 case OP_EXACT5: p += 5; break;
592 case OP_EXACTN:
593 GET_LENGTH_INC(len, p); p += len; break;
594 case OP_EXACTMB2N1: p += 2; break;
595 case OP_EXACTMB2N2: p += 4; break;
596 case OP_EXACTMB2N3: p += 6; break;
597 case OP_EXACTMB2N:
598 GET_LENGTH_INC(len, p); p += len * 2; break;
599 case OP_EXACTMB3N:
600 GET_LENGTH_INC(len, p); p += len * 3; break;
601 case OP_EXACTMBN:
602 {
603 int mb_len;
604 GET_LENGTH_INC(mb_len, p);
605 GET_LENGTH_INC(len, p);
606 p += mb_len * len;
607 }
608 break;
609
610 case OP_EXACT1_IC:
611 len = enclen(enc, p, pend); p += len; break;
612 case OP_EXACTN_IC:
613 GET_LENGTH_INC(len, p); p += len; break;
614
615 case OP_CCLASS:
616 case OP_CCLASS_NOT:
617 p += SIZE_BITSET; break;
618 case OP_CCLASS_MB:
619 case OP_CCLASS_MB_NOT:
620 GET_LENGTH_INC(len, p); p += len; break;
621 case OP_CCLASS_MIX:
622 case OP_CCLASS_MIX_NOT:
623 p += SIZE_BITSET;
624 GET_LENGTH_INC(len, p);
625 p += len;
626 break;
627
628 case OP_ANYCHAR:
629 case OP_ANYCHAR_ML:
630 break;
631 case OP_ANYCHAR_STAR:
632 case OP_ANYCHAR_ML_STAR:
633 INC_CACHE_OPCODES;
634 break;
635 case OP_ANYCHAR_STAR_PEEK_NEXT:
636 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
637 p++;
638 INC_CACHE_OPCODES;
639 break;
640
641 case OP_WORD:
642 case OP_NOT_WORD:
643 case OP_WORD_BOUND:
644 case OP_NOT_WORD_BOUND:
645 case OP_WORD_BEGIN:
646 case OP_WORD_END:
647 break;
648
649 case OP_ASCII_WORD:
650 case OP_NOT_ASCII_WORD:
651 case OP_ASCII_WORD_BOUND:
652 case OP_NOT_ASCII_WORD_BOUND:
653 case OP_ASCII_WORD_BEGIN:
654 case OP_ASCII_WORD_END:
655 break;
656
657 case OP_BEGIN_BUF:
658 case OP_END_BUF:
659 case OP_BEGIN_LINE:
660 case OP_END_LINE:
661 case OP_SEMI_END_BUF:
662 case OP_BEGIN_POSITION:
663 break;
664
665 case OP_BACKREF1:
666 case OP_BACKREF2:
667 case OP_BACKREFN:
668 case OP_BACKREFN_IC:
669 case OP_BACKREF_MULTI:
670 case OP_BACKREF_MULTI_IC:
671 case OP_BACKREF_WITH_LEVEL:
672 goto unexpected_bytecode_error;
673
674 case OP_MEMORY_START:
675 case OP_MEMORY_START_PUSH:
676 case OP_MEMORY_END_PUSH:
677 case OP_MEMORY_END_PUSH_REC:
678 case OP_MEMORY_END:
679 case OP_MEMORY_END_REC:
680 p += SIZE_MEMNUM;
681 if (lookaround_nesting != 0) {
682 goto unexpected_bytecode_error;
683 }
684 break;
685
686 case OP_KEEP:
687 break;
688
689 case OP_FAIL:
690 break;
691 case OP_JUMP:
692 p += SIZE_RELADDR;
693 break;
694 case OP_PUSH:
695 p += SIZE_RELADDR;
696 INC_CACHE_OPCODES;
697 break;
698 case OP_POP:
699 break;
700 case OP_PUSH_OR_JUMP_EXACT1:
701 case OP_PUSH_IF_PEEK_NEXT:
702 p += SIZE_RELADDR + 1;
703 INC_CACHE_OPCODES;
704 break;
705 case OP_REPEAT:
706 case OP_REPEAT_NG:
707 GET_MEMNUM_INC(repeat_mem, p);
708 p += SIZE_RELADDR;
709 if (reg->repeat_range[repeat_mem].lower == 0) {
710 INC_CACHE_OPCODES;
711 }
712 {
713 long num_cache_points_in_repeat = 0;
714 long num_cache_points_at_repeat = cache_point;
715 OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes;
716 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
717 if (result != 0) {
718 goto fail;
719 }
720 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
721 if (repeat_range->lower < repeat_range->upper) {
722 INC_CACHE_OPCODES;
723 cache_point -= lookaround_nesting != 0 ? 2 : 1;
724 }
725 int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
726 cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
727 for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
728 cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
729 cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
730 }
731 }
732 break;
733 case OP_REPEAT_INC:
734 case OP_REPEAT_INC_NG:
735 p += SIZE_MEMNUM;
736 goto exit;
737 case OP_REPEAT_INC_SG:
738 case OP_REPEAT_INC_NG_SG:
739 goto unexpected_bytecode_error;
740 case OP_NULL_CHECK_START:
741 p += SIZE_MEMNUM;
742 break;
743 case OP_NULL_CHECK_END:
744 case OP_NULL_CHECK_END_MEMST_PUSH:
745 p += SIZE_MEMNUM;
746 break;
747 case OP_NULL_CHECK_END_MEMST:
748 p += SIZE_MEMNUM;
749 break;
750
751 case OP_PUSH_POS:
752 lookaround:
753 {
754 OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes;
755 result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
756 if (result != 0) {
757 goto fail;
758 }
759 UChar* match_addr = p - 1;
760 for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
761 if (cache_opcodes_in_lookaround->match_addr == NULL) {
762 cache_opcodes_in_lookaround->match_addr = match_addr;
763 }
764 }
765 }
766 break;
767 case OP_PUSH_POS_NOT:
768 p += SIZE_RELADDR;
769 goto lookaround;
770 case OP_PUSH_LOOK_BEHIND_NOT:
771 p += SIZE_RELADDR;
772 p += SIZE_LENGTH;
773 goto lookaround;
774 case OP_PUSH_STOP_BT:
775 {
776 OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes;
777 result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
778 if (result != 0) {
779 goto fail;
780 }
781 UChar* match_addr = p - 1;
782 for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
783 if (cache_opcodes_in_atomic->match_addr == NULL) {
784 cache_opcodes_in_atomic->match_addr = match_addr;
785 }
786 }
787 }
788 break;
789 case OP_POP_POS:
790 case OP_FAIL_POS:
791 case OP_FAIL_LOOK_BEHIND_NOT:
792 case OP_POP_STOP_BT:
793 goto exit;
794 case OP_LOOK_BEHIND:
795 p += SIZE_LENGTH;
796 break;
797
798 case OP_ABSENT_END:
799 case OP_ABSENT:
800 goto unexpected_bytecode_error;
801
802 case OP_CALL:
803 case OP_RETURN:
804 goto unexpected_bytecode_error;
805
806 case OP_CONDITION:
807 goto unexpected_bytecode_error;
808
809 case OP_STATE_CHECK_PUSH:
810 case OP_STATE_CHECK_PUSH_OR_JUMP:
811 case OP_STATE_CHECK:
812 case OP_STATE_CHECK_ANYCHAR_STAR:
813 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
814 goto unexpected_bytecode_error;
815
816 case OP_SET_OPTION_PUSH:
817 case OP_SET_OPTION:
818 p += SIZE_OPTION;
819 break;
820
821 default:
822 goto bytecode_error;
823 }
824 }
825
826exit:
827 *cache_opcodes_ptr = cache_opcodes;
828 *pp = p;
829 *num_cache_points_ptr = cache_point;
830 return 0;
831
832fail:
833 return result;
834
835unexpected_bytecode_error:
836 return ONIGERR_UNEXPECTED_BYTECODE;
837
838bytecode_error:
839 return ONIGERR_UNDEFINED_BYTECODE;
840}
841
842/* collect cache opcodes from the given regex program, and compute the total number of cache points. */
843static OnigPosition
844init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr)
845{
846 UChar* p = reg->p;
847 *num_cache_points_ptr = 0;
848 OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
849 if (result == 0 && p != reg->p + reg->used) {
850 return ONIGERR_UNDEFINED_BYTECODE;
851 }
852
853 return result;
854}
855#else
856static OnigPosition
857count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
858{
859 *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
860 return 0;
861}
862#endif /* USE_MATCH_CACHE */
863
864extern int
865onig_check_linear_time(OnigRegexType* reg)
866{
867 long num_cache_opcodes = 0;
868 count_num_cache_opcodes(reg, &num_cache_opcodes);
869 return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
870}
871
872extern void
873onig_region_clear(OnigRegion* region)
874{
875 int i;
876
877 for (i = 0; i < region->num_regs; i++) {
878 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
879 }
880#ifdef USE_CAPTURE_HISTORY
881 history_root_free(region);
882#endif
883}
884
885extern int
886onig_region_resize(OnigRegion* region, int n)
887{
888 region->num_regs = n;
889
890 if (n < ONIG_NREGION)
891 n = ONIG_NREGION;
892
893 if (region->allocated == 0) {
894 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
895 if (region->beg == 0)
896 return ONIGERR_MEMORY;
897
898 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
899 if (region->end == 0) {
900 xfree(region->beg);
901 return ONIGERR_MEMORY;
902 }
903
904 region->allocated = n;
905 }
906 else if (region->allocated < n) {
907 OnigPosition *tmp;
908
909 region->allocated = 0;
910 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
911 if (tmp == 0) {
912 xfree(region->beg);
913 xfree(region->end);
914 return ONIGERR_MEMORY;
915 }
916 region->beg = tmp;
917 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
918 if (tmp == 0) {
919 xfree(region->beg);
920 xfree(region->end);
921 return ONIGERR_MEMORY;
922 }
923 region->end = tmp;
924
925 region->allocated = n;
926 }
927
928 return 0;
929}
930
931static int
932onig_region_resize_clear(OnigRegion* region, int n)
933{
934 int r;
935
936 r = onig_region_resize(region, n);
937 if (r != 0) return r;
938 onig_region_clear(region);
939 return 0;
940}
941
942extern int
943onig_region_set(OnigRegion* region, int at, int beg, int end)
944{
945 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
946
947 if (at >= region->allocated) {
948 int r = onig_region_resize(region, at + 1);
949 if (r < 0) return r;
950 }
951
952 region->beg[at] = beg;
953 region->end[at] = end;
954 return 0;
955}
956
957extern void
958onig_region_init(OnigRegion* region)
959{
960 region->num_regs = 0;
961 region->allocated = 0;
962 region->beg = (OnigPosition* )0;
963 region->end = (OnigPosition* )0;
964#ifdef USE_CAPTURE_HISTORY
965 region->history_root = (OnigCaptureTreeNode* )0;
966#endif
967}
968
969extern OnigRegion*
970onig_region_new(void)
971{
972 OnigRegion* r;
973
974 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
975 if (r)
976 onig_region_init(r);
977 return r;
978}
979
980extern void
981onig_region_free(OnigRegion* r, int free_self)
982{
983 if (r) {
984 if (r->allocated > 0) {
985 xfree(r->beg);
986 xfree(r->end);
987 }
988#ifdef USE_CAPTURE_HISTORY
989 history_root_free(r);
990#endif
991 if (free_self) {
992 xfree(r);
993 }
994 else {
995 memset(r, 0, sizeof(OnigRegion));
996 }
997 }
998}
999
1000extern void
1001onig_region_copy(OnigRegion* to, const OnigRegion* from)
1002{
1003#define RREGC_SIZE (sizeof(int) * from->num_regs)
1004 int i, r;
1005
1006 if (to == from) return;
1007
1008 r = onig_region_resize(to, from->num_regs);
1009 if (r) return;
1010
1011 for (i = 0; i < from->num_regs; i++) {
1012 to->beg[i] = from->beg[i];
1013 to->end[i] = from->end[i];
1014 }
1015 to->num_regs = from->num_regs;
1016
1017#ifdef USE_CAPTURE_HISTORY
1018 history_root_free(to);
1019
1020 if (IS_NOT_NULL(from->history_root)) {
1021 to->history_root = history_tree_clone(from->history_root);
1022 }
1023#endif
1024}
1025
1026
1028#define INVALID_STACK_INDEX -1
1029
1030/* stack type */
1031/* used by normal-POP */
1032#define STK_ALT 0x0001
1033#define STK_LOOK_BEHIND_NOT 0x0002
1034#define STK_POS_NOT 0x0003
1035/* handled by normal-POP */
1036#define STK_MEM_START 0x0100
1037#define STK_MEM_END 0x8200
1038#define STK_REPEAT_INC 0x0300
1039#define STK_STATE_CHECK_MARK 0x1000
1040/* avoided by normal-POP */
1041#define STK_NULL_CHECK_START 0x3000
1042#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
1043#define STK_MEM_END_MARK 0x8400
1044#define STK_POS 0x0500 /* used when POP-POS */
1045#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
1046#define STK_REPEAT 0x0700
1047#define STK_CALL_FRAME 0x0800
1048#define STK_RETURN 0x0900
1049#define STK_VOID 0x0a00 /* for fill a blank */
1050#define STK_ABSENT_POS 0x0b00 /* for absent */
1051#define STK_ABSENT 0x0c00 /* absent inner loop marker */
1052#define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */
1053#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1054
1055/* stack type check mask */
1056#define STK_MASK_POP_USED 0x00ff
1057#define STK_MASK_TO_VOID_TARGET 0x10ff
1058#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1059
1060#ifdef USE_MATCH_CACHE
1061#define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1062 (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1063 (msa).num_fails = 0;\
1064 (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1065 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1066 (msa).num_cache_points = 0;\
1067 (msa).match_cache_buf = (uint8_t*)NULL;\
1068} while(0)
1069#define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1070 xfree((msa).cache_opcodes);\
1071 xfree((msa).match_cache_buf);\
1072 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1073 (msa).match_cache_buf = (uint8_t*)NULL;\
1074} while(0)
1075#else
1076#define MATCH_ARG_INIT_MATCH_CACHE(msa)
1077#define MATCH_ARG_FREE_MATCH_CACHE(msa)
1078#endif
1079
1080#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1081# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1082 (msa).stack_p = (void* )0;\
1083 (msa).options = (arg_option);\
1084 (msa).region = (arg_region);\
1085 (msa).start = (arg_start);\
1086 (msa).gpos = (arg_gpos);\
1087 (msa).best_len = ONIG_MISMATCH;\
1088 (msa).counter = 0;\
1089 (msa).end_time = 0;\
1090 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1091} while(0)
1092#else
1093# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1094 (msa).stack_p = (void* )0;\
1095 (msa).options = (arg_option);\
1096 (msa).region = (arg_region);\
1097 (msa).start = (arg_start);\
1098 (msa).gpos = (arg_gpos);\
1099 (msa).counter = 0;\
1100 (msa).end_time = 0;\
1101 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1102} while(0)
1103#endif
1104
1105#ifdef USE_COMBINATION_EXPLOSION_CHECK
1106
1107# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
1108
1109# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
1110 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1111 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1112 offset = ((offset) * (state_num)) >> 3;\
1113 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1114 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1115 (msa).state_check_buff = (void* )xmalloc(size);\
1116 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1117 }\
1118 else \
1119 (msa).state_check_buff = (void* )xalloca(size);\
1120 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1121 (size_t )(size - (offset))); \
1122 (msa).state_check_buff_size = size;\
1123 }\
1124 else {\
1125 (msa).state_check_buff = (void* )0;\
1126 (msa).state_check_buff_size = 0;\
1127 }\
1128 }\
1129 else {\
1130 (msa).state_check_buff = (void* )0;\
1131 (msa).state_check_buff_size = 0;\
1132 }\
1133 } while(0)
1134
1135# define MATCH_ARG_FREE(msa) do {\
1136 xfree((msa).stack_p);\
1137 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1138 xfree((msa).state_check_buff);\
1139 }\
1140 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1141} while(0)
1142#else /* USE_COMBINATION_EXPLOSION_CHECK */
1143# define MATCH_ARG_FREE(msa) do {\
1144 xfree((msa).stack_p);\
1145 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1146} while (0)
1147#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1148
1149
1150
1151#define MAX_PTR_NUM 100
1152
1153#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
1154 if (ptr_num > MAX_PTR_NUM) {\
1155 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1156 heap_addr = alloc_addr;\
1157 if (msa->stack_p) {\
1158 stk_alloc = (OnigStackType* )(msa->stack_p);\
1159 stk_base = stk_alloc;\
1160 stk = stk_base;\
1161 stk_end = stk_base + msa->stack_n;\
1162 } else {\
1163 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1164 stk_base = stk_alloc;\
1165 stk = stk_base;\
1166 stk_end = stk_base + (stack_num);\
1167 }\
1168 } else if (msa->stack_p) {\
1169 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1170 heap_addr = NULL;\
1171 stk_alloc = (OnigStackType* )(msa->stack_p);\
1172 stk_base = stk_alloc;\
1173 stk = stk_base;\
1174 stk_end = stk_base + msa->stack_n;\
1175 }\
1176 else {\
1177 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1178 + sizeof(OnigStackType) * (stack_num));\
1179 heap_addr = NULL;\
1180 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1181 stk_base = stk_alloc;\
1182 stk = stk_base;\
1183 stk_end = stk_base + (stack_num);\
1184 }\
1185} while(0)
1186
1187#define STACK_SAVE do{\
1188 if (stk_base != stk_alloc) {\
1189 msa->stack_p = stk_base;\
1190 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1191 };\
1192} while(0)
1193
1194static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1195
1196extern unsigned int
1197onig_get_match_stack_limit_size(void)
1198{
1199 return MatchStackLimitSize;
1200}
1201
1202extern int
1203onig_set_match_stack_limit_size(unsigned int size)
1204{
1205 MatchStackLimitSize = size;
1206 return 0;
1207}
1208
1209static int
1210stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1211 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1212{
1213 size_t n;
1214 OnigStackType *x, *stk_base, *stk_end, *stk;
1215
1216 stk_base = *arg_stk_base;
1217 stk_end = *arg_stk_end;
1218 stk = *arg_stk;
1219
1220 n = stk_end - stk_base;
1221 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1222 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1223 if (IS_NULL(x)) {
1224 STACK_SAVE;
1225 return ONIGERR_MEMORY;
1226 }
1227 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1228 n *= 2;
1229 }
1230 else {
1231 unsigned int limit_size = MatchStackLimitSize;
1232 n *= 2;
1233 if (limit_size != 0 && n > limit_size) {
1234 if ((unsigned int )(stk_end - stk_base) == limit_size)
1235 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1236 else
1237 n = limit_size;
1238 }
1239 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1240 if (IS_NULL(x)) {
1241 STACK_SAVE;
1242 return ONIGERR_MEMORY;
1243 }
1244 }
1245 *arg_stk = x + (stk - stk_base);
1246 *arg_stk_base = x;
1247 *arg_stk_end = x + n;
1248 return 0;
1249}
1250
1251#define STACK_ENSURE(n) do {\
1252 if (stk_end - stk < (n)) {\
1253 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1254 if (r != 0) {\
1255 STACK_SAVE;\
1256 xfree(xmalloc_base);\
1257 return r;\
1258 }\
1259 }\
1260} while(0)
1261
1262#define STACK_AT(index) (stk_base + (index))
1263#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1264
1265#define STACK_PUSH_TYPE(stack_type) do {\
1266 STACK_ENSURE(1);\
1267 stk->type = (stack_type);\
1268 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1269 STACK_INC;\
1270} while(0)
1271
1272#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1273
1274#ifdef USE_COMBINATION_EXPLOSION_CHECK
1275# define STATE_CHECK_POS(s,snum) \
1276 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1277# define STATE_CHECK_VAL(v,snum) do {\
1278 if (state_check_buff != NULL) {\
1279 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1280 (v) = state_check_buff[x/8] & (1<<(x%8));\
1281 }\
1282 else (v) = 0;\
1283} while(0)
1284
1285
1286# define ELSE_IF_STATE_CHECK_MARK(stk) \
1287 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1288 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1289 state_check_buff[x/8] |= (1<<(x%8)); \
1290 }
1291
1292# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1293 STACK_ENSURE(1);\
1294 stk->type = (stack_type);\
1295 stk->u.state.pcode = (pat);\
1296 stk->u.state.pstr = (s);\
1297 stk->u.state.pstr_prev = (sprev);\
1298 stk->u.state.state_check = 0;\
1299 stk->u.state.pkeep = (keep);\
1300 STACK_INC;\
1301} while(0)
1302
1303# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1304 stk->type = (stack_type);\
1305 stk->u.state.pcode = (pat);\
1306 stk->u.state.state_check = 0;\
1307 STACK_INC;\
1308} while(0)
1309
1310# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1311 STACK_ENSURE(1);\
1312 stk->type = STK_ALT;\
1313 stk->u.state.pcode = (pat);\
1314 stk->u.state.pstr = (s);\
1315 stk->u.state.pstr_prev = (sprev);\
1316 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1317 stk->u.state.pkeep = (keep);\
1318 STACK_INC;\
1319} while(0)
1320
1321# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1322 if (state_check_buff != NULL) {\
1323 STACK_ENSURE(1);\
1324 stk->type = STK_STATE_CHECK_MARK;\
1325 stk->u.state.pstr = (s);\
1326 stk->u.state.state_check = (snum);\
1327 STACK_INC;\
1328 }\
1329} while(0)
1330
1331#else /* USE_COMBINATION_EXPLOSION_CHECK */
1332
1333# define ELSE_IF_STATE_CHECK_MARK(stk)
1334
1335# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1336 STACK_ENSURE(1);\
1337 stk->type = (stack_type);\
1338 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1339 stk->u.state.pcode = (pat);\
1340 stk->u.state.pstr = (s);\
1341 stk->u.state.pstr_prev = (sprev);\
1342 stk->u.state.pkeep = (keep);\
1343 STACK_INC;\
1344} while(0)
1345
1346# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1347 stk->type = (stack_type);\
1348 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1349 stk->u.state.pcode = (pat);\
1350 STACK_INC;\
1351} while(0)
1352#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1353
1354#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1355#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1356#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1357#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1358#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1359#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1360 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1361
1362#define STACK_PUSH_REPEAT(id, pat) do {\
1363 STACK_ENSURE(1);\
1364 stk->type = STK_REPEAT;\
1365 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1366 stk->u.repeat.num = (id);\
1367 stk->u.repeat.pcode = (pat);\
1368 stk->u.repeat.count = 0;\
1369 STACK_INC;\
1370} while(0)
1371
1372#define STACK_PUSH_REPEAT_INC(sindex) do {\
1373 STACK_ENSURE(1);\
1374 stk->type = STK_REPEAT_INC;\
1375 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1376 stk->u.repeat_inc.si = (sindex);\
1377 STACK_INC;\
1378} while(0)
1379
1380#define STACK_PUSH_MEM_START(mnum, s) do {\
1381 STACK_ENSURE(1);\
1382 stk->type = STK_MEM_START;\
1383 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1384 stk->u.mem.num = (mnum);\
1385 stk->u.mem.pstr = (s);\
1386 stk->u.mem.start = mem_start_stk[mnum];\
1387 stk->u.mem.end = mem_end_stk[mnum];\
1388 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1389 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1390 STACK_INC;\
1391} while(0)
1392
1393#define STACK_PUSH_MEM_END(mnum, s) do {\
1394 STACK_ENSURE(1);\
1395 stk->type = STK_MEM_END;\
1396 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1397 stk->u.mem.num = (mnum);\
1398 stk->u.mem.pstr = (s);\
1399 stk->u.mem.start = mem_start_stk[mnum];\
1400 stk->u.mem.end = mem_end_stk[mnum];\
1401 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1402 STACK_INC;\
1403} while(0)
1404
1405#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1406 STACK_ENSURE(1);\
1407 stk->type = STK_MEM_END_MARK;\
1408 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1409 stk->u.mem.num = (mnum);\
1410 STACK_INC;\
1411} while(0)
1412
1413#define STACK_GET_MEM_START(mnum, k) do {\
1414 int level = 0;\
1415 k = stk;\
1416 while (k > stk_base) {\
1417 k--;\
1418 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1419 && k->u.mem.num == (mnum)) {\
1420 level++;\
1421 }\
1422 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1423 if (level == 0) break;\
1424 level--;\
1425 }\
1426 }\
1427} while(0)
1428
1429#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1430 int level = 0;\
1431 while (k < stk) {\
1432 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1433 if (level == 0) (start) = k->u.mem.pstr;\
1434 level++;\
1435 }\
1436 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1437 level--;\
1438 if (level == 0) {\
1439 (end) = k->u.mem.pstr;\
1440 break;\
1441 }\
1442 }\
1443 k++;\
1444 }\
1445} while(0)
1446
1447#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1448 STACK_ENSURE(1);\
1449 stk->type = STK_NULL_CHECK_START;\
1450 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1451 stk->u.null_check.num = (cnum);\
1452 stk->u.null_check.pstr = (s);\
1453 STACK_INC;\
1454} while(0)
1455
1456#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1457 STACK_ENSURE(1);\
1458 stk->type = STK_NULL_CHECK_END;\
1459 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1460 stk->u.null_check.num = (cnum);\
1461 STACK_INC;\
1462} while(0)
1463
1464#define STACK_PUSH_CALL_FRAME(pat) do {\
1465 STACK_ENSURE(1);\
1466 stk->type = STK_CALL_FRAME;\
1467 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1468 stk->u.call_frame.ret_addr = (pat);\
1469 STACK_INC;\
1470} while(0)
1471
1472#define STACK_PUSH_RETURN do {\
1473 STACK_ENSURE(1);\
1474 stk->type = STK_RETURN;\
1475 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1476 STACK_INC;\
1477} while(0)
1478
1479#define STACK_PUSH_ABSENT_POS(start, end) do {\
1480 STACK_ENSURE(1);\
1481 stk->type = STK_ABSENT_POS;\
1482 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1483 stk->u.absent_pos.abs_pstr = (start);\
1484 stk->u.absent_pos.end_pstr = (end);\
1485 STACK_INC;\
1486} while(0)
1487
1488#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1489 STACK_ENSURE(1);\
1490 stk->type = STK_MATCH_CACHE_POINT;\
1491 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1492 stk->u.match_cache_point.index = (match_cache_point_index);\
1493 stk->u.match_cache_point.mask = (match_cache_point_mask);\
1494 STACK_INC;\
1495} while(0)
1496
1497
1498#ifdef ONIG_DEBUG
1499# define STACK_BASE_CHECK(p, at) \
1500 if ((p) < stk_base) {\
1501 fprintf(stderr, "at %s\n", at);\
1502 goto stack_error;\
1503 }
1504#else
1505# define STACK_BASE_CHECK(p, at)
1506#endif
1507
1508#ifdef ONIG_DEBUG_MATCH_CACHE
1509# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1510#else
1511# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1512#endif
1513
1514#ifdef USE_MATCH_CACHE
1515# define INC_NUM_FAILS msa->num_fails++
1516# define MEMOIZE_MATCH_CACHE_POINT do {\
1517 if (stk->type == STK_MATCH_CACHE_POINT) {\
1518 msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1519 MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1520 } else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1521 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1522 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1523 }\
1524 } while(0)
1525# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1526 if (stkp->type == STK_MATCH_CACHE_POINT) {\
1527 stkp->type = STK_VOID;\
1528 memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1529 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1530 }\
1531 } while(0)
1532# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1533 if (stk->type == STK_MATCH_CACHE_POINT) {\
1534 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1535 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1536 }\
1537 } while(0)
1538#else
1539# define INC_NUM_FAILS ((void) 0)
1540# define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1541# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT ((void) 0)
1542#endif
1543
1544#define STACK_POP_ONE do {\
1545 stk--;\
1546 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1547} while(0)
1548
1549#define STACK_POP do {\
1550 switch (pop_level) {\
1551 case STACK_POP_LEVEL_FREE:\
1552 while (1) {\
1553 stk--;\
1554 STACK_BASE_CHECK(stk, "STACK_POP"); \
1555 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1556 ELSE_IF_STATE_CHECK_MARK(stk);\
1557 MEMOIZE_MATCH_CACHE_POINT;\
1558 }\
1559 break;\
1560 case STACK_POP_LEVEL_MEM_START:\
1561 while (1) {\
1562 stk--;\
1563 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1564 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1565 else if (stk->type == STK_MEM_START) {\
1566 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1567 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1568 }\
1569 ELSE_IF_STATE_CHECK_MARK(stk);\
1570 MEMOIZE_MATCH_CACHE_POINT;\
1571 }\
1572 break;\
1573 default:\
1574 while (1) {\
1575 stk--;\
1576 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1577 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1578 else if (stk->type == STK_MEM_START) {\
1579 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1580 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1581 }\
1582 else if (stk->type == STK_REPEAT_INC) {\
1583 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1584 }\
1585 else if (stk->type == STK_MEM_END) {\
1586 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1587 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1588 }\
1589 ELSE_IF_STATE_CHECK_MARK(stk);\
1590 MEMOIZE_MATCH_CACHE_POINT;\
1591 }\
1592 break;\
1593 }\
1594} while(0)
1595
1596#define STACK_POP_TIL_POS_NOT do {\
1597 while (1) {\
1598 stk--;\
1599 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1600 if (stk->type == STK_POS_NOT) break;\
1601 else if (stk->type == STK_MEM_START) {\
1602 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1603 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1604 }\
1605 else if (stk->type == STK_REPEAT_INC) {\
1606 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1607 }\
1608 else if (stk->type == STK_MEM_END) {\
1609 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1610 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1611 }\
1612 else if (IS_TO_VOID_TARGET(stk)) {\
1613 INC_NUM_FAILS;\
1614 }\
1615 ELSE_IF_STATE_CHECK_MARK(stk);\
1616 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1617 }\
1618} while(0)
1619
1620#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1621 while (1) {\
1622 stk--;\
1623 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1624 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1625 else if (stk->type == STK_MEM_START) {\
1626 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1627 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1628 }\
1629 else if (stk->type == STK_REPEAT_INC) {\
1630 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1631 }\
1632 else if (stk->type == STK_MEM_END) {\
1633 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1634 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1635 }\
1636 ELSE_IF_STATE_CHECK_MARK(stk);\
1637 }\
1638} while(0)
1639
1640#define STACK_POP_TIL_ABSENT do {\
1641 while (1) {\
1642 stk--;\
1643 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1644 if (stk->type == STK_ABSENT) break;\
1645 else if (stk->type == STK_MEM_START) {\
1646 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1647 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1648 }\
1649 else if (stk->type == STK_REPEAT_INC) {\
1650 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1651 }\
1652 else if (stk->type == STK_MEM_END) {\
1653 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1654 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1655 }\
1656 ELSE_IF_STATE_CHECK_MARK(stk);\
1657 }\
1658} while(0)
1659
1660#define STACK_POP_ABSENT_POS(start, end) do {\
1661 stk--;\
1662 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1663 (start) = stk->u.absent_pos.abs_pstr;\
1664 (end) = stk->u.absent_pos.end_pstr;\
1665} while(0)
1666
1667#define STACK_POS_END(k) do {\
1668 k = stk;\
1669 while (1) {\
1670 k--;\
1671 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1672 if (IS_TO_VOID_TARGET(k)) {\
1673 INC_NUM_FAILS;\
1674 k->type = STK_VOID;\
1675 }\
1676 else if (k->type == STK_POS) {\
1677 k->type = STK_VOID;\
1678 break;\
1679 }\
1680 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1681 }\
1682} while(0)
1683
1684#define STACK_STOP_BT_END do {\
1685 OnigStackType *k = stk;\
1686 while (1) {\
1687 k--;\
1688 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1689 if (IS_TO_VOID_TARGET(k)) {\
1690 INC_NUM_FAILS;\
1691 k->type = STK_VOID;\
1692 }\
1693 else if (k->type == STK_STOP_BT) {\
1694 k->type = STK_VOID;\
1695 break;\
1696 }\
1697 else if (k->type == STK_MATCH_CACHE_POINT) {\
1698 k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1699 }\
1700 }\
1701} while(0)
1702
1703#define STACK_STOP_BT_FAIL do {\
1704 while (1) {\
1705 stk--;\
1706 STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1707 if (stk->type == STK_STOP_BT) {\
1708 stk->type = STK_VOID;\
1709 break;\
1710 }\
1711 MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1712 }\
1713} while(0)
1714
1715#define STACK_NULL_CHECK(isnull,id,s) do {\
1716 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1717 while (1) {\
1718 k--;\
1719 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1720 if (k->type == STK_NULL_CHECK_START) {\
1721 if (k->u.null_check.num == (id)) {\
1722 (isnull) = (k->u.null_check.pstr == (s));\
1723 break;\
1724 }\
1725 }\
1726 }\
1727} while(0)
1728
1729#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1730 int level = 0;\
1731 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1732 while (1) {\
1733 k--;\
1734 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1735 if (k->type == STK_NULL_CHECK_START) {\
1736 if (k->u.null_check.num == (id)) {\
1737 if (level == 0) {\
1738 (isnull) = (k->u.null_check.pstr == (s));\
1739 break;\
1740 }\
1741 else level--;\
1742 }\
1743 }\
1744 else if (k->type == STK_NULL_CHECK_END) {\
1745 level++;\
1746 }\
1747 }\
1748} while(0)
1749
1750#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1751 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1752 while (1) {\
1753 k--;\
1754 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1755 if (k->type == STK_NULL_CHECK_START) {\
1756 if (k->u.null_check.num == (id)) {\
1757 if (k->u.null_check.pstr != (s)) {\
1758 (isnull) = 0;\
1759 break;\
1760 }\
1761 else {\
1762 UChar* endp;\
1763 (isnull) = 1;\
1764 while (k < stk) {\
1765 if (k->type == STK_MEM_START) {\
1766 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1767 (isnull) = 0; break;\
1768 }\
1769 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1770 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1771 else\
1772 endp = (UChar* )k->u.mem.end;\
1773 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1774 (isnull) = 0; break;\
1775 }\
1776 else if (endp != s) {\
1777 (isnull) = -1; /* empty, but position changed */ \
1778 }\
1779 }\
1780 k++;\
1781 }\
1782 break;\
1783 }\
1784 }\
1785 }\
1786 }\
1787} while(0)
1788
1789#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1790 int level = 0;\
1791 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1792 while (1) {\
1793 k--;\
1794 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1795 if (k->type == STK_NULL_CHECK_START) {\
1796 if (k->u.null_check.num == (id)) {\
1797 if (level == 0) {\
1798 if (k->u.null_check.pstr != (s)) {\
1799 (isnull) = 0;\
1800 break;\
1801 }\
1802 else {\
1803 UChar* endp;\
1804 (isnull) = 1;\
1805 while (k < stk) {\
1806 if (k->type == STK_MEM_START) {\
1807 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1808 (isnull) = 0; break;\
1809 }\
1810 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1811 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1812 else\
1813 endp = (UChar* )k->u.mem.end;\
1814 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1815 (isnull) = 0; break;\
1816 }\
1817 else if (endp != s) {\
1818 (isnull) = -1; /* empty, but position changed */ \
1819 }\
1820 }\
1821 k++;\
1822 }\
1823 break;\
1824 }\
1825 }\
1826 else {\
1827 level--;\
1828 }\
1829 }\
1830 }\
1831 else if (k->type == STK_NULL_CHECK_END) {\
1832 if (k->u.null_check.num == (id)) level++;\
1833 }\
1834 }\
1835} while(0)
1836
1837#define STACK_GET_REPEAT(id, k) do {\
1838 int level = 0;\
1839 k = stk;\
1840 while (1) {\
1841 k--;\
1842 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1843 if (k->type == STK_REPEAT) {\
1844 if (level == 0) {\
1845 if (k->u.repeat.num == (id)) {\
1846 break;\
1847 }\
1848 }\
1849 }\
1850 else if (k->type == STK_CALL_FRAME) level--;\
1851 else if (k->type == STK_RETURN) level++;\
1852 }\
1853} while(0)
1854
1855#define STACK_RETURN(addr) do {\
1856 int level = 0;\
1857 OnigStackType* k = stk;\
1858 while (1) {\
1859 k--;\
1860 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1861 if (k->type == STK_CALL_FRAME) {\
1862 if (level == 0) {\
1863 (addr) = k->u.call_frame.ret_addr;\
1864 break;\
1865 }\
1866 else level--;\
1867 }\
1868 else if (k->type == STK_RETURN)\
1869 level++;\
1870 }\
1871} while(0)
1872
1873
1874#define STRING_CMP(s1,s2,len) do {\
1875 while (len-- > 0) {\
1876 if (*s1++ != *s2++) goto fail;\
1877 }\
1878} while(0)
1879
1880#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1881 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1882 goto fail; \
1883} while(0)
1884
1885static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1886 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1887{
1888 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1889 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1890 UChar *p1, *p2, *end1, *s2;
1891 int len1, len2;
1892
1893 s2 = *ps2;
1894 end1 = s1 + mblen;
1895 while (s1 < end1) {
1896 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1897 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1898 if (len1 != len2) return 0;
1899 p1 = buf1;
1900 p2 = buf2;
1901 while (len1-- > 0) {
1902 if (*p1 != *p2) return 0;
1903 p1++;
1904 p2++;
1905 }
1906 }
1907
1908 *ps2 = s2;
1909 return 1;
1910}
1911
1912#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1913 is_fail = 0;\
1914 while (len-- > 0) {\
1915 if (*s1++ != *s2++) {\
1916 is_fail = 1; break;\
1917 }\
1918 }\
1919} while(0)
1920
1921#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1922 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1923 is_fail = 1; \
1924 else \
1925 is_fail = 0; \
1926} while(0)
1927
1928
1929#define IS_EMPTY_STR (str == end)
1930#define ON_STR_BEGIN(s) ((s) == str)
1931#define ON_STR_END(s) ((s) == end)
1932#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1933# define DATA_ENSURE_CHECK1 (s < right_range)
1934# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1935# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1936# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1937# define ABSENT_END_POS right_range
1938#else
1939# define DATA_ENSURE_CHECK1 (s < end)
1940# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1941# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1942# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1943# define ABSENT_END_POS end
1944#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1945
1946int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
1947
1948static inline int
1949enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
1950{
1951 if (enc->max_enc_len == enc->min_enc_len) {
1952 return (p < e ? enc->min_enc_len : 0);
1953 }
1954 else {
1955 return onigenc_mbclen_approximate(p, e, enc);
1956 }
1957}
1958
1959
1960#ifdef USE_CAPTURE_HISTORY
1961static int
1962make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1963 OnigStackType* stk_top, UChar* str, regex_t* reg)
1964{
1965 int n, r;
1966 OnigCaptureTreeNode* child;
1967 OnigStackType* k = *kp;
1968
1969 while (k < stk_top) {
1970 if (k->type == STK_MEM_START) {
1971 n = k->u.mem.num;
1972 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1973 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1974 child = history_node_new();
1975 CHECK_NULL_RETURN_MEMERR(child);
1976 child->group = n;
1977 child->beg = k->u.mem.pstr - str;
1978 r = history_tree_add_child(node, child);
1979 if (r != 0) {
1980 history_tree_free(child);
1981 return r;
1982 }
1983 *kp = (k + 1);
1984 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1985 if (r != 0) return r;
1986
1987 k = *kp;
1988 child->end = k->u.mem.pstr - str;
1989 }
1990 }
1991 else if (k->type == STK_MEM_END) {
1992 if (k->u.mem.num == node->group) {
1993 node->end = k->u.mem.pstr - str;
1994 *kp = k;
1995 return 0;
1996 }
1997 }
1998 k++;
1999 }
2000
2001 return 1; /* 1: root node ending. */
2002}
2003#endif /* USE_CAPTURE_HISTORY */
2004
2005#ifdef USE_BACKREF_WITH_LEVEL
2006static int
2007mem_is_in_memp(int mem, int num, UChar* memp)
2008{
2009 int i;
2010 MemNumType m;
2011
2012 for (i = 0; i < num; i++) {
2013 GET_MEMNUM_INC(m, memp);
2014 if (mem == (int )m) return 1;
2015 }
2016 return 0;
2017}
2018
2019static int backref_match_at_nested_level(regex_t* reg,
2020 OnigStackType* top, OnigStackType* stk_base,
2021 int ignore_case, int case_fold_flag,
2022 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
2023{
2024 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2025 int level;
2026 OnigStackType* k;
2027
2028 level = 0;
2029 k = top;
2030 k--;
2031 while (k >= stk_base) {
2032 if (k->type == STK_CALL_FRAME) {
2033 level--;
2034 }
2035 else if (k->type == STK_RETURN) {
2036 level++;
2037 }
2038 else if (level == nest) {
2039 if (k->type == STK_MEM_START) {
2040 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2041 pstart = k->u.mem.pstr;
2042 if (pend != NULL_UCHARP) {
2043 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2044 p = pstart;
2045 ss = *s;
2046
2047 if (ignore_case != 0) {
2048 if (string_cmp_ic(reg->enc, case_fold_flag,
2049 pstart, &ss, pend - pstart, send) == 0)
2050 return 0; /* or goto next_mem; */
2051 }
2052 else {
2053 while (p < pend) {
2054 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2055 }
2056 }
2057
2058 *s = ss;
2059 return 1;
2060 }
2061 }
2062 }
2063 else if (k->type == STK_MEM_END) {
2064 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2065 pend = k->u.mem.pstr;
2066 }
2067 }
2068 }
2069 k--;
2070 }
2071
2072 return 0;
2073}
2074#endif /* USE_BACKREF_WITH_LEVEL */
2075
2076
2077#ifdef ONIG_DEBUG_STATISTICS
2078
2079# ifdef _WIN32
2080# include <windows.h>
2081static LARGE_INTEGER ts, te, freq;
2082# define GETTIME(t) QueryPerformanceCounter(&(t))
2083# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2084 * 1000000 / freq.QuadPart)
2085# else /* _WIN32 */
2086
2087# define USE_TIMEOFDAY
2088
2089# ifdef USE_TIMEOFDAY
2090# ifdef HAVE_SYS_TIME_H
2091# include <sys/time.h>
2092# endif
2093# ifdef HAVE_UNISTD_H
2094# include <unistd.h>
2095# endif
2096static struct timeval ts, te;
2097# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2098# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2099 (((te).tv_sec - (ts).tv_sec)*1000000))
2100# else /* USE_TIMEOFDAY */
2101# ifdef HAVE_SYS_TIMES_H
2102# include <sys/times.h>
2103# endif
2104static struct tms ts, te;
2105# define GETTIME(t) times(&(t))
2106# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2107# endif /* USE_TIMEOFDAY */
2108
2109# endif /* _WIN32 */
2110
2111static int OpCounter[256];
2112static int OpPrevCounter[256];
2113static unsigned long OpTime[256];
2114static int OpCurr = OP_FINISH;
2115static int OpPrevTarget = OP_FAIL;
2116static int MaxStackDepth = 0;
2117
2118# define MOP_IN(opcode) do {\
2119 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2120 OpCurr = opcode;\
2121 OpCounter[opcode]++;\
2122 GETTIME(ts);\
2123} while(0)
2124
2125# define MOP_OUT do {\
2126 GETTIME(te);\
2127 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2128} while(0)
2129
2130extern void
2131onig_statistics_init(void)
2132{
2133 int i;
2134 for (i = 0; i < 256; i++) {
2135 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2136 }
2137 MaxStackDepth = 0;
2138# ifdef _WIN32
2139 QueryPerformanceFrequency(&freq);
2140# endif
2141}
2142
2143extern void
2144onig_print_statistics(FILE* f)
2145{
2146 int i;
2147 fprintf(f, " count prev time\n");
2148 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2149 fprintf(f, "%8d: %8d: %10lu: %s\n",
2150 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2151 }
2152 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2153}
2154
2155# define STACK_INC do {\
2156 stk++;\
2157 if (stk - stk_base > MaxStackDepth) \
2158 MaxStackDepth = stk - stk_base;\
2159} while(0)
2160
2161#else /* ONIG_DEBUG_STATISTICS */
2162# define STACK_INC stk++
2163
2164# define MOP_IN(opcode)
2165# define MOP_OUT
2166#endif /* ONIG_DEBUG_STATISTICS */
2167
2168
2169#ifdef ONIG_DEBUG_MATCH
2170static const char *
2171stack_type_str(int stack_type)
2172{
2173 switch (stack_type) {
2174 case STK_ALT: return "Alt ";
2175 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2176 case STK_POS_NOT: return "PosNot";
2177 case STK_MEM_START: return "MemS ";
2178 case STK_MEM_END: return "MemE ";
2179 case STK_REPEAT_INC: return "RepInc";
2180 case STK_STATE_CHECK_MARK: return "StChMk";
2181 case STK_NULL_CHECK_START: return "NulChS";
2182 case STK_NULL_CHECK_END: return "NulChE";
2183 case STK_MEM_END_MARK: return "MemEMk";
2184 case STK_POS: return "Pos ";
2185 case STK_STOP_BT: return "StopBt";
2186 case STK_REPEAT: return "Rep ";
2187 case STK_CALL_FRAME: return "Call ";
2188 case STK_RETURN: return "Ret ";
2189 case STK_VOID: return "Void ";
2190 case STK_ABSENT_POS: return "AbsPos";
2191 case STK_ABSENT: return "Absent";
2192 case STK_MATCH_CACHE_POINT: return "MCache";
2193 default: return " ";
2194 }
2195}
2196#endif
2197#ifdef USE_MATCH_CACHE
2198
2199static long
2200bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p)
2201{
2202 long l = 0, r = num_cache_opcodes - 1, m = 0;
2203
2204 while (l <= r) {
2205 m = (l + r) / 2;
2206 if (cache_opcodes[m].addr == p) break;
2207 if (cache_opcodes[m].addr < p) l = m + 1;
2208 else r = m - 1;
2209 }
2210 return m;
2211}
2212
2213static long
2214find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr)
2215{
2216 long m;
2217 const OnigCacheOpcode* cache_opcode;
2218 const OnigRepeatRange* range;
2219 const OnigStackType *stkp;
2220 int count = 0;
2221 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
2222 long cache_point;
2223 long num_cache_points_at_outer_repeat;
2224 long num_cache_points_in_outer_repeat;
2225
2226 m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
2227
2228 if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
2229 return -1;
2230 }
2231
2232 cache_opcode = &cache_opcodes[m];
2233 *cache_opcode_ptr = &cache_opcodes[m];
2234 cache_point = cache_opcode->cache_point;
2235 if (cache_opcode->outer_repeat_mem == -1) {
2236 return cache_point;
2237 }
2238
2239 num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
2240 num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
2241
2242 range = &reg->repeat_range[cache_opcode->outer_repeat_mem];
2243
2244 stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
2245 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
2246
2247 if (count < range->lower) {
2248 return num_cache_points_at_outer_repeat +
2249 num_cache_points_in_outer_repeat * count +
2250 cache_point;
2251 }
2252
2253 if (range->upper == 0x7fffffff) {
2254 return num_cache_points_at_outer_repeat +
2255 num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
2256 cache_point;
2257 }
2258
2259 return num_cache_points_at_outer_repeat +
2260 num_cache_points_in_outer_repeat * (range->lower - 1) +
2261 (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
2262 cache_point;
2263}
2264
2265static int check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) {
2266 if (match_cache_point_mask & 0x80) {
2267 return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
2268 } else {
2269 return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
2270 }
2271}
2272
2273static void memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) {
2274 match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
2275 if (match_cache_point_mask & 0x80) {
2276 match_cache_buf[match_cache_point_index + 1] |= 0x01;
2277 } else {
2278 match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
2279 }
2280}
2281
2282#endif /* USE_MATCH_CACHE */
2283
2284/* match data(str - end) from position (sstart). */
2285/* if sstart == str then set sprev to NULL. */
2286static OnigPosition
2287match_at(regex_t* reg, const UChar* str, const UChar* end,
2288#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2289 const UChar* right_range,
2290#endif
2291 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2292{
2293 static const UChar FinishCode[] = { OP_FINISH };
2294
2295 int i, num_mem, pop_level;
2296 ptrdiff_t n, best_len;
2297 LengthType tlen, tlen2;
2298 MemNumType mem;
2299 RelAddrType addr;
2300 OnigOptionType option = reg->options;
2301 OnigEncoding encode = reg->enc;
2302 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2303 UChar *s, *q, *sbegin;
2304 UChar *p = reg->p;
2305 UChar *pbegin = p;
2306 UChar *pkeep;
2307 char *alloca_base;
2308 char *xmalloc_base = NULL;
2309 OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
2310 OnigStackType *stkp; /* used as any purpose. */
2311 OnigStackIndex si;
2312 OnigStackIndex *repeat_stk;
2313 OnigStackIndex *mem_start_stk, *mem_end_stk;
2314#ifdef USE_COMBINATION_EXPLOSION_CHECK
2315 int scv;
2316 unsigned char* state_check_buff = msa->state_check_buff;
2317 int num_comb_exp_check = reg->num_comb_exp_check;
2318#endif
2319
2320#if USE_TOKEN_THREADED_VM
2321# define OP_OFFSET 1
2322# define VM_LOOP JUMP;
2323# define VM_LOOP_END
2324# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2325# define DEFAULT L_DEFAULT:
2326# define NEXT sprev = sbegin; JUMP
2327# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2328
2329 RB_GNUC_EXTENSION static const void *oplabels[] = {
2330 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2331 &&L_OP_END, /* pattern code terminator (success end) */
2332
2333 &&L_OP_EXACT1, /* single byte, N = 1 */
2334 &&L_OP_EXACT2, /* single byte, N = 2 */
2335 &&L_OP_EXACT3, /* single byte, N = 3 */
2336 &&L_OP_EXACT4, /* single byte, N = 4 */
2337 &&L_OP_EXACT5, /* single byte, N = 5 */
2338 &&L_OP_EXACTN, /* single byte */
2339 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2340 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2341 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2342 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2343 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2344 &&L_OP_EXACTMBN, /* other length */
2345
2346 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2347 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2348
2349 &&L_OP_CCLASS,
2350 &&L_OP_CCLASS_MB,
2351 &&L_OP_CCLASS_MIX,
2352 &&L_OP_CCLASS_NOT,
2353 &&L_OP_CCLASS_MB_NOT,
2354 &&L_OP_CCLASS_MIX_NOT,
2355
2356 &&L_OP_ANYCHAR, /* "." */
2357 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2358 &&L_OP_ANYCHAR_STAR, /* ".*" */
2359 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2360 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2361 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2362
2363 &&L_OP_WORD,
2364 &&L_OP_NOT_WORD,
2365 &&L_OP_WORD_BOUND,
2366 &&L_OP_NOT_WORD_BOUND,
2367# ifdef USE_WORD_BEGIN_END
2368 &&L_OP_WORD_BEGIN,
2369 &&L_OP_WORD_END,
2370# else
2371 &&L_DEFAULT,
2372 &&L_DEFAULT,
2373# endif
2374 &&L_OP_ASCII_WORD,
2375 &&L_OP_NOT_ASCII_WORD,
2376 &&L_OP_ASCII_WORD_BOUND,
2377 &&L_OP_NOT_ASCII_WORD_BOUND,
2378# ifdef USE_WORD_BEGIN_END
2379 &&L_OP_ASCII_WORD_BEGIN,
2380 &&L_OP_ASCII_WORD_END,
2381# else
2382 &&L_DEFAULT,
2383 &&L_DEFAULT,
2384# endif
2385
2386 &&L_OP_BEGIN_BUF,
2387 &&L_OP_END_BUF,
2388 &&L_OP_BEGIN_LINE,
2389 &&L_OP_END_LINE,
2390 &&L_OP_SEMI_END_BUF,
2391 &&L_OP_BEGIN_POSITION,
2392
2393 &&L_OP_BACKREF1,
2394 &&L_OP_BACKREF2,
2395 &&L_OP_BACKREFN,
2396 &&L_OP_BACKREFN_IC,
2397 &&L_OP_BACKREF_MULTI,
2398 &&L_OP_BACKREF_MULTI_IC,
2399# ifdef USE_BACKREF_WITH_LEVEL
2400 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2401# else
2402 &&L_DEFAULT,
2403# endif
2404 &&L_OP_MEMORY_START,
2405 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2406 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2407# ifdef USE_SUBEXP_CALL
2408 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2409# else
2410 &&L_DEFAULT,
2411# endif
2412 &&L_OP_MEMORY_END,
2413# ifdef USE_SUBEXP_CALL
2414 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2415# else
2416 &&L_DEFAULT,
2417# endif
2418
2419 &&L_OP_KEEP,
2420
2421 &&L_OP_FAIL, /* pop stack and move */
2422 &&L_OP_JUMP,
2423 &&L_OP_PUSH,
2424 &&L_OP_POP,
2425# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2426 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2427# else
2428 &&L_DEFAULT,
2429# endif
2430 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2431 &&L_OP_REPEAT, /* {n,m} */
2432 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2433 &&L_OP_REPEAT_INC,
2434 &&L_OP_REPEAT_INC_NG, /* non greedy */
2435 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2436 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2437 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2438 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2439# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2440 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2441# else
2442 &&L_DEFAULT,
2443# endif
2444# ifdef USE_SUBEXP_CALL
2445 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2446# else
2447 &&L_DEFAULT,
2448# endif
2449
2450 &&L_OP_PUSH_POS, /* (?=...) start */
2451 &&L_OP_POP_POS, /* (?=...) end */
2452 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2453 &&L_OP_FAIL_POS, /* (?!...) end */
2454 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2455 &&L_OP_POP_STOP_BT, /* (?>...) end */
2456 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2457 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2458 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2459 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2460 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2461 &&L_OP_ABSENT_END, /* (?~...) end */
2462
2463# ifdef USE_SUBEXP_CALL
2464 &&L_OP_CALL, /* \g<name> */
2465 &&L_OP_RETURN,
2466# else
2467 &&L_DEFAULT,
2468 &&L_DEFAULT,
2469# endif
2470 &&L_OP_CONDITION,
2471
2472# ifdef USE_COMBINATION_EXPLOSION_CHECK
2473 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2474 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2475 &&L_OP_STATE_CHECK, /* check only */
2476# else
2477 &&L_DEFAULT,
2478 &&L_DEFAULT,
2479 &&L_DEFAULT,
2480# endif
2481# ifdef USE_COMBINATION_EXPLOSION_CHECK
2482 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2483 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2484# else
2485 &&L_DEFAULT,
2486 &&L_DEFAULT,
2487# endif
2488 /* no need: IS_DYNAMIC_OPTION() == 0 */
2489# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2490 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2491 &&L_OP_SET_OPTION /* set option */
2492# else
2493 &&L_DEFAULT,
2494 &&L_DEFAULT
2495# endif
2496 };
2497#else /* USE_TOKEN_THREADED_VM */
2498
2499# define OP_OFFSET 0
2500# define VM_LOOP \
2501 while (1) { \
2502 OPCODE_EXEC_HOOK; \
2503 pbegin = p; \
2504 sbegin = s; \
2505 switch (*p++) {
2506# define VM_LOOP_END } sprev = sbegin; }
2507# define CASE(x) case x:
2508# define DEFAULT default:
2509# define NEXT break
2510# define JUMP continue; break
2511#endif /* USE_TOKEN_THREADED_VM */
2512
2513
2514#ifdef USE_SUBEXP_CALL
2515/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2516 etc. Additional space is required. */
2517# define ADD_NUMMEM 1
2518#else
2519/* Stack #0 not is used. */
2520# define ADD_NUMMEM 0
2521#endif
2522
2523 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2524
2525 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2526 pop_level = reg->stack_pop_level;
2527 num_mem = reg->num_mem;
2528 repeat_stk = (OnigStackIndex* )alloca_base;
2529
2530 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2531 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2532 {
2533 OnigStackIndex *pp = mem_start_stk;
2534 for (; pp < repeat_stk + n; pp += 2) {
2535 pp[0] = INVALID_STACK_INDEX;
2536 pp[1] = INVALID_STACK_INDEX;
2537 }
2538 }
2539#ifndef USE_SUBEXP_CALL
2540 mem_start_stk--; /* for index start from 1,
2541 mem_start_stk[1]..mem_start_stk[num_mem] */
2542 mem_end_stk--; /* for index start from 1,
2543 mem_end_stk[1]..mem_end_stk[num_mem] */
2544#endif
2545
2546#ifdef ONIG_DEBUG_MATCH
2547 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2548 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2549 fprintf(stderr, "size: %d, start offset: %d\n",
2550 (int )(end - str), (int )(sstart - str));
2551 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2552#endif
2553
2554 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2555 best_len = ONIG_MISMATCH;
2556 s = (UChar* )sstart;
2557 pkeep = (UChar* )sstart;
2558
2559
2560#ifdef ONIG_DEBUG_MATCH
2561# define OPCODE_EXEC_HOOK \
2562 if (s) { \
2563 UChar *op, *q, *bp, buf[50]; \
2564 int len; \
2565 op = p - OP_OFFSET; \
2566 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2567 bp = buf; \
2568 q = s; \
2569 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2570 for (i = 0; i < 7 && q < end; i++) { \
2571 len = enclen(encode, q, end); \
2572 while (len-- > 0) *bp++ = *q++; \
2573 } \
2574 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2575 } \
2576 xmemcpy(bp, "\"", 1); bp += 1; \
2577 *bp = 0; \
2578 fputs((char* )buf, stderr); \
2579 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2580 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2581 stk - stk_base - 1, \
2582 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2583 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2584 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2585 fprintf(stderr, "\n"); \
2586 }
2587#else
2588# define OPCODE_EXEC_HOOK ((void) 0)
2589#endif
2590
2591#ifdef USE_MATCH_CACHE
2592#ifdef ONIG_DEBUG_MATCH_CACHE
2593#define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2594#define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2595#else
2596#define MATCH_CACHE_DEBUG ((void) 0)
2597#define MATCH_CACHE_DEBUG_HIT ((void) 0)
2598#endif
2599
2600#define MATCH_CACHE_HIT ((void) 0)
2601
2602# define CHECK_MATCH_CACHE do {\
2603 if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2604 const OnigCacheOpcode *cache_opcode;\
2605 long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2606 if (cache_point >= 0) {\
2607 long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2608 long match_cache_point_index = match_cache_point >> 3;\
2609 uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2610 MATCH_CACHE_DEBUG;\
2611 if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2612 MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2613 if (cache_opcode->lookaround_nesting == 0) goto fail;\
2614 else if (cache_opcode->lookaround_nesting < 0) {\
2615 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2616 STACK_STOP_BT_FAIL;\
2617 goto fail;\
2618 } else goto fail;\
2619 } else {\
2620 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2621 p = cache_opcode->match_addr;\
2622 MOP_OUT;\
2623 JUMP;\
2624 } else goto fail;\
2625 }\
2626 }\
2627 STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2628 }\
2629 }\
2630} while (0)
2631#else
2632# define CHECK_MATCH_CACHE ((void) 0)
2633#endif
2634
2635 VM_LOOP {
2636 CASE(OP_END) MOP_IN(OP_END);
2637 n = s - sstart;
2638 if (n > best_len) {
2639 OnigRegion* region;
2640#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2641 if (IS_FIND_LONGEST(option)) {
2642 if (n > msa->best_len) {
2643 msa->best_len = n;
2644 msa->best_s = (UChar* )sstart;
2645 }
2646 else
2647 goto end_best_len;
2648 }
2649#endif
2650 best_len = n;
2651 region = msa->region;
2652 if (region) {
2653 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2654 region->end[0] = s - str;
2655 for (i = 1; i <= num_mem; i++) {
2656 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2657 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2658 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2659 else
2660 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2661
2662 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2663 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2664 : (UChar* )((void* )mem_end_stk[i])) - str;
2665 }
2666 else {
2667 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2668 }
2669 }
2670
2671#ifdef USE_CAPTURE_HISTORY
2672 if (reg->capture_history != 0) {
2673 int r;
2674 OnigCaptureTreeNode* node;
2675
2676 if (IS_NULL(region->history_root)) {
2677 region->history_root = node = history_node_new();
2678 CHECK_NULL_RETURN_MEMERR(node);
2679 }
2680 else {
2681 node = region->history_root;
2682 history_tree_clear(node);
2683 }
2684
2685 node->group = 0;
2686 node->beg = ((pkeep > s) ? s : pkeep) - str;
2687 node->end = s - str;
2688
2689 stkp = stk_base;
2690 r = make_capture_history_tree(region->history_root, &stkp,
2691 stk, (UChar* )str, reg);
2692 if (r < 0) {
2693 best_len = r; /* error code */
2694 goto finish;
2695 }
2696 }
2697#endif /* USE_CAPTURE_HISTORY */
2698 } /* if (region) */
2699 } /* n > best_len */
2700
2701#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2702 end_best_len:
2703#endif
2704 MOP_OUT;
2705
2706 if (IS_FIND_CONDITION(option)) {
2707 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2708 best_len = ONIG_MISMATCH;
2709 goto fail; /* for retry */
2710 }
2711 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2712 goto fail; /* for retry */
2713 }
2714 }
2715
2716 /* default behavior: return first-matching result. */
2717 goto finish;
2718 NEXT;
2719
2720 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2721 DATA_ENSURE(1);
2722 if (*p != *s) goto fail;
2723 p++; s++;
2724 MOP_OUT;
2725 NEXT;
2726
2727 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2728 {
2729 int len;
2730 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2731
2732 DATA_ENSURE(1);
2733 len = ONIGENC_MBC_CASE_FOLD(encode,
2734 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2735 case_fold_flag,
2736 &s, end, lowbuf);
2737 DATA_ENSURE(0);
2738 q = lowbuf;
2739 while (len-- > 0) {
2740 if (*p != *q) {
2741 goto fail;
2742 }
2743 p++; q++;
2744 }
2745 }
2746 MOP_OUT;
2747 NEXT;
2748
2749 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2750 DATA_ENSURE(2);
2751 if (*p != *s) goto fail;
2752 p++; s++;
2753 if (*p != *s) goto fail;
2754 sprev = s;
2755 p++; s++;
2756 MOP_OUT;
2757 JUMP;
2758
2759 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2760 DATA_ENSURE(3);
2761 if (*p != *s) goto fail;
2762 p++; s++;
2763 if (*p != *s) goto fail;
2764 p++; s++;
2765 if (*p != *s) goto fail;
2766 sprev = s;
2767 p++; s++;
2768 MOP_OUT;
2769 JUMP;
2770
2771 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2772 DATA_ENSURE(4);
2773 if (*p != *s) goto fail;
2774 p++; s++;
2775 if (*p != *s) goto fail;
2776 p++; s++;
2777 if (*p != *s) goto fail;
2778 p++; s++;
2779 if (*p != *s) goto fail;
2780 sprev = s;
2781 p++; s++;
2782 MOP_OUT;
2783 JUMP;
2784
2785 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2786 DATA_ENSURE(5);
2787 if (*p != *s) goto fail;
2788 p++; s++;
2789 if (*p != *s) goto fail;
2790 p++; s++;
2791 if (*p != *s) goto fail;
2792 p++; s++;
2793 if (*p != *s) goto fail;
2794 p++; s++;
2795 if (*p != *s) goto fail;
2796 sprev = s;
2797 p++; s++;
2798 MOP_OUT;
2799 JUMP;
2800
2801 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2802 GET_LENGTH_INC(tlen, p);
2803 DATA_ENSURE(tlen);
2804 while (tlen-- > 0) {
2805 if (*p++ != *s++) goto fail;
2806 }
2807 sprev = s - 1;
2808 MOP_OUT;
2809 JUMP;
2810
2811 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2812 {
2813 int len;
2814 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2815
2816 GET_LENGTH_INC(tlen, p);
2817 endp = p + tlen;
2818
2819 while (p < endp) {
2820 sprev = s;
2821 DATA_ENSURE(1);
2822 len = ONIGENC_MBC_CASE_FOLD(encode,
2823 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2824 case_fold_flag,
2825 &s, end, lowbuf);
2826 DATA_ENSURE(0);
2827 q = lowbuf;
2828 while (len-- > 0) {
2829 if (*p != *q) goto fail;
2830 p++; q++;
2831 }
2832 }
2833 }
2834
2835 MOP_OUT;
2836 JUMP;
2837
2838 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2839 DATA_ENSURE(2);
2840 if (*p != *s) goto fail;
2841 p++; s++;
2842 if (*p != *s) goto fail;
2843 p++; s++;
2844 MOP_OUT;
2845 NEXT;
2846
2847 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2848 DATA_ENSURE(4);
2849 if (*p != *s) goto fail;
2850 p++; s++;
2851 if (*p != *s) goto fail;
2852 p++; s++;
2853 sprev = s;
2854 if (*p != *s) goto fail;
2855 p++; s++;
2856 if (*p != *s) goto fail;
2857 p++; s++;
2858 MOP_OUT;
2859 JUMP;
2860
2861 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2862 DATA_ENSURE(6);
2863 if (*p != *s) goto fail;
2864 p++; s++;
2865 if (*p != *s) goto fail;
2866 p++; s++;
2867 if (*p != *s) goto fail;
2868 p++; s++;
2869 if (*p != *s) goto fail;
2870 p++; s++;
2871 sprev = s;
2872 if (*p != *s) goto fail;
2873 p++; s++;
2874 if (*p != *s) goto fail;
2875 p++; s++;
2876 MOP_OUT;
2877 JUMP;
2878
2879 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2880 GET_LENGTH_INC(tlen, p);
2881 DATA_ENSURE(tlen * 2);
2882 while (tlen-- > 0) {
2883 if (*p != *s) goto fail;
2884 p++; s++;
2885 if (*p != *s) goto fail;
2886 p++; s++;
2887 }
2888 sprev = s - 2;
2889 MOP_OUT;
2890 JUMP;
2891
2892 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2893 GET_LENGTH_INC(tlen, p);
2894 DATA_ENSURE(tlen * 3);
2895 while (tlen-- > 0) {
2896 if (*p != *s) goto fail;
2897 p++; s++;
2898 if (*p != *s) goto fail;
2899 p++; s++;
2900 if (*p != *s) goto fail;
2901 p++; s++;
2902 }
2903 sprev = s - 3;
2904 MOP_OUT;
2905 JUMP;
2906
2907 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2908 GET_LENGTH_INC(tlen, p); /* mb-len */
2909 GET_LENGTH_INC(tlen2, p); /* string len */
2910 tlen2 *= tlen;
2911 DATA_ENSURE(tlen2);
2912 while (tlen2-- > 0) {
2913 if (*p != *s) goto fail;
2914 p++; s++;
2915 }
2916 sprev = s - tlen;
2917 MOP_OUT;
2918 JUMP;
2919
2920 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2921 DATA_ENSURE(1);
2922 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2923 p += SIZE_BITSET;
2924 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2925 MOP_OUT;
2926 NEXT;
2927
2928 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2929 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2930
2931 cclass_mb:
2932 GET_LENGTH_INC(tlen, p);
2933 {
2934 OnigCodePoint code;
2935 UChar *ss;
2936 int mb_len;
2937
2938 DATA_ENSURE(1);
2939 mb_len = enclen_approx(encode, s, end);
2940 DATA_ENSURE(mb_len);
2941 ss = s;
2942 s += mb_len;
2943 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2944
2945#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2946 if (! onig_is_in_code_range(p, code)) goto fail;
2947#else
2948 q = p;
2949 ALIGNMENT_RIGHT(q);
2950 if (! onig_is_in_code_range(q, code)) goto fail;
2951#endif
2952 }
2953 p += tlen;
2954 MOP_OUT;
2955 NEXT;
2956
2957 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2958 DATA_ENSURE(1);
2959 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2960 p += SIZE_BITSET;
2961 goto cclass_mb;
2962 }
2963 else {
2964 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2965 goto fail;
2966
2967 p += SIZE_BITSET;
2968 GET_LENGTH_INC(tlen, p);
2969 p += tlen;
2970 s++;
2971 }
2972 MOP_OUT;
2973 NEXT;
2974
2975 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2976 DATA_ENSURE(1);
2977 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2978 p += SIZE_BITSET;
2979 s += enclen(encode, s, end);
2980 MOP_OUT;
2981 NEXT;
2982
2983 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2984 DATA_ENSURE(1);
2985 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2986 s++;
2987 GET_LENGTH_INC(tlen, p);
2988 p += tlen;
2989 goto cc_mb_not_success;
2990 }
2991
2992 cclass_mb_not:
2993 GET_LENGTH_INC(tlen, p);
2994 {
2995 OnigCodePoint code;
2996 UChar *ss;
2997 int mb_len = enclen(encode, s, end);
2998
2999 if (! DATA_ENSURE_CHECK(mb_len)) {
3000 DATA_ENSURE(1);
3001 s = (UChar* )end;
3002 p += tlen;
3003 goto cc_mb_not_success;
3004 }
3005
3006 ss = s;
3007 s += mb_len;
3008 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3009
3010#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3011 if (onig_is_in_code_range(p, code)) goto fail;
3012#else
3013 q = p;
3014 ALIGNMENT_RIGHT(q);
3015 if (onig_is_in_code_range(q, code)) goto fail;
3016#endif
3017 }
3018 p += tlen;
3019
3020 cc_mb_not_success:
3021 MOP_OUT;
3022 NEXT;
3023
3024 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
3025 DATA_ENSURE(1);
3026 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3027 p += SIZE_BITSET;
3028 goto cclass_mb_not;
3029 }
3030 else {
3031 if (BITSET_AT(((BitSetRef )p), *s) != 0)
3032 goto fail;
3033
3034 p += SIZE_BITSET;
3035 GET_LENGTH_INC(tlen, p);
3036 p += tlen;
3037 s++;
3038 }
3039 MOP_OUT;
3040 NEXT;
3041
3042 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
3043 DATA_ENSURE(1);
3044 n = enclen_approx(encode, s, end);
3045 DATA_ENSURE(n);
3046 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3047 s += n;
3048 MOP_OUT;
3049 NEXT;
3050
3051 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
3052 DATA_ENSURE(1);
3053 n = enclen_approx(encode, s, end);
3054 DATA_ENSURE(n);
3055 s += n;
3056 MOP_OUT;
3057 NEXT;
3058
3059 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
3060 while (DATA_ENSURE_CHECK1) {
3061 CHECK_MATCH_CACHE;
3062 STACK_PUSH_ALT(p, s, sprev, pkeep);
3063 n = enclen_approx(encode, s, end);
3064 DATA_ENSURE(n);
3065 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3066 sprev = s;
3067 s += n;
3068 }
3069 MOP_OUT;
3070 JUMP;
3071
3072 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
3073 while (DATA_ENSURE_CHECK1) {
3074 CHECK_MATCH_CACHE;
3075 STACK_PUSH_ALT(p, s, sprev, pkeep);
3076 n = enclen_approx(encode, s, end);
3077 if (n > 1) {
3078 DATA_ENSURE(n);
3079 sprev = s;
3080 s += n;
3081 }
3082 else {
3083 sprev = s;
3084 s++;
3085 }
3086 }
3087 MOP_OUT;
3088 JUMP;
3089
3090 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3091 while (DATA_ENSURE_CHECK1) {
3092 CHECK_MATCH_CACHE;
3093 if (*p == *s) {
3094 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3095 } else {
3096#ifdef USE_MATCH_CACHE
3097 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3098 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
3099 msa->num_fails++;
3100#endif
3101 }
3102 n = enclen_approx(encode, s, end);
3103 DATA_ENSURE(n);
3104 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3105 sprev = s;
3106 s += n;
3107 }
3108 p++;
3109 MOP_OUT;
3110 NEXT;
3111
3112 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3113 while (DATA_ENSURE_CHECK1) {
3114 CHECK_MATCH_CACHE;
3115 if (*p == *s) {
3116 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3117 } else {
3118#ifdef USE_MATCH_CACHE
3119 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3120 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
3121 msa->num_fails++;
3122#endif
3123 }
3124 n = enclen_approx(encode, s, end);
3125 if (n > 1) {
3126 DATA_ENSURE(n);
3127 sprev = s;
3128 s += n;
3129 }
3130 else {
3131 sprev = s;
3132 s++;
3133 }
3134 }
3135 p++;
3136 MOP_OUT;
3137 NEXT;
3138
3139#ifdef USE_COMBINATION_EXPLOSION_CHECK
3140 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3141 GET_STATE_CHECK_NUM_INC(mem, p);
3142 while (DATA_ENSURE_CHECK1) {
3143 STATE_CHECK_VAL(scv, mem);
3144 if (scv) goto fail;
3145
3146 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3147 n = enclen_approx(encode, s, end);
3148 DATA_ENSURE(n);
3149 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3150 sprev = s;
3151 s += n;
3152 }
3153 MOP_OUT;
3154 NEXT;
3155
3156 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3157 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3158
3159 GET_STATE_CHECK_NUM_INC(mem, p);
3160 while (DATA_ENSURE_CHECK1) {
3161 STATE_CHECK_VAL(scv, mem);
3162 if (scv) goto fail;
3163
3164 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3165 n = enclen_approx(encode, s, end);
3166 if (n > 1) {
3167 DATA_ENSURE(n);
3168 sprev = s;
3169 s += n;
3170 }
3171 else {
3172 sprev = s;
3173 s++;
3174 }
3175 }
3176 MOP_OUT;
3177 NEXT;
3178#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3179
3180 CASE(OP_WORD) MOP_IN(OP_WORD);
3181 DATA_ENSURE(1);
3182 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3183 goto fail;
3184
3185 s += enclen(encode, s, end);
3186 MOP_OUT;
3187 NEXT;
3188
3189 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
3190 DATA_ENSURE(1);
3191 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3192 goto fail;
3193
3194 s += enclen(encode, s, end);
3195 MOP_OUT;
3196 NEXT;
3197
3198 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
3199 DATA_ENSURE(1);
3200 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3201 goto fail;
3202
3203 s += enclen(encode, s, end);
3204 MOP_OUT;
3205 NEXT;
3206
3207 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
3208 DATA_ENSURE(1);
3209 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3210 goto fail;
3211
3212 s += enclen(encode, s, end);
3213 MOP_OUT;
3214 NEXT;
3215
3216 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
3217 if (ON_STR_BEGIN(s)) {
3218 DATA_ENSURE(1);
3219 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3220 goto fail;
3221 }
3222 else if (ON_STR_END(s)) {
3223 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3224 goto fail;
3225 }
3226 else {
3227 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3228 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3229 goto fail;
3230 }
3231 MOP_OUT;
3232 JUMP;
3233
3234 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
3235 if (ON_STR_BEGIN(s)) {
3236 DATA_ENSURE(1);
3237 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3238 goto fail;
3239 }
3240 else if (ON_STR_END(s)) {
3241 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3242 goto fail;
3243 }
3244 else {
3245 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3246 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3247 goto fail;
3248 }
3249 MOP_OUT;
3250 JUMP;
3251
3252 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3253 if (ON_STR_BEGIN(s)) {
3254 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3255 goto fail;
3256 }
3257 else if (ON_STR_END(s)) {
3258 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3259 goto fail;
3260 }
3261 else {
3262 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3263 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3264 goto fail;
3265 }
3266 MOP_OUT;
3267 JUMP;
3268
3269 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3270 if (ON_STR_BEGIN(s)) {
3271 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3272 goto fail;
3273 }
3274 else if (ON_STR_END(s)) {
3275 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3276 goto fail;
3277 }
3278 else {
3279 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3280 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3281 goto fail;
3282 }
3283 MOP_OUT;
3284 JUMP;
3285
3286#ifdef USE_WORD_BEGIN_END
3287 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3288 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3289 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3290 MOP_OUT;
3291 JUMP;
3292 }
3293 }
3294 goto fail;
3295 NEXT;
3296
3297 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3298 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3299 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3300 MOP_OUT;
3301 JUMP;
3302 }
3303 }
3304 goto fail;
3305 NEXT;
3306
3307 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3308 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3309 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3310 MOP_OUT;
3311 JUMP;
3312 }
3313 }
3314 goto fail;
3315 NEXT;
3316
3317 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3318 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3319 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3320 MOP_OUT;
3321 JUMP;
3322 }
3323 }
3324 goto fail;
3325 NEXT;
3326#endif
3327
3328 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3329 if (! ON_STR_BEGIN(s)) goto fail;
3330 if (IS_NOTBOS(msa->options)) goto fail;
3331
3332 MOP_OUT;
3333 JUMP;
3334
3335 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3336 if (! ON_STR_END(s)) goto fail;
3337 if (IS_NOTEOS(msa->options)) goto fail;
3338
3339 MOP_OUT;
3340 JUMP;
3341
3342 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3343 if (ON_STR_BEGIN(s)) {
3344 if (IS_NOTBOL(msa->options)) goto fail;
3345 MOP_OUT;
3346 JUMP;
3347 }
3348 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3349#ifdef USE_CRNL_AS_LINE_TERMINATOR
3350 && !(IS_NEWLINE_CRLF(option)
3351 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3352#endif
3353 && !ON_STR_END(s)) {
3354 MOP_OUT;
3355 JUMP;
3356 }
3357 goto fail;
3358 NEXT;
3359
3360 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3361 if (ON_STR_END(s)) {
3362#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3363 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3364#endif
3365 if (IS_NOTEOL(msa->options)) goto fail;
3366 MOP_OUT;
3367 JUMP;
3368#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3369 }
3370#endif
3371 }
3372 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3373 MOP_OUT;
3374 JUMP;
3375 }
3376 goto fail;
3377 NEXT;
3378
3379 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3380 if (ON_STR_END(s)) {
3381#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3382 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3383#endif
3384 if (IS_NOTEOL(msa->options)) goto fail;
3385 MOP_OUT;
3386 JUMP;
3387#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3388 }
3389#endif
3390 }
3391 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3392 UChar* ss = s + enclen(encode, s, end);
3393 if (ON_STR_END(ss)) {
3394 MOP_OUT;
3395 JUMP;
3396 }
3397#ifdef USE_CRNL_AS_LINE_TERMINATOR
3398 else if (IS_NEWLINE_CRLF(option)
3399 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3400 ss += enclen(encode, ss, end);
3401 if (ON_STR_END(ss)) {
3402 MOP_OUT;
3403 JUMP;
3404 }
3405 }
3406#endif
3407 }
3408 goto fail;
3409 NEXT;
3410
3411 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3412 if (s != msa->gpos)
3413 goto fail;
3414
3415 MOP_OUT;
3416 JUMP;
3417
3418 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3419 GET_MEMNUM_INC(mem, p);
3420 STACK_PUSH_MEM_START(mem, s);
3421 MOP_OUT;
3422 JUMP;
3423
3424 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3425 GET_MEMNUM_INC(mem, p);
3426 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3427 mem_end_stk[mem] = INVALID_STACK_INDEX;
3428 MOP_OUT;
3429 JUMP;
3430
3431 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3432 GET_MEMNUM_INC(mem, p);
3433 STACK_PUSH_MEM_END(mem, s);
3434 MOP_OUT;
3435 JUMP;
3436
3437 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3438 GET_MEMNUM_INC(mem, p);
3439 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3440 MOP_OUT;
3441 JUMP;
3442
3443 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3444 pkeep = s;
3445 MOP_OUT;
3446 JUMP;
3447
3448#ifdef USE_SUBEXP_CALL
3449 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3450 GET_MEMNUM_INC(mem, p);
3451 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3452 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3453 STACK_PUSH_MEM_END(mem, s);
3454 MOP_OUT;
3455 JUMP;
3456
3457 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3458 GET_MEMNUM_INC(mem, p);
3459 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3460 STACK_GET_MEM_START(mem, stkp);
3461
3462 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3463 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3464 else
3465 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3466
3467 STACK_PUSH_MEM_END_MARK(mem);
3468 MOP_OUT;
3469 JUMP;
3470#endif
3471
3472 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3473 mem = 1;
3474 goto backref;
3475 NEXT;
3476
3477 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3478 mem = 2;
3479 goto backref;
3480 NEXT;
3481
3482 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3483 GET_MEMNUM_INC(mem, p);
3484 backref:
3485 {
3486 int len;
3487 UChar *pstart, *pend;
3488
3489 /* if you want to remove following line,
3490 you should check in parse and compile time. */
3491 if (mem > num_mem) goto fail;
3492 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3493 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3494
3495 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3496 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3497 else
3498 pstart = (UChar* )((void* )mem_start_stk[mem]);
3499
3500 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3501 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3502 : (UChar* )((void* )mem_end_stk[mem]));
3503 n = pend - pstart;
3504 DATA_ENSURE(n);
3505 sprev = s;
3506 STRING_CMP(pstart, s, n);
3507 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3508 sprev += len;
3509
3510 MOP_OUT;
3511 JUMP;
3512 }
3513
3514 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3515 GET_MEMNUM_INC(mem, p);
3516 {
3517 int len;
3518 UChar *pstart, *pend;
3519
3520 /* if you want to remove following line,
3521 you should check in parse and compile time. */
3522 if (mem > num_mem) goto fail;
3523 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3524 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3525
3526 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3527 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3528 else
3529 pstart = (UChar* )((void* )mem_start_stk[mem]);
3530
3531 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3532 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3533 : (UChar* )((void* )mem_end_stk[mem]));
3534 n = pend - pstart;
3535 DATA_ENSURE(n);
3536 sprev = s;
3537 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3538 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3539 sprev += len;
3540
3541 MOP_OUT;
3542 JUMP;
3543 }
3544 NEXT;
3545
3546 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3547 {
3548 int len, is_fail;
3549 UChar *pstart, *pend, *swork;
3550
3551 GET_LENGTH_INC(tlen, p);
3552 for (i = 0; i < tlen; i++) {
3553 GET_MEMNUM_INC(mem, p);
3554
3555 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3556 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3557
3558 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3559 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3560 else
3561 pstart = (UChar* )((void* )mem_start_stk[mem]);
3562
3563 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3564 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3565 : (UChar* )((void* )mem_end_stk[mem]));
3566 n = pend - pstart;
3567 DATA_ENSURE_CONTINUE(n);
3568 sprev = s;
3569 swork = s;
3570 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3571 if (is_fail) continue;
3572 s = swork;
3573 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3574 sprev += len;
3575
3576 p += (SIZE_MEMNUM * (tlen - i - 1));
3577 break; /* success */
3578 }
3579 if (i == tlen) goto fail;
3580 MOP_OUT;
3581 JUMP;
3582 }
3583 NEXT;
3584
3585 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3586 {
3587 int len, is_fail;
3588 UChar *pstart, *pend, *swork;
3589
3590 GET_LENGTH_INC(tlen, p);
3591 for (i = 0; i < tlen; i++) {
3592 GET_MEMNUM_INC(mem, p);
3593
3594 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3595 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3596
3597 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3598 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3599 else
3600 pstart = (UChar* )((void* )mem_start_stk[mem]);
3601
3602 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3603 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3604 : (UChar* )((void* )mem_end_stk[mem]));
3605 n = pend - pstart;
3606 DATA_ENSURE_CONTINUE(n);
3607 sprev = s;
3608 swork = s;
3609 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3610 if (is_fail) continue;
3611 s = swork;
3612 while (sprev + (len = enclen(encode, sprev, end)) < s)
3613 sprev += len;
3614
3615 p += (SIZE_MEMNUM * (tlen - i - 1));
3616 break; /* success */
3617 }
3618 if (i == tlen) goto fail;
3619 MOP_OUT;
3620 JUMP;
3621 }
3622
3623#ifdef USE_BACKREF_WITH_LEVEL
3624 CASE(OP_BACKREF_WITH_LEVEL)
3625 {
3626 int len;
3627 OnigOptionType ic;
3628 LengthType level;
3629
3630 GET_OPTION_INC(ic, p);
3631 GET_LENGTH_INC(level, p);
3632 GET_LENGTH_INC(tlen, p);
3633
3634 sprev = s;
3635 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3636 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3637 while (sprev + (len = enclen(encode, sprev, end)) < s)
3638 sprev += len;
3639
3640 p += (SIZE_MEMNUM * tlen);
3641 }
3642 else
3643 goto fail;
3644
3645 MOP_OUT;
3646 JUMP;
3647 }
3648
3649#endif
3650
3651#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3652 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3653 GET_OPTION_INC(option, p);
3654 STACK_PUSH_ALT(p, s, sprev, pkeep);
3655 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3656 MOP_OUT;
3657 JUMP;
3658
3659 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3660 GET_OPTION_INC(option, p);
3661 MOP_OUT;
3662 JUMP;
3663#endif
3664
3665 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3666 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3667 STACK_PUSH_NULL_CHECK_START(mem, s);
3668 MOP_OUT;
3669 JUMP;
3670
3671 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3672 {
3673 int isnull;
3674
3675 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3676 STACK_NULL_CHECK(isnull, mem, s);
3677 if (isnull) {
3678#ifdef ONIG_DEBUG_MATCH
3679 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3680 (int )mem, (uintptr_t )s, s);
3681#endif
3682 null_check_found:
3683 /* empty loop founded, skip next instruction */
3684 switch (*p++) {
3685 case OP_JUMP:
3686 case OP_PUSH:
3687 p += SIZE_RELADDR;
3688 break;
3689 case OP_REPEAT_INC:
3690 case OP_REPEAT_INC_NG:
3691 case OP_REPEAT_INC_SG:
3692 case OP_REPEAT_INC_NG_SG:
3693 p += SIZE_MEMNUM;
3694 break;
3695 default:
3696 goto unexpected_bytecode_error;
3697 break;
3698 }
3699 }
3700 }
3701 MOP_OUT;
3702 JUMP;
3703
3704#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3705 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3706 {
3707 int isnull;
3708
3709 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3710 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3711 if (isnull) {
3712# ifdef ONIG_DEBUG_MATCH
3713 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3714 (int )mem, (uintptr_t )s, s);
3715# endif
3716 if (isnull == -1) goto fail;
3717 goto null_check_found;
3718 }
3719 }
3720 MOP_OUT;
3721 JUMP;
3722#endif
3723
3724#ifdef USE_SUBEXP_CALL
3725 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3726 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3727 {
3728 int isnull;
3729
3730 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3731# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3732 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3733# else
3734 STACK_NULL_CHECK_REC(isnull, mem, s);
3735# endif
3736 if (isnull) {
3737# ifdef ONIG_DEBUG_MATCH
3738 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3739 (int )mem, (uintptr_t )s, s);
3740# endif
3741 if (isnull == -1) goto fail;
3742 goto null_check_found;
3743 }
3744 else {
3745 STACK_PUSH_NULL_CHECK_END(mem);
3746 }
3747 }
3748 MOP_OUT;
3749 JUMP;
3750#endif
3751
3752 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3753 GET_RELADDR_INC(addr, p);
3754 p += addr;
3755 MOP_OUT;
3756 CHECK_INTERRUPT_IN_MATCH_AT;
3757 JUMP;
3758
3759 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3760 GET_RELADDR_INC(addr, p);
3761 CHECK_MATCH_CACHE;
3762 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3763 MOP_OUT;
3764 JUMP;
3765
3766#ifdef USE_COMBINATION_EXPLOSION_CHECK
3767 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3768 GET_STATE_CHECK_NUM_INC(mem, p);
3769 STATE_CHECK_VAL(scv, mem);
3770 if (scv) goto fail;
3771
3772 GET_RELADDR_INC(addr, p);
3773 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3774 MOP_OUT;
3775 JUMP;
3776
3777 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3778 GET_STATE_CHECK_NUM_INC(mem, p);
3779 GET_RELADDR_INC(addr, p);
3780 STATE_CHECK_VAL(scv, mem);
3781 if (scv) {
3782 p += addr;
3783 }
3784 else {
3785 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3786 }
3787 MOP_OUT;
3788 JUMP;
3789
3790 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3791 GET_STATE_CHECK_NUM_INC(mem, p);
3792 STATE_CHECK_VAL(scv, mem);
3793 if (scv) goto fail;
3794
3795 STACK_PUSH_STATE_CHECK(s, mem);
3796 MOP_OUT;
3797 JUMP;
3798#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3799
3800 CASE(OP_POP) MOP_IN(OP_POP);
3801 STACK_POP_ONE;
3802#ifdef USE_MATCH_CACHE
3803 /* We need to increment num_fails here, for invoking a cache optimization correctly, */
3804 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3805 msa->num_fails++;
3806#endif
3807 MOP_OUT;
3808 JUMP;
3809
3810#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3811 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3812 GET_RELADDR_INC(addr, p);
3813 if (*p == *s && DATA_ENSURE_CHECK1) {
3814 p++;
3815 CHECK_MATCH_CACHE;
3816 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3817 MOP_OUT;
3818 JUMP;
3819 }
3820 p += (addr + 1);
3821 MOP_OUT;
3822 JUMP;
3823#endif
3824
3825 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3826 GET_RELADDR_INC(addr, p);
3827 CHECK_MATCH_CACHE;
3828 if (*p == *s) {
3829 p++;
3830 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3831 MOP_OUT;
3832 JUMP;
3833 }
3834 p++;
3835 INC_NUM_FAILS;
3836 MOP_OUT;
3837 JUMP;
3838
3839 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3840 {
3841 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3842 GET_RELADDR_INC(addr, p);
3843
3844 STACK_ENSURE(1);
3845 repeat_stk[mem] = GET_STACK_INDEX(stk);
3846 STACK_PUSH_REPEAT(mem, p);
3847
3848 if (reg->repeat_range[mem].lower == 0) {
3849 CHECK_MATCH_CACHE;
3850 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3851 }
3852 }
3853 MOP_OUT;
3854 JUMP;
3855
3856 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3857 {
3858 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3859 GET_RELADDR_INC(addr, p);
3860
3861 STACK_ENSURE(1);
3862 repeat_stk[mem] = GET_STACK_INDEX(stk);
3863 STACK_PUSH_REPEAT(mem, p);
3864
3865 if (reg->repeat_range[mem].lower == 0) {
3866 CHECK_MATCH_CACHE;
3867 STACK_PUSH_ALT(p, s, sprev, pkeep);
3868 p += addr;
3869 }
3870 }
3871 MOP_OUT;
3872 JUMP;
3873
3874 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3875 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3876 si = repeat_stk[mem];
3877 stkp = STACK_AT(si);
3878
3879 repeat_inc:
3880 stkp->u.repeat.count++;
3881 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3882 /* end of repeat. Nothing to do. */
3883 }
3884 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3885#ifdef USE_MATCH_CACHE
3886 if (*pbegin == OP_REPEAT_INC) {
3887#undef MATCH_CACHE_HIT
3888#define MATCH_CACHE_HIT stkp->u.repeat.count--;
3889 CHECK_MATCH_CACHE;
3890#undef MATCH_CACHE_HIT
3891#define MATCH_CACHE_HIT ((void) 0)
3892 }
3893#endif
3894 STACK_PUSH_ALT(p, s, sprev, pkeep);
3895 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3896 }
3897 else {
3898 p = stkp->u.repeat.pcode;
3899 }
3900 STACK_PUSH_REPEAT_INC(si);
3901 MOP_OUT;
3902 CHECK_INTERRUPT_IN_MATCH_AT;
3903 JUMP;
3904
3905 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3906 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3907 STACK_GET_REPEAT(mem, stkp);
3908 si = GET_STACK_INDEX(stkp);
3909 goto repeat_inc;
3910 NEXT;
3911
3912 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3913 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3914 si = repeat_stk[mem];
3915 stkp = STACK_AT(si);
3916
3917 repeat_inc_ng:
3918 stkp->u.repeat.count++;
3919 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3920 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3921 UChar* pcode = stkp->u.repeat.pcode;
3922
3923 STACK_PUSH_REPEAT_INC(si);
3924 if (*pbegin == OP_REPEAT_INC_NG) {
3925 CHECK_MATCH_CACHE;
3926 }
3927 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3928 }
3929 else {
3930 p = stkp->u.repeat.pcode;
3931 STACK_PUSH_REPEAT_INC(si);
3932 }
3933 }
3934 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3935 STACK_PUSH_REPEAT_INC(si);
3936 }
3937 MOP_OUT;
3938 CHECK_INTERRUPT_IN_MATCH_AT;
3939 JUMP;
3940
3941 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3942 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3943 STACK_GET_REPEAT(mem, stkp);
3944 si = GET_STACK_INDEX(stkp);
3945 goto repeat_inc_ng;
3946 NEXT;
3947
3948 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3949 STACK_PUSH_POS(s, sprev, pkeep);
3950 MOP_OUT;
3951 JUMP;
3952
3953 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3954 {
3955 STACK_POS_END(stkp);
3956 s = stkp->u.state.pstr;
3957 sprev = stkp->u.state.pstr_prev;
3958 }
3959 MOP_OUT;
3960 JUMP;
3961
3962 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3963 GET_RELADDR_INC(addr, p);
3964 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3965 MOP_OUT;
3966 JUMP;
3967
3968 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3969 STACK_POP_TIL_POS_NOT;
3970 goto fail;
3971 NEXT;
3972
3973 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3974 STACK_PUSH_STOP_BT;
3975 MOP_OUT;
3976 JUMP;
3977
3978 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3979 STACK_STOP_BT_END;
3980 MOP_OUT;
3981 JUMP;
3982
3983 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3984 GET_LENGTH_INC(tlen, p);
3985 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3986 if (IS_NULL(s)) goto fail;
3987 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3988 MOP_OUT;
3989 JUMP;
3990
3991 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3992 GET_RELADDR_INC(addr, p);
3993 GET_LENGTH_INC(tlen, p);
3994 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3995 if (IS_NULL(q)) {
3996 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3997 If you want to change to fail, replace following line. */
3998 p += addr;
3999 /* goto fail; */
4000 }
4001 else {
4002 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4003 s = q;
4004 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4005 }
4006 MOP_OUT;
4007 JUMP;
4008
4009 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4010 STACK_POP_TIL_LOOK_BEHIND_NOT;
4011 goto fail;
4012 NEXT;
4013
4014 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
4015 /* Save the absent-start-pos and the original end-pos. */
4016 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4017 MOP_OUT;
4018 JUMP;
4019
4020 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
4021 {
4022 const UChar* aend = ABSENT_END_POS;
4023 UChar* absent;
4024 UChar* selfp = p - 1;
4025
4026 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
4027 GET_RELADDR_INC(addr, p);
4028#ifdef ONIG_DEBUG_MATCH
4029 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4030#endif
4031 if ((absent > aend) && (s > absent)) {
4032 /* An empty match occurred in (?~...) at the start point.
4033 * Never match. */
4034 STACK_POP;
4035 goto fail;
4036 }
4037 else if ((s >= aend) && (s > absent)) {
4038 if (s > aend) {
4039 /* Only one (or less) character matched in the last iteration.
4040 * This is not a possible point. */
4041 goto fail;
4042 }
4043 /* All possible points were found. Try matching after (?~...). */
4044 DATA_ENSURE(0);
4045 p += addr;
4046 }
4047 else if (s == end) {
4048 /* At the end of the string, just match with it */
4049 DATA_ENSURE(0);
4050 p += addr;
4051 }
4052 else {
4053 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
4054 n = enclen(encode, s, end);
4055 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
4056 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
4057 STACK_PUSH_ABSENT;
4058 ABSENT_END_POS = aend;
4059 }
4060 }
4061 MOP_OUT;
4062 JUMP;
4063
4064 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
4065 /* The pattern inside (?~...) was matched.
4066 * Set the end-pos temporary and go to next iteration. */
4067 if (sprev < ABSENT_END_POS)
4068 ABSENT_END_POS = sprev;
4069#ifdef ONIG_DEBUG_MATCH
4070 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
4071#endif
4072 STACK_POP_TIL_ABSENT;
4073 goto fail;
4074 NEXT;
4075
4076#ifdef USE_SUBEXP_CALL
4077 CASE(OP_CALL) MOP_IN(OP_CALL);
4078 GET_ABSADDR_INC(addr, p);
4079 STACK_PUSH_CALL_FRAME(p);
4080 p = reg->p + addr;
4081 MOP_OUT;
4082 JUMP;
4083
4084 CASE(OP_RETURN) MOP_IN(OP_RETURN);
4085 STACK_RETURN(p);
4086 STACK_PUSH_RETURN;
4087 MOP_OUT;
4088 JUMP;
4089#endif
4090
4091 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
4092 GET_MEMNUM_INC(mem, p);
4093 GET_RELADDR_INC(addr, p);
4094 if ((mem > num_mem) ||
4095 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
4096 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4097 p += addr;
4098 }
4099 MOP_OUT;
4100 JUMP;
4101
4102 CASE(OP_FINISH)
4103 goto finish;
4104 NEXT;
4105
4106 CASE(OP_FAIL)
4107 if (0) {
4108 /* fall */
4109 fail:
4110 MOP_OUT;
4111 }
4112 MOP_IN(OP_FAIL);
4113 STACK_POP;
4114 p = stk->u.state.pcode;
4115 s = stk->u.state.pstr;
4116 sprev = stk->u.state.pstr_prev;
4117 pkeep = stk->u.state.pkeep;
4118
4119#ifdef USE_MATCH_CACHE
4120 if (
4121 msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4122 ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
4123 ) {
4124 if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4125 msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4126 OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4127 if (r < 0) goto bytecode_error;
4128 }
4129 if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4130 msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4131 goto fail_match_cache;
4132 }
4133 if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
4134 goto fail_match_cache;
4135 }
4136 if (msa->cache_opcodes == NULL) {
4137 msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4138 OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
4139 if (cache_opcodes == NULL) {
4140 return ONIGERR_MEMORY;
4141 }
4142 OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4143 if (r < 0) {
4144 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
4145 else goto bytecode_error;
4146 }
4147 msa->cache_opcodes = cache_opcodes;
4148#ifdef ONIG_DEBUG_MATCH_CACHE
4149 fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4150 fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4151 fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4152 for (int i = 0; i < msa->num_cache_opcodes; i++) {
4153 fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4154 }
4155#endif
4156 }
4157 if (msa->match_cache_buf == NULL) {
4158 size_t length = (end - str) + 1;
4159 size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4160#ifdef ONIG_DEBUG_MATCH_CACHE
4161 fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4162#endif
4163 /* Overflow check */
4164 if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
4165 return ONIGERR_MEMORY;
4166 }
4167 if (num_match_cache_points >= LONG_MAX_LIMIT) {
4168 return ONIGERR_MEMORY;
4169 }
4170 size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4171 uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
4172 if (match_cache_buf == NULL) {
4173 return ONIGERR_MEMORY;
4174 }
4175 xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
4176 msa->match_cache_buf = match_cache_buf;
4177 }
4178 }
4179 fail_match_cache:
4180#endif
4181
4182#ifdef USE_COMBINATION_EXPLOSION_CHECK
4183 if (stk->u.state.state_check != 0) {
4184 stk->type = STK_STATE_CHECK_MARK;
4185 stk++;
4186 }
4187#endif
4188
4189 MOP_OUT;
4190 CHECK_INTERRUPT_IN_MATCH_AT;
4191 JUMP;
4192
4193 DEFAULT
4194 goto bytecode_error;
4195 } VM_LOOP_END
4196
4197 finish:
4198 STACK_SAVE;
4199 xfree(xmalloc_base);
4200 return best_len;
4201
4202#ifdef ONIG_DEBUG
4203 stack_error:
4204 STACK_SAVE;
4205 xfree(xmalloc_base);
4206 return ONIGERR_STACK_BUG;
4207#endif
4208
4209 bytecode_error:
4210 STACK_SAVE;
4211 xfree(xmalloc_base);
4212 return ONIGERR_UNDEFINED_BYTECODE;
4213
4214 unexpected_bytecode_error:
4215 STACK_SAVE;
4216 xfree(xmalloc_base);
4217 return ONIGERR_UNEXPECTED_BYTECODE;
4218
4219 timeout:
4220 STACK_SAVE;
4221 xfree(xmalloc_base);
4222 return ONIGERR_TIMEOUT;
4223}
4224
4225
4226static UChar*
4227slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4228 const UChar* text, const UChar* text_end, UChar* text_range)
4229{
4230 UChar *t, *p, *s, *end;
4231
4232 end = (UChar* )text_end;
4233 end -= target_end - target - 1;
4234 if (end > text_range)
4235 end = text_range;
4236
4237 s = (UChar* )text;
4238
4239 if (enc->max_enc_len == enc->min_enc_len) {
4240 int n = enc->max_enc_len;
4241
4242 while (s < end) {
4243 if (*s == *target) {
4244 p = s + 1;
4245 t = target + 1;
4246 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4247 return s;
4248 }
4249 s += n;
4250 }
4251 return (UChar* )NULL;
4252 }
4253 while (s < end) {
4254 if (*s == *target) {
4255 p = s + 1;
4256 t = target + 1;
4257 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4258 return s;
4259 }
4260 s += enclen(enc, s, text_end);
4261 }
4262
4263 return (UChar* )NULL;
4264}
4265
4266static int
4267str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4268 const UChar* t, const UChar* tend,
4269 const UChar* p, const UChar* end)
4270{
4271 int lowlen;
4272 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4273
4274 while (t < tend) {
4275 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4276 q = lowbuf;
4277 while (lowlen > 0) {
4278 if (*t++ != *q++) return 0;
4279 lowlen--;
4280 }
4281 }
4282
4283 return 1;
4284}
4285
4286static UChar*
4287slow_search_ic(OnigEncoding enc, int case_fold_flag,
4288 UChar* target, UChar* target_end,
4289 const UChar* text, const UChar* text_end, UChar* text_range)
4290{
4291 UChar *s, *end;
4292
4293 end = (UChar* )text_end;
4294 end -= target_end - target - 1;
4295 if (end > text_range)
4296 end = text_range;
4297
4298 s = (UChar* )text;
4299
4300 while (s < end) {
4301 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4302 s, text_end))
4303 return s;
4304
4305 s += enclen(enc, s, text_end);
4306 }
4307
4308 return (UChar* )NULL;
4309}
4310
4311static UChar*
4312slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4313 const UChar* text, const UChar* adjust_text,
4314 const UChar* text_end, const UChar* text_start)
4315{
4316 UChar *t, *p, *s;
4317
4318 s = (UChar* )text_end;
4319 s -= (target_end - target);
4320 if (s > text_start)
4321 s = (UChar* )text_start;
4322 else
4323 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4324
4325 while (s >= text) {
4326 if (*s == *target) {
4327 p = s + 1;
4328 t = target + 1;
4329 while (t < target_end) {
4330 if (*t != *p++)
4331 break;
4332 t++;
4333 }
4334 if (t == target_end)
4335 return s;
4336 }
4337 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4338 }
4339
4340 return (UChar* )NULL;
4341}
4342
4343static UChar*
4344slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4345 UChar* target, UChar* target_end,
4346 const UChar* text, const UChar* adjust_text,
4347 const UChar* text_end, const UChar* text_start)
4348{
4349 UChar *s;
4350
4351 s = (UChar* )text_end;
4352 s -= (target_end - target);
4353 if (s > text_start)
4354 s = (UChar* )text_start;
4355 else
4356 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4357
4358 while (s >= text) {
4359 if (str_lower_case_match(enc, case_fold_flag,
4360 target, target_end, s, text_end))
4361 return s;
4362
4363 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4364 }
4365
4366 return (UChar* )NULL;
4367}
4368
4369#ifndef USE_SUNDAY_QUICK_SEARCH
4370/* Boyer-Moore-Horspool search applied to a multibyte string */
4371static UChar*
4372bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4373 const UChar* text, const UChar* text_end,
4374 const UChar* text_range)
4375{
4376 const UChar *s, *se, *t, *p, *end;
4377 const UChar *tail;
4378 ptrdiff_t skip, tlen1;
4379
4380# ifdef ONIG_DEBUG_SEARCH
4381 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4382 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4383# endif
4384
4385 tail = target_end - 1;
4386 tlen1 = tail - target;
4387 end = text_range;
4388 if (end + tlen1 > text_end)
4389 end = text_end - tlen1;
4390
4391 s = text;
4392
4393 if (IS_NULL(reg->int_map)) {
4394 while (s < end) {
4395 p = se = s + tlen1;
4396 t = tail;
4397 while (*p == *t) {
4398 if (t == target) return (UChar* )s;
4399 p--; t--;
4400 }
4401 skip = reg->map[*se];
4402 t = s;
4403 do {
4404 s += enclen(reg->enc, s, end);
4405 } while ((s - t) < skip && s < end);
4406 }
4407 }
4408 else {
4409# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4410 while (s < end) {
4411 p = se = s + tlen1;
4412 t = tail;
4413 while (*p == *t) {
4414 if (t == target) return (UChar* )s;
4415 p--; t--;
4416 }
4417 skip = reg->int_map[*se];
4418 t = s;
4419 do {
4420 s += enclen(reg->enc, s, end);
4421 } while ((s - t) < skip && s < end);
4422 }
4423# endif
4424 }
4425
4426 return (UChar* )NULL;
4427}
4428
4429/* Boyer-Moore-Horspool search */
4430static UChar*
4431bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4432 const UChar* text, const UChar* text_end, const UChar* text_range)
4433{
4434 const UChar *s, *t, *p, *end;
4435 const UChar *tail;
4436
4437# ifdef ONIG_DEBUG_SEARCH
4438 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4439 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4440# endif
4441
4442 end = text_range + (target_end - target) - 1;
4443 if (end > text_end)
4444 end = text_end;
4445
4446 tail = target_end - 1;
4447 s = text + (target_end - target) - 1;
4448 if (IS_NULL(reg->int_map)) {
4449 while (s < end) {
4450 p = s;
4451 t = tail;
4452# ifdef ONIG_DEBUG_SEARCH
4453 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4454 (intptr_t )(s - text), s);
4455# endif
4456 while (*p == *t) {
4457 if (t == target) return (UChar* )p;
4458 p--; t--;
4459 }
4460 s += reg->map[*s];
4461 }
4462 }
4463 else { /* see int_map[] */
4464# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4465 while (s < end) {
4466 p = s;
4467 t = tail;
4468 while (*p == *t) {
4469 if (t == target) return (UChar* )p;
4470 p--; t--;
4471 }
4472 s += reg->int_map[*s];
4473 }
4474# endif
4475 }
4476 return (UChar* )NULL;
4477}
4478
4479/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4480static UChar*
4481bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4482 const UChar* text, const UChar* text_end,
4483 const UChar* text_range)
4484{
4485 const UChar *s, *se, *t, *end;
4486 const UChar *tail;
4487 ptrdiff_t skip, tlen1;
4488 OnigEncoding enc = reg->enc;
4489 int case_fold_flag = reg->case_fold_flag;
4490
4491# ifdef ONIG_DEBUG_SEARCH
4492 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4493 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4494# endif
4495
4496 tail = target_end - 1;
4497 tlen1 = tail - target;
4498 end = text_range;
4499 if (end + tlen1 > text_end)
4500 end = text_end - tlen1;
4501
4502 s = text;
4503
4504 if (IS_NULL(reg->int_map)) {
4505 while (s < end) {
4506 se = s + tlen1;
4507 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4508 s, se + 1))
4509 return (UChar* )s;
4510 skip = reg->map[*se];
4511 t = s;
4512 do {
4513 s += enclen(reg->enc, s, end);
4514 } while ((s - t) < skip && s < end);
4515 }
4516 }
4517 else {
4518# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4519 while (s < end) {
4520 se = s + tlen1;
4521 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4522 s, se + 1))
4523 return (UChar* )s;
4524 skip = reg->int_map[*se];
4525 t = s;
4526 do {
4527 s += enclen(reg->enc, s, end);
4528 } while ((s - t) < skip && s < end);
4529 }
4530# endif
4531 }
4532
4533 return (UChar* )NULL;
4534}
4535
4536/* Boyer-Moore-Horspool search (ignore case) */
4537static UChar*
4538bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4539 const UChar* text, const UChar* text_end, const UChar* text_range)
4540{
4541 const UChar *s, *p, *end;
4542 const UChar *tail;
4543 OnigEncoding enc = reg->enc;
4544 int case_fold_flag = reg->case_fold_flag;
4545
4546# ifdef ONIG_DEBUG_SEARCH
4547 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4548 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4549# endif
4550
4551 end = text_range + (target_end - target) - 1;
4552 if (end > text_end)
4553 end = text_end;
4554
4555 tail = target_end - 1;
4556 s = text + (target_end - target) - 1;
4557 if (IS_NULL(reg->int_map)) {
4558 while (s < end) {
4559 p = s - (target_end - target) + 1;
4560 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4561 p, s + 1))
4562 return (UChar* )p;
4563 s += reg->map[*s];
4564 }
4565 }
4566 else { /* see int_map[] */
4567# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4568 while (s < end) {
4569 p = s - (target_end - target) + 1;
4570 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4571 p, s + 1))
4572 return (UChar* )p;
4573 s += reg->int_map[*s];
4574 }
4575# endif
4576 }
4577 return (UChar* )NULL;
4578}
4579
4580#else /* USE_SUNDAY_QUICK_SEARCH */
4581
4582/* Sunday's quick search applied to a multibyte string */
4583static UChar*
4584bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4585 const UChar* text, const UChar* text_end,
4586 const UChar* text_range)
4587{
4588 const UChar *s, *se, *t, *p, *end;
4589 const UChar *tail;
4590 ptrdiff_t skip, tlen1;
4591 OnigEncoding enc = reg->enc;
4592
4593# ifdef ONIG_DEBUG_SEARCH
4594 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4595 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4596# endif
4597
4598 tail = target_end - 1;
4599 tlen1 = tail - target;
4600 end = text_range;
4601 if (end + tlen1 > text_end)
4602 end = text_end - tlen1;
4603
4604 s = text;
4605
4606 if (IS_NULL(reg->int_map)) {
4607 while (s < end) {
4608 p = se = s + tlen1;
4609 t = tail;
4610 while (*p == *t) {
4611 if (t == target) return (UChar* )s;
4612 p--; t--;
4613 }
4614 if (s + 1 >= end) break;
4615 skip = reg->map[se[1]];
4616 t = s;
4617 do {
4618 s += enclen(enc, s, end);
4619 } while ((s - t) < skip && s < end);
4620 }
4621 }
4622 else {
4623# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4624 while (s < end) {
4625 p = se = s + tlen1;
4626 t = tail;
4627 while (*p == *t) {
4628 if (t == target) return (UChar* )s;
4629 p--; t--;
4630 }
4631 if (s + 1 >= end) break;
4632 skip = reg->int_map[se[1]];
4633 t = s;
4634 do {
4635 s += enclen(enc, s, end);
4636 } while ((s - t) < skip && s < end);
4637 }
4638# endif
4639 }
4640
4641 return (UChar* )NULL;
4642}
4643
4644/* Sunday's quick search */
4645static UChar*
4646bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4647 const UChar* text, const UChar* text_end, const UChar* text_range)
4648{
4649 const UChar *s, *t, *p, *end;
4650 const UChar *tail;
4651 ptrdiff_t tlen1;
4652
4653# ifdef ONIG_DEBUG_SEARCH
4654 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4655 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4656# endif
4657
4658 tail = target_end - 1;
4659 tlen1 = tail - target;
4660 end = text_range + tlen1;
4661 if (end > text_end)
4662 end = text_end;
4663
4664 s = text + tlen1;
4665 if (IS_NULL(reg->int_map)) {
4666 while (s < end) {
4667 p = s;
4668 t = tail;
4669 while (*p == *t) {
4670 if (t == target) return (UChar* )p;
4671 p--; t--;
4672 }
4673 if (s + 1 >= end) break;
4674 s += reg->map[s[1]];
4675 }
4676 }
4677 else { /* see int_map[] */
4678# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4679 while (s < end) {
4680 p = s;
4681 t = tail;
4682 while (*p == *t) {
4683 if (t == target) return (UChar* )p;
4684 p--; t--;
4685 }
4686 if (s + 1 >= end) break;
4687 s += reg->int_map[s[1]];
4688 }
4689# endif
4690 }
4691 return (UChar* )NULL;
4692}
4693
4694/* Sunday's quick search applied to a multibyte string (ignore case) */
4695static UChar*
4696bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4697 const UChar* text, const UChar* text_end,
4698 const UChar* text_range)
4699{
4700 const UChar *s, *se, *t, *end;
4701 const UChar *tail;
4702 ptrdiff_t skip, tlen1;
4703 OnigEncoding enc = reg->enc;
4704 int case_fold_flag = reg->case_fold_flag;
4705
4706# ifdef ONIG_DEBUG_SEARCH
4707 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4708 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4709# endif
4710
4711 tail = target_end - 1;
4712 tlen1 = tail - target;
4713 end = text_range;
4714 if (end + tlen1 > text_end)
4715 end = text_end - tlen1;
4716
4717 s = text;
4718
4719 if (IS_NULL(reg->int_map)) {
4720 while (s < end) {
4721 se = s + tlen1;
4722 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4723 s, se + 1))
4724 return (UChar* )s;
4725 if (s + 1 >= end) break;
4726 skip = reg->map[se[1]];
4727 t = s;
4728 do {
4729 s += enclen(enc, s, end);
4730 } while ((s - t) < skip && s < end);
4731 }
4732 }
4733 else {
4734# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4735 while (s < end) {
4736 se = s + tlen1;
4737 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4738 s, se + 1))
4739 return (UChar* )s;
4740 if (s + 1 >= end) break;
4741 skip = reg->int_map[se[1]];
4742 t = s;
4743 do {
4744 s += enclen(enc, s, end);
4745 } while ((s - t) < skip && s < end);
4746 }
4747# endif
4748 }
4749
4750 return (UChar* )NULL;
4751}
4752
4753/* Sunday's quick search (ignore case) */
4754static UChar*
4755bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4756 const UChar* text, const UChar* text_end, const UChar* text_range)
4757{
4758 const UChar *s, *p, *end;
4759 const UChar *tail;
4760 ptrdiff_t tlen1;
4761 OnigEncoding enc = reg->enc;
4762 int case_fold_flag = reg->case_fold_flag;
4763
4764# ifdef ONIG_DEBUG_SEARCH
4765 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4766 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4767# endif
4768
4769 tail = target_end - 1;
4770 tlen1 = tail - target;
4771 end = text_range + tlen1;
4772 if (end > text_end)
4773 end = text_end;
4774
4775 s = text + tlen1;
4776 if (IS_NULL(reg->int_map)) {
4777 while (s < end) {
4778 p = s - tlen1;
4779 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4780 p, s + 1))
4781 return (UChar* )p;
4782 if (s + 1 >= end) break;
4783 s += reg->map[s[1]];
4784 }
4785 }
4786 else { /* see int_map[] */
4787# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4788 while (s < end) {
4789 p = s - tlen1;
4790 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4791 p, s + 1))
4792 return (UChar* )p;
4793 if (s + 1 >= end) break;
4794 s += reg->int_map[s[1]];
4795 }
4796# endif
4797 }
4798 return (UChar* )NULL;
4799}
4800#endif /* USE_SUNDAY_QUICK_SEARCH */
4801
4802#ifdef USE_INT_MAP_BACKWARD
4803static int
4804set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4805 int** skip)
4806{
4807 int i, len;
4808
4809 if (IS_NULL(*skip)) {
4810 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4811 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4812 }
4813
4814 len = (int )(end - s);
4815 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4816 (*skip)[i] = len;
4817
4818 for (i = len - 1; i > 0; i--)
4819 (*skip)[s[i]] = i;
4820
4821 return 0;
4822}
4823
4824static UChar*
4825bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4826 const UChar* text, const UChar* adjust_text,
4827 const UChar* text_end, const UChar* text_start)
4828{
4829 const UChar *s, *t, *p;
4830
4831 s = text_end - (target_end - target);
4832 if (text_start < s)
4833 s = text_start;
4834 else
4835 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4836
4837 while (s >= text) {
4838 p = s;
4839 t = target;
4840 while (t < target_end && *p == *t) {
4841 p++; t++;
4842 }
4843 if (t == target_end)
4844 return (UChar* )s;
4845
4846 s -= reg->int_map_backward[*s];
4847 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4848 }
4849
4850 return (UChar* )NULL;
4851}
4852#endif
4853
4854static UChar*
4855map_search(OnigEncoding enc, UChar map[],
4856 const UChar* text, const UChar* text_range, const UChar* text_end)
4857{
4858 const UChar *s = text;
4859
4860 while (s < text_range) {
4861 if (map[*s]) return (UChar* )s;
4862
4863 s += enclen(enc, s, text_end);
4864 }
4865 return (UChar* )NULL;
4866}
4867
4868static UChar*
4869map_search_backward(OnigEncoding enc, UChar map[],
4870 const UChar* text, const UChar* adjust_text,
4871 const UChar* text_start, const UChar* text_end)
4872{
4873 const UChar *s = text_start;
4874
4875 while (s >= text) {
4876 if (map[*s]) return (UChar* )s;
4877
4878 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4879 }
4880 return (UChar* )NULL;
4881}
4882
4883extern OnigPosition
4884onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4885 OnigOptionType option)
4886{
4887 ptrdiff_t r;
4888 UChar *prev;
4889 OnigMatchArg msa;
4890
4891 MATCH_ARG_INIT(msa, option, region, at, at);
4892#ifdef USE_COMBINATION_EXPLOSION_CHECK
4893 {
4894 ptrdiff_t offset = at - str;
4895 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4896 }
4897#endif
4898
4899 if (region) {
4900 r = onig_region_resize_clear(region, reg->num_mem + 1);
4901 }
4902 else
4903 r = 0;
4904
4905 if (r == 0) {
4906 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4907 r = match_at(reg, str, end,
4908#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4909 end,
4910#endif
4911 at, prev, &msa);
4912 }
4913
4914 MATCH_ARG_FREE(msa);
4915 return r;
4916}
4917
4918static int
4919forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4920 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4921{
4922 UChar *p, *pprev = (UChar* )NULL;
4923 size_t input_len = end - str;
4924
4925#ifdef ONIG_DEBUG_SEARCH
4926 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4927 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4928#endif
4929
4930 if (reg->dmin > input_len) {
4931 return 0;
4932 }
4933
4934 p = s;
4935 if (reg->dmin > 0) {
4936 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4937 p += reg->dmin;
4938 }
4939 else {
4940 UChar *q = p + reg->dmin;
4941
4942 if (q >= end) return 0; /* fail */
4943 while (p < q) p += enclen(reg->enc, p, end);
4944 }
4945 }
4946
4947 retry:
4948 switch (reg->optimize) {
4949 case ONIG_OPTIMIZE_EXACT:
4950 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4951 break;
4952 case ONIG_OPTIMIZE_EXACT_IC:
4953 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4954 reg->exact, reg->exact_end, p, end, range);
4955 break;
4956
4957 case ONIG_OPTIMIZE_EXACT_BM:
4958 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4959 break;
4960
4961 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4962 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4963 break;
4964
4965 case ONIG_OPTIMIZE_EXACT_BM_IC:
4966 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4967 break;
4968
4969 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4970 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4971 break;
4972
4973 case ONIG_OPTIMIZE_MAP:
4974 p = map_search(reg->enc, reg->map, p, range, end);
4975 break;
4976 }
4977
4978 if (p && p < range) {
4979 if (p - reg->dmin < s) {
4980 retry_gate:
4981 pprev = p;
4982 p += enclen(reg->enc, p, end);
4983 goto retry;
4984 }
4985
4986 if (reg->sub_anchor) {
4987 UChar* prev;
4988
4989 switch (reg->sub_anchor) {
4990 case ANCHOR_BEGIN_LINE:
4991 if (!ON_STR_BEGIN(p)) {
4992 prev = onigenc_get_prev_char_head(reg->enc,
4993 (pprev ? pprev : str), p, end);
4994 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4995 goto retry_gate;
4996 }
4997 break;
4998
4999 case ANCHOR_END_LINE:
5000 if (ON_STR_END(p)) {
5001#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5002 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5003 (pprev ? pprev : str), p);
5004 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
5005 goto retry_gate;
5006#endif
5007 }
5008 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
5009 goto retry_gate;
5010 break;
5011 }
5012 }
5013
5014 if (reg->dmax == 0) {
5015 *low = p;
5016 if (low_prev) {
5017 if (*low > s)
5018 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
5019 else
5020 *low_prev = onigenc_get_prev_char_head(reg->enc,
5021 (pprev ? pprev : str), p, end);
5022 }
5023 }
5024 else {
5025 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5026 if (p < str + reg->dmax) {
5027 *low = (UChar* )str;
5028 if (low_prev)
5029 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
5030 }
5031 else {
5032 *low = p - reg->dmax;
5033 if (*low > s) {
5034 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
5035 *low, end, (const UChar** )low_prev);
5036 if (low_prev && IS_NULL(*low_prev))
5037 *low_prev = onigenc_get_prev_char_head(reg->enc,
5038 (pprev ? pprev : s), *low, end);
5039 }
5040 else {
5041 if (low_prev)
5042 *low_prev = onigenc_get_prev_char_head(reg->enc,
5043 (pprev ? pprev : str), *low, end);
5044 }
5045 }
5046 }
5047 }
5048 /* no needs to adjust *high, *high is used as range check only */
5049 *high = p - reg->dmin;
5050
5051#ifdef ONIG_DEBUG_SEARCH
5052 fprintf(stderr,
5053 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
5054 *low - str, *high - str, reg->dmin, reg->dmax);
5055#endif
5056 return 1; /* success */
5057 }
5058
5059 return 0; /* fail */
5060}
5061
5062#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
5063
5064static int
5065backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
5066 UChar* s, const UChar* range, UChar* adjrange,
5067 UChar** low, UChar** high)
5068{
5069 UChar *p;
5070 size_t input_len = end - str;
5071
5072 if (reg->dmin > input_len) {
5073 return 0;
5074 }
5075
5076 range += reg->dmin;
5077 p = s;
5078
5079 retry:
5080 switch (reg->optimize) {
5081 case ONIG_OPTIMIZE_EXACT:
5082 exact_method:
5083 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5084 range, adjrange, end, p);
5085 break;
5086
5087 case ONIG_OPTIMIZE_EXACT_IC:
5088 case ONIG_OPTIMIZE_EXACT_BM_IC:
5089 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
5090 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5091 reg->exact, reg->exact_end,
5092 range, adjrange, end, p);
5093 break;
5094
5095 case ONIG_OPTIMIZE_EXACT_BM:
5096 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
5097#ifdef USE_INT_MAP_BACKWARD
5098 if (IS_NULL(reg->int_map_backward)) {
5099 int r;
5100 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
5101 goto exact_method;
5102
5103 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
5104 &(reg->int_map_backward));
5105 if (r) return r;
5106 }
5107 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
5108 end, p);
5109#else
5110 goto exact_method;
5111#endif
5112 break;
5113
5114 case ONIG_OPTIMIZE_MAP:
5115 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
5116 break;
5117 }
5118
5119 if (p) {
5120 if (reg->sub_anchor) {
5121 UChar* prev;
5122
5123 switch (reg->sub_anchor) {
5124 case ANCHOR_BEGIN_LINE:
5125 if (!ON_STR_BEGIN(p)) {
5126 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
5127 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
5128 p = prev;
5129 goto retry;
5130 }
5131 }
5132 break;
5133
5134 case ANCHOR_END_LINE:
5135 if (ON_STR_END(p)) {
5136#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5137 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5138 if (IS_NULL(prev)) goto fail;
5139 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
5140 p = prev;
5141 goto retry;
5142 }
5143#endif
5144 }
5145 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
5146 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
5147 if (IS_NULL(p)) goto fail;
5148 goto retry;
5149 }
5150 break;
5151 }
5152 }
5153
5154 /* no needs to adjust *high, *high is used as range check only */
5155 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5156 *low = p - reg->dmax;
5157 *high = p - reg->dmin;
5158 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
5159 }
5160
5161#ifdef ONIG_DEBUG_SEARCH
5162 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
5163 (int )(*low - str), (int )(*high - str));
5164#endif
5165 return 1; /* success */
5166 }
5167
5168 fail:
5169#ifdef ONIG_DEBUG_SEARCH
5170 fprintf(stderr, "backward_search_range: fail.\n");
5171#endif
5172 return 0; /* fail */
5173}
5174
5175
5176extern OnigPosition
5177onig_search(regex_t* reg, const UChar* str, const UChar* end,
5178 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5179{
5180 return onig_search_gpos(reg, str, end, start, start, range, region, option);
5181}
5182
5183extern OnigPosition
5184onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
5185 const UChar* global_pos,
5186 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5187{
5188 ptrdiff_t r;
5189 UChar *s, *prev;
5190 OnigMatchArg msa;
5191#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5192 const UChar *orig_start = start;
5193 const UChar *orig_range = range;
5194#endif
5195
5196#ifdef ONIG_DEBUG_SEARCH
5197 fprintf(stderr,
5198 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
5199 (uintptr_t )str, str, end - str, start - str, range - str);
5200#endif
5201
5202 if (region) {
5203 r = onig_region_resize_clear(region, reg->num_mem + 1);
5204 if (r) goto finish_no_msa;
5205 }
5206
5207 if (start > end || start < str) goto mismatch_no_msa;
5208
5209
5210#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5211# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5212# define MATCH_AND_RETURN_CHECK(upper_range) \
5213 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5214 switch (r) { \
5215 case ONIG_MISMATCH: \
5216 break; \
5217 case ONIGERR_TIMEOUT: \
5218 goto timeout; \
5219 default: \
5220 if (r >= 0) { \
5221 if (! IS_FIND_LONGEST(reg->options)) { \
5222 goto match; \
5223 }\
5224 }\
5225 else goto finish; /* error */ \
5226 }
5227# else
5228# define MATCH_AND_RETURN_CHECK(upper_range) \
5229 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5230 switch (r) { \
5231 case ONIG_MISMATCH: \
5232 break; \
5233 case ONIGERR_TIMEOUT: \
5234 goto timeout; \
5235 default: \
5236 if (r >= 0) { \
5237 goto match; \
5238 }\
5239 else goto finish; /* error */ \
5240 }
5241# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5242#else
5243# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5244# define MATCH_AND_RETURN_CHECK(none) \
5245 r = match_at(reg, str, end, s, prev, &msa);\
5246 switch (r) { \
5247 case ONIG_MISMATCH: \
5248 break; \
5249 case ONIGERR_TIMEOUT: \
5250 goto timeout; \
5251 default: \
5252 if (r >= 0) { \
5253 if (! IS_FIND_LONGEST(reg->options)) { \
5254 goto match; \
5255 } \
5256 } \
5257 else goto finish; /* error */ \
5258 }
5259# else
5260# define MATCH_AND_RETURN_CHECK(none) \
5261 r = match_at(reg, str, end, s, prev, &msa);\
5262 switch (r) { \
5263 case ONIG_MISMATCH: \
5264 break; \
5265 case ONIGERR_TIMEOUT: \
5266 goto timeout; \
5267 default: \
5268 if (r >= 0) { \
5269 goto match; \
5270 } \
5271 else goto finish; /* error */ \
5272 }
5273# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5274#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
5275
5276
5277 /* anchor optimize: resume search range */
5278 if (reg->anchor != 0 && str < end) {
5279 UChar *min_semi_end, *max_semi_end;
5280
5281 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5282 /* search start-position only */
5283 begin_position:
5284 if (range > start)
5285 {
5286 if (global_pos > start)
5287 {
5288 if (global_pos < range)
5289 range = global_pos + 1;
5290 }
5291 else
5292 range = start + 1;
5293 }
5294 else
5295 range = start;
5296 }
5297 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5298 /* search str-position only */
5299 if (range > start) {
5300 if (start != str) goto mismatch_no_msa;
5301 range = str + 1;
5302 }
5303 else {
5304 if (range <= str) {
5305 start = str;
5306 range = str;
5307 }
5308 else
5309 goto mismatch_no_msa;
5310 }
5311 }
5312 else if (reg->anchor & ANCHOR_END_BUF) {
5313 min_semi_end = max_semi_end = (UChar* )end;
5314
5315 end_buf:
5316 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5317 goto mismatch_no_msa;
5318
5319 if (range > start) {
5320 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5321 start = min_semi_end - reg->anchor_dmax;
5322 if (start < end)
5323 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5324 }
5325 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5326 range = max_semi_end - reg->anchor_dmin + 1;
5327 }
5328
5329 if (start > range) goto mismatch_no_msa;
5330 /* If start == range, match with empty at end.
5331 Backward search is used. */
5332 }
5333 else {
5334 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5335 range = min_semi_end - reg->anchor_dmax;
5336 }
5337 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5338 start = max_semi_end - reg->anchor_dmin;
5339 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5340 }
5341 if (range > start) goto mismatch_no_msa;
5342 }
5343 }
5344 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5345 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5346
5347 max_semi_end = (UChar* )end;
5348 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5349 min_semi_end = pre_end;
5350
5351#ifdef USE_CRNL_AS_LINE_TERMINATOR
5352 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5353 if (IS_NOT_NULL(pre_end) &&
5354 IS_NEWLINE_CRLF(reg->options) &&
5355 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5356 min_semi_end = pre_end;
5357 }
5358#endif
5359 if (min_semi_end > str && start <= min_semi_end) {
5360 goto end_buf;
5361 }
5362 }
5363 else {
5364 min_semi_end = (UChar* )end;
5365 goto end_buf;
5366 }
5367 }
5368 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5369 goto begin_position;
5370 }
5371 }
5372 else if (str == end) { /* empty string */
5373 static const UChar address_for_empty_string[] = "";
5374
5375#ifdef ONIG_DEBUG_SEARCH
5376 fprintf(stderr, "onig_search: empty string.\n");
5377#endif
5378
5379 if (reg->threshold_len == 0) {
5380 start = end = str = address_for_empty_string;
5381 s = (UChar* )start;
5382 prev = (UChar* )NULL;
5383
5384 MATCH_ARG_INIT(msa, option, region, start, start);
5385#ifdef USE_COMBINATION_EXPLOSION_CHECK
5386 msa.state_check_buff = (void* )0;
5387 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5388#endif
5389 MATCH_AND_RETURN_CHECK(end);
5390 goto mismatch;
5391 }
5392 goto mismatch_no_msa;
5393 }
5394
5395#ifdef ONIG_DEBUG_SEARCH
5396 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5397 (int )(end - str), (int )(start - str), (int )(range - str));
5398#endif
5399
5400 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5401#ifdef USE_COMBINATION_EXPLOSION_CHECK
5402 {
5403 ptrdiff_t offset = (MIN(start, range) - str);
5404 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5405 }
5406#endif
5407
5408 s = (UChar* )start;
5409 if (range > start) { /* forward search */
5410 if (s > str)
5411 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5412 else
5413 prev = (UChar* )NULL;
5414
5415 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5416 UChar *sch_range, *low, *high, *low_prev;
5417
5418 sch_range = (UChar* )range;
5419 if (reg->dmax != 0) {
5420 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5421 sch_range = (UChar* )end;
5422 else {
5423 sch_range += reg->dmax;
5424 if (sch_range > end) sch_range = (UChar* )end;
5425 }
5426 }
5427
5428 if ((end - start) < reg->threshold_len)
5429 goto mismatch;
5430
5431 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5432 do {
5433 if (! forward_search_range(reg, str, end, s, sch_range,
5434 &low, &high, &low_prev)) goto mismatch;
5435 if (s < low) {
5436 s = low;
5437 prev = low_prev;
5438 }
5439 while (s <= high) {
5440 MATCH_AND_RETURN_CHECK(orig_range);
5441 prev = s;
5442 s += enclen(reg->enc, s, end);
5443 }
5444 } while (s < range);
5445 goto mismatch;
5446 }
5447 else { /* check only. */
5448 if (! forward_search_range(reg, str, end, s, sch_range,
5449 &low, &high, (UChar** )NULL)) goto mismatch;
5450
5451 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5452 do {
5453 MATCH_AND_RETURN_CHECK(orig_range);
5454 prev = s;
5455 s += enclen(reg->enc, s, end);
5456
5457 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5458 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5459 && s < range) {
5460 prev = s;
5461 s += enclen(reg->enc, s, end);
5462 }
5463 }
5464 } while (s < range);
5465 goto mismatch;
5466 }
5467 }
5468 }
5469
5470 do {
5471 MATCH_AND_RETURN_CHECK(orig_range);
5472 prev = s;
5473 s += enclen(reg->enc, s, end);
5474 } while (s < range);
5475
5476 if (s == range) { /* because empty match with /$/. */
5477 MATCH_AND_RETURN_CHECK(orig_range);
5478 }
5479 }
5480 else { /* backward search */
5481 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5482 UChar *low, *high, *adjrange, *sch_start;
5483
5484 if (range < end)
5485 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5486 else
5487 adjrange = (UChar* )end;
5488
5489 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5490 (end - range) >= reg->threshold_len) {
5491 do {
5492 sch_start = s + reg->dmax;
5493 if (sch_start > end) sch_start = (UChar* )end;
5494 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5495 &low, &high) <= 0)
5496 goto mismatch;
5497
5498 if (s > high)
5499 s = high;
5500
5501 while (s >= low) {
5502 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5503 MATCH_AND_RETURN_CHECK(orig_start);
5504 s = prev;
5505 }
5506 } while (s >= range);
5507 goto mismatch;
5508 }
5509 else { /* check only. */
5510 if ((end - range) < reg->threshold_len) goto mismatch;
5511
5512 sch_start = s;
5513 if (reg->dmax != 0) {
5514 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5515 sch_start = (UChar* )end;
5516 else {
5517 sch_start += reg->dmax;
5518 if (sch_start > end) sch_start = (UChar* )end;
5519 else
5520 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5521 start, sch_start, end);
5522 }
5523 }
5524 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5525 &low, &high) <= 0) goto mismatch;
5526 }
5527 }
5528
5529 do {
5530 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5531 MATCH_AND_RETURN_CHECK(orig_start);
5532 s = prev;
5533 } while (s >= range);
5534 }
5535
5536 mismatch:
5537#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5538 if (IS_FIND_LONGEST(reg->options)) {
5539 if (msa.best_len >= 0) {
5540 s = msa.best_s;
5541 goto match;
5542 }
5543 }
5544#endif
5545 r = ONIG_MISMATCH;
5546
5547 finish:
5548 MATCH_ARG_FREE(msa);
5549
5550 /* If result is mismatch and no FIND_NOT_EMPTY option,
5551 then the region is not set in match_at(). */
5552 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5553 onig_region_clear(region);
5554 }
5555
5556#ifdef ONIG_DEBUG
5557 if (r != ONIG_MISMATCH)
5558 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5559#endif
5560 return r;
5561
5562 mismatch_no_msa:
5563 r = ONIG_MISMATCH;
5564 finish_no_msa:
5565#ifdef ONIG_DEBUG
5566 if (r != ONIG_MISMATCH)
5567 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5568#endif
5569 return r;
5570
5571 match:
5572 MATCH_ARG_FREE(msa);
5573 return s - str;
5574
5575timeout:
5576 MATCH_ARG_FREE(msa);
5577 return ONIGERR_TIMEOUT;
5578}
5579
5580extern OnigPosition
5581onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5582 OnigRegion* region, OnigOptionType option,
5583 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5584 void* callback_arg)
5585{
5586 OnigPosition r;
5587 OnigPosition n;
5588 int rs;
5589 const UChar* start;
5590
5591 n = 0;
5592 start = str;
5593 while (1) {
5594 r = onig_search(reg, str, end, start, end, region, option);
5595 if (r >= 0) {
5596 rs = scan_callback(n, r, region, callback_arg);
5597 n++;
5598 if (rs != 0)
5599 return rs;
5600
5601 if (region->end[0] == start - str) {
5602 if (start >= end) break;
5603 start += enclen(reg->enc, start, end);
5604 }
5605 else
5606 start = str + region->end[0];
5607
5608 if (start > end)
5609 break;
5610 }
5611 else if (r == ONIG_MISMATCH) {
5612 break;
5613 }
5614 else { /* error */
5615 return r;
5616 }
5617 }
5618
5619 return n;
5620}
5621
5622extern OnigEncoding
5623onig_get_encoding(const regex_t* reg)
5624{
5625 return reg->enc;
5626}
5627
5628extern OnigOptionType
5629onig_get_options(const regex_t* reg)
5630{
5631 return reg->options;
5632}
5633
5634extern OnigCaseFoldType
5635onig_get_case_fold_flag(const regex_t* reg)
5636{
5637 return reg->case_fold_flag;
5638}
5639
5640extern const OnigSyntaxType*
5641onig_get_syntax(const regex_t* reg)
5642{
5643 return reg->syntax;
5644}
5645
5646extern int
5647onig_number_of_captures(const regex_t* reg)
5648{
5649 return reg->num_mem;
5650}
5651
5652extern int
5653onig_number_of_capture_histories(const regex_t* reg)
5654{
5655#ifdef USE_CAPTURE_HISTORY
5656 int i, n;
5657
5658 n = 0;
5659 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5660 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5661 n++;
5662 }
5663 return n;
5664#else
5665 return 0;
5666#endif
5667}
5668
5669extern void
5670onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5671{
5672 *to = *from;
5673}
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition defines.h:89
int len
Length of the buffer.
Definition io.h:8
Definition win32.h:701