Ruby 3.5.0dev (2025-05-22 revision 7b10660974dcdd7882a48a62cfc2a32c4a17aa85)
regexec.c (7b10660974dcdd7882a48a62cfc2a32c4a17aa85)
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234#ifdef USE_MATCH_CACHE
235
236/*
237Glossary for "match cache"
238
239"match cache" or "match cache optimization"
240The `Regexp#match` optimization by using a cache.
241
242"cache opcode"
243A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
244It is corresponding to some cache points.
245
246"cache point"
247A cacheable point on matching.
248Usually, one-to-one corresponding between a cache opcode and a cache point exists,
249but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
250cache points depending on repetition counts.
251
252"match cache point"
253A pair of a cache point and a position on an input string.
254We encode a match cache point to an integer value by the following equation:
255"match cache point" = "position on input string" * "total number of cache points" + "cache point"
256
257"match cache buffer"
258A bit-array for memoizing (recording) match cache points once backtracked.
259*/
260
261static OnigPosition count_num_cache_opcodes_inner(
262 const regex_t* reg,
263 MemNumType current_repeat_mem, int lookaround_nesting,
264 UChar** pp, long* num_cache_opcodes_ptr
265)
266{
267 UChar* p = *pp;
268 UChar* pend = reg->p + reg->used;
269 LengthType len;
270 MemNumType repeat_mem;
271 OnigEncoding enc = reg->enc;
272 long num_cache_opcodes = *num_cache_opcodes_ptr;
273 OnigPosition result;
274
275 while (p < pend) {
276 switch (*p++) {
277 case OP_FINISH:
278 case OP_END:
279 break;
280
281 case OP_EXACT1: p++; break;
282 case OP_EXACT2: p += 2; break;
283 case OP_EXACT3: p += 3; break;
284 case OP_EXACT4: p += 4; break;
285 case OP_EXACT5: p += 5; break;
286 case OP_EXACTN:
287 GET_LENGTH_INC(len, p); p += len; break;
288 case OP_EXACTMB2N1: p += 2; break;
289 case OP_EXACTMB2N2: p += 4; break;
290 case OP_EXACTMB2N3: p += 6; break;
291 case OP_EXACTMB2N:
292 GET_LENGTH_INC(len, p); p += len * 2; break;
293 case OP_EXACTMB3N:
294 GET_LENGTH_INC(len, p); p += len * 3; break;
295 case OP_EXACTMBN:
296 {
297 int mb_len;
298 GET_LENGTH_INC(mb_len, p);
299 GET_LENGTH_INC(len, p);
300 p += mb_len * len;
301 }
302 break;
303
304 case OP_EXACT1_IC:
305 len = enclen(enc, p, pend); p += len; break;
306 case OP_EXACTN_IC:
307 GET_LENGTH_INC(len, p); p += len; break;
308
309 case OP_CCLASS:
310 case OP_CCLASS_NOT:
311 p += SIZE_BITSET; break;
312 case OP_CCLASS_MB:
313 case OP_CCLASS_MB_NOT:
314 GET_LENGTH_INC(len, p); p += len; break;
315 case OP_CCLASS_MIX:
316 case OP_CCLASS_MIX_NOT:
317 p += SIZE_BITSET;
318 GET_LENGTH_INC(len, p);
319 p += len;
320 break;
321
322 case OP_ANYCHAR:
323 case OP_ANYCHAR_ML:
324 break;
325 case OP_ANYCHAR_STAR:
326 case OP_ANYCHAR_ML_STAR:
327 num_cache_opcodes++; break;
328 case OP_ANYCHAR_STAR_PEEK_NEXT:
329 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
330 p++; num_cache_opcodes++; break;
331
332 case OP_WORD:
333 case OP_NOT_WORD:
334 case OP_WORD_BOUND:
335 case OP_NOT_WORD_BOUND:
336 case OP_WORD_BEGIN:
337 case OP_WORD_END:
338 break;
339
340 case OP_ASCII_WORD:
341 case OP_NOT_ASCII_WORD:
342 case OP_ASCII_WORD_BOUND:
343 case OP_NOT_ASCII_WORD_BOUND:
344 case OP_ASCII_WORD_BEGIN:
345 case OP_ASCII_WORD_END:
346 break;
347
348 case OP_BEGIN_BUF:
349 case OP_END_BUF:
350 case OP_BEGIN_LINE:
351 case OP_END_LINE:
352 case OP_SEMI_END_BUF:
353 case OP_BEGIN_POSITION:
354 break;
355
356 case OP_BACKREF1:
357 case OP_BACKREF2:
358 case OP_BACKREFN:
359 case OP_BACKREFN_IC:
360 case OP_BACKREF_MULTI:
361 case OP_BACKREF_MULTI_IC:
362 case OP_BACKREF_WITH_LEVEL:
363 goto impossible;
364
365 case OP_MEMORY_START:
366 case OP_MEMORY_START_PUSH:
367 case OP_MEMORY_END_PUSH:
368 case OP_MEMORY_END_PUSH_REC:
369 case OP_MEMORY_END:
370 case OP_MEMORY_END_REC:
371 p += SIZE_MEMNUM;
372 // A memory (capture) in look-around is found.
373 if (lookaround_nesting != 0) {
374 goto impossible;
375 }
376 break;
377
378 case OP_KEEP:
379 break;
380
381 case OP_FAIL:
382 break;
383 case OP_JUMP:
384 p += SIZE_RELADDR;
385 break;
386 case OP_PUSH:
387 p += SIZE_RELADDR;
388 num_cache_opcodes++;
389 break;
390 case OP_POP:
391 break;
392 case OP_PUSH_OR_JUMP_EXACT1:
393 case OP_PUSH_IF_PEEK_NEXT:
394 p += SIZE_RELADDR + 1; num_cache_opcodes++; break;
395 case OP_REPEAT:
396 case OP_REPEAT_NG:
397 if (current_repeat_mem != -1) {
398 // A nested OP_REPEAT is not yet supported.
399 goto impossible;
400 }
401 GET_MEMNUM_INC(repeat_mem, p);
402 p += SIZE_RELADDR;
403 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
404 long dummy_num_cache_opcodes = 0;
405 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes);
406 if (result < 0 || dummy_num_cache_opcodes < 0) {
407 goto fail;
408 }
409 } else {
410 if (reg->repeat_range[repeat_mem].lower == 0) {
411 num_cache_opcodes++;
412 }
413 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
414 if (result < 0 || num_cache_opcodes < 0) {
415 goto fail;
416 }
417 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
418 if (repeat_range->lower < repeat_range->upper) {
419 num_cache_opcodes++;
420 }
421 }
422 break;
423 case OP_REPEAT_INC:
424 case OP_REPEAT_INC_NG:
425 GET_MEMNUM_INC(repeat_mem, p);
426 if (repeat_mem != current_repeat_mem) {
427 // A lone or invalid OP_REPEAT_INC is found.
428 goto impossible;
429 }
430 goto exit;
431 case OP_REPEAT_INC_SG:
432 case OP_REPEAT_INC_NG_SG:
433 goto impossible;
434 case OP_NULL_CHECK_START:
435 p += SIZE_MEMNUM;
436 break;
437 case OP_NULL_CHECK_END:
438 case OP_NULL_CHECK_END_MEMST_PUSH:
439 p += SIZE_MEMNUM;
440 break;
441 case OP_NULL_CHECK_END_MEMST:
442 p += SIZE_MEMNUM;
443 break;
444
445 case OP_PUSH_POS:
446 if (lookaround_nesting < 0) {
447 // A look-around nested in a atomic grouping is found.
448 goto impossible;
449 }
450 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
451 if (result < 0 || num_cache_opcodes < 0) {
452 goto fail;
453 }
454 break;
455 case OP_PUSH_POS_NOT:
456 if (lookaround_nesting < 0) {
457 // A look-around nested in a atomic grouping is found.
458 goto impossible;
459 }
460 p += SIZE_RELADDR;
461 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
462 if (result < 0 || num_cache_opcodes < 0) {
463 goto fail;
464 }
465 break;
466 case OP_PUSH_LOOK_BEHIND_NOT:
467 if (lookaround_nesting < 0) {
468 // A look-around nested in a atomic grouping is found.
469 goto impossible;
470 }
471 p += SIZE_RELADDR;
472 p += SIZE_LENGTH;
473 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
474 if (result < 0 || num_cache_opcodes < 0) {
475 goto fail;
476 }
477 break;
478 case OP_PUSH_STOP_BT:
479 if (lookaround_nesting != 0) {
480 // A nested atomic grouping is found.
481 goto impossible;
482 }
483 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
484 if (result < 0 || num_cache_opcodes < 0) {
485 goto fail;
486 }
487 break;
488 case OP_POP_POS:
489 case OP_FAIL_POS:
490 case OP_FAIL_LOOK_BEHIND_NOT:
491 case OP_POP_STOP_BT:
492 goto exit;
493 case OP_LOOK_BEHIND:
494 p += SIZE_LENGTH;
495 break;
496
497 case OP_PUSH_ABSENT_POS:
498 case OP_ABSENT_END:
499 case OP_ABSENT:
500 goto impossible;
501
502 case OP_CALL:
503 case OP_RETURN:
504 goto impossible;
505
506 case OP_CONDITION:
507 goto impossible;
508
509 case OP_STATE_CHECK_PUSH:
510 case OP_STATE_CHECK_PUSH_OR_JUMP:
511 case OP_STATE_CHECK:
512 case OP_STATE_CHECK_ANYCHAR_STAR:
513 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
514 goto impossible;
515
516 case OP_SET_OPTION_PUSH:
517 case OP_SET_OPTION:
518 p += SIZE_OPTION;
519 break;
520
521 default:
522 goto bytecode_error;
523 }
524 }
525
526exit:
527 *pp = p;
528 *num_cache_opcodes_ptr = num_cache_opcodes;
529 return 0;
530
531fail:
532 *num_cache_opcodes_ptr = num_cache_opcodes;
533 return result;
534
535impossible:
536 *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
537 return 0;
538
539bytecode_error:
540 return ONIGERR_UNDEFINED_BYTECODE;
541}
542
543/* count the total number of cache opcodes for allocating a match cache buffer. */
544static OnigPosition
545count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
546{
547 UChar* p = reg->p;
548 *num_cache_opcodes_ptr = 0;
549 OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
550 if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
551 return ONIGERR_UNDEFINED_BYTECODE;
552 }
553
554 return result;
555}
556
557static OnigPosition
558init_cache_opcodes_inner(
559 const regex_t* reg,
560 MemNumType current_repeat_mem, int lookaround_nesting,
561 OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr
562)
563{
564 UChar* p = *pp;
565 UChar* pend = reg->p + reg->used;
566 UChar* pbegin;
567 LengthType len;
568 MemNumType repeat_mem;
569 OnigEncoding enc = reg->enc;
570 long cache_point = *num_cache_points_ptr;
571 OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr;
572 OnigPosition result;
573
574# define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
575 cache_opcodes->addr = pbegin;\
576 cache_opcodes->cache_point = cache_point;\
577 cache_opcodes->outer_repeat_mem = current_repeat_mem;\
578 cache_opcodes->num_cache_points_at_outer_repeat = 0;\
579 cache_opcodes->num_cache_points_in_outer_repeat = 0;\
580 cache_opcodes->lookaround_nesting = lookaround_nesting;\
581 cache_opcodes->match_addr = NULL;\
582 cache_point += lookaround_nesting != 0 ? 2 : 1;\
583 cache_opcodes++;\
584 }
585
586 while (p < pend) {
587 pbegin = p;
588 switch (*p++) {
589 case OP_FINISH:
590 case OP_END:
591 break;
592
593 case OP_EXACT1: p++; break;
594 case OP_EXACT2: p += 2; break;
595 case OP_EXACT3: p += 3; break;
596 case OP_EXACT4: p += 4; break;
597 case OP_EXACT5: p += 5; break;
598 case OP_EXACTN:
599 GET_LENGTH_INC(len, p); p += len; break;
600 case OP_EXACTMB2N1: p += 2; break;
601 case OP_EXACTMB2N2: p += 4; break;
602 case OP_EXACTMB2N3: p += 6; break;
603 case OP_EXACTMB2N:
604 GET_LENGTH_INC(len, p); p += len * 2; break;
605 case OP_EXACTMB3N:
606 GET_LENGTH_INC(len, p); p += len * 3; break;
607 case OP_EXACTMBN:
608 {
609 int mb_len;
610 GET_LENGTH_INC(mb_len, p);
611 GET_LENGTH_INC(len, p);
612 p += mb_len * len;
613 }
614 break;
615
616 case OP_EXACT1_IC:
617 len = enclen(enc, p, pend); p += len; break;
618 case OP_EXACTN_IC:
619 GET_LENGTH_INC(len, p); p += len; break;
620
621 case OP_CCLASS:
622 case OP_CCLASS_NOT:
623 p += SIZE_BITSET; break;
624 case OP_CCLASS_MB:
625 case OP_CCLASS_MB_NOT:
626 GET_LENGTH_INC(len, p); p += len; break;
627 case OP_CCLASS_MIX:
628 case OP_CCLASS_MIX_NOT:
629 p += SIZE_BITSET;
630 GET_LENGTH_INC(len, p);
631 p += len;
632 break;
633
634 case OP_ANYCHAR:
635 case OP_ANYCHAR_ML:
636 break;
637 case OP_ANYCHAR_STAR:
638 case OP_ANYCHAR_ML_STAR:
639 INC_CACHE_OPCODES;
640 break;
641 case OP_ANYCHAR_STAR_PEEK_NEXT:
642 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
643 p++;
644 INC_CACHE_OPCODES;
645 break;
646
647 case OP_WORD:
648 case OP_NOT_WORD:
649 case OP_WORD_BOUND:
650 case OP_NOT_WORD_BOUND:
651 case OP_WORD_BEGIN:
652 case OP_WORD_END:
653 break;
654
655 case OP_ASCII_WORD:
656 case OP_NOT_ASCII_WORD:
657 case OP_ASCII_WORD_BOUND:
658 case OP_NOT_ASCII_WORD_BOUND:
659 case OP_ASCII_WORD_BEGIN:
660 case OP_ASCII_WORD_END:
661 break;
662
663 case OP_BEGIN_BUF:
664 case OP_END_BUF:
665 case OP_BEGIN_LINE:
666 case OP_END_LINE:
667 case OP_SEMI_END_BUF:
668 case OP_BEGIN_POSITION:
669 break;
670
671 case OP_BACKREF1:
672 case OP_BACKREF2:
673 case OP_BACKREFN:
674 case OP_BACKREFN_IC:
675 case OP_BACKREF_MULTI:
676 case OP_BACKREF_MULTI_IC:
677 case OP_BACKREF_WITH_LEVEL:
678 goto unexpected_bytecode_error;
679
680 case OP_MEMORY_START:
681 case OP_MEMORY_START_PUSH:
682 case OP_MEMORY_END_PUSH:
683 case OP_MEMORY_END_PUSH_REC:
684 case OP_MEMORY_END:
685 case OP_MEMORY_END_REC:
686 p += SIZE_MEMNUM;
687 if (lookaround_nesting != 0) {
688 goto unexpected_bytecode_error;
689 }
690 break;
691
692 case OP_KEEP:
693 break;
694
695 case OP_FAIL:
696 break;
697 case OP_JUMP:
698 p += SIZE_RELADDR;
699 break;
700 case OP_PUSH:
701 p += SIZE_RELADDR;
702 INC_CACHE_OPCODES;
703 break;
704 case OP_POP:
705 break;
706 case OP_PUSH_OR_JUMP_EXACT1:
707 case OP_PUSH_IF_PEEK_NEXT:
708 p += SIZE_RELADDR + 1;
709 INC_CACHE_OPCODES;
710 break;
711 case OP_REPEAT:
712 case OP_REPEAT_NG:
713 GET_MEMNUM_INC(repeat_mem, p);
714 p += SIZE_RELADDR;
715 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
716 long dummy_num_cache_points = 0;
717 OnigCacheOpcode* dummy_cache_opcodes = NULL;
718 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points);
719 if (result != 0) {
720 goto fail;
721 }
722 } else {
723 if (reg->repeat_range[repeat_mem].lower == 0) {
724 INC_CACHE_OPCODES;
725 }
726 {
727 long num_cache_points_in_repeat = 0;
728 long num_cache_points_at_repeat = cache_point;
729 OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes;
730 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
731 if (result != 0) {
732 goto fail;
733 }
734 OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
735 if (repeat_range->lower < repeat_range->upper) {
736 INC_CACHE_OPCODES;
737 cache_point -= lookaround_nesting != 0 ? 2 : 1;
738 }
739 int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
740 cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
741 for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
742 cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
743 cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
744 }
745 }
746 }
747 break;
748 case OP_REPEAT_INC:
749 case OP_REPEAT_INC_NG:
750 p += SIZE_MEMNUM;
751 goto exit;
752 case OP_REPEAT_INC_SG:
753 case OP_REPEAT_INC_NG_SG:
754 goto unexpected_bytecode_error;
755 case OP_NULL_CHECK_START:
756 p += SIZE_MEMNUM;
757 break;
758 case OP_NULL_CHECK_END:
759 case OP_NULL_CHECK_END_MEMST_PUSH:
760 p += SIZE_MEMNUM;
761 break;
762 case OP_NULL_CHECK_END_MEMST:
763 p += SIZE_MEMNUM;
764 break;
765
766 case OP_PUSH_POS:
767 lookaround:
768 {
769 OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes;
770 result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
771 if (result != 0) {
772 goto fail;
773 }
774 UChar* match_addr = p - 1;
775 for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
776 if (cache_opcodes_in_lookaround->match_addr == NULL) {
777 cache_opcodes_in_lookaround->match_addr = match_addr;
778 }
779 }
780 }
781 break;
782 case OP_PUSH_POS_NOT:
783 p += SIZE_RELADDR;
784 goto lookaround;
785 case OP_PUSH_LOOK_BEHIND_NOT:
786 p += SIZE_RELADDR;
787 p += SIZE_LENGTH;
788 goto lookaround;
789 case OP_PUSH_STOP_BT:
790 {
791 OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes;
792 result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
793 if (result != 0) {
794 goto fail;
795 }
796 UChar* match_addr = p - 1;
797 for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
798 if (cache_opcodes_in_atomic->match_addr == NULL) {
799 cache_opcodes_in_atomic->match_addr = match_addr;
800 }
801 }
802 }
803 break;
804 case OP_POP_POS:
805 case OP_FAIL_POS:
806 case OP_FAIL_LOOK_BEHIND_NOT:
807 case OP_POP_STOP_BT:
808 goto exit;
809 case OP_LOOK_BEHIND:
810 p += SIZE_LENGTH;
811 break;
812
813 case OP_ABSENT_END:
814 case OP_ABSENT:
815 goto unexpected_bytecode_error;
816
817 case OP_CALL:
818 case OP_RETURN:
819 goto unexpected_bytecode_error;
820
821 case OP_CONDITION:
822 goto unexpected_bytecode_error;
823
824 case OP_STATE_CHECK_PUSH:
825 case OP_STATE_CHECK_PUSH_OR_JUMP:
826 case OP_STATE_CHECK:
827 case OP_STATE_CHECK_ANYCHAR_STAR:
828 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
829 goto unexpected_bytecode_error;
830
831 case OP_SET_OPTION_PUSH:
832 case OP_SET_OPTION:
833 p += SIZE_OPTION;
834 break;
835
836 default:
837 goto bytecode_error;
838 }
839 }
840
841exit:
842 *cache_opcodes_ptr = cache_opcodes;
843 *pp = p;
844 *num_cache_points_ptr = cache_point;
845 return 0;
846
847fail:
848 return result;
849
850unexpected_bytecode_error:
851 return ONIGERR_UNEXPECTED_BYTECODE;
852
853bytecode_error:
854 return ONIGERR_UNDEFINED_BYTECODE;
855}
856
857/* collect cache opcodes from the given regex program, and compute the total number of cache points. */
858static OnigPosition
859init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr)
860{
861 UChar* p = reg->p;
862 *num_cache_points_ptr = 0;
863 OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
864 if (result == 0 && p != reg->p + reg->used) {
865 return ONIGERR_UNDEFINED_BYTECODE;
866 }
867
868 return result;
869}
870#else
871static OnigPosition
872count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
873{
874 *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
875 return 0;
876}
877#endif /* USE_MATCH_CACHE */
878
879extern int
880onig_check_linear_time(OnigRegexType* reg)
881{
882 long num_cache_opcodes = 0;
883 count_num_cache_opcodes(reg, &num_cache_opcodes);
884 return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
885}
886
887extern void
888onig_region_clear(OnigRegion* region)
889{
890 int i;
891
892 for (i = 0; i < region->num_regs; i++) {
893 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
894 }
895#ifdef USE_CAPTURE_HISTORY
896 history_root_free(region);
897#endif
898}
899
900extern int
901onig_region_resize(OnigRegion* region, int n)
902{
903 region->num_regs = n;
904
905 if (n < ONIG_NREGION)
906 n = ONIG_NREGION;
907
908 if (region->allocated == 0) {
909 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
910 if (region->beg == 0)
911 return ONIGERR_MEMORY;
912
913 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
914 if (region->end == 0) {
915 xfree(region->beg);
916 return ONIGERR_MEMORY;
917 }
918
919 region->allocated = n;
920 }
921 else if (region->allocated < n) {
922 OnigPosition *tmp;
923
924 region->allocated = 0;
925 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
926 if (tmp == 0) {
927 xfree(region->beg);
928 xfree(region->end);
929 return ONIGERR_MEMORY;
930 }
931 region->beg = tmp;
932 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
933 if (tmp == 0) {
934 xfree(region->beg);
935 xfree(region->end);
936 return ONIGERR_MEMORY;
937 }
938 region->end = tmp;
939
940 region->allocated = n;
941 }
942
943 return 0;
944}
945
946static int
947onig_region_resize_clear(OnigRegion* region, int n)
948{
949 int r;
950
951 r = onig_region_resize(region, n);
952 if (r != 0) return r;
953 onig_region_clear(region);
954 return 0;
955}
956
957extern int
958onig_region_set(OnigRegion* region, int at, int beg, int end)
959{
960 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
961
962 if (at >= region->allocated) {
963 int r = onig_region_resize(region, at + 1);
964 if (r < 0) return r;
965 }
966
967 region->beg[at] = beg;
968 region->end[at] = end;
969 return 0;
970}
971
972extern void
973onig_region_init(OnigRegion* region)
974{
975 region->num_regs = 0;
976 region->allocated = 0;
977 region->beg = (OnigPosition* )0;
978 region->end = (OnigPosition* )0;
979#ifdef USE_CAPTURE_HISTORY
980 region->history_root = (OnigCaptureTreeNode* )0;
981#endif
982}
983
984extern OnigRegion*
985onig_region_new(void)
986{
987 OnigRegion* r;
988
989 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
990 if (r)
991 onig_region_init(r);
992 return r;
993}
994
995extern void
996onig_region_free(OnigRegion* r, int free_self)
997{
998 if (r) {
999 if (r->allocated > 0) {
1000 xfree(r->beg);
1001 xfree(r->end);
1002 }
1003#ifdef USE_CAPTURE_HISTORY
1004 history_root_free(r);
1005#endif
1006 if (free_self) {
1007 xfree(r);
1008 }
1009 else {
1010 memset(r, 0, sizeof(OnigRegion));
1011 }
1012 }
1013}
1014
1015extern void
1016onig_region_copy(OnigRegion* to, const OnigRegion* from)
1017{
1018#define RREGC_SIZE (sizeof(int) * from->num_regs)
1019 int i, r;
1020
1021 if (to == from) return;
1022
1023 r = onig_region_resize(to, from->num_regs);
1024 if (r) return;
1025
1026 for (i = 0; i < from->num_regs; i++) {
1027 to->beg[i] = from->beg[i];
1028 to->end[i] = from->end[i];
1029 }
1030 to->num_regs = from->num_regs;
1031
1032#ifdef USE_CAPTURE_HISTORY
1033 history_root_free(to);
1034
1035 if (IS_NOT_NULL(from->history_root)) {
1036 to->history_root = history_tree_clone(from->history_root);
1037 }
1038#endif
1039}
1040
1041
1043#define INVALID_STACK_INDEX -1
1044
1045/* stack type */
1046/* used by normal-POP */
1047#define STK_ALT 0x0001
1048#define STK_LOOK_BEHIND_NOT 0x0002
1049#define STK_POS_NOT 0x0003
1050/* handled by normal-POP */
1051#define STK_MEM_START 0x0100
1052#define STK_MEM_END 0x8200
1053#define STK_REPEAT_INC 0x0300
1054#define STK_STATE_CHECK_MARK 0x1000
1055/* avoided by normal-POP */
1056#define STK_NULL_CHECK_START 0x3000
1057#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
1058#define STK_MEM_END_MARK 0x8400
1059#define STK_POS 0x0500 /* used when POP-POS */
1060#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
1061#define STK_REPEAT 0x0700
1062#define STK_CALL_FRAME 0x0800
1063#define STK_RETURN 0x0900
1064#define STK_VOID 0x0a00 /* for fill a blank */
1065#define STK_ABSENT_POS 0x0b00 /* for absent */
1066#define STK_ABSENT 0x0c00 /* absent inner loop marker */
1067#define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */
1068#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1069
1070/* stack type check mask */
1071#define STK_MASK_POP_USED 0x00ff
1072#define STK_MASK_TO_VOID_TARGET 0x10ff
1073#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
1074
1075#ifdef USE_MATCH_CACHE
1076#define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1077 (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1078 (msa).num_fails = 0;\
1079 (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1080 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1081 (msa).num_cache_points = 0;\
1082 (msa).match_cache_buf = (uint8_t*)NULL;\
1083} while(0)
1084#define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1085 xfree((msa).cache_opcodes);\
1086 xfree((msa).match_cache_buf);\
1087 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1088 (msa).match_cache_buf = (uint8_t*)NULL;\
1089} while(0)
1090#else
1091#define MATCH_ARG_INIT_MATCH_CACHE(msa)
1092#define MATCH_ARG_FREE_MATCH_CACHE(msa)
1093#endif
1094
1095#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1096# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1097 (msa).stack_p = (void* )0;\
1098 (msa).options = (arg_option);\
1099 (msa).region = (arg_region);\
1100 (msa).start = (arg_start);\
1101 (msa).gpos = (arg_gpos);\
1102 (msa).best_len = ONIG_MISMATCH;\
1103 (msa).counter = 0;\
1104 (msa).end_time = 0;\
1105 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1106} while(0)
1107#else
1108# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1109 (msa).stack_p = (void* )0;\
1110 (msa).options = (arg_option);\
1111 (msa).region = (arg_region);\
1112 (msa).start = (arg_start);\
1113 (msa).gpos = (arg_gpos);\
1114 (msa).counter = 0;\
1115 (msa).end_time = 0;\
1116 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1117} while(0)
1118#endif
1119
1120#ifdef USE_COMBINATION_EXPLOSION_CHECK
1121
1122# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
1123
1124# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
1125 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1126 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1127 offset = ((offset) * (state_num)) >> 3;\
1128 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1129 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1130 (msa).state_check_buff = (void* )xmalloc(size);\
1131 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1132 }\
1133 else \
1134 (msa).state_check_buff = (void* )xalloca(size);\
1135 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1136 (size_t )(size - (offset))); \
1137 (msa).state_check_buff_size = size;\
1138 }\
1139 else {\
1140 (msa).state_check_buff = (void* )0;\
1141 (msa).state_check_buff_size = 0;\
1142 }\
1143 }\
1144 else {\
1145 (msa).state_check_buff = (void* )0;\
1146 (msa).state_check_buff_size = 0;\
1147 }\
1148 } while(0)
1149
1150# define MATCH_ARG_FREE(msa) do {\
1151 xfree((msa).stack_p);\
1152 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1153 xfree((msa).state_check_buff);\
1154 }\
1155 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1156} while(0)
1157#else /* USE_COMBINATION_EXPLOSION_CHECK */
1158# define MATCH_ARG_FREE(msa) do {\
1159 xfree((msa).stack_p);\
1160 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1161} while (0)
1162#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1163
1164
1165
1166#define MAX_PTR_NUM 100
1167
1168#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
1169 if (ptr_num > MAX_PTR_NUM) {\
1170 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1171 heap_addr = alloc_addr;\
1172 if (msa->stack_p) {\
1173 stk_alloc = (OnigStackType* )(msa->stack_p);\
1174 stk_base = stk_alloc;\
1175 stk = stk_base;\
1176 stk_end = stk_base + msa->stack_n;\
1177 }\
1178 else {\
1179 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1180 stk_base = stk_alloc;\
1181 stk = stk_base;\
1182 stk_end = stk_base + (stack_num);\
1183 }\
1184 }\
1185 else if (msa->stack_p) {\
1186 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1187 heap_addr = NULL;\
1188 stk_alloc = (OnigStackType* )(msa->stack_p);\
1189 stk_base = stk_alloc;\
1190 stk = stk_base;\
1191 stk_end = stk_base + msa->stack_n;\
1192 }\
1193 else {\
1194 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1195 + sizeof(OnigStackType) * (stack_num));\
1196 heap_addr = NULL;\
1197 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1198 stk_base = stk_alloc;\
1199 stk = stk_base;\
1200 stk_end = stk_base + (stack_num);\
1201 }\
1202} while(0)
1203
1204#define STACK_SAVE do{\
1205 if (stk_base != stk_alloc) {\
1206 msa->stack_p = stk_base;\
1207 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1208 };\
1209} while(0)
1210
1211static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1212
1213extern unsigned int
1214onig_get_match_stack_limit_size(void)
1215{
1216 return MatchStackLimitSize;
1217}
1218
1219extern int
1220onig_set_match_stack_limit_size(unsigned int size)
1221{
1222 MatchStackLimitSize = size;
1223 return 0;
1224}
1225
1226static int
1227stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1228 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1229{
1230 size_t n;
1231 OnigStackType *x, *stk_base, *stk_end, *stk;
1232
1233 stk_base = *arg_stk_base;
1234 stk_end = *arg_stk_end;
1235 stk = *arg_stk;
1236
1237 n = stk_end - stk_base;
1238 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1239 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1240 if (IS_NULL(x)) {
1241 STACK_SAVE;
1242 return ONIGERR_MEMORY;
1243 }
1244 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1245 n *= 2;
1246 }
1247 else {
1248 unsigned int limit_size = MatchStackLimitSize;
1249 n *= 2;
1250 if (limit_size != 0 && n > limit_size) {
1251 if ((unsigned int )(stk_end - stk_base) == limit_size)
1252 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1253 else
1254 n = limit_size;
1255 }
1256 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1257 if (IS_NULL(x)) {
1258 STACK_SAVE;
1259 return ONIGERR_MEMORY;
1260 }
1261 }
1262 *arg_stk = x + (stk - stk_base);
1263 *arg_stk_base = x;
1264 *arg_stk_end = x + n;
1265 return 0;
1266}
1267
1268#define STACK_ENSURE(n) do {\
1269 if (stk_end - stk < (n)) {\
1270 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1271 if (r != 0) {\
1272 STACK_SAVE;\
1273 xfree(xmalloc_base);\
1274 return r;\
1275 }\
1276 }\
1277} while(0)
1278
1279#define STACK_AT(index) (stk_base + (index))
1280#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1281
1282#define STACK_PUSH_TYPE(stack_type) do {\
1283 STACK_ENSURE(1);\
1284 stk->type = (stack_type);\
1285 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1286 STACK_INC;\
1287} while(0)
1288
1289#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1290
1291#ifdef USE_COMBINATION_EXPLOSION_CHECK
1292# define STATE_CHECK_POS(s,snum) \
1293 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1294# define STATE_CHECK_VAL(v,snum) do {\
1295 if (state_check_buff != NULL) {\
1296 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1297 (v) = state_check_buff[x/8] & (1<<(x%8));\
1298 }\
1299 else (v) = 0;\
1300} while(0)
1301
1302
1303# define ELSE_IF_STATE_CHECK_MARK(stk) \
1304 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1305 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1306 state_check_buff[x/8] |= (1<<(x%8)); \
1307 }
1308
1309# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1310 STACK_ENSURE(1);\
1311 stk->type = (stack_type);\
1312 stk->u.state.pcode = (pat);\
1313 stk->u.state.pstr = (s);\
1314 stk->u.state.pstr_prev = (sprev);\
1315 stk->u.state.state_check = 0;\
1316 stk->u.state.pkeep = (keep);\
1317 STACK_INC;\
1318} while(0)
1319
1320# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1321 stk->type = (stack_type);\
1322 stk->u.state.pcode = (pat);\
1323 stk->u.state.state_check = 0;\
1324 STACK_INC;\
1325} while(0)
1326
1327# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1328 STACK_ENSURE(1);\
1329 stk->type = STK_ALT;\
1330 stk->u.state.pcode = (pat);\
1331 stk->u.state.pstr = (s);\
1332 stk->u.state.pstr_prev = (sprev);\
1333 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1334 stk->u.state.pkeep = (keep);\
1335 STACK_INC;\
1336} while(0)
1337
1338# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1339 if (state_check_buff != NULL) {\
1340 STACK_ENSURE(1);\
1341 stk->type = STK_STATE_CHECK_MARK;\
1342 stk->u.state.pstr = (s);\
1343 stk->u.state.state_check = (snum);\
1344 STACK_INC;\
1345 }\
1346} while(0)
1347
1348#else /* USE_COMBINATION_EXPLOSION_CHECK */
1349
1350# define ELSE_IF_STATE_CHECK_MARK(stk)
1351
1352# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1353 STACK_ENSURE(1);\
1354 stk->type = (stack_type);\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.state.pcode = (pat);\
1357 stk->u.state.pstr = (s);\
1358 stk->u.state.pstr_prev = (sprev);\
1359 stk->u.state.pkeep = (keep);\
1360 STACK_INC;\
1361} while(0)
1362
1363# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1364 stk->type = (stack_type);\
1365 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1366 stk->u.state.pcode = (pat);\
1367 STACK_INC;\
1368} while(0)
1369#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1370
1371#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1372#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1373#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1374#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1375#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1376#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1377 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1378
1379#define STACK_PUSH_REPEAT(id, pat) do {\
1380 STACK_ENSURE(1);\
1381 stk->type = STK_REPEAT;\
1382 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1383 stk->u.repeat.num = (id);\
1384 stk->u.repeat.pcode = (pat);\
1385 stk->u.repeat.count = 0;\
1386 STACK_INC;\
1387} while(0)
1388
1389#define STACK_PUSH_REPEAT_INC(sindex) do {\
1390 STACK_ENSURE(1);\
1391 stk->type = STK_REPEAT_INC;\
1392 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1393 stk->u.repeat_inc.si = (sindex);\
1394 STACK_INC;\
1395} while(0)
1396
1397#define STACK_PUSH_MEM_START(mnum, s) do {\
1398 STACK_ENSURE(1);\
1399 stk->type = STK_MEM_START;\
1400 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1401 stk->u.mem.num = (mnum);\
1402 stk->u.mem.pstr = (s);\
1403 stk->u.mem.start = mem_start_stk[mnum];\
1404 stk->u.mem.end = mem_end_stk[mnum];\
1405 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1406 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1407 STACK_INC;\
1408} while(0)
1409
1410#define STACK_PUSH_MEM_END(mnum, s) do {\
1411 STACK_ENSURE(1);\
1412 stk->type = STK_MEM_END;\
1413 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1414 stk->u.mem.num = (mnum);\
1415 stk->u.mem.pstr = (s);\
1416 stk->u.mem.start = mem_start_stk[mnum];\
1417 stk->u.mem.end = mem_end_stk[mnum];\
1418 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1419 STACK_INC;\
1420} while(0)
1421
1422#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1423 STACK_ENSURE(1);\
1424 stk->type = STK_MEM_END_MARK;\
1425 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1426 stk->u.mem.num = (mnum);\
1427 STACK_INC;\
1428} while(0)
1429
1430#define STACK_GET_MEM_START(mnum, k) do {\
1431 int level = 0;\
1432 k = stk;\
1433 while (k > stk_base) {\
1434 k--;\
1435 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1436 && k->u.mem.num == (mnum)) {\
1437 level++;\
1438 }\
1439 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1440 if (level == 0) break;\
1441 level--;\
1442 }\
1443 }\
1444} while(0)
1445
1446#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1447 int level = 0;\
1448 while (k < stk) {\
1449 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1450 if (level == 0) (start) = k->u.mem.pstr;\
1451 level++;\
1452 }\
1453 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1454 level--;\
1455 if (level == 0) {\
1456 (end) = k->u.mem.pstr;\
1457 break;\
1458 }\
1459 }\
1460 k++;\
1461 }\
1462} while(0)
1463
1464#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1465 STACK_ENSURE(1);\
1466 stk->type = STK_NULL_CHECK_START;\
1467 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1468 stk->u.null_check.num = (cnum);\
1469 stk->u.null_check.pstr = (s);\
1470 STACK_INC;\
1471} while(0)
1472
1473#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1474 STACK_ENSURE(1);\
1475 stk->type = STK_NULL_CHECK_END;\
1476 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1477 stk->u.null_check.num = (cnum);\
1478 STACK_INC;\
1479} while(0)
1480
1481#define STACK_PUSH_CALL_FRAME(pat) do {\
1482 STACK_ENSURE(1);\
1483 stk->type = STK_CALL_FRAME;\
1484 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1485 stk->u.call_frame.ret_addr = (pat);\
1486 STACK_INC;\
1487} while(0)
1488
1489#define STACK_PUSH_RETURN do {\
1490 STACK_ENSURE(1);\
1491 stk->type = STK_RETURN;\
1492 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1493 STACK_INC;\
1494} while(0)
1495
1496#define STACK_PUSH_ABSENT_POS(start, end) do {\
1497 STACK_ENSURE(1);\
1498 stk->type = STK_ABSENT_POS;\
1499 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1500 stk->u.absent_pos.abs_pstr = (start);\
1501 stk->u.absent_pos.end_pstr = (end);\
1502 STACK_INC;\
1503} while(0)
1504
1505#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1506 STACK_ENSURE(1);\
1507 stk->type = STK_MATCH_CACHE_POINT;\
1508 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1509 stk->u.match_cache_point.index = (match_cache_point_index);\
1510 stk->u.match_cache_point.mask = (match_cache_point_mask);\
1511 STACK_INC;\
1512} while(0)
1513
1514
1515#ifdef ONIG_DEBUG
1516# define STACK_BASE_CHECK(p, at) \
1517 if ((p) < stk_base) {\
1518 fprintf(stderr, "at %s\n", at);\
1519 goto stack_error;\
1520 }
1521#else
1522# define STACK_BASE_CHECK(p, at)
1523#endif
1524
1525#ifdef ONIG_DEBUG_MATCH_CACHE
1526# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1527#else
1528# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1529#endif
1530
1531#ifdef USE_MATCH_CACHE
1532# define INC_NUM_FAILS msa->num_fails++
1533# define MEMOIZE_MATCH_CACHE_POINT do {\
1534 if (stk->type == STK_MATCH_CACHE_POINT) {\
1535 msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1536 MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1537 }\
1538 else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1539 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1540 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1541 }\
1542 } while(0)
1543# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1544 if (stkp->type == STK_MATCH_CACHE_POINT) {\
1545 stkp->type = STK_VOID;\
1546 memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1547 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1548 }\
1549 } while(0)
1550# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1551 if (stk->type == STK_MATCH_CACHE_POINT) {\
1552 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1553 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1554 }\
1555 } while(0)
1556#else
1557# define INC_NUM_FAILS ((void) 0)
1558# define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1559# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
1560#endif
1561
1562#define STACK_POP_ONE do {\
1563 stk--;\
1564 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1565} while(0)
1566
1567#define STACK_POP do {\
1568 switch (pop_level) {\
1569 case STACK_POP_LEVEL_FREE:\
1570 while (1) {\
1571 stk--;\
1572 STACK_BASE_CHECK(stk, "STACK_POP"); \
1573 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1574 ELSE_IF_STATE_CHECK_MARK(stk);\
1575 MEMOIZE_MATCH_CACHE_POINT;\
1576 }\
1577 break;\
1578 case STACK_POP_LEVEL_MEM_START:\
1579 while (1) {\
1580 stk--;\
1581 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1582 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1583 else if (stk->type == STK_MEM_START) {\
1584 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1585 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1586 }\
1587 ELSE_IF_STATE_CHECK_MARK(stk);\
1588 MEMOIZE_MATCH_CACHE_POINT;\
1589 }\
1590 break;\
1591 default:\
1592 while (1) {\
1593 stk--;\
1594 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1595 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1596 else if (stk->type == STK_MEM_START) {\
1597 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1598 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1599 }\
1600 else if (stk->type == STK_REPEAT_INC) {\
1601 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1602 }\
1603 else if (stk->type == STK_MEM_END) {\
1604 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1605 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1606 }\
1607 ELSE_IF_STATE_CHECK_MARK(stk);\
1608 MEMOIZE_MATCH_CACHE_POINT;\
1609 }\
1610 break;\
1611 }\
1612} while(0)
1613
1614#define STACK_POP_TIL_POS_NOT do {\
1615 while (1) {\
1616 stk--;\
1617 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1618 if (stk->type == STK_POS_NOT) break;\
1619 else if (stk->type == STK_MEM_START) {\
1620 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1621 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1622 }\
1623 else if (stk->type == STK_REPEAT_INC) {\
1624 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1625 }\
1626 else if (stk->type == STK_MEM_END) {\
1627 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1628 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1629 }\
1630 else if (IS_TO_VOID_TARGET(stk)) {\
1631 INC_NUM_FAILS;\
1632 }\
1633 ELSE_IF_STATE_CHECK_MARK(stk);\
1634 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1635 }\
1636} while(0)
1637
1638#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1639 while (1) {\
1640 stk--;\
1641 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1642 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1643 else if (stk->type == STK_MEM_START) {\
1644 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1645 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1646 }\
1647 else if (stk->type == STK_REPEAT_INC) {\
1648 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1649 }\
1650 else if (stk->type == STK_MEM_END) {\
1651 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1652 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1653 }\
1654 ELSE_IF_STATE_CHECK_MARK(stk);\
1655 }\
1656} while(0)
1657
1658#define STACK_POP_TIL_ABSENT do {\
1659 while (1) {\
1660 stk--;\
1661 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1662 if (stk->type == STK_ABSENT) break;\
1663 else if (stk->type == STK_MEM_START) {\
1664 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1665 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1666 }\
1667 else if (stk->type == STK_REPEAT_INC) {\
1668 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1669 }\
1670 else if (stk->type == STK_MEM_END) {\
1671 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1672 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1673 }\
1674 ELSE_IF_STATE_CHECK_MARK(stk);\
1675 }\
1676} while(0)
1677
1678#define STACK_POP_ABSENT_POS(start, end) do {\
1679 stk--;\
1680 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1681 (start) = stk->u.absent_pos.abs_pstr;\
1682 (end) = stk->u.absent_pos.end_pstr;\
1683} while(0)
1684
1685#define STACK_POS_END(k) do {\
1686 k = stk;\
1687 while (1) {\
1688 k--;\
1689 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1690 if (IS_TO_VOID_TARGET(k)) {\
1691 INC_NUM_FAILS;\
1692 k->type = STK_VOID;\
1693 }\
1694 else if (k->type == STK_POS) {\
1695 k->type = STK_VOID;\
1696 break;\
1697 }\
1698 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1699 }\
1700} while(0)
1701
1702#define STACK_STOP_BT_END do {\
1703 OnigStackType *k = stk;\
1704 while (1) {\
1705 k--;\
1706 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1707 if (IS_TO_VOID_TARGET(k)) {\
1708 INC_NUM_FAILS;\
1709 k->type = STK_VOID;\
1710 }\
1711 else if (k->type == STK_STOP_BT) {\
1712 k->type = STK_VOID;\
1713 break;\
1714 }\
1715 else if (k->type == STK_MATCH_CACHE_POINT) {\
1716 k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1717 }\
1718 }\
1719} while(0)
1720
1721#define STACK_STOP_BT_FAIL do {\
1722 while (1) {\
1723 stk--;\
1724 STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1725 if (stk->type == STK_STOP_BT) {\
1726 stk->type = STK_VOID;\
1727 break;\
1728 }\
1729 MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1730 }\
1731} while(0)
1732
1733#define STACK_NULL_CHECK(isnull,id,s) do {\
1734 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1735 while (1) {\
1736 k--;\
1737 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1738 if (k->type == STK_NULL_CHECK_START) {\
1739 if (k->u.null_check.num == (id)) {\
1740 (isnull) = (k->u.null_check.pstr == (s));\
1741 break;\
1742 }\
1743 }\
1744 }\
1745} while(0)
1746
1747#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1748 int level = 0;\
1749 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1750 while (1) {\
1751 k--;\
1752 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1753 if (k->type == STK_NULL_CHECK_START) {\
1754 if (k->u.null_check.num == (id)) {\
1755 if (level == 0) {\
1756 (isnull) = (k->u.null_check.pstr == (s));\
1757 break;\
1758 }\
1759 else level--;\
1760 }\
1761 }\
1762 else if (k->type == STK_NULL_CHECK_END) {\
1763 level++;\
1764 }\
1765 }\
1766} while(0)
1767
1768#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1769 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1770 while (1) {\
1771 k--;\
1772 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1773 if (k->type == STK_NULL_CHECK_START) {\
1774 if (k->u.null_check.num == (id)) {\
1775 if (k->u.null_check.pstr != (s)) {\
1776 (isnull) = 0;\
1777 break;\
1778 }\
1779 else {\
1780 UChar* endp;\
1781 (isnull) = 1;\
1782 while (k < stk) {\
1783 if (k->type == STK_MEM_START) {\
1784 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1785 (isnull) = 0; break;\
1786 }\
1787 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1788 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1789 else\
1790 endp = (UChar* )k->u.mem.end;\
1791 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1792 (isnull) = 0; break;\
1793 }\
1794 else if (endp != s) {\
1795 (isnull) = -1; /* empty, but position changed */ \
1796 }\
1797 }\
1798 k++;\
1799 }\
1800 break;\
1801 }\
1802 }\
1803 }\
1804 }\
1805} while(0)
1806
1807#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1808 int level = 0;\
1809 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1810 while (1) {\
1811 k--;\
1812 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1813 if (k->type == STK_NULL_CHECK_START) {\
1814 if (k->u.null_check.num == (id)) {\
1815 if (level == 0) {\
1816 if (k->u.null_check.pstr != (s)) {\
1817 (isnull) = 0;\
1818 break;\
1819 }\
1820 else {\
1821 UChar* endp;\
1822 (isnull) = 1;\
1823 while (k < stk) {\
1824 if (k->type == STK_MEM_START) {\
1825 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1826 (isnull) = 0; break;\
1827 }\
1828 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1829 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1830 else\
1831 endp = (UChar* )k->u.mem.end;\
1832 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1833 (isnull) = 0; break;\
1834 }\
1835 else if (endp != s) {\
1836 (isnull) = -1; /* empty, but position changed */ \
1837 }\
1838 }\
1839 k++;\
1840 }\
1841 break;\
1842 }\
1843 }\
1844 else {\
1845 level--;\
1846 }\
1847 }\
1848 }\
1849 else if (k->type == STK_NULL_CHECK_END) {\
1850 if (k->u.null_check.num == (id)) level++;\
1851 }\
1852 }\
1853} while(0)
1854
1855#define STACK_GET_REPEAT(id, k) do {\
1856 int level = 0;\
1857 k = stk;\
1858 while (1) {\
1859 k--;\
1860 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1861 if (k->type == STK_REPEAT) {\
1862 if (level == 0) {\
1863 if (k->u.repeat.num == (id)) {\
1864 break;\
1865 }\
1866 }\
1867 }\
1868 else if (k->type == STK_CALL_FRAME) level--;\
1869 else if (k->type == STK_RETURN) level++;\
1870 }\
1871} while(0)
1872
1873#define STACK_RETURN(addr) do {\
1874 int level = 0;\
1875 OnigStackType* k = stk;\
1876 while (1) {\
1877 k--;\
1878 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1879 if (k->type == STK_CALL_FRAME) {\
1880 if (level == 0) {\
1881 (addr) = k->u.call_frame.ret_addr;\
1882 break;\
1883 }\
1884 else level--;\
1885 }\
1886 else if (k->type == STK_RETURN)\
1887 level++;\
1888 }\
1889} while(0)
1890
1891
1892#define STRING_CMP(s1,s2,len) do {\
1893 while (len-- > 0) {\
1894 if (*s1++ != *s2++) goto fail;\
1895 }\
1896} while(0)
1897
1898#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1899 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1900 goto fail; \
1901} while(0)
1902
1903static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1904 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1905{
1906 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1907 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1908 UChar *p1, *p2, *end1, *s2;
1909 int len1, len2;
1910
1911 s2 = *ps2;
1912 end1 = s1 + mblen;
1913 while (s1 < end1) {
1914 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1915 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1916 if (len1 != len2) return 0;
1917 p1 = buf1;
1918 p2 = buf2;
1919 while (len1-- > 0) {
1920 if (*p1 != *p2) return 0;
1921 p1++;
1922 p2++;
1923 }
1924 }
1925
1926 *ps2 = s2;
1927 return 1;
1928}
1929
1930#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1931 is_fail = 0;\
1932 while (len-- > 0) {\
1933 if (*s1++ != *s2++) {\
1934 is_fail = 1; break;\
1935 }\
1936 }\
1937} while(0)
1938
1939#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1940 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1941 is_fail = 1; \
1942 else \
1943 is_fail = 0; \
1944} while(0)
1945
1946
1947#define IS_EMPTY_STR (str == end)
1948#define ON_STR_BEGIN(s) ((s) == str)
1949#define ON_STR_END(s) ((s) == end)
1950#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1951# define DATA_ENSURE_CHECK1 (s < right_range)
1952# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1953# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1954# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1955# define ABSENT_END_POS right_range
1956#else
1957# define DATA_ENSURE_CHECK1 (s < end)
1958# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1959# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1960# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1961# define ABSENT_END_POS end
1962#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1963
1964int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
1965
1966static inline int
1967enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
1968{
1969 if (enc->max_enc_len == enc->min_enc_len) {
1970 return (p < e ? enc->min_enc_len : 0);
1971 }
1972 else {
1973 return onigenc_mbclen_approximate(p, e, enc);
1974 }
1975}
1976
1977
1978#ifdef USE_CAPTURE_HISTORY
1979static int
1980make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1981 OnigStackType* stk_top, UChar* str, regex_t* reg)
1982{
1983 int n, r;
1984 OnigCaptureTreeNode* child;
1985 OnigStackType* k = *kp;
1986
1987 while (k < stk_top) {
1988 if (k->type == STK_MEM_START) {
1989 n = k->u.mem.num;
1990 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1991 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1992 child = history_node_new();
1993 CHECK_NULL_RETURN_MEMERR(child);
1994 child->group = n;
1995 child->beg = k->u.mem.pstr - str;
1996 r = history_tree_add_child(node, child);
1997 if (r != 0) {
1998 history_tree_free(child);
1999 return r;
2000 }
2001 *kp = (k + 1);
2002 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2003 if (r != 0) return r;
2004
2005 k = *kp;
2006 child->end = k->u.mem.pstr - str;
2007 }
2008 }
2009 else if (k->type == STK_MEM_END) {
2010 if (k->u.mem.num == node->group) {
2011 node->end = k->u.mem.pstr - str;
2012 *kp = k;
2013 return 0;
2014 }
2015 }
2016 k++;
2017 }
2018
2019 return 1; /* 1: root node ending. */
2020}
2021#endif /* USE_CAPTURE_HISTORY */
2022
2023#ifdef USE_BACKREF_WITH_LEVEL
2024static int
2025mem_is_in_memp(int mem, int num, UChar* memp)
2026{
2027 int i;
2028 MemNumType m;
2029
2030 for (i = 0; i < num; i++) {
2031 GET_MEMNUM_INC(m, memp);
2032 if (mem == (int )m) return 1;
2033 }
2034 return 0;
2035}
2036
2037static int backref_match_at_nested_level(regex_t* reg,
2038 OnigStackType* top, OnigStackType* stk_base,
2039 int ignore_case, int case_fold_flag,
2040 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
2041{
2042 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2043 int level;
2044 OnigStackType* k;
2045
2046 level = 0;
2047 k = top;
2048 k--;
2049 while (k >= stk_base) {
2050 if (k->type == STK_CALL_FRAME) {
2051 level--;
2052 }
2053 else if (k->type == STK_RETURN) {
2054 level++;
2055 }
2056 else if (level == nest) {
2057 if (k->type == STK_MEM_START) {
2058 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2059 pstart = k->u.mem.pstr;
2060 if (pend != NULL_UCHARP) {
2061 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2062 p = pstart;
2063 ss = *s;
2064
2065 if (ignore_case != 0) {
2066 if (string_cmp_ic(reg->enc, case_fold_flag,
2067 pstart, &ss, pend - pstart, send) == 0)
2068 return 0; /* or goto next_mem; */
2069 }
2070 else {
2071 while (p < pend) {
2072 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2073 }
2074 }
2075
2076 *s = ss;
2077 return 1;
2078 }
2079 }
2080 }
2081 else if (k->type == STK_MEM_END) {
2082 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2083 pend = k->u.mem.pstr;
2084 }
2085 }
2086 }
2087 k--;
2088 }
2089
2090 return 0;
2091}
2092#endif /* USE_BACKREF_WITH_LEVEL */
2093
2094
2095#ifdef ONIG_DEBUG_STATISTICS
2096
2097# ifdef _WIN32
2098# include <windows.h>
2099static LARGE_INTEGER ts, te, freq;
2100# define GETTIME(t) QueryPerformanceCounter(&(t))
2101# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2102 * 1000000 / freq.QuadPart)
2103# else /* _WIN32 */
2104
2105# define USE_TIMEOFDAY
2106
2107# ifdef USE_TIMEOFDAY
2108# ifdef HAVE_SYS_TIME_H
2109# include <sys/time.h>
2110# endif
2111# ifdef HAVE_UNISTD_H
2112# include <unistd.h>
2113# endif
2114static struct timeval ts, te;
2115# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2116# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2117 (((te).tv_sec - (ts).tv_sec)*1000000))
2118# else /* USE_TIMEOFDAY */
2119# ifdef HAVE_SYS_TIMES_H
2120# include <sys/times.h>
2121# endif
2122static struct tms ts, te;
2123# define GETTIME(t) times(&(t))
2124# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2125# endif /* USE_TIMEOFDAY */
2126
2127# endif /* _WIN32 */
2128
2129static int OpCounter[256];
2130static int OpPrevCounter[256];
2131static unsigned long OpTime[256];
2132static int OpCurr = OP_FINISH;
2133static int OpPrevTarget = OP_FAIL;
2134static int MaxStackDepth = 0;
2135
2136# define MOP_IN(opcode) do {\
2137 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2138 OpCurr = opcode;\
2139 OpCounter[opcode]++;\
2140 GETTIME(ts);\
2141} while(0)
2142
2143# define MOP_OUT do {\
2144 GETTIME(te);\
2145 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2146} while(0)
2147
2148extern void
2149onig_statistics_init(void)
2150{
2151 int i;
2152 for (i = 0; i < 256; i++) {
2153 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2154 }
2155 MaxStackDepth = 0;
2156# ifdef _WIN32
2157 QueryPerformanceFrequency(&freq);
2158# endif
2159}
2160
2161extern void
2162onig_print_statistics(FILE* f)
2163{
2164 int i;
2165 fprintf(f, " count prev time\n");
2166 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2167 fprintf(f, "%8d: %8d: %10lu: %s\n",
2168 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2169 }
2170 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2171}
2172
2173# define STACK_INC do {\
2174 stk++;\
2175 if (stk - stk_base > MaxStackDepth) \
2176 MaxStackDepth = stk - stk_base;\
2177} while(0)
2178
2179#else /* ONIG_DEBUG_STATISTICS */
2180# define STACK_INC stk++
2181
2182# define MOP_IN(opcode)
2183# define MOP_OUT
2184#endif /* ONIG_DEBUG_STATISTICS */
2185
2186
2187#ifdef ONIG_DEBUG_MATCH
2188static const char *
2189stack_type_str(int stack_type)
2190{
2191 switch (stack_type) {
2192 case STK_ALT: return "Alt ";
2193 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2194 case STK_POS_NOT: return "PosNot";
2195 case STK_MEM_START: return "MemS ";
2196 case STK_MEM_END: return "MemE ";
2197 case STK_REPEAT_INC: return "RepInc";
2198 case STK_STATE_CHECK_MARK: return "StChMk";
2199 case STK_NULL_CHECK_START: return "NulChS";
2200 case STK_NULL_CHECK_END: return "NulChE";
2201 case STK_MEM_END_MARK: return "MemEMk";
2202 case STK_POS: return "Pos ";
2203 case STK_STOP_BT: return "StopBt";
2204 case STK_REPEAT: return "Rep ";
2205 case STK_CALL_FRAME: return "Call ";
2206 case STK_RETURN: return "Ret ";
2207 case STK_VOID: return "Void ";
2208 case STK_ABSENT_POS: return "AbsPos";
2209 case STK_ABSENT: return "Absent";
2210 case STK_MATCH_CACHE_POINT: return "MCache";
2211 default: return " ";
2212 }
2213}
2214#endif
2215#ifdef USE_MATCH_CACHE
2216
2217static long
2218bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p)
2219{
2220 long l = 0, r = num_cache_opcodes - 1, m = 0;
2221
2222 while (l <= r) {
2223 m = (l + r) / 2;
2224 if (cache_opcodes[m].addr == p) break;
2225 if (cache_opcodes[m].addr < p) l = m + 1;
2226 else r = m - 1;
2227 }
2228 return m;
2229}
2230
2231static long
2232find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr)
2233{
2234 long m;
2235 const OnigCacheOpcode* cache_opcode;
2236 const OnigRepeatRange* range;
2237 const OnigStackType *stkp;
2238 int count = 0;
2239 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
2240 long cache_point;
2241 long num_cache_points_at_outer_repeat;
2242 long num_cache_points_in_outer_repeat;
2243
2244 m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
2245
2246 if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
2247 return -1;
2248 }
2249
2250 cache_opcode = &cache_opcodes[m];
2251 *cache_opcode_ptr = &cache_opcodes[m];
2252 cache_point = cache_opcode->cache_point;
2253 if (cache_opcode->outer_repeat_mem == -1) {
2254 return cache_point;
2255 }
2256
2257 num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
2258 num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
2259
2260 range = &reg->repeat_range[cache_opcode->outer_repeat_mem];
2261
2262 stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
2263 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
2264
2265 if (count < range->lower) {
2266 return num_cache_points_at_outer_repeat +
2267 num_cache_points_in_outer_repeat * count +
2268 cache_point;
2269 }
2270
2271 if (range->upper == 0x7fffffff) {
2272 return num_cache_points_at_outer_repeat +
2273 num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
2274 cache_point;
2275 }
2276
2277 return num_cache_points_at_outer_repeat +
2278 num_cache_points_in_outer_repeat * (range->lower - 1) +
2279 (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
2280 cache_point;
2281}
2282
2283static int
2284check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
2285{
2286 if (match_cache_point_mask & 0x80) {
2287 return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
2288 }
2289 else {
2290 return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
2291 }
2292}
2293
2294static void
2295memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
2296{
2297 match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
2298 if (match_cache_point_mask & 0x80) {
2299 match_cache_buf[match_cache_point_index + 1] |= 0x01;
2300 }
2301 else {
2302 match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
2303 }
2304}
2305
2306#endif /* USE_MATCH_CACHE */
2307
2308/* match data(str - end) from position (sstart). */
2309/* if sstart == str then set sprev to NULL. */
2310static OnigPosition
2311match_at(regex_t* reg, const UChar* str, const UChar* end,
2312#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2313 const UChar* right_range,
2314#endif
2315 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2316{
2317 static const UChar FinishCode[] = { OP_FINISH };
2318
2319 int i, num_mem, pop_level;
2320 ptrdiff_t n, best_len;
2321 LengthType tlen, tlen2;
2322 MemNumType mem;
2323 RelAddrType addr;
2324 OnigOptionType option = reg->options;
2325 OnigEncoding encode = reg->enc;
2326 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2327 UChar *s, *q, *sbegin;
2328 UChar *p = reg->p;
2329 UChar *pbegin = p;
2330 UChar *pkeep;
2331 char *alloca_base;
2332 char *xmalloc_base = NULL;
2333 OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
2334 OnigStackType *stkp; /* used as any purpose. */
2335 OnigStackIndex si;
2336 OnigStackIndex *repeat_stk;
2337 OnigStackIndex *mem_start_stk, *mem_end_stk;
2338#ifdef USE_COMBINATION_EXPLOSION_CHECK
2339 int scv;
2340 unsigned char* state_check_buff = msa->state_check_buff;
2341 int num_comb_exp_check = reg->num_comb_exp_check;
2342#endif
2343
2344#if USE_TOKEN_THREADED_VM
2345# define OP_OFFSET 1
2346# define VM_LOOP JUMP;
2347# define VM_LOOP_END
2348# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2349# define DEFAULT L_DEFAULT:
2350# define NEXT sprev = sbegin; JUMP
2351# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2352
2353 RB_GNUC_EXTENSION static const void *oplabels[] = {
2354 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2355 &&L_OP_END, /* pattern code terminator (success end) */
2356
2357 &&L_OP_EXACT1, /* single byte, N = 1 */
2358 &&L_OP_EXACT2, /* single byte, N = 2 */
2359 &&L_OP_EXACT3, /* single byte, N = 3 */
2360 &&L_OP_EXACT4, /* single byte, N = 4 */
2361 &&L_OP_EXACT5, /* single byte, N = 5 */
2362 &&L_OP_EXACTN, /* single byte */
2363 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2364 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2365 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2366 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2367 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2368 &&L_OP_EXACTMBN, /* other length */
2369
2370 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2371 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2372
2373 &&L_OP_CCLASS,
2374 &&L_OP_CCLASS_MB,
2375 &&L_OP_CCLASS_MIX,
2376 &&L_OP_CCLASS_NOT,
2377 &&L_OP_CCLASS_MB_NOT,
2378 &&L_OP_CCLASS_MIX_NOT,
2379
2380 &&L_OP_ANYCHAR, /* "." */
2381 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2382 &&L_OP_ANYCHAR_STAR, /* ".*" */
2383 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2384 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2385 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2386
2387 &&L_OP_WORD,
2388 &&L_OP_NOT_WORD,
2389 &&L_OP_WORD_BOUND,
2390 &&L_OP_NOT_WORD_BOUND,
2391# ifdef USE_WORD_BEGIN_END
2392 &&L_OP_WORD_BEGIN,
2393 &&L_OP_WORD_END,
2394# else
2395 &&L_DEFAULT,
2396 &&L_DEFAULT,
2397# endif
2398 &&L_OP_ASCII_WORD,
2399 &&L_OP_NOT_ASCII_WORD,
2400 &&L_OP_ASCII_WORD_BOUND,
2401 &&L_OP_NOT_ASCII_WORD_BOUND,
2402# ifdef USE_WORD_BEGIN_END
2403 &&L_OP_ASCII_WORD_BEGIN,
2404 &&L_OP_ASCII_WORD_END,
2405# else
2406 &&L_DEFAULT,
2407 &&L_DEFAULT,
2408# endif
2409
2410 &&L_OP_BEGIN_BUF,
2411 &&L_OP_END_BUF,
2412 &&L_OP_BEGIN_LINE,
2413 &&L_OP_END_LINE,
2414 &&L_OP_SEMI_END_BUF,
2415 &&L_OP_BEGIN_POSITION,
2416
2417 &&L_OP_BACKREF1,
2418 &&L_OP_BACKREF2,
2419 &&L_OP_BACKREFN,
2420 &&L_OP_BACKREFN_IC,
2421 &&L_OP_BACKREF_MULTI,
2422 &&L_OP_BACKREF_MULTI_IC,
2423# ifdef USE_BACKREF_WITH_LEVEL
2424 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2425# else
2426 &&L_DEFAULT,
2427# endif
2428 &&L_OP_MEMORY_START,
2429 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2430 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2431# ifdef USE_SUBEXP_CALL
2432 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2433# else
2434 &&L_DEFAULT,
2435# endif
2436 &&L_OP_MEMORY_END,
2437# ifdef USE_SUBEXP_CALL
2438 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2439# else
2440 &&L_DEFAULT,
2441# endif
2442
2443 &&L_OP_KEEP,
2444
2445 &&L_OP_FAIL, /* pop stack and move */
2446 &&L_OP_JUMP,
2447 &&L_OP_PUSH,
2448 &&L_OP_POP,
2449# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2450 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2451# else
2452 &&L_DEFAULT,
2453# endif
2454 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2455 &&L_OP_REPEAT, /* {n,m} */
2456 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2457 &&L_OP_REPEAT_INC,
2458 &&L_OP_REPEAT_INC_NG, /* non greedy */
2459 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2460 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2461 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2462 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2463# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2464 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2465# else
2466 &&L_DEFAULT,
2467# endif
2468# ifdef USE_SUBEXP_CALL
2469 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2470# else
2471 &&L_DEFAULT,
2472# endif
2473
2474 &&L_OP_PUSH_POS, /* (?=...) start */
2475 &&L_OP_POP_POS, /* (?=...) end */
2476 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2477 &&L_OP_FAIL_POS, /* (?!...) end */
2478 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2479 &&L_OP_POP_STOP_BT, /* (?>...) end */
2480 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2481 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2482 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2483 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2484 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2485 &&L_OP_ABSENT_END, /* (?~...) end */
2486
2487# ifdef USE_SUBEXP_CALL
2488 &&L_OP_CALL, /* \g<name> */
2489 &&L_OP_RETURN,
2490# else
2491 &&L_DEFAULT,
2492 &&L_DEFAULT,
2493# endif
2494 &&L_OP_CONDITION,
2495
2496# ifdef USE_COMBINATION_EXPLOSION_CHECK
2497 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2498 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2499 &&L_OP_STATE_CHECK, /* check only */
2500# else
2501 &&L_DEFAULT,
2502 &&L_DEFAULT,
2503 &&L_DEFAULT,
2504# endif
2505# ifdef USE_COMBINATION_EXPLOSION_CHECK
2506 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2507 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2508# else
2509 &&L_DEFAULT,
2510 &&L_DEFAULT,
2511# endif
2512 /* no need: IS_DYNAMIC_OPTION() == 0 */
2513# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2514 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2515 &&L_OP_SET_OPTION /* set option */
2516# else
2517 &&L_DEFAULT,
2518 &&L_DEFAULT
2519# endif
2520 };
2521#else /* USE_TOKEN_THREADED_VM */
2522
2523# define OP_OFFSET 0
2524# define VM_LOOP \
2525 while (1) { \
2526 OPCODE_EXEC_HOOK; \
2527 pbegin = p; \
2528 sbegin = s; \
2529 switch (*p++) {
2530# define VM_LOOP_END } sprev = sbegin; }
2531# define CASE(x) case x:
2532# define DEFAULT default:
2533# define NEXT break
2534# define JUMP continue; break
2535#endif /* USE_TOKEN_THREADED_VM */
2536
2537
2538#ifdef USE_SUBEXP_CALL
2539/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2540 etc. Additional space is required. */
2541# define ADD_NUMMEM 1
2542#else
2543/* Stack #0 not is used. */
2544# define ADD_NUMMEM 0
2545#endif
2546
2547 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2548
2549 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2550 pop_level = reg->stack_pop_level;
2551 num_mem = reg->num_mem;
2552 repeat_stk = (OnigStackIndex* )alloca_base;
2553
2554 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2555 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2556 {
2557 OnigStackIndex *pp = mem_start_stk;
2558 for (; pp < repeat_stk + n; pp += 2) {
2559 pp[0] = INVALID_STACK_INDEX;
2560 pp[1] = INVALID_STACK_INDEX;
2561 }
2562 }
2563#ifndef USE_SUBEXP_CALL
2564 mem_start_stk--; /* for index start from 1,
2565 mem_start_stk[1]..mem_start_stk[num_mem] */
2566 mem_end_stk--; /* for index start from 1,
2567 mem_end_stk[1]..mem_end_stk[num_mem] */
2568#endif
2569
2570#ifdef ONIG_DEBUG_MATCH
2571 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2572 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2573 fprintf(stderr, "size: %d, start offset: %d\n",
2574 (int )(end - str), (int )(sstart - str));
2575 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2576#endif
2577
2578 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2579 best_len = ONIG_MISMATCH;
2580 s = (UChar* )sstart;
2581 pkeep = (UChar* )sstart;
2582
2583
2584#ifdef ONIG_DEBUG_MATCH
2585# define OPCODE_EXEC_HOOK \
2586 if (s) { \
2587 UChar *op, *q, *bp, buf[50]; \
2588 int len; \
2589 op = p - OP_OFFSET; \
2590 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2591 bp = buf; \
2592 q = s; \
2593 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2594 for (i = 0; i < 7 && q < end; i++) { \
2595 len = enclen(encode, q, end); \
2596 while (len-- > 0) *bp++ = *q++; \
2597 } \
2598 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2599 } \
2600 xmemcpy(bp, "\"", 1); bp += 1; \
2601 *bp = 0; \
2602 fputs((char* )buf, stderr); \
2603 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2604 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2605 stk - stk_base - 1, \
2606 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2607 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2608 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2609 fprintf(stderr, "\n"); \
2610 }
2611#else
2612# define OPCODE_EXEC_HOOK ((void) 0)
2613#endif
2614
2615#ifdef USE_MATCH_CACHE
2616#ifdef ONIG_DEBUG_MATCH_CACHE
2617#define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2618#define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2619#else
2620#define MATCH_CACHE_DEBUG ((void) 0)
2621#define MATCH_CACHE_DEBUG_HIT ((void) 0)
2622#endif
2623
2624#define MATCH_CACHE_HIT ((void) 0)
2625
2626# define CHECK_MATCH_CACHE do {\
2627 if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2628 const OnigCacheOpcode *cache_opcode;\
2629 long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2630 if (cache_point >= 0) {\
2631 long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2632 long match_cache_point_index = match_cache_point >> 3;\
2633 uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2634 MATCH_CACHE_DEBUG;\
2635 if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2636 MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2637 if (cache_opcode->lookaround_nesting == 0) goto fail;\
2638 else if (cache_opcode->lookaround_nesting < 0) {\
2639 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2640 STACK_STOP_BT_FAIL;\
2641 goto fail;\
2642 }\
2643 else goto fail;\
2644 }\
2645 else {\
2646 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2647 p = cache_opcode->match_addr;\
2648 MOP_OUT;\
2649 JUMP;\
2650 }\
2651 else goto fail;\
2652 }\
2653 }\
2654 STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2655 }\
2656 }\
2657} while (0)
2658#else
2659# define CHECK_MATCH_CACHE ((void) 0)
2660#endif
2661
2662 VM_LOOP {
2663 CASE(OP_END) MOP_IN(OP_END);
2664 n = s - sstart;
2665 if (n > best_len) {
2666 OnigRegion* region;
2667#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2668 if (IS_FIND_LONGEST(option)) {
2669 if (n > msa->best_len) {
2670 msa->best_len = n;
2671 msa->best_s = (UChar* )sstart;
2672 }
2673 else
2674 goto end_best_len;
2675 }
2676#endif
2677 best_len = n;
2678 region = msa->region;
2679 if (region) {
2680 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2681 region->end[0] = s - str;
2682 for (i = 1; i <= num_mem; i++) {
2683 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2684 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2685 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2686 else
2687 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2688
2689 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2690 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2691 : (UChar* )((void* )mem_end_stk[i])) - str;
2692 }
2693 else {
2694 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2695 }
2696 }
2697
2698#ifdef USE_CAPTURE_HISTORY
2699 if (reg->capture_history != 0) {
2700 int r;
2701 OnigCaptureTreeNode* node;
2702
2703 if (IS_NULL(region->history_root)) {
2704 region->history_root = node = history_node_new();
2705 CHECK_NULL_RETURN_MEMERR(node);
2706 }
2707 else {
2708 node = region->history_root;
2709 history_tree_clear(node);
2710 }
2711
2712 node->group = 0;
2713 node->beg = ((pkeep > s) ? s : pkeep) - str;
2714 node->end = s - str;
2715
2716 stkp = stk_base;
2717 r = make_capture_history_tree(region->history_root, &stkp,
2718 stk, (UChar* )str, reg);
2719 if (r < 0) {
2720 best_len = r; /* error code */
2721 goto finish;
2722 }
2723 }
2724#endif /* USE_CAPTURE_HISTORY */
2725 } /* if (region) */
2726 } /* n > best_len */
2727
2728#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2729 end_best_len:
2730#endif
2731 MOP_OUT;
2732
2733 if (IS_FIND_CONDITION(option)) {
2734 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2735 best_len = ONIG_MISMATCH;
2736 goto fail; /* for retry */
2737 }
2738 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2739 goto fail; /* for retry */
2740 }
2741 }
2742
2743 /* default behavior: return first-matching result. */
2744 goto finish;
2745 NEXT;
2746
2747 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2748 DATA_ENSURE(1);
2749 if (*p != *s) goto fail;
2750 p++; s++;
2751 MOP_OUT;
2752 NEXT;
2753
2754 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2755 {
2756 int len;
2757 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2758
2759 DATA_ENSURE(1);
2760 len = ONIGENC_MBC_CASE_FOLD(encode,
2761 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2762 case_fold_flag,
2763 &s, end, lowbuf);
2764 DATA_ENSURE(0);
2765 q = lowbuf;
2766 while (len-- > 0) {
2767 if (*p != *q) {
2768 goto fail;
2769 }
2770 p++; q++;
2771 }
2772 }
2773 MOP_OUT;
2774 NEXT;
2775
2776 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2777 DATA_ENSURE(2);
2778 if (*p != *s) goto fail;
2779 p++; s++;
2780 if (*p != *s) goto fail;
2781 sprev = s;
2782 p++; s++;
2783 MOP_OUT;
2784 JUMP;
2785
2786 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2787 DATA_ENSURE(3);
2788 if (*p != *s) goto fail;
2789 p++; s++;
2790 if (*p != *s) goto fail;
2791 p++; s++;
2792 if (*p != *s) goto fail;
2793 sprev = s;
2794 p++; s++;
2795 MOP_OUT;
2796 JUMP;
2797
2798 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2799 DATA_ENSURE(4);
2800 if (*p != *s) goto fail;
2801 p++; s++;
2802 if (*p != *s) goto fail;
2803 p++; s++;
2804 if (*p != *s) goto fail;
2805 p++; s++;
2806 if (*p != *s) goto fail;
2807 sprev = s;
2808 p++; s++;
2809 MOP_OUT;
2810 JUMP;
2811
2812 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2813 DATA_ENSURE(5);
2814 if (*p != *s) goto fail;
2815 p++; s++;
2816 if (*p != *s) goto fail;
2817 p++; s++;
2818 if (*p != *s) goto fail;
2819 p++; s++;
2820 if (*p != *s) goto fail;
2821 p++; s++;
2822 if (*p != *s) goto fail;
2823 sprev = s;
2824 p++; s++;
2825 MOP_OUT;
2826 JUMP;
2827
2828 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2829 GET_LENGTH_INC(tlen, p);
2830 DATA_ENSURE(tlen);
2831 while (tlen-- > 0) {
2832 if (*p++ != *s++) goto fail;
2833 }
2834 sprev = s - 1;
2835 MOP_OUT;
2836 JUMP;
2837
2838 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2839 {
2840 int len;
2841 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2842
2843 GET_LENGTH_INC(tlen, p);
2844 endp = p + tlen;
2845
2846 while (p < endp) {
2847 sprev = s;
2848 DATA_ENSURE(1);
2849 len = ONIGENC_MBC_CASE_FOLD(encode,
2850 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2851 case_fold_flag,
2852 &s, end, lowbuf);
2853 DATA_ENSURE(0);
2854 q = lowbuf;
2855 while (len-- > 0) {
2856 if (*p != *q) goto fail;
2857 p++; q++;
2858 }
2859 }
2860 }
2861
2862 MOP_OUT;
2863 JUMP;
2864
2865 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2866 DATA_ENSURE(2);
2867 if (*p != *s) goto fail;
2868 p++; s++;
2869 if (*p != *s) goto fail;
2870 p++; s++;
2871 MOP_OUT;
2872 NEXT;
2873
2874 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2875 DATA_ENSURE(4);
2876 if (*p != *s) goto fail;
2877 p++; s++;
2878 if (*p != *s) goto fail;
2879 p++; s++;
2880 sprev = s;
2881 if (*p != *s) goto fail;
2882 p++; s++;
2883 if (*p != *s) goto fail;
2884 p++; s++;
2885 MOP_OUT;
2886 JUMP;
2887
2888 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2889 DATA_ENSURE(6);
2890 if (*p != *s) goto fail;
2891 p++; s++;
2892 if (*p != *s) goto fail;
2893 p++; s++;
2894 if (*p != *s) goto fail;
2895 p++; s++;
2896 if (*p != *s) goto fail;
2897 p++; s++;
2898 sprev = s;
2899 if (*p != *s) goto fail;
2900 p++; s++;
2901 if (*p != *s) goto fail;
2902 p++; s++;
2903 MOP_OUT;
2904 JUMP;
2905
2906 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2907 GET_LENGTH_INC(tlen, p);
2908 DATA_ENSURE(tlen * 2);
2909 while (tlen-- > 0) {
2910 if (*p != *s) goto fail;
2911 p++; s++;
2912 if (*p != *s) goto fail;
2913 p++; s++;
2914 }
2915 sprev = s - 2;
2916 MOP_OUT;
2917 JUMP;
2918
2919 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2920 GET_LENGTH_INC(tlen, p);
2921 DATA_ENSURE(tlen * 3);
2922 while (tlen-- > 0) {
2923 if (*p != *s) goto fail;
2924 p++; s++;
2925 if (*p != *s) goto fail;
2926 p++; s++;
2927 if (*p != *s) goto fail;
2928 p++; s++;
2929 }
2930 sprev = s - 3;
2931 MOP_OUT;
2932 JUMP;
2933
2934 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2935 GET_LENGTH_INC(tlen, p); /* mb-len */
2936 GET_LENGTH_INC(tlen2, p); /* string len */
2937 tlen2 *= tlen;
2938 DATA_ENSURE(tlen2);
2939 while (tlen2-- > 0) {
2940 if (*p != *s) goto fail;
2941 p++; s++;
2942 }
2943 sprev = s - tlen;
2944 MOP_OUT;
2945 JUMP;
2946
2947 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2948 DATA_ENSURE(1);
2949 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2950 p += SIZE_BITSET;
2951 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2952 MOP_OUT;
2953 NEXT;
2954
2955 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2956 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2957
2958 cclass_mb:
2959 GET_LENGTH_INC(tlen, p);
2960 {
2961 OnigCodePoint code;
2962 UChar *ss;
2963 int mb_len;
2964
2965 DATA_ENSURE(1);
2966 mb_len = enclen_approx(encode, s, end);
2967 DATA_ENSURE(mb_len);
2968 ss = s;
2969 s += mb_len;
2970 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2971
2972#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2973 if (! onig_is_in_code_range(p, code)) goto fail;
2974#else
2975 q = p;
2976 ALIGNMENT_RIGHT(q);
2977 if (! onig_is_in_code_range(q, code)) goto fail;
2978#endif
2979 }
2980 p += tlen;
2981 MOP_OUT;
2982 NEXT;
2983
2984 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2985 DATA_ENSURE(1);
2986 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2987 p += SIZE_BITSET;
2988 goto cclass_mb;
2989 }
2990 else {
2991 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2992 goto fail;
2993
2994 p += SIZE_BITSET;
2995 GET_LENGTH_INC(tlen, p);
2996 p += tlen;
2997 s++;
2998 }
2999 MOP_OUT;
3000 NEXT;
3001
3002 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
3003 DATA_ENSURE(1);
3004 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
3005 p += SIZE_BITSET;
3006 s += enclen(encode, s, end);
3007 MOP_OUT;
3008 NEXT;
3009
3010 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
3011 DATA_ENSURE(1);
3012 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3013 s++;
3014 GET_LENGTH_INC(tlen, p);
3015 p += tlen;
3016 goto cc_mb_not_success;
3017 }
3018
3019 cclass_mb_not:
3020 GET_LENGTH_INC(tlen, p);
3021 {
3022 OnigCodePoint code;
3023 UChar *ss;
3024 int mb_len = enclen(encode, s, end);
3025
3026 if (! DATA_ENSURE_CHECK(mb_len)) {
3027 DATA_ENSURE(1);
3028 s = (UChar* )end;
3029 p += tlen;
3030 goto cc_mb_not_success;
3031 }
3032
3033 ss = s;
3034 s += mb_len;
3035 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3036
3037#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3038 if (onig_is_in_code_range(p, code)) goto fail;
3039#else
3040 q = p;
3041 ALIGNMENT_RIGHT(q);
3042 if (onig_is_in_code_range(q, code)) goto fail;
3043#endif
3044 }
3045 p += tlen;
3046
3047 cc_mb_not_success:
3048 MOP_OUT;
3049 NEXT;
3050
3051 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
3052 DATA_ENSURE(1);
3053 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3054 p += SIZE_BITSET;
3055 goto cclass_mb_not;
3056 }
3057 else {
3058 if (BITSET_AT(((BitSetRef )p), *s) != 0)
3059 goto fail;
3060
3061 p += SIZE_BITSET;
3062 GET_LENGTH_INC(tlen, p);
3063 p += tlen;
3064 s++;
3065 }
3066 MOP_OUT;
3067 NEXT;
3068
3069 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
3070 DATA_ENSURE(1);
3071 n = enclen_approx(encode, s, end);
3072 DATA_ENSURE(n);
3073 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3074 s += n;
3075 MOP_OUT;
3076 NEXT;
3077
3078 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
3079 DATA_ENSURE(1);
3080 n = enclen_approx(encode, s, end);
3081 DATA_ENSURE(n);
3082 s += n;
3083 MOP_OUT;
3084 NEXT;
3085
3086 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
3087 while (DATA_ENSURE_CHECK1) {
3088 CHECK_MATCH_CACHE;
3089 STACK_PUSH_ALT(p, s, sprev, pkeep);
3090 n = enclen_approx(encode, s, end);
3091 DATA_ENSURE(n);
3092 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3093 sprev = s;
3094 s += n;
3095 }
3096 MOP_OUT;
3097 JUMP;
3098
3099 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
3100 while (DATA_ENSURE_CHECK1) {
3101 CHECK_MATCH_CACHE;
3102 STACK_PUSH_ALT(p, s, sprev, pkeep);
3103 n = enclen_approx(encode, s, end);
3104 if (n > 1) {
3105 DATA_ENSURE(n);
3106 sprev = s;
3107 s += n;
3108 }
3109 else {
3110 sprev = s;
3111 s++;
3112 }
3113 }
3114 MOP_OUT;
3115 JUMP;
3116
3117 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3118 while (DATA_ENSURE_CHECK1) {
3119 CHECK_MATCH_CACHE;
3120 if (*p == *s) {
3121 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3122 } else {
3123#ifdef USE_MATCH_CACHE
3124 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3125 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
3126 msa->num_fails++;
3127#endif
3128 }
3129 n = enclen_approx(encode, s, end);
3130 DATA_ENSURE(n);
3131 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3132 sprev = s;
3133 s += n;
3134 }
3135 p++;
3136 MOP_OUT;
3137 NEXT;
3138
3139 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3140 while (DATA_ENSURE_CHECK1) {
3141 CHECK_MATCH_CACHE;
3142 if (*p == *s) {
3143 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3144 } else {
3145#ifdef USE_MATCH_CACHE
3146 /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3147 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
3148 msa->num_fails++;
3149#endif
3150 }
3151 n = enclen_approx(encode, s, end);
3152 if (n > 1) {
3153 DATA_ENSURE(n);
3154 sprev = s;
3155 s += n;
3156 }
3157 else {
3158 sprev = s;
3159 s++;
3160 }
3161 }
3162 p++;
3163 MOP_OUT;
3164 NEXT;
3165
3166#ifdef USE_COMBINATION_EXPLOSION_CHECK
3167 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3168 GET_STATE_CHECK_NUM_INC(mem, p);
3169 while (DATA_ENSURE_CHECK1) {
3170 STATE_CHECK_VAL(scv, mem);
3171 if (scv) goto fail;
3172
3173 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3174 n = enclen_approx(encode, s, end);
3175 DATA_ENSURE(n);
3176 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3177 sprev = s;
3178 s += n;
3179 }
3180 MOP_OUT;
3181 NEXT;
3182
3183 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3184 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3185
3186 GET_STATE_CHECK_NUM_INC(mem, p);
3187 while (DATA_ENSURE_CHECK1) {
3188 STATE_CHECK_VAL(scv, mem);
3189 if (scv) goto fail;
3190
3191 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3192 n = enclen_approx(encode, s, end);
3193 if (n > 1) {
3194 DATA_ENSURE(n);
3195 sprev = s;
3196 s += n;
3197 }
3198 else {
3199 sprev = s;
3200 s++;
3201 }
3202 }
3203 MOP_OUT;
3204 NEXT;
3205#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3206
3207 CASE(OP_WORD) MOP_IN(OP_WORD);
3208 DATA_ENSURE(1);
3209 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3210 goto fail;
3211
3212 s += enclen(encode, s, end);
3213 MOP_OUT;
3214 NEXT;
3215
3216 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
3217 DATA_ENSURE(1);
3218 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3219 goto fail;
3220
3221 s += enclen(encode, s, end);
3222 MOP_OUT;
3223 NEXT;
3224
3225 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
3226 DATA_ENSURE(1);
3227 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3228 goto fail;
3229
3230 s += enclen(encode, s, end);
3231 MOP_OUT;
3232 NEXT;
3233
3234 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
3235 DATA_ENSURE(1);
3236 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3237 goto fail;
3238
3239 s += enclen(encode, s, end);
3240 MOP_OUT;
3241 NEXT;
3242
3243 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
3244 if (ON_STR_BEGIN(s)) {
3245 DATA_ENSURE(1);
3246 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3247 goto fail;
3248 }
3249 else if (ON_STR_END(s)) {
3250 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3251 goto fail;
3252 }
3253 else {
3254 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3255 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3256 goto fail;
3257 }
3258 MOP_OUT;
3259 JUMP;
3260
3261 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
3262 if (ON_STR_BEGIN(s)) {
3263 DATA_ENSURE(1);
3264 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3265 goto fail;
3266 }
3267 else if (ON_STR_END(s)) {
3268 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3269 goto fail;
3270 }
3271 else {
3272 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3273 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3274 goto fail;
3275 }
3276 MOP_OUT;
3277 JUMP;
3278
3279 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3280 if (ON_STR_BEGIN(s)) {
3281 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3282 goto fail;
3283 }
3284 else if (ON_STR_END(s)) {
3285 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3286 goto fail;
3287 }
3288 else {
3289 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3290 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3291 goto fail;
3292 }
3293 MOP_OUT;
3294 JUMP;
3295
3296 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3297 if (ON_STR_BEGIN(s)) {
3298 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3299 goto fail;
3300 }
3301 else if (ON_STR_END(s)) {
3302 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3303 goto fail;
3304 }
3305 else {
3306 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3307 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3308 goto fail;
3309 }
3310 MOP_OUT;
3311 JUMP;
3312
3313#ifdef USE_WORD_BEGIN_END
3314 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3315 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3316 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3317 MOP_OUT;
3318 JUMP;
3319 }
3320 }
3321 goto fail;
3322 NEXT;
3323
3324 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3325 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3326 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3327 MOP_OUT;
3328 JUMP;
3329 }
3330 }
3331 goto fail;
3332 NEXT;
3333
3334 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3335 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3336 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3337 MOP_OUT;
3338 JUMP;
3339 }
3340 }
3341 goto fail;
3342 NEXT;
3343
3344 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3345 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3346 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3347 MOP_OUT;
3348 JUMP;
3349 }
3350 }
3351 goto fail;
3352 NEXT;
3353#endif
3354
3355 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3356 if (! ON_STR_BEGIN(s)) goto fail;
3357 if (IS_NOTBOS(msa->options)) goto fail;
3358
3359 MOP_OUT;
3360 JUMP;
3361
3362 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3363 if (! ON_STR_END(s)) goto fail;
3364 if (IS_NOTEOS(msa->options)) goto fail;
3365
3366 MOP_OUT;
3367 JUMP;
3368
3369 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3370 if (ON_STR_BEGIN(s)) {
3371 if (IS_NOTBOL(msa->options)) goto fail;
3372 MOP_OUT;
3373 JUMP;
3374 }
3375 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3376#ifdef USE_CRNL_AS_LINE_TERMINATOR
3377 && !(IS_NEWLINE_CRLF(option)
3378 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3379#endif
3380 && !ON_STR_END(s)) {
3381 MOP_OUT;
3382 JUMP;
3383 }
3384 goto fail;
3385 NEXT;
3386
3387 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3388 if (ON_STR_END(s)) {
3389#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3390 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3391#endif
3392 if (IS_NOTEOL(msa->options)) goto fail;
3393 MOP_OUT;
3394 JUMP;
3395#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3396 }
3397#endif
3398 }
3399 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3400 MOP_OUT;
3401 JUMP;
3402 }
3403 goto fail;
3404 NEXT;
3405
3406 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3407 if (ON_STR_END(s)) {
3408#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3409 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3410#endif
3411 if (IS_NOTEOL(msa->options)) goto fail;
3412 MOP_OUT;
3413 JUMP;
3414#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3415 }
3416#endif
3417 }
3418 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3419 UChar* ss = s + enclen(encode, s, end);
3420 if (ON_STR_END(ss)) {
3421 MOP_OUT;
3422 JUMP;
3423 }
3424#ifdef USE_CRNL_AS_LINE_TERMINATOR
3425 else if (IS_NEWLINE_CRLF(option)
3426 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3427 ss += enclen(encode, ss, end);
3428 if (ON_STR_END(ss)) {
3429 MOP_OUT;
3430 JUMP;
3431 }
3432 }
3433#endif
3434 }
3435 goto fail;
3436 NEXT;
3437
3438 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3439 if (s != msa->gpos)
3440 goto fail;
3441
3442 MOP_OUT;
3443 JUMP;
3444
3445 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3446 GET_MEMNUM_INC(mem, p);
3447 STACK_PUSH_MEM_START(mem, s);
3448 MOP_OUT;
3449 JUMP;
3450
3451 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3452 GET_MEMNUM_INC(mem, p);
3453 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3454 mem_end_stk[mem] = INVALID_STACK_INDEX;
3455 MOP_OUT;
3456 JUMP;
3457
3458 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3459 GET_MEMNUM_INC(mem, p);
3460 STACK_PUSH_MEM_END(mem, s);
3461 MOP_OUT;
3462 JUMP;
3463
3464 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3465 GET_MEMNUM_INC(mem, p);
3466 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3467 MOP_OUT;
3468 JUMP;
3469
3470 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3471 pkeep = s;
3472 MOP_OUT;
3473 JUMP;
3474
3475#ifdef USE_SUBEXP_CALL
3476 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3477 GET_MEMNUM_INC(mem, p);
3478 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3479 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3480 STACK_PUSH_MEM_END(mem, s);
3481 MOP_OUT;
3482 JUMP;
3483
3484 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3485 GET_MEMNUM_INC(mem, p);
3486 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3487 STACK_GET_MEM_START(mem, stkp);
3488
3489 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3490 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3491 else
3492 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3493
3494 STACK_PUSH_MEM_END_MARK(mem);
3495 MOP_OUT;
3496 JUMP;
3497#endif
3498
3499 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3500 mem = 1;
3501 goto backref;
3502 NEXT;
3503
3504 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3505 mem = 2;
3506 goto backref;
3507 NEXT;
3508
3509 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3510 GET_MEMNUM_INC(mem, p);
3511 backref:
3512 {
3513 int len;
3514 UChar *pstart, *pend;
3515
3516 /* if you want to remove following line,
3517 you should check in parse and compile time. */
3518 if (mem > num_mem) goto fail;
3519 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3520 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3521
3522 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3523 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3524 else
3525 pstart = (UChar* )((void* )mem_start_stk[mem]);
3526
3527 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3528 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3529 : (UChar* )((void* )mem_end_stk[mem]));
3530 n = pend - pstart;
3531 DATA_ENSURE(n);
3532 sprev = s;
3533 STRING_CMP(pstart, s, n);
3534 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3535 sprev += len;
3536
3537 MOP_OUT;
3538 JUMP;
3539 }
3540
3541 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3542 GET_MEMNUM_INC(mem, p);
3543 {
3544 int len;
3545 UChar *pstart, *pend;
3546
3547 /* if you want to remove following line,
3548 you should check in parse and compile time. */
3549 if (mem > num_mem) goto fail;
3550 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3551 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3552
3553 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3554 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3555 else
3556 pstart = (UChar* )((void* )mem_start_stk[mem]);
3557
3558 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3559 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3560 : (UChar* )((void* )mem_end_stk[mem]));
3561 n = pend - pstart;
3562 DATA_ENSURE(n);
3563 sprev = s;
3564 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3565 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3566 sprev += len;
3567
3568 MOP_OUT;
3569 JUMP;
3570 }
3571 NEXT;
3572
3573 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3574 {
3575 int len, is_fail;
3576 UChar *pstart, *pend, *swork;
3577
3578 GET_LENGTH_INC(tlen, p);
3579 for (i = 0; i < tlen; i++) {
3580 GET_MEMNUM_INC(mem, p);
3581
3582 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3583 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3584
3585 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3586 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3587 else
3588 pstart = (UChar* )((void* )mem_start_stk[mem]);
3589
3590 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3591 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3592 : (UChar* )((void* )mem_end_stk[mem]));
3593 n = pend - pstart;
3594 DATA_ENSURE_CONTINUE(n);
3595 sprev = s;
3596 swork = s;
3597 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3598 if (is_fail) continue;
3599 s = swork;
3600 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3601 sprev += len;
3602
3603 p += (SIZE_MEMNUM * (tlen - i - 1));
3604 break; /* success */
3605 }
3606 if (i == tlen) goto fail;
3607 MOP_OUT;
3608 JUMP;
3609 }
3610 NEXT;
3611
3612 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3613 {
3614 int len, is_fail;
3615 UChar *pstart, *pend, *swork;
3616
3617 GET_LENGTH_INC(tlen, p);
3618 for (i = 0; i < tlen; i++) {
3619 GET_MEMNUM_INC(mem, p);
3620
3621 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3622 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3623
3624 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3625 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3626 else
3627 pstart = (UChar* )((void* )mem_start_stk[mem]);
3628
3629 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3630 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3631 : (UChar* )((void* )mem_end_stk[mem]));
3632 n = pend - pstart;
3633 DATA_ENSURE_CONTINUE(n);
3634 sprev = s;
3635 swork = s;
3636 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3637 if (is_fail) continue;
3638 s = swork;
3639 while (sprev + (len = enclen(encode, sprev, end)) < s)
3640 sprev += len;
3641
3642 p += (SIZE_MEMNUM * (tlen - i - 1));
3643 break; /* success */
3644 }
3645 if (i == tlen) goto fail;
3646 MOP_OUT;
3647 JUMP;
3648 }
3649
3650#ifdef USE_BACKREF_WITH_LEVEL
3651 CASE(OP_BACKREF_WITH_LEVEL)
3652 {
3653 int len;
3654 OnigOptionType ic;
3655 LengthType level;
3656
3657 GET_OPTION_INC(ic, p);
3658 GET_LENGTH_INC(level, p);
3659 GET_LENGTH_INC(tlen, p);
3660
3661 sprev = s;
3662 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3663 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3664 while (sprev + (len = enclen(encode, sprev, end)) < s)
3665 sprev += len;
3666
3667 p += (SIZE_MEMNUM * tlen);
3668 }
3669 else
3670 goto fail;
3671
3672 MOP_OUT;
3673 JUMP;
3674 }
3675
3676#endif
3677
3678#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3679 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3680 GET_OPTION_INC(option, p);
3681 STACK_PUSH_ALT(p, s, sprev, pkeep);
3682 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3683 MOP_OUT;
3684 JUMP;
3685
3686 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3687 GET_OPTION_INC(option, p);
3688 MOP_OUT;
3689 JUMP;
3690#endif
3691
3692 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3693 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3694 STACK_PUSH_NULL_CHECK_START(mem, s);
3695 MOP_OUT;
3696 JUMP;
3697
3698 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3699 {
3700 int isnull;
3701
3702 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3703 STACK_NULL_CHECK(isnull, mem, s);
3704 if (isnull) {
3705#ifdef ONIG_DEBUG_MATCH
3706 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3707 (int )mem, (uintptr_t )s, s);
3708#endif
3709 null_check_found:
3710 /* empty loop founded, skip next instruction */
3711 switch (*p++) {
3712 case OP_JUMP:
3713 case OP_PUSH:
3714 p += SIZE_RELADDR;
3715 break;
3716 case OP_REPEAT_INC:
3717 case OP_REPEAT_INC_NG:
3718 case OP_REPEAT_INC_SG:
3719 case OP_REPEAT_INC_NG_SG:
3720 p += SIZE_MEMNUM;
3721 break;
3722 default:
3723 goto unexpected_bytecode_error;
3724 break;
3725 }
3726 }
3727 }
3728 MOP_OUT;
3729 JUMP;
3730
3731#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3732 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3733 {
3734 int isnull;
3735
3736 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3737 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3738 if (isnull) {
3739# ifdef ONIG_DEBUG_MATCH
3740 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3741 (int )mem, (uintptr_t )s, s);
3742# endif
3743 if (isnull == -1) goto fail;
3744 goto null_check_found;
3745 }
3746 }
3747 MOP_OUT;
3748 JUMP;
3749#endif
3750
3751#ifdef USE_SUBEXP_CALL
3752 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3753 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3754 {
3755 int isnull;
3756
3757 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3758# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3759 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3760# else
3761 STACK_NULL_CHECK_REC(isnull, mem, s);
3762# endif
3763 if (isnull) {
3764# ifdef ONIG_DEBUG_MATCH
3765 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3766 (int )mem, (uintptr_t )s, s);
3767# endif
3768 if (isnull == -1) goto fail;
3769 goto null_check_found;
3770 }
3771 else {
3772 STACK_PUSH_NULL_CHECK_END(mem);
3773 }
3774 }
3775 MOP_OUT;
3776 JUMP;
3777#endif
3778
3779 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3780 GET_RELADDR_INC(addr, p);
3781 p += addr;
3782 MOP_OUT;
3783 CHECK_INTERRUPT_IN_MATCH_AT;
3784 JUMP;
3785
3786 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3787 GET_RELADDR_INC(addr, p);
3788 CHECK_MATCH_CACHE;
3789 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3790 MOP_OUT;
3791 JUMP;
3792
3793#ifdef USE_COMBINATION_EXPLOSION_CHECK
3794 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3795 GET_STATE_CHECK_NUM_INC(mem, p);
3796 STATE_CHECK_VAL(scv, mem);
3797 if (scv) goto fail;
3798
3799 GET_RELADDR_INC(addr, p);
3800 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3801 MOP_OUT;
3802 JUMP;
3803
3804 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3805 GET_STATE_CHECK_NUM_INC(mem, p);
3806 GET_RELADDR_INC(addr, p);
3807 STATE_CHECK_VAL(scv, mem);
3808 if (scv) {
3809 p += addr;
3810 }
3811 else {
3812 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3813 }
3814 MOP_OUT;
3815 JUMP;
3816
3817 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3818 GET_STATE_CHECK_NUM_INC(mem, p);
3819 STATE_CHECK_VAL(scv, mem);
3820 if (scv) goto fail;
3821
3822 STACK_PUSH_STATE_CHECK(s, mem);
3823 MOP_OUT;
3824 JUMP;
3825#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3826
3827 CASE(OP_POP) MOP_IN(OP_POP);
3828 STACK_POP_ONE;
3829#ifdef USE_MATCH_CACHE
3830 /* We need to increment num_fails here, for invoking a cache optimization correctly, */
3831 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3832 msa->num_fails++;
3833#endif
3834 MOP_OUT;
3835 JUMP;
3836
3837#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3838 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3839 GET_RELADDR_INC(addr, p);
3840 if (*p == *s && DATA_ENSURE_CHECK1) {
3841 p++;
3842 CHECK_MATCH_CACHE;
3843 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3844 MOP_OUT;
3845 JUMP;
3846 }
3847 p += (addr + 1);
3848 MOP_OUT;
3849 JUMP;
3850#endif
3851
3852 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3853 GET_RELADDR_INC(addr, p);
3854 CHECK_MATCH_CACHE;
3855 if (*p == *s) {
3856 p++;
3857 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3858 MOP_OUT;
3859 JUMP;
3860 }
3861 p++;
3862 INC_NUM_FAILS;
3863 MOP_OUT;
3864 JUMP;
3865
3866 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3867 {
3868 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3869 GET_RELADDR_INC(addr, p);
3870
3871 STACK_ENSURE(1);
3872 repeat_stk[mem] = GET_STACK_INDEX(stk);
3873 STACK_PUSH_REPEAT(mem, p);
3874
3875 if (reg->repeat_range[mem].lower == 0) {
3876 CHECK_MATCH_CACHE;
3877 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3878 }
3879 }
3880 MOP_OUT;
3881 JUMP;
3882
3883 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3884 {
3885 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3886 GET_RELADDR_INC(addr, p);
3887
3888 STACK_ENSURE(1);
3889 repeat_stk[mem] = GET_STACK_INDEX(stk);
3890 STACK_PUSH_REPEAT(mem, p);
3891
3892 if (reg->repeat_range[mem].lower == 0) {
3893 CHECK_MATCH_CACHE;
3894 STACK_PUSH_ALT(p, s, sprev, pkeep);
3895 p += addr;
3896 }
3897 }
3898 MOP_OUT;
3899 JUMP;
3900
3901 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3902 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3903 si = repeat_stk[mem];
3904 stkp = STACK_AT(si);
3905
3906 repeat_inc:
3907 stkp->u.repeat.count++;
3908 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3909 /* end of repeat. Nothing to do. */
3910 }
3911 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3912#ifdef USE_MATCH_CACHE
3913 if (*pbegin == OP_REPEAT_INC) {
3914#undef MATCH_CACHE_HIT
3915#define MATCH_CACHE_HIT stkp->u.repeat.count--;
3916 CHECK_MATCH_CACHE;
3917#undef MATCH_CACHE_HIT
3918#define MATCH_CACHE_HIT ((void) 0)
3919 }
3920#endif
3921 STACK_PUSH_ALT(p, s, sprev, pkeep);
3922 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3923 }
3924 else {
3925 p = stkp->u.repeat.pcode;
3926 }
3927 STACK_PUSH_REPEAT_INC(si);
3928 MOP_OUT;
3929 CHECK_INTERRUPT_IN_MATCH_AT;
3930 JUMP;
3931
3932 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3933 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3934 STACK_GET_REPEAT(mem, stkp);
3935 si = GET_STACK_INDEX(stkp);
3936 goto repeat_inc;
3937 NEXT;
3938
3939 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3940 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3941 si = repeat_stk[mem];
3942 stkp = STACK_AT(si);
3943
3944 repeat_inc_ng:
3945 stkp->u.repeat.count++;
3946 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3947 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3948 UChar* pcode = stkp->u.repeat.pcode;
3949
3950 STACK_PUSH_REPEAT_INC(si);
3951 if (*pbegin == OP_REPEAT_INC_NG) {
3952 CHECK_MATCH_CACHE;
3953 }
3954 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3955 }
3956 else {
3957 p = stkp->u.repeat.pcode;
3958 STACK_PUSH_REPEAT_INC(si);
3959 }
3960 }
3961 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3962 STACK_PUSH_REPEAT_INC(si);
3963 }
3964 MOP_OUT;
3965 CHECK_INTERRUPT_IN_MATCH_AT;
3966 JUMP;
3967
3968 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3969 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3970 STACK_GET_REPEAT(mem, stkp);
3971 si = GET_STACK_INDEX(stkp);
3972 goto repeat_inc_ng;
3973 NEXT;
3974
3975 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3976 STACK_PUSH_POS(s, sprev, pkeep);
3977 MOP_OUT;
3978 JUMP;
3979
3980 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3981 {
3982 STACK_POS_END(stkp);
3983 s = stkp->u.state.pstr;
3984 sprev = stkp->u.state.pstr_prev;
3985 }
3986 MOP_OUT;
3987 JUMP;
3988
3989 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3990 GET_RELADDR_INC(addr, p);
3991 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3992 MOP_OUT;
3993 JUMP;
3994
3995 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3996 STACK_POP_TIL_POS_NOT;
3997 goto fail;
3998 NEXT;
3999
4000 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
4001 STACK_PUSH_STOP_BT;
4002 MOP_OUT;
4003 JUMP;
4004
4005 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
4006 STACK_STOP_BT_END;
4007 MOP_OUT;
4008 JUMP;
4009
4010 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
4011 GET_LENGTH_INC(tlen, p);
4012 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4013 if (IS_NULL(s)) goto fail;
4014 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4015 MOP_OUT;
4016 JUMP;
4017
4018 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
4019 GET_RELADDR_INC(addr, p);
4020 GET_LENGTH_INC(tlen, p);
4021 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4022 if (IS_NULL(q)) {
4023 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
4024 If you want to change to fail, replace following line. */
4025 p += addr;
4026 /* goto fail; */
4027 }
4028 else {
4029 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4030 s = q;
4031 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4032 }
4033 MOP_OUT;
4034 JUMP;
4035
4036 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4037 STACK_POP_TIL_LOOK_BEHIND_NOT;
4038 goto fail;
4039 NEXT;
4040
4041 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
4042 /* Save the absent-start-pos and the original end-pos. */
4043 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4044 MOP_OUT;
4045 JUMP;
4046
4047 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
4048 {
4049 const UChar* aend = ABSENT_END_POS;
4050 UChar* absent;
4051 UChar* selfp = p - 1;
4052
4053 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
4054 GET_RELADDR_INC(addr, p);
4055#ifdef ONIG_DEBUG_MATCH
4056 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4057#endif
4058 if ((absent > aend) && (s > absent)) {
4059 /* An empty match occurred in (?~...) at the start point.
4060 * Never match. */
4061 STACK_POP;
4062 goto fail;
4063 }
4064 else if ((s >= aend) && (s > absent)) {
4065 if (s > aend) {
4066 /* Only one (or less) character matched in the last iteration.
4067 * This is not a possible point. */
4068 goto fail;
4069 }
4070 /* All possible points were found. Try matching after (?~...). */
4071 DATA_ENSURE(0);
4072 p += addr;
4073 }
4074 else if (s == end) {
4075 /* At the end of the string, just match with it */
4076 DATA_ENSURE(0);
4077 p += addr;
4078 }
4079 else {
4080 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
4081 n = enclen(encode, s, end);
4082 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
4083 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
4084 STACK_PUSH_ABSENT;
4085 ABSENT_END_POS = aend;
4086 }
4087 }
4088 MOP_OUT;
4089 JUMP;
4090
4091 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
4092 /* The pattern inside (?~...) was matched.
4093 * Set the end-pos temporary and go to next iteration. */
4094 if (sprev < ABSENT_END_POS)
4095 ABSENT_END_POS = sprev;
4096#ifdef ONIG_DEBUG_MATCH
4097 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
4098#endif
4099 STACK_POP_TIL_ABSENT;
4100 goto fail;
4101 NEXT;
4102
4103#ifdef USE_SUBEXP_CALL
4104 CASE(OP_CALL) MOP_IN(OP_CALL);
4105 GET_ABSADDR_INC(addr, p);
4106 STACK_PUSH_CALL_FRAME(p);
4107 p = reg->p + addr;
4108 MOP_OUT;
4109 JUMP;
4110
4111 CASE(OP_RETURN) MOP_IN(OP_RETURN);
4112 STACK_RETURN(p);
4113 STACK_PUSH_RETURN;
4114 MOP_OUT;
4115 JUMP;
4116#endif
4117
4118 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
4119 GET_MEMNUM_INC(mem, p);
4120 GET_RELADDR_INC(addr, p);
4121 if ((mem > num_mem) ||
4122 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
4123 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4124 p += addr;
4125 }
4126 MOP_OUT;
4127 JUMP;
4128
4129 CASE(OP_FINISH)
4130 goto finish;
4131 NEXT;
4132
4133 CASE(OP_FAIL)
4134 if (0) {
4135 /* fall */
4136 fail:
4137 MOP_OUT;
4138 }
4139 MOP_IN(OP_FAIL);
4140 STACK_POP;
4141 p = stk->u.state.pcode;
4142 s = stk->u.state.pstr;
4143 sprev = stk->u.state.pstr_prev;
4144 pkeep = stk->u.state.pkeep;
4145
4146#ifdef USE_MATCH_CACHE
4147 if (
4148 msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4149 ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
4150 ) {
4151 if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4152 msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4153 OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4154 if (r < 0) goto bytecode_error;
4155 }
4156 if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4157 msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4158 goto fail_match_cache;
4159 }
4160 if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
4161 goto fail_match_cache;
4162 }
4163 if (msa->cache_opcodes == NULL) {
4164 msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4165 OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
4166 if (cache_opcodes == NULL) {
4167 return ONIGERR_MEMORY;
4168 }
4169 OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4170 if (r < 0) {
4171 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
4172 else goto bytecode_error;
4173 }
4174 msa->cache_opcodes = cache_opcodes;
4175#ifdef ONIG_DEBUG_MATCH_CACHE
4176 fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4177 fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4178 fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4179 for (int i = 0; i < msa->num_cache_opcodes; i++) {
4180 fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4181 }
4182#endif
4183 }
4184 if (msa->match_cache_buf == NULL) {
4185 size_t length = (end - str) + 1;
4186 size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4187#ifdef ONIG_DEBUG_MATCH_CACHE
4188 fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4189#endif
4190 /* Overflow check */
4191 if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
4192 return ONIGERR_MEMORY;
4193 }
4194 if (num_match_cache_points >= LONG_MAX_LIMIT) {
4195 return ONIGERR_MEMORY;
4196 }
4197 size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4198 uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
4199 if (match_cache_buf == NULL) {
4200 return ONIGERR_MEMORY;
4201 }
4202 xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
4203 msa->match_cache_buf = match_cache_buf;
4204 }
4205 }
4206 fail_match_cache:
4207#endif
4208
4209#ifdef USE_COMBINATION_EXPLOSION_CHECK
4210 if (stk->u.state.state_check != 0) {
4211 stk->type = STK_STATE_CHECK_MARK;
4212 stk++;
4213 }
4214#endif
4215
4216 MOP_OUT;
4217 CHECK_INTERRUPT_IN_MATCH_AT;
4218 JUMP;
4219
4220 DEFAULT
4221 goto bytecode_error;
4222 } VM_LOOP_END
4223
4224 finish:
4225 STACK_SAVE;
4226 xfree(xmalloc_base);
4227 return best_len;
4228
4229#ifdef ONIG_DEBUG
4230 stack_error:
4231 STACK_SAVE;
4232 xfree(xmalloc_base);
4233 return ONIGERR_STACK_BUG;
4234#endif
4235
4236 bytecode_error:
4237 STACK_SAVE;
4238 xfree(xmalloc_base);
4239 return ONIGERR_UNDEFINED_BYTECODE;
4240
4241 unexpected_bytecode_error:
4242 STACK_SAVE;
4243 xfree(xmalloc_base);
4244 return ONIGERR_UNEXPECTED_BYTECODE;
4245
4246 timeout:
4247 STACK_SAVE;
4248 xfree(xmalloc_base);
4249 return ONIGERR_TIMEOUT;
4250}
4251
4252
4253static UChar*
4254slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4255 const UChar* text, const UChar* text_end, UChar* text_range)
4256{
4257 UChar *t, *p, *s, *end;
4258
4259 end = (UChar* )text_end;
4260 end -= target_end - target - 1;
4261 if (end > text_range)
4262 end = text_range;
4263
4264 s = (UChar* )text;
4265
4266 if (enc->max_enc_len == enc->min_enc_len) {
4267 int n = enc->max_enc_len;
4268
4269 while (s < end) {
4270 if (*s == *target) {
4271 p = s + 1;
4272 t = target + 1;
4273 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4274 return s;
4275 }
4276 s += n;
4277 }
4278 return (UChar* )NULL;
4279 }
4280 while (s < end) {
4281 if (*s == *target) {
4282 p = s + 1;
4283 t = target + 1;
4284 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4285 return s;
4286 }
4287 s += enclen(enc, s, text_end);
4288 }
4289
4290 return (UChar* )NULL;
4291}
4292
4293static int
4294str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4295 const UChar* t, const UChar* tend,
4296 const UChar* p, const UChar* end)
4297{
4298 int lowlen;
4299 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4300
4301 while (t < tend) {
4302 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4303 q = lowbuf;
4304 while (lowlen > 0) {
4305 if (*t++ != *q++) return 0;
4306 lowlen--;
4307 }
4308 }
4309
4310 return 1;
4311}
4312
4313static UChar*
4314slow_search_ic(OnigEncoding enc, int case_fold_flag,
4315 UChar* target, UChar* target_end,
4316 const UChar* text, const UChar* text_end, UChar* text_range)
4317{
4318 UChar *s, *end;
4319
4320 end = (UChar* )text_end;
4321 end -= target_end - target - 1;
4322 if (end > text_range)
4323 end = text_range;
4324
4325 s = (UChar* )text;
4326
4327 while (s < end) {
4328 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4329 s, text_end))
4330 return s;
4331
4332 s += enclen(enc, s, text_end);
4333 }
4334
4335 return (UChar* )NULL;
4336}
4337
4338static UChar*
4339slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4340 const UChar* text, const UChar* adjust_text,
4341 const UChar* text_end, const UChar* text_start)
4342{
4343 UChar *t, *p, *s;
4344
4345 s = (UChar* )text_end;
4346 s -= (target_end - target);
4347 if (s > text_start)
4348 s = (UChar* )text_start;
4349 else
4350 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4351
4352 while (s >= text) {
4353 if (*s == *target) {
4354 p = s + 1;
4355 t = target + 1;
4356 while (t < target_end) {
4357 if (*t != *p++)
4358 break;
4359 t++;
4360 }
4361 if (t == target_end)
4362 return s;
4363 }
4364 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4365 }
4366
4367 return (UChar* )NULL;
4368}
4369
4370static UChar*
4371slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4372 UChar* target, UChar* target_end,
4373 const UChar* text, const UChar* adjust_text,
4374 const UChar* text_end, const UChar* text_start)
4375{
4376 UChar *s;
4377
4378 s = (UChar* )text_end;
4379 s -= (target_end - target);
4380 if (s > text_start)
4381 s = (UChar* )text_start;
4382 else
4383 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4384
4385 while (s >= text) {
4386 if (str_lower_case_match(enc, case_fold_flag,
4387 target, target_end, s, text_end))
4388 return s;
4389
4390 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4391 }
4392
4393 return (UChar* )NULL;
4394}
4395
4396#ifndef USE_SUNDAY_QUICK_SEARCH
4397/* Boyer-Moore-Horspool search applied to a multibyte string */
4398static UChar*
4399bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4400 const UChar* text, const UChar* text_end,
4401 const UChar* text_range)
4402{
4403 const UChar *s, *se, *t, *p, *end;
4404 const UChar *tail;
4405 ptrdiff_t skip, tlen1;
4406
4407# ifdef ONIG_DEBUG_SEARCH
4408 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4409 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4410# endif
4411
4412 tail = target_end - 1;
4413 tlen1 = tail - target;
4414 end = text_range;
4415 if (end + tlen1 > text_end)
4416 end = text_end - tlen1;
4417
4418 s = text;
4419
4420 if (IS_NULL(reg->int_map)) {
4421 while (s < end) {
4422 p = se = s + tlen1;
4423 t = tail;
4424 while (*p == *t) {
4425 if (t == target) return (UChar* )s;
4426 p--; t--;
4427 }
4428 skip = reg->map[*se];
4429 t = s;
4430 do {
4431 s += enclen(reg->enc, s, end);
4432 } while ((s - t) < skip && s < end);
4433 }
4434 }
4435 else {
4436# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4437 while (s < end) {
4438 p = se = s + tlen1;
4439 t = tail;
4440 while (*p == *t) {
4441 if (t == target) return (UChar* )s;
4442 p--; t--;
4443 }
4444 skip = reg->int_map[*se];
4445 t = s;
4446 do {
4447 s += enclen(reg->enc, s, end);
4448 } while ((s - t) < skip && s < end);
4449 }
4450# endif
4451 }
4452
4453 return (UChar* )NULL;
4454}
4455
4456/* Boyer-Moore-Horspool search */
4457static UChar*
4458bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4459 const UChar* text, const UChar* text_end, const UChar* text_range)
4460{
4461 const UChar *s, *t, *p, *end;
4462 const UChar *tail;
4463
4464# ifdef ONIG_DEBUG_SEARCH
4465 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4466 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4467# endif
4468
4469 end = text_range + (target_end - target) - 1;
4470 if (end > text_end)
4471 end = text_end;
4472
4473 tail = target_end - 1;
4474 s = text + (target_end - target) - 1;
4475 if (IS_NULL(reg->int_map)) {
4476 while (s < end) {
4477 p = s;
4478 t = tail;
4479# ifdef ONIG_DEBUG_SEARCH
4480 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4481 (intptr_t )(s - text), s);
4482# endif
4483 while (*p == *t) {
4484 if (t == target) return (UChar* )p;
4485 p--; t--;
4486 }
4487 s += reg->map[*s];
4488 }
4489 }
4490 else { /* see int_map[] */
4491# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4492 while (s < end) {
4493 p = s;
4494 t = tail;
4495 while (*p == *t) {
4496 if (t == target) return (UChar* )p;
4497 p--; t--;
4498 }
4499 s += reg->int_map[*s];
4500 }
4501# endif
4502 }
4503 return (UChar* )NULL;
4504}
4505
4506/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4507static UChar*
4508bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4509 const UChar* text, const UChar* text_end,
4510 const UChar* text_range)
4511{
4512 const UChar *s, *se, *t, *end;
4513 const UChar *tail;
4514 ptrdiff_t skip, tlen1;
4515 OnigEncoding enc = reg->enc;
4516 int case_fold_flag = reg->case_fold_flag;
4517
4518# ifdef ONIG_DEBUG_SEARCH
4519 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4520 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4521# endif
4522
4523 tail = target_end - 1;
4524 tlen1 = tail - target;
4525 end = text_range;
4526 if (end + tlen1 > text_end)
4527 end = text_end - tlen1;
4528
4529 s = text;
4530
4531 if (IS_NULL(reg->int_map)) {
4532 while (s < end) {
4533 se = s + tlen1;
4534 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4535 s, se + 1))
4536 return (UChar* )s;
4537 skip = reg->map[*se];
4538 t = s;
4539 do {
4540 s += enclen(reg->enc, s, end);
4541 } while ((s - t) < skip && s < end);
4542 }
4543 }
4544 else {
4545# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4546 while (s < end) {
4547 se = s + tlen1;
4548 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4549 s, se + 1))
4550 return (UChar* )s;
4551 skip = reg->int_map[*se];
4552 t = s;
4553 do {
4554 s += enclen(reg->enc, s, end);
4555 } while ((s - t) < skip && s < end);
4556 }
4557# endif
4558 }
4559
4560 return (UChar* )NULL;
4561}
4562
4563/* Boyer-Moore-Horspool search (ignore case) */
4564static UChar*
4565bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4566 const UChar* text, const UChar* text_end, const UChar* text_range)
4567{
4568 const UChar *s, *p, *end;
4569 const UChar *tail;
4570 OnigEncoding enc = reg->enc;
4571 int case_fold_flag = reg->case_fold_flag;
4572
4573# ifdef ONIG_DEBUG_SEARCH
4574 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4575 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4576# endif
4577
4578 end = text_range + (target_end - target) - 1;
4579 if (end > text_end)
4580 end = text_end;
4581
4582 tail = target_end - 1;
4583 s = text + (target_end - target) - 1;
4584 if (IS_NULL(reg->int_map)) {
4585 while (s < end) {
4586 p = s - (target_end - target) + 1;
4587 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4588 p, s + 1))
4589 return (UChar* )p;
4590 s += reg->map[*s];
4591 }
4592 }
4593 else { /* see int_map[] */
4594# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4595 while (s < end) {
4596 p = s - (target_end - target) + 1;
4597 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4598 p, s + 1))
4599 return (UChar* )p;
4600 s += reg->int_map[*s];
4601 }
4602# endif
4603 }
4604 return (UChar* )NULL;
4605}
4606
4607#else /* USE_SUNDAY_QUICK_SEARCH */
4608
4609/* Sunday's quick search applied to a multibyte string */
4610static UChar*
4611bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4612 const UChar* text, const UChar* text_end,
4613 const UChar* text_range)
4614{
4615 const UChar *s, *se, *t, *p, *end;
4616 const UChar *tail;
4617 ptrdiff_t skip, tlen1;
4618 OnigEncoding enc = reg->enc;
4619
4620# ifdef ONIG_DEBUG_SEARCH
4621 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4622 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4623# endif
4624
4625 tail = target_end - 1;
4626 tlen1 = tail - target;
4627 end = text_range;
4628 if (end + tlen1 > text_end)
4629 end = text_end - tlen1;
4630
4631 s = text;
4632
4633 if (IS_NULL(reg->int_map)) {
4634 while (s < end) {
4635 p = se = s + tlen1;
4636 t = tail;
4637 while (*p == *t) {
4638 if (t == target) return (UChar* )s;
4639 p--; t--;
4640 }
4641 if (s + 1 >= end) break;
4642 skip = reg->map[se[1]];
4643 t = s;
4644 do {
4645 s += enclen(enc, s, end);
4646 } while ((s - t) < skip && s < end);
4647 }
4648 }
4649 else {
4650# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4651 while (s < end) {
4652 p = se = s + tlen1;
4653 t = tail;
4654 while (*p == *t) {
4655 if (t == target) return (UChar* )s;
4656 p--; t--;
4657 }
4658 if (s + 1 >= end) break;
4659 skip = reg->int_map[se[1]];
4660 t = s;
4661 do {
4662 s += enclen(enc, s, end);
4663 } while ((s - t) < skip && s < end);
4664 }
4665# endif
4666 }
4667
4668 return (UChar* )NULL;
4669}
4670
4671/* Sunday's quick search */
4672static UChar*
4673bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4674 const UChar* text, const UChar* text_end, const UChar* text_range)
4675{
4676 const UChar *s, *t, *p, *end;
4677 const UChar *tail;
4678 ptrdiff_t tlen1;
4679
4680# ifdef ONIG_DEBUG_SEARCH
4681 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4682 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4683# endif
4684
4685 tail = target_end - 1;
4686 tlen1 = tail - target;
4687 end = text_range + tlen1;
4688 if (end > text_end)
4689 end = text_end;
4690
4691 s = text + tlen1;
4692 if (IS_NULL(reg->int_map)) {
4693 while (s < end) {
4694 p = s;
4695 t = tail;
4696 while (*p == *t) {
4697 if (t == target) return (UChar* )p;
4698 p--; t--;
4699 }
4700 if (s + 1 >= end) break;
4701 s += reg->map[s[1]];
4702 }
4703 }
4704 else { /* see int_map[] */
4705# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4706 while (s < end) {
4707 p = s;
4708 t = tail;
4709 while (*p == *t) {
4710 if (t == target) return (UChar* )p;
4711 p--; t--;
4712 }
4713 if (s + 1 >= end) break;
4714 s += reg->int_map[s[1]];
4715 }
4716# endif
4717 }
4718 return (UChar* )NULL;
4719}
4720
4721/* Sunday's quick search applied to a multibyte string (ignore case) */
4722static UChar*
4723bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4724 const UChar* text, const UChar* text_end,
4725 const UChar* text_range)
4726{
4727 const UChar *s, *se, *t, *end;
4728 const UChar *tail;
4729 ptrdiff_t skip, tlen1;
4730 OnigEncoding enc = reg->enc;
4731 int case_fold_flag = reg->case_fold_flag;
4732
4733# ifdef ONIG_DEBUG_SEARCH
4734 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4735 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4736# endif
4737
4738 tail = target_end - 1;
4739 tlen1 = tail - target;
4740 end = text_range;
4741 if (end + tlen1 > text_end)
4742 end = text_end - tlen1;
4743
4744 s = text;
4745
4746 if (IS_NULL(reg->int_map)) {
4747 while (s < end) {
4748 se = s + tlen1;
4749 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4750 s, se + 1))
4751 return (UChar* )s;
4752 if (s + 1 >= end) break;
4753 skip = reg->map[se[1]];
4754 t = s;
4755 do {
4756 s += enclen(enc, s, end);
4757 } while ((s - t) < skip && s < end);
4758 }
4759 }
4760 else {
4761# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4762 while (s < end) {
4763 se = s + tlen1;
4764 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4765 s, se + 1))
4766 return (UChar* )s;
4767 if (s + 1 >= end) break;
4768 skip = reg->int_map[se[1]];
4769 t = s;
4770 do {
4771 s += enclen(enc, s, end);
4772 } while ((s - t) < skip && s < end);
4773 }
4774# endif
4775 }
4776
4777 return (UChar* )NULL;
4778}
4779
4780/* Sunday's quick search (ignore case) */
4781static UChar*
4782bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4783 const UChar* text, const UChar* text_end, const UChar* text_range)
4784{
4785 const UChar *s, *p, *end;
4786 const UChar *tail;
4787 ptrdiff_t tlen1;
4788 OnigEncoding enc = reg->enc;
4789 int case_fold_flag = reg->case_fold_flag;
4790
4791# ifdef ONIG_DEBUG_SEARCH
4792 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4793 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4794# endif
4795
4796 tail = target_end - 1;
4797 tlen1 = tail - target;
4798 end = text_range + tlen1;
4799 if (end > text_end)
4800 end = text_end;
4801
4802 s = text + tlen1;
4803 if (IS_NULL(reg->int_map)) {
4804 while (s < end) {
4805 p = s - tlen1;
4806 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4807 p, s + 1))
4808 return (UChar* )p;
4809 if (s + 1 >= end) break;
4810 s += reg->map[s[1]];
4811 }
4812 }
4813 else { /* see int_map[] */
4814# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4815 while (s < end) {
4816 p = s - tlen1;
4817 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4818 p, s + 1))
4819 return (UChar* )p;
4820 if (s + 1 >= end) break;
4821 s += reg->int_map[s[1]];
4822 }
4823# endif
4824 }
4825 return (UChar* )NULL;
4826}
4827#endif /* USE_SUNDAY_QUICK_SEARCH */
4828
4829#ifdef USE_INT_MAP_BACKWARD
4830static int
4831set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4832 int** skip)
4833{
4834 int i, len;
4835
4836 if (IS_NULL(*skip)) {
4837 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4838 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4839 }
4840
4841 len = (int )(end - s);
4842 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4843 (*skip)[i] = len;
4844
4845 for (i = len - 1; i > 0; i--)
4846 (*skip)[s[i]] = i;
4847
4848 return 0;
4849}
4850
4851static UChar*
4852bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4853 const UChar* text, const UChar* adjust_text,
4854 const UChar* text_end, const UChar* text_start)
4855{
4856 const UChar *s, *t, *p;
4857
4858 s = text_end - (target_end - target);
4859 if (text_start < s)
4860 s = text_start;
4861 else
4862 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4863
4864 while (s >= text) {
4865 p = s;
4866 t = target;
4867 while (t < target_end && *p == *t) {
4868 p++; t++;
4869 }
4870 if (t == target_end)
4871 return (UChar* )s;
4872
4873 s -= reg->int_map_backward[*s];
4874 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4875 }
4876
4877 return (UChar* )NULL;
4878}
4879#endif
4880
4881static UChar*
4882map_search(OnigEncoding enc, UChar map[],
4883 const UChar* text, const UChar* text_range, const UChar* text_end)
4884{
4885 const UChar *s = text;
4886
4887 while (s < text_range) {
4888 if (map[*s]) return (UChar* )s;
4889
4890 s += enclen(enc, s, text_end);
4891 }
4892 return (UChar* )NULL;
4893}
4894
4895static UChar*
4896map_search_backward(OnigEncoding enc, UChar map[],
4897 const UChar* text, const UChar* adjust_text,
4898 const UChar* text_start, const UChar* text_end)
4899{
4900 const UChar *s = text_start;
4901
4902 while (s >= text) {
4903 if (map[*s]) return (UChar* )s;
4904
4905 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4906 }
4907 return (UChar* )NULL;
4908}
4909
4910extern OnigPosition
4911onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4912 OnigOptionType option)
4913{
4914 ptrdiff_t r;
4915 UChar *prev;
4916 OnigMatchArg msa;
4917
4918 MATCH_ARG_INIT(msa, option, region, at, at);
4919#ifdef USE_COMBINATION_EXPLOSION_CHECK
4920 {
4921 ptrdiff_t offset = at - str;
4922 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4923 }
4924#endif
4925
4926 if (region) {
4927 r = onig_region_resize_clear(region, reg->num_mem + 1);
4928 }
4929 else
4930 r = 0;
4931
4932 if (r == 0) {
4933 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4934 r = match_at(reg, str, end,
4935#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4936 end,
4937#endif
4938 at, prev, &msa);
4939 }
4940
4941 MATCH_ARG_FREE(msa);
4942 return r;
4943}
4944
4945static int
4946forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4947 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4948{
4949 UChar *p, *pprev = (UChar* )NULL;
4950 size_t input_len = end - str;
4951
4952#ifdef ONIG_DEBUG_SEARCH
4953 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4954 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4955#endif
4956
4957 if (reg->dmin > input_len) {
4958 return 0;
4959 }
4960
4961 p = s;
4962 if (reg->dmin > 0) {
4963 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4964 p += reg->dmin;
4965 }
4966 else {
4967 UChar *q = p + reg->dmin;
4968
4969 if (q >= end) return 0; /* fail */
4970 while (p < q) p += enclen(reg->enc, p, end);
4971 }
4972 }
4973
4974 retry:
4975 switch (reg->optimize) {
4976 case ONIG_OPTIMIZE_EXACT:
4977 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4978 break;
4979 case ONIG_OPTIMIZE_EXACT_IC:
4980 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4981 reg->exact, reg->exact_end, p, end, range);
4982 break;
4983
4984 case ONIG_OPTIMIZE_EXACT_BM:
4985 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4986 break;
4987
4988 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4989 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4990 break;
4991
4992 case ONIG_OPTIMIZE_EXACT_BM_IC:
4993 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4994 break;
4995
4996 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4997 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4998 break;
4999
5000 case ONIG_OPTIMIZE_MAP:
5001 p = map_search(reg->enc, reg->map, p, range, end);
5002 break;
5003 }
5004
5005 if (p && p < range) {
5006 if (p - reg->dmin < s) {
5007 retry_gate:
5008 pprev = p;
5009 p += enclen(reg->enc, p, end);
5010 goto retry;
5011 }
5012
5013 if (reg->sub_anchor) {
5014 UChar* prev;
5015
5016 switch (reg->sub_anchor) {
5017 case ANCHOR_BEGIN_LINE:
5018 if (!ON_STR_BEGIN(p)) {
5019 prev = onigenc_get_prev_char_head(reg->enc,
5020 (pprev ? pprev : str), p, end);
5021 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
5022 goto retry_gate;
5023 }
5024 break;
5025
5026 case ANCHOR_END_LINE:
5027 if (ON_STR_END(p)) {
5028#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5029 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5030 (pprev ? pprev : str), p);
5031 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
5032 goto retry_gate;
5033#endif
5034 }
5035 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
5036 goto retry_gate;
5037 break;
5038 }
5039 }
5040
5041 if (reg->dmax == 0) {
5042 *low = p;
5043 if (low_prev) {
5044 if (*low > s)
5045 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
5046 else
5047 *low_prev = onigenc_get_prev_char_head(reg->enc,
5048 (pprev ? pprev : str), p, end);
5049 }
5050 }
5051 else {
5052 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5053 if (p < str + reg->dmax) {
5054 *low = (UChar* )str;
5055 if (low_prev)
5056 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
5057 }
5058 else {
5059 *low = p - reg->dmax;
5060 if (*low > s) {
5061 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
5062 *low, end, (const UChar** )low_prev);
5063 if (low_prev && IS_NULL(*low_prev))
5064 *low_prev = onigenc_get_prev_char_head(reg->enc,
5065 (pprev ? pprev : s), *low, end);
5066 }
5067 else {
5068 if (low_prev)
5069 *low_prev = onigenc_get_prev_char_head(reg->enc,
5070 (pprev ? pprev : str), *low, end);
5071 }
5072 }
5073 }
5074 }
5075 /* no needs to adjust *high, *high is used as range check only */
5076 *high = p - reg->dmin;
5077
5078#ifdef ONIG_DEBUG_SEARCH
5079 fprintf(stderr,
5080 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
5081 *low - str, *high - str, reg->dmin, reg->dmax);
5082#endif
5083 return 1; /* success */
5084 }
5085
5086 return 0; /* fail */
5087}
5088
5089#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
5090
5091static int
5092backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
5093 UChar* s, const UChar* range, UChar* adjrange,
5094 UChar** low, UChar** high)
5095{
5096 UChar *p;
5097 size_t input_len = end - str;
5098
5099 if (reg->dmin > input_len) {
5100 return 0;
5101 }
5102
5103 range += reg->dmin;
5104 p = s;
5105
5106 retry:
5107 switch (reg->optimize) {
5108 case ONIG_OPTIMIZE_EXACT:
5109 exact_method:
5110 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5111 range, adjrange, end, p);
5112 break;
5113
5114 case ONIG_OPTIMIZE_EXACT_IC:
5115 case ONIG_OPTIMIZE_EXACT_BM_IC:
5116 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
5117 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5118 reg->exact, reg->exact_end,
5119 range, adjrange, end, p);
5120 break;
5121
5122 case ONIG_OPTIMIZE_EXACT_BM:
5123 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
5124#ifdef USE_INT_MAP_BACKWARD
5125 if (IS_NULL(reg->int_map_backward)) {
5126 int r;
5127 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
5128 goto exact_method;
5129
5130 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
5131 &(reg->int_map_backward));
5132 if (r) return r;
5133 }
5134 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
5135 end, p);
5136#else
5137 goto exact_method;
5138#endif
5139 break;
5140
5141 case ONIG_OPTIMIZE_MAP:
5142 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
5143 break;
5144 }
5145
5146 if (p) {
5147 if (reg->sub_anchor) {
5148 UChar* prev;
5149
5150 switch (reg->sub_anchor) {
5151 case ANCHOR_BEGIN_LINE:
5152 if (!ON_STR_BEGIN(p)) {
5153 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
5154 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
5155 p = prev;
5156 goto retry;
5157 }
5158 }
5159 break;
5160
5161 case ANCHOR_END_LINE:
5162 if (ON_STR_END(p)) {
5163#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5164 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5165 if (IS_NULL(prev)) goto fail;
5166 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
5167 p = prev;
5168 goto retry;
5169 }
5170#endif
5171 }
5172 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
5173 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
5174 if (IS_NULL(p)) goto fail;
5175 goto retry;
5176 }
5177 break;
5178 }
5179 }
5180
5181 /* no needs to adjust *high, *high is used as range check only */
5182 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5183 *low = p - reg->dmax;
5184 *high = p - reg->dmin;
5185 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
5186 }
5187
5188#ifdef ONIG_DEBUG_SEARCH
5189 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
5190 (int )(*low - str), (int )(*high - str));
5191#endif
5192 return 1; /* success */
5193 }
5194
5195 fail:
5196#ifdef ONIG_DEBUG_SEARCH
5197 fprintf(stderr, "backward_search_range: fail.\n");
5198#endif
5199 return 0; /* fail */
5200}
5201
5202
5203extern OnigPosition
5204onig_search(regex_t* reg, const UChar* str, const UChar* end,
5205 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5206{
5207 return onig_search_gpos(reg, str, end, start, start, range, region, option);
5208}
5209
5210extern OnigPosition
5211onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
5212 const UChar* global_pos,
5213 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
5214{
5215 ptrdiff_t r;
5216 UChar *s, *prev;
5217 OnigMatchArg msa;
5218#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5219 const UChar *orig_start = start;
5220 const UChar *orig_range = range;
5221#endif
5222
5223#ifdef ONIG_DEBUG_SEARCH
5224 fprintf(stderr,
5225 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
5226 (uintptr_t )str, str, end - str, start - str, range - str);
5227#endif
5228
5229 if (region) {
5230 r = onig_region_resize_clear(region, reg->num_mem + 1);
5231 if (r) goto finish_no_msa;
5232 }
5233
5234 if (start > end || start < str) goto mismatch_no_msa;
5235
5236
5237#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5238# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5239# define MATCH_AND_RETURN_CHECK(upper_range) \
5240 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5241 switch (r) { \
5242 case ONIG_MISMATCH: \
5243 break; \
5244 case ONIGERR_TIMEOUT: \
5245 goto timeout; \
5246 default: \
5247 if (r >= 0) { \
5248 if (! IS_FIND_LONGEST(reg->options)) { \
5249 goto match; \
5250 }\
5251 }\
5252 else goto finish; /* error */ \
5253 }
5254# else
5255# define MATCH_AND_RETURN_CHECK(upper_range) \
5256 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5257 switch (r) { \
5258 case ONIG_MISMATCH: \
5259 break; \
5260 case ONIGERR_TIMEOUT: \
5261 goto timeout; \
5262 default: \
5263 if (r >= 0) { \
5264 goto match; \
5265 }\
5266 else goto finish; /* error */ \
5267 }
5268# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5269#else
5270# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5271# define MATCH_AND_RETURN_CHECK(none) \
5272 r = match_at(reg, str, end, s, prev, &msa);\
5273 switch (r) { \
5274 case ONIG_MISMATCH: \
5275 break; \
5276 case ONIGERR_TIMEOUT: \
5277 goto timeout; \
5278 default: \
5279 if (r >= 0) { \
5280 if (! IS_FIND_LONGEST(reg->options)) { \
5281 goto match; \
5282 } \
5283 } \
5284 else goto finish; /* error */ \
5285 }
5286# else
5287# define MATCH_AND_RETURN_CHECK(none) \
5288 r = match_at(reg, str, end, s, prev, &msa);\
5289 switch (r) { \
5290 case ONIG_MISMATCH: \
5291 break; \
5292 case ONIGERR_TIMEOUT: \
5293 goto timeout; \
5294 default: \
5295 if (r >= 0) { \
5296 goto match; \
5297 } \
5298 else goto finish; /* error */ \
5299 }
5300# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5301#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
5302
5303
5304 /* anchor optimize: resume search range */
5305 if (reg->anchor != 0 && str < end) {
5306 UChar *min_semi_end, *max_semi_end;
5307
5308 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5309 /* search start-position only */
5310 begin_position:
5311 if (range > start)
5312 {
5313 if (global_pos > start)
5314 {
5315 if (global_pos < range)
5316 range = global_pos + 1;
5317 }
5318 else
5319 range = start + 1;
5320 }
5321 else
5322 range = start;
5323 }
5324 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5325 /* search str-position only */
5326 if (range > start) {
5327 if (start != str) goto mismatch_no_msa;
5328 range = str + 1;
5329 }
5330 else {
5331 if (range <= str) {
5332 start = str;
5333 range = str;
5334 }
5335 else
5336 goto mismatch_no_msa;
5337 }
5338 }
5339 else if (reg->anchor & ANCHOR_END_BUF) {
5340 min_semi_end = max_semi_end = (UChar* )end;
5341
5342 end_buf:
5343 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5344 goto mismatch_no_msa;
5345
5346 if (range > start) {
5347 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5348 start = min_semi_end - reg->anchor_dmax;
5349 if (start < end)
5350 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5351 }
5352 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5353 range = max_semi_end - reg->anchor_dmin + 1;
5354 }
5355
5356 if (start > range) goto mismatch_no_msa;
5357 /* If start == range, match with empty at end.
5358 Backward search is used. */
5359 }
5360 else {
5361 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5362 range = min_semi_end - reg->anchor_dmax;
5363 }
5364 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5365 start = max_semi_end - reg->anchor_dmin;
5366 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5367 }
5368 if (range > start) goto mismatch_no_msa;
5369 }
5370 }
5371 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5372 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5373
5374 max_semi_end = (UChar* )end;
5375 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5376 min_semi_end = pre_end;
5377
5378#ifdef USE_CRNL_AS_LINE_TERMINATOR
5379 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5380 if (IS_NOT_NULL(pre_end) &&
5381 IS_NEWLINE_CRLF(reg->options) &&
5382 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5383 min_semi_end = pre_end;
5384 }
5385#endif
5386 if (min_semi_end > str && start <= min_semi_end) {
5387 goto end_buf;
5388 }
5389 }
5390 else {
5391 min_semi_end = (UChar* )end;
5392 goto end_buf;
5393 }
5394 }
5395 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5396 goto begin_position;
5397 }
5398 }
5399 else if (str == end) { /* empty string */
5400 static const UChar address_for_empty_string[] = "";
5401
5402#ifdef ONIG_DEBUG_SEARCH
5403 fprintf(stderr, "onig_search: empty string.\n");
5404#endif
5405
5406 if (reg->threshold_len == 0) {
5407 start = end = str = address_for_empty_string;
5408 s = (UChar* )start;
5409 prev = (UChar* )NULL;
5410
5411 MATCH_ARG_INIT(msa, option, region, start, start);
5412#ifdef USE_COMBINATION_EXPLOSION_CHECK
5413 msa.state_check_buff = (void* )0;
5414 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5415#endif
5416 MATCH_AND_RETURN_CHECK(end);
5417 goto mismatch;
5418 }
5419 goto mismatch_no_msa;
5420 }
5421
5422#ifdef ONIG_DEBUG_SEARCH
5423 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5424 (int )(end - str), (int )(start - str), (int )(range - str));
5425#endif
5426
5427 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5428#ifdef USE_COMBINATION_EXPLOSION_CHECK
5429 {
5430 ptrdiff_t offset = (MIN(start, range) - str);
5431 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5432 }
5433#endif
5434
5435 s = (UChar* )start;
5436 if (range > start) { /* forward search */
5437 if (s > str)
5438 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5439 else
5440 prev = (UChar* )NULL;
5441
5442 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5443 UChar *sch_range, *low, *high, *low_prev;
5444
5445 sch_range = (UChar* )range;
5446 if (reg->dmax != 0) {
5447 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5448 sch_range = (UChar* )end;
5449 else {
5450 sch_range += reg->dmax;
5451 if (sch_range > end) sch_range = (UChar* )end;
5452 }
5453 }
5454
5455 if ((end - start) < reg->threshold_len)
5456 goto mismatch;
5457
5458 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5459 do {
5460 if (! forward_search_range(reg, str, end, s, sch_range,
5461 &low, &high, &low_prev)) goto mismatch;
5462 if (s < low) {
5463 s = low;
5464 prev = low_prev;
5465 }
5466 while (s <= high) {
5467 MATCH_AND_RETURN_CHECK(orig_range);
5468 prev = s;
5469 s += enclen(reg->enc, s, end);
5470 }
5471 } while (s < range);
5472 goto mismatch;
5473 }
5474 else { /* check only. */
5475 if (! forward_search_range(reg, str, end, s, sch_range,
5476 &low, &high, (UChar** )NULL)) goto mismatch;
5477
5478 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5479 do {
5480 MATCH_AND_RETURN_CHECK(orig_range);
5481 prev = s;
5482 s += enclen(reg->enc, s, end);
5483
5484 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5485 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5486 && s < range) {
5487 prev = s;
5488 s += enclen(reg->enc, s, end);
5489 }
5490 }
5491 } while (s < range);
5492 goto mismatch;
5493 }
5494 }
5495 }
5496
5497 do {
5498 MATCH_AND_RETURN_CHECK(orig_range);
5499 prev = s;
5500 s += enclen(reg->enc, s, end);
5501 } while (s < range);
5502
5503 if (s == range) { /* because empty match with /$/. */
5504 MATCH_AND_RETURN_CHECK(orig_range);
5505 }
5506 }
5507 else { /* backward search */
5508 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5509 UChar *low, *high, *adjrange, *sch_start;
5510
5511 if (range < end)
5512 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5513 else
5514 adjrange = (UChar* )end;
5515
5516 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5517 (end - range) >= reg->threshold_len) {
5518 do {
5519 sch_start = s + reg->dmax;
5520 if (sch_start > end) sch_start = (UChar* )end;
5521 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5522 &low, &high) <= 0)
5523 goto mismatch;
5524
5525 if (s > high)
5526 s = high;
5527
5528 while (s >= low) {
5529 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5530 MATCH_AND_RETURN_CHECK(orig_start);
5531 s = prev;
5532 }
5533 } while (s >= range);
5534 goto mismatch;
5535 }
5536 else { /* check only. */
5537 if ((end - range) < reg->threshold_len) goto mismatch;
5538
5539 sch_start = s;
5540 if (reg->dmax != 0) {
5541 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5542 sch_start = (UChar* )end;
5543 else {
5544 sch_start += reg->dmax;
5545 if (sch_start > end) sch_start = (UChar* )end;
5546 else
5547 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5548 start, sch_start, end);
5549 }
5550 }
5551 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5552 &low, &high) <= 0) goto mismatch;
5553 }
5554 }
5555
5556 do {
5557 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5558 MATCH_AND_RETURN_CHECK(orig_start);
5559 s = prev;
5560 } while (s >= range);
5561 }
5562
5563 mismatch:
5564#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5565 if (IS_FIND_LONGEST(reg->options)) {
5566 if (msa.best_len >= 0) {
5567 s = msa.best_s;
5568 goto match;
5569 }
5570 }
5571#endif
5572 r = ONIG_MISMATCH;
5573
5574 finish:
5575 MATCH_ARG_FREE(msa);
5576
5577 /* If result is mismatch and no FIND_NOT_EMPTY option,
5578 then the region is not set in match_at(). */
5579 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5580 onig_region_clear(region);
5581 }
5582
5583#ifdef ONIG_DEBUG
5584 if (r != ONIG_MISMATCH)
5585 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5586#endif
5587 return r;
5588
5589 mismatch_no_msa:
5590 r = ONIG_MISMATCH;
5591 finish_no_msa:
5592#ifdef ONIG_DEBUG
5593 if (r != ONIG_MISMATCH)
5594 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5595#endif
5596 return r;
5597
5598 match:
5599 MATCH_ARG_FREE(msa);
5600 return s - str;
5601
5602timeout:
5603 MATCH_ARG_FREE(msa);
5604 return ONIGERR_TIMEOUT;
5605}
5606
5607extern OnigPosition
5608onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5609 OnigRegion* region, OnigOptionType option,
5610 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5611 void* callback_arg)
5612{
5613 OnigPosition r;
5614 OnigPosition n;
5615 int rs;
5616 const UChar* start;
5617
5618 n = 0;
5619 start = str;
5620 while (1) {
5621 r = onig_search(reg, str, end, start, end, region, option);
5622 if (r >= 0) {
5623 rs = scan_callback(n, r, region, callback_arg);
5624 n++;
5625 if (rs != 0)
5626 return rs;
5627
5628 if (region->end[0] == start - str) {
5629 if (start >= end) break;
5630 start += enclen(reg->enc, start, end);
5631 }
5632 else
5633 start = str + region->end[0];
5634
5635 if (start > end)
5636 break;
5637 }
5638 else if (r == ONIG_MISMATCH) {
5639 break;
5640 }
5641 else { /* error */
5642 return r;
5643 }
5644 }
5645
5646 return n;
5647}
5648
5649extern OnigEncoding
5650onig_get_encoding(const regex_t* reg)
5651{
5652 return reg->enc;
5653}
5654
5655extern OnigOptionType
5656onig_get_options(const regex_t* reg)
5657{
5658 return reg->options;
5659}
5660
5661extern OnigCaseFoldType
5662onig_get_case_fold_flag(const regex_t* reg)
5663{
5664 return reg->case_fold_flag;
5665}
5666
5667extern const OnigSyntaxType*
5668onig_get_syntax(const regex_t* reg)
5669{
5670 return reg->syntax;
5671}
5672
5673extern int
5674onig_number_of_captures(const regex_t* reg)
5675{
5676 return reg->num_mem;
5677}
5678
5679extern int
5680onig_number_of_capture_histories(const regex_t* reg)
5681{
5682#ifdef USE_CAPTURE_HISTORY
5683 int i, n;
5684
5685 n = 0;
5686 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5687 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5688 n++;
5689 }
5690 return n;
5691#else
5692 return 0;
5693#endif
5694}
5695
5696extern void
5697onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5698{
5699 *to = *from;
5700}
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition defines.h:89
int len
Length of the buffer.
Definition io.h:8
Definition win32.h:708