34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
39#ifndef USE_TOKEN_THREADED_VM
41# define USE_TOKEN_THREADED_VM 1
43# define USE_TOKEN_THREADED_VM 0
48# define ENC_DUMMY_FLAG (1<<24)
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
68is_mbc_newline_ex(
OnigEncoding enc,
const UChar *p,
const UChar *start,
69 const UChar *end, OnigOptionType option,
int check_prev)
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
84 const UChar *pnext = p + enclen(enc, p, end);
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
107history_tree_clear(OnigCaptureTreeNode* node)
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
125 node->childs = (OnigCaptureTreeNode** )0;
130history_tree_free(OnigCaptureTreeNode* node)
132 history_tree_clear(node);
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
145static OnigCaptureTreeNode*
146history_node_new(
void)
148 OnigCaptureTreeNode* node;
150 node = (OnigCaptureTreeNode* )
xmalloc(
sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
154 node->num_childs = 0;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent->num_childs >= parent->allocated) {
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
173 (OnigCaptureTreeNode** )
xmalloc(
sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
180 (OnigCaptureTreeNode** )
xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
186 parent->childs = tmp;
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
191 parent->allocated = n;
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
203 OnigCaptureTreeNode *clone, *child;
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
216 r = history_tree_add_child(clone, child);
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
227extern OnigCaptureTreeNode*
230 return region->history_root;
234#ifdef USE_MATCH_CACHE
261static OnigPosition count_num_cache_opcodes_inner(
263 MemNumType current_repeat_mem,
int lookaround_nesting,
264 UChar** pp,
long* num_cache_opcodes_ptr
268 UChar* pend = reg->p + reg->used;
270 MemNumType repeat_mem;
272 long num_cache_opcodes = *num_cache_opcodes_ptr;
281 case OP_EXACT1: p++;
break;
282 case OP_EXACT2: p += 2;
break;
283 case OP_EXACT3: p += 3;
break;
284 case OP_EXACT4: p += 4;
break;
285 case OP_EXACT5: p += 5;
break;
287 GET_LENGTH_INC(
len, p); p +=
len;
break;
288 case OP_EXACTMB2N1: p += 2;
break;
289 case OP_EXACTMB2N2: p += 4;
break;
290 case OP_EXACTMB2N3: p += 6;
break;
292 GET_LENGTH_INC(
len, p); p +=
len * 2;
break;
294 GET_LENGTH_INC(
len, p); p +=
len * 3;
break;
298 GET_LENGTH_INC(mb_len, p);
299 GET_LENGTH_INC(
len, p);
305 len = enclen(enc, p, pend); p +=
len;
break;
307 GET_LENGTH_INC(
len, p); p +=
len;
break;
311 p += SIZE_BITSET;
break;
313 case OP_CCLASS_MB_NOT:
314 GET_LENGTH_INC(
len, p); p +=
len;
break;
316 case OP_CCLASS_MIX_NOT:
318 GET_LENGTH_INC(
len, p);
325 case OP_ANYCHAR_STAR:
326 case OP_ANYCHAR_ML_STAR:
327 num_cache_opcodes++;
break;
328 case OP_ANYCHAR_STAR_PEEK_NEXT:
329 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
330 p++; num_cache_opcodes++;
break;
335 case OP_NOT_WORD_BOUND:
341 case OP_NOT_ASCII_WORD:
342 case OP_ASCII_WORD_BOUND:
343 case OP_NOT_ASCII_WORD_BOUND:
344 case OP_ASCII_WORD_BEGIN:
345 case OP_ASCII_WORD_END:
352 case OP_SEMI_END_BUF:
353 case OP_BEGIN_POSITION:
360 case OP_BACKREF_MULTI:
361 case OP_BACKREF_MULTI_IC:
362 case OP_BACKREF_WITH_LEVEL:
365 case OP_MEMORY_START:
366 case OP_MEMORY_START_PUSH:
367 case OP_MEMORY_END_PUSH:
368 case OP_MEMORY_END_PUSH_REC:
370 case OP_MEMORY_END_REC:
373 if (lookaround_nesting != 0) {
392 case OP_PUSH_OR_JUMP_EXACT1:
393 case OP_PUSH_IF_PEEK_NEXT:
394 p += SIZE_RELADDR + 1; num_cache_opcodes++;
break;
397 if (current_repeat_mem != -1) {
401 GET_MEMNUM_INC(repeat_mem, p);
403 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
404 long dummy_num_cache_opcodes = 0;
405 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes);
406 if (result < 0 || dummy_num_cache_opcodes < 0) {
410 if (reg->repeat_range[repeat_mem].lower == 0) {
413 result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
414 if (result < 0 || num_cache_opcodes < 0) {
418 if (repeat_range->lower < repeat_range->upper) {
424 case OP_REPEAT_INC_NG:
425 GET_MEMNUM_INC(repeat_mem, p);
426 if (repeat_mem != current_repeat_mem) {
431 case OP_REPEAT_INC_SG:
432 case OP_REPEAT_INC_NG_SG:
434 case OP_NULL_CHECK_START:
437 case OP_NULL_CHECK_END:
438 case OP_NULL_CHECK_END_MEMST_PUSH:
441 case OP_NULL_CHECK_END_MEMST:
446 if (lookaround_nesting < 0) {
450 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
451 if (result < 0 || num_cache_opcodes < 0) {
455 case OP_PUSH_POS_NOT:
456 if (lookaround_nesting < 0) {
461 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
462 if (result < 0 || num_cache_opcodes < 0) {
466 case OP_PUSH_LOOK_BEHIND_NOT:
467 if (lookaround_nesting < 0) {
473 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
474 if (result < 0 || num_cache_opcodes < 0) {
478 case OP_PUSH_STOP_BT:
479 if (lookaround_nesting != 0) {
483 result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
484 if (result < 0 || num_cache_opcodes < 0) {
490 case OP_FAIL_LOOK_BEHIND_NOT:
497 case OP_PUSH_ABSENT_POS:
509 case OP_STATE_CHECK_PUSH:
510 case OP_STATE_CHECK_PUSH_OR_JUMP:
512 case OP_STATE_CHECK_ANYCHAR_STAR:
513 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
516 case OP_SET_OPTION_PUSH:
528 *num_cache_opcodes_ptr = num_cache_opcodes;
532 *num_cache_opcodes_ptr = num_cache_opcodes;
536 *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
540 return ONIGERR_UNDEFINED_BYTECODE;
545count_num_cache_opcodes(
const regex_t* reg,
long* num_cache_opcodes_ptr)
548 *num_cache_opcodes_ptr = 0;
549 OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
550 if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
551 return ONIGERR_UNDEFINED_BYTECODE;
558init_cache_opcodes_inner(
560 MemNumType current_repeat_mem,
int lookaround_nesting,
561 OnigCacheOpcode** cache_opcodes_ptr, UChar** pp,
long* num_cache_points_ptr
565 UChar* pend = reg->p + reg->used;
568 MemNumType repeat_mem;
570 long cache_point = *num_cache_points_ptr;
574# define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
575 cache_opcodes->addr = pbegin;\
576 cache_opcodes->cache_point = cache_point;\
577 cache_opcodes->outer_repeat_mem = current_repeat_mem;\
578 cache_opcodes->num_cache_points_at_outer_repeat = 0;\
579 cache_opcodes->num_cache_points_in_outer_repeat = 0;\
580 cache_opcodes->lookaround_nesting = lookaround_nesting;\
581 cache_opcodes->match_addr = NULL;\
582 cache_point += lookaround_nesting != 0 ? 2 : 1;\
593 case OP_EXACT1: p++;
break;
594 case OP_EXACT2: p += 2;
break;
595 case OP_EXACT3: p += 3;
break;
596 case OP_EXACT4: p += 4;
break;
597 case OP_EXACT5: p += 5;
break;
599 GET_LENGTH_INC(
len, p); p +=
len;
break;
600 case OP_EXACTMB2N1: p += 2;
break;
601 case OP_EXACTMB2N2: p += 4;
break;
602 case OP_EXACTMB2N3: p += 6;
break;
604 GET_LENGTH_INC(
len, p); p +=
len * 2;
break;
606 GET_LENGTH_INC(
len, p); p +=
len * 3;
break;
610 GET_LENGTH_INC(mb_len, p);
611 GET_LENGTH_INC(
len, p);
617 len = enclen(enc, p, pend); p +=
len;
break;
619 GET_LENGTH_INC(
len, p); p +=
len;
break;
623 p += SIZE_BITSET;
break;
625 case OP_CCLASS_MB_NOT:
626 GET_LENGTH_INC(
len, p); p +=
len;
break;
628 case OP_CCLASS_MIX_NOT:
630 GET_LENGTH_INC(
len, p);
637 case OP_ANYCHAR_STAR:
638 case OP_ANYCHAR_ML_STAR:
641 case OP_ANYCHAR_STAR_PEEK_NEXT:
642 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
650 case OP_NOT_WORD_BOUND:
656 case OP_NOT_ASCII_WORD:
657 case OP_ASCII_WORD_BOUND:
658 case OP_NOT_ASCII_WORD_BOUND:
659 case OP_ASCII_WORD_BEGIN:
660 case OP_ASCII_WORD_END:
667 case OP_SEMI_END_BUF:
668 case OP_BEGIN_POSITION:
675 case OP_BACKREF_MULTI:
676 case OP_BACKREF_MULTI_IC:
677 case OP_BACKREF_WITH_LEVEL:
678 goto unexpected_bytecode_error;
680 case OP_MEMORY_START:
681 case OP_MEMORY_START_PUSH:
682 case OP_MEMORY_END_PUSH:
683 case OP_MEMORY_END_PUSH_REC:
685 case OP_MEMORY_END_REC:
687 if (lookaround_nesting != 0) {
688 goto unexpected_bytecode_error;
706 case OP_PUSH_OR_JUMP_EXACT1:
707 case OP_PUSH_IF_PEEK_NEXT:
708 p += SIZE_RELADDR + 1;
713 GET_MEMNUM_INC(repeat_mem, p);
715 if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
716 long dummy_num_cache_points = 0;
718 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points);
723 if (reg->repeat_range[repeat_mem].lower == 0) {
727 long num_cache_points_in_repeat = 0;
728 long num_cache_points_at_repeat = cache_point;
730 result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
735 if (repeat_range->lower < repeat_range->upper) {
737 cache_point -= lookaround_nesting != 0 ? 2 : 1;
739 int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
740 cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
741 for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
742 cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
743 cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
749 case OP_REPEAT_INC_NG:
752 case OP_REPEAT_INC_SG:
753 case OP_REPEAT_INC_NG_SG:
754 goto unexpected_bytecode_error;
755 case OP_NULL_CHECK_START:
758 case OP_NULL_CHECK_END:
759 case OP_NULL_CHECK_END_MEMST_PUSH:
762 case OP_NULL_CHECK_END_MEMST:
770 result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
774 UChar* match_addr = p - 1;
775 for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
776 if (cache_opcodes_in_lookaround->match_addr == NULL) {
777 cache_opcodes_in_lookaround->match_addr = match_addr;
782 case OP_PUSH_POS_NOT:
785 case OP_PUSH_LOOK_BEHIND_NOT:
789 case OP_PUSH_STOP_BT:
792 result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
796 UChar* match_addr = p - 1;
797 for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
798 if (cache_opcodes_in_atomic->match_addr == NULL) {
799 cache_opcodes_in_atomic->match_addr = match_addr;
806 case OP_FAIL_LOOK_BEHIND_NOT:
815 goto unexpected_bytecode_error;
819 goto unexpected_bytecode_error;
822 goto unexpected_bytecode_error;
824 case OP_STATE_CHECK_PUSH:
825 case OP_STATE_CHECK_PUSH_OR_JUMP:
827 case OP_STATE_CHECK_ANYCHAR_STAR:
828 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
829 goto unexpected_bytecode_error;
831 case OP_SET_OPTION_PUSH:
842 *cache_opcodes_ptr = cache_opcodes;
844 *num_cache_points_ptr = cache_point;
850unexpected_bytecode_error:
851 return ONIGERR_UNEXPECTED_BYTECODE;
854 return ONIGERR_UNDEFINED_BYTECODE;
862 *num_cache_points_ptr = 0;
863 OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
864 if (result == 0 && p != reg->p + reg->used) {
865 return ONIGERR_UNDEFINED_BYTECODE;
872count_num_cache_opcodes(
regex_t* reg,
long* num_cache_opcodes)
874 *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
882 long num_cache_opcodes = 0;
883 count_num_cache_opcodes(reg, &num_cache_opcodes);
884 return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
892 for (i = 0; i < region->num_regs; i++) {
893 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
895#ifdef USE_CAPTURE_HISTORY
896 history_root_free(region);
903 region->num_regs = n;
905 if (n < ONIG_NREGION)
908 if (region->allocated == 0) {
909 region->beg = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
910 if (region->beg == 0)
911 return ONIGERR_MEMORY;
913 region->end = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
914 if (region->end == 0) {
916 return ONIGERR_MEMORY;
919 region->allocated = n;
921 else if (region->allocated < n) {
924 region->allocated = 0;
925 tmp = (OnigPosition* )
xrealloc(region->beg, n *
sizeof(OnigPosition));
929 return ONIGERR_MEMORY;
932 tmp = (OnigPosition* )
xrealloc(region->end, n *
sizeof(OnigPosition));
936 return ONIGERR_MEMORY;
940 region->allocated = n;
947onig_region_resize_clear(
OnigRegion* region,
int n)
951 r = onig_region_resize(region, n);
952 if (r != 0)
return r;
953 onig_region_clear(region);
958onig_region_set(
OnigRegion* region,
int at,
int beg,
int end)
960 if (at < 0)
return ONIGERR_INVALID_ARGUMENT;
962 if (at >= region->allocated) {
963 int r = onig_region_resize(region, at + 1);
967 region->beg[at] = beg;
968 region->end[at] = end;
975 region->num_regs = 0;
976 region->allocated = 0;
977 region->beg = (OnigPosition* )0;
978 region->end = (OnigPosition* )0;
979#ifdef USE_CAPTURE_HISTORY
980 region->history_root = (OnigCaptureTreeNode* )0;
999 if (r->allocated > 0) {
1003#ifdef USE_CAPTURE_HISTORY
1004 history_root_free(r);
1018#define RREGC_SIZE (sizeof(int) * from->num_regs)
1021 if (to == from)
return;
1023 r = onig_region_resize(to, from->num_regs);
1026 for (i = 0; i < from->num_regs; i++) {
1027 to->beg[i] = from->beg[i];
1028 to->end[i] = from->end[i];
1030 to->num_regs = from->num_regs;
1032#ifdef USE_CAPTURE_HISTORY
1033 history_root_free(to);
1035 if (IS_NOT_NULL(from->history_root)) {
1036 to->history_root = history_tree_clone(from->history_root);
1043#define INVALID_STACK_INDEX -1
1047#define STK_ALT 0x0001
1048#define STK_LOOK_BEHIND_NOT 0x0002
1049#define STK_POS_NOT 0x0003
1051#define STK_MEM_START 0x0100
1052#define STK_MEM_END 0x8200
1053#define STK_REPEAT_INC 0x0300
1054#define STK_STATE_CHECK_MARK 0x1000
1056#define STK_NULL_CHECK_START 0x3000
1057#define STK_NULL_CHECK_END 0x5000
1058#define STK_MEM_END_MARK 0x8400
1059#define STK_POS 0x0500
1060#define STK_STOP_BT 0x0600
1061#define STK_REPEAT 0x0700
1062#define STK_CALL_FRAME 0x0800
1063#define STK_RETURN 0x0900
1064#define STK_VOID 0x0a00
1065#define STK_ABSENT_POS 0x0b00
1066#define STK_ABSENT 0x0c00
1067#define STK_MATCH_CACHE_POINT 0x0d00
1068#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1071#define STK_MASK_POP_USED 0x00ff
1072#define STK_MASK_TO_VOID_TARGET 0x10ff
1073#define STK_MASK_MEM_END_OR_MARK 0x8000
1075#ifdef USE_MATCH_CACHE
1076#define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1077 (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1078 (msa).num_fails = 0;\
1079 (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1080 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1081 (msa).num_cache_points = 0;\
1082 (msa).match_cache_buf = (uint8_t*)NULL;\
1084#define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1085 xfree((msa).cache_opcodes);\
1086 xfree((msa).match_cache_buf);\
1087 (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1088 (msa).match_cache_buf = (uint8_t*)NULL;\
1091#define MATCH_ARG_INIT_MATCH_CACHE(msa)
1092#define MATCH_ARG_FREE_MATCH_CACHE(msa)
1095#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1096# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1097 (msa).stack_p = (void* )0;\
1098 (msa).options = (arg_option);\
1099 (msa).region = (arg_region);\
1100 (msa).start = (arg_start);\
1101 (msa).gpos = (arg_gpos);\
1102 (msa).best_len = ONIG_MISMATCH;\
1104 (msa).end_time = 0;\
1105 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1108# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1109 (msa).stack_p = (void* )0;\
1110 (msa).options = (arg_option);\
1111 (msa).region = (arg_region);\
1112 (msa).start = (arg_start);\
1113 (msa).gpos = (arg_gpos);\
1115 (msa).end_time = 0;\
1116 MATCH_ARG_INIT_MATCH_CACHE(msa);\
1120#ifdef USE_COMBINATION_EXPLOSION_CHECK
1122# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
1124# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
1125 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1126 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1127 offset = ((offset) * (state_num)) >> 3;\
1128 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1129 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1130 (msa).state_check_buff = (void* )xmalloc(size);\
1131 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1134 (msa).state_check_buff = (void* )xalloca(size);\
1135 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1136 (size_t )(size - (offset))); \
1137 (msa).state_check_buff_size = size;\
1140 (msa).state_check_buff = (void* )0;\
1141 (msa).state_check_buff_size = 0;\
1145 (msa).state_check_buff = (void* )0;\
1146 (msa).state_check_buff_size = 0;\
1150# define MATCH_ARG_FREE(msa) do {\
1151 xfree((msa).stack_p);\
1152 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1153 xfree((msa).state_check_buff);\
1155 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1158# define MATCH_ARG_FREE(msa) do {\
1159 xfree((msa).stack_p);\
1160 MATCH_ARG_FREE_MATCH_CACHE(msa);\
1166#define MAX_PTR_NUM 100
1168#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
1169 if (ptr_num > MAX_PTR_NUM) {\
1170 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1171 heap_addr = alloc_addr;\
1172 if (msa->stack_p) {\
1173 stk_alloc = (OnigStackType* )(msa->stack_p);\
1174 stk_base = stk_alloc;\
1176 stk_end = stk_base + msa->stack_n;\
1179 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1180 stk_base = stk_alloc;\
1182 stk_end = stk_base + (stack_num);\
1185 else if (msa->stack_p) {\
1186 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1188 stk_alloc = (OnigStackType* )(msa->stack_p);\
1189 stk_base = stk_alloc;\
1191 stk_end = stk_base + msa->stack_n;\
1194 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1195 + sizeof(OnigStackType) * (stack_num));\
1197 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1198 stk_base = stk_alloc;\
1200 stk_end = stk_base + (stack_num);\
1204#define STACK_SAVE do{\
1205 if (stk_base != stk_alloc) {\
1206 msa->stack_p = stk_base;\
1207 msa->stack_n = stk_end - stk_base; \
1211static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1214onig_get_match_stack_limit_size(
void)
1216 return MatchStackLimitSize;
1220onig_set_match_stack_limit_size(
unsigned int size)
1222 MatchStackLimitSize = size;
1233 stk_base = *arg_stk_base;
1234 stk_end = *arg_stk_end;
1237 n = stk_end - stk_base;
1238 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1242 return ONIGERR_MEMORY;
1248 unsigned int limit_size = MatchStackLimitSize;
1250 if (limit_size != 0 && n > limit_size) {
1251 if ((
unsigned int )(stk_end - stk_base) == limit_size)
1252 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1259 return ONIGERR_MEMORY;
1262 *arg_stk = x + (stk - stk_base);
1264 *arg_stk_end = x + n;
1268#define STACK_ENSURE(n) do {\
1269 if (stk_end - stk < (n)) {\
1270 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1273 xfree(xmalloc_base);\
1279#define STACK_AT(index) (stk_base + (index))
1280#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1282#define STACK_PUSH_TYPE(stack_type) do {\
1284 stk->type = (stack_type);\
1285 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1289#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1291#ifdef USE_COMBINATION_EXPLOSION_CHECK
1292# define STATE_CHECK_POS(s,snum) \
1293 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1294# define STATE_CHECK_VAL(v,snum) do {\
1295 if (state_check_buff != NULL) {\
1296 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1297 (v) = state_check_buff[x/8] & (1<<(x%8));\
1303# define ELSE_IF_STATE_CHECK_MARK(stk) \
1304 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1305 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1306 state_check_buff[x/8] |= (1<<(x%8)); \
1309# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1311 stk->type = (stack_type);\
1312 stk->u.state.pcode = (pat);\
1313 stk->u.state.pstr = (s);\
1314 stk->u.state.pstr_prev = (sprev);\
1315 stk->u.state.state_check = 0;\
1316 stk->u.state.pkeep = (keep);\
1320# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1321 stk->type = (stack_type);\
1322 stk->u.state.pcode = (pat);\
1323 stk->u.state.state_check = 0;\
1327# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1329 stk->type = STK_ALT;\
1330 stk->u.state.pcode = (pat);\
1331 stk->u.state.pstr = (s);\
1332 stk->u.state.pstr_prev = (sprev);\
1333 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1334 stk->u.state.pkeep = (keep);\
1338# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1339 if (state_check_buff != NULL) {\
1341 stk->type = STK_STATE_CHECK_MARK;\
1342 stk->u.state.pstr = (s);\
1343 stk->u.state.state_check = (snum);\
1350# define ELSE_IF_STATE_CHECK_MARK(stk)
1352# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1354 stk->type = (stack_type);\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.state.pcode = (pat);\
1357 stk->u.state.pstr = (s);\
1358 stk->u.state.pstr_prev = (sprev);\
1359 stk->u.state.pkeep = (keep);\
1363# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1364 stk->type = (stack_type);\
1365 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1366 stk->u.state.pcode = (pat);\
1371#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1372#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1373#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1374#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1375#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1376#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1377 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1379#define STACK_PUSH_REPEAT(id, pat) do {\
1381 stk->type = STK_REPEAT;\
1382 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1383 stk->u.repeat.num = (id);\
1384 stk->u.repeat.pcode = (pat);\
1385 stk->u.repeat.count = 0;\
1389#define STACK_PUSH_REPEAT_INC(sindex) do {\
1391 stk->type = STK_REPEAT_INC;\
1392 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1393 stk->u.repeat_inc.si = (sindex);\
1397#define STACK_PUSH_MEM_START(mnum, s) do {\
1399 stk->type = STK_MEM_START;\
1400 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1401 stk->u.mem.num = (mnum);\
1402 stk->u.mem.pstr = (s);\
1403 stk->u.mem.start = mem_start_stk[mnum];\
1404 stk->u.mem.end = mem_end_stk[mnum];\
1405 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1406 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1410#define STACK_PUSH_MEM_END(mnum, s) do {\
1412 stk->type = STK_MEM_END;\
1413 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1414 stk->u.mem.num = (mnum);\
1415 stk->u.mem.pstr = (s);\
1416 stk->u.mem.start = mem_start_stk[mnum];\
1417 stk->u.mem.end = mem_end_stk[mnum];\
1418 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1422#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1424 stk->type = STK_MEM_END_MARK;\
1425 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1426 stk->u.mem.num = (mnum);\
1430#define STACK_GET_MEM_START(mnum, k) do {\
1433 while (k > stk_base) {\
1435 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1436 && k->u.mem.num == (mnum)) {\
1439 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1440 if (level == 0) break;\
1446#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1449 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1450 if (level == 0) (start) = k->u.mem.pstr;\
1453 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1456 (end) = k->u.mem.pstr;\
1464#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1466 stk->type = STK_NULL_CHECK_START;\
1467 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1468 stk->u.null_check.num = (cnum);\
1469 stk->u.null_check.pstr = (s);\
1473#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1475 stk->type = STK_NULL_CHECK_END;\
1476 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1477 stk->u.null_check.num = (cnum);\
1481#define STACK_PUSH_CALL_FRAME(pat) do {\
1483 stk->type = STK_CALL_FRAME;\
1484 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1485 stk->u.call_frame.ret_addr = (pat);\
1489#define STACK_PUSH_RETURN do {\
1491 stk->type = STK_RETURN;\
1492 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1496#define STACK_PUSH_ABSENT_POS(start, end) do {\
1498 stk->type = STK_ABSENT_POS;\
1499 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1500 stk->u.absent_pos.abs_pstr = (start);\
1501 stk->u.absent_pos.end_pstr = (end);\
1505#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1507 stk->type = STK_MATCH_CACHE_POINT;\
1508 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1509 stk->u.match_cache_point.index = (match_cache_point_index);\
1510 stk->u.match_cache_point.mask = (match_cache_point_mask);\
1516# define STACK_BASE_CHECK(p, at) \
1517 if ((p) < stk_base) {\
1518 fprintf(stderr, "at %s\n", at);\
1522# define STACK_BASE_CHECK(p, at)
1525#ifdef ONIG_DEBUG_MATCH_CACHE
1526# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1528# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1531#ifdef USE_MATCH_CACHE
1532# define INC_NUM_FAILS msa->num_fails++
1533# define MEMOIZE_MATCH_CACHE_POINT do {\
1534 if (stk->type == STK_MATCH_CACHE_POINT) {\
1535 msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1536 MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1538 else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1539 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1540 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1543# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1544 if (stkp->type == STK_MATCH_CACHE_POINT) {\
1545 stkp->type = STK_VOID;\
1546 memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1547 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1550# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1551 if (stk->type == STK_MATCH_CACHE_POINT) {\
1552 memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1553 MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1557# define INC_NUM_FAILS ((void) 0)
1558# define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1559# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
1562#define STACK_POP_ONE do {\
1564 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1567#define STACK_POP do {\
1568 switch (pop_level) {\
1569 case STACK_POP_LEVEL_FREE:\
1572 STACK_BASE_CHECK(stk, "STACK_POP"); \
1573 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1574 ELSE_IF_STATE_CHECK_MARK(stk);\
1575 MEMOIZE_MATCH_CACHE_POINT;\
1578 case STACK_POP_LEVEL_MEM_START:\
1581 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1582 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1583 else if (stk->type == STK_MEM_START) {\
1584 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1585 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1587 ELSE_IF_STATE_CHECK_MARK(stk);\
1588 MEMOIZE_MATCH_CACHE_POINT;\
1594 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1595 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1596 else if (stk->type == STK_MEM_START) {\
1597 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1598 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1600 else if (stk->type == STK_REPEAT_INC) {\
1601 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1603 else if (stk->type == STK_MEM_END) {\
1604 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1605 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1607 ELSE_IF_STATE_CHECK_MARK(stk);\
1608 MEMOIZE_MATCH_CACHE_POINT;\
1614#define STACK_POP_TIL_POS_NOT do {\
1617 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1618 if (stk->type == STK_POS_NOT) break;\
1619 else if (stk->type == STK_MEM_START) {\
1620 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1621 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1623 else if (stk->type == STK_REPEAT_INC) {\
1624 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1626 else if (stk->type == STK_MEM_END) {\
1627 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1628 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1630 else if (IS_TO_VOID_TARGET(stk)) {\
1633 ELSE_IF_STATE_CHECK_MARK(stk);\
1634 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1638#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1641 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1642 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1643 else if (stk->type == STK_MEM_START) {\
1644 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1645 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1647 else if (stk->type == STK_REPEAT_INC) {\
1648 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1650 else if (stk->type == STK_MEM_END) {\
1651 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1652 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1654 ELSE_IF_STATE_CHECK_MARK(stk);\
1658#define STACK_POP_TIL_ABSENT do {\
1661 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1662 if (stk->type == STK_ABSENT) break;\
1663 else if (stk->type == STK_MEM_START) {\
1664 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1665 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1667 else if (stk->type == STK_REPEAT_INC) {\
1668 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1670 else if (stk->type == STK_MEM_END) {\
1671 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1672 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1674 ELSE_IF_STATE_CHECK_MARK(stk);\
1678#define STACK_POP_ABSENT_POS(start, end) do {\
1680 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1681 (start) = stk->u.absent_pos.abs_pstr;\
1682 (end) = stk->u.absent_pos.end_pstr;\
1685#define STACK_POS_END(k) do {\
1689 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1690 if (IS_TO_VOID_TARGET(k)) {\
1692 k->type = STK_VOID;\
1694 else if (k->type == STK_POS) {\
1695 k->type = STK_VOID;\
1698 MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1702#define STACK_STOP_BT_END do {\
1703 OnigStackType *k = stk;\
1706 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1707 if (IS_TO_VOID_TARGET(k)) {\
1709 k->type = STK_VOID;\
1711 else if (k->type == STK_STOP_BT) {\
1712 k->type = STK_VOID;\
1715 else if (k->type == STK_MATCH_CACHE_POINT) {\
1716 k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1721#define STACK_STOP_BT_FAIL do {\
1724 STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1725 if (stk->type == STK_STOP_BT) {\
1726 stk->type = STK_VOID;\
1729 MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1733#define STACK_NULL_CHECK(isnull,id,s) do {\
1734 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1737 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1738 if (k->type == STK_NULL_CHECK_START) {\
1739 if (k->u.null_check.num == (id)) {\
1740 (isnull) = (k->u.null_check.pstr == (s));\
1747#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1749 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1752 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1753 if (k->type == STK_NULL_CHECK_START) {\
1754 if (k->u.null_check.num == (id)) {\
1756 (isnull) = (k->u.null_check.pstr == (s));\
1762 else if (k->type == STK_NULL_CHECK_END) {\
1768#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1769 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1772 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1773 if (k->type == STK_NULL_CHECK_START) {\
1774 if (k->u.null_check.num == (id)) {\
1775 if (k->u.null_check.pstr != (s)) {\
1783 if (k->type == STK_MEM_START) {\
1784 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1785 (isnull) = 0; break;\
1787 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1788 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1790 endp = (UChar* )k->u.mem.end;\
1791 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1792 (isnull) = 0; break;\
1794 else if (endp != s) {\
1807#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1809 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1812 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1813 if (k->type == STK_NULL_CHECK_START) {\
1814 if (k->u.null_check.num == (id)) {\
1816 if (k->u.null_check.pstr != (s)) {\
1824 if (k->type == STK_MEM_START) {\
1825 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1826 (isnull) = 0; break;\
1828 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1829 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1831 endp = (UChar* )k->u.mem.end;\
1832 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1833 (isnull) = 0; break;\
1835 else if (endp != s) {\
1849 else if (k->type == STK_NULL_CHECK_END) {\
1850 if (k->u.null_check.num == (id)) level++;\
1855#define STACK_GET_REPEAT(id, k) do {\
1860 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1861 if (k->type == STK_REPEAT) {\
1863 if (k->u.repeat.num == (id)) {\
1868 else if (k->type == STK_CALL_FRAME) level--;\
1869 else if (k->type == STK_RETURN) level++;\
1873#define STACK_RETURN(addr) do {\
1875 OnigStackType* k = stk;\
1878 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1879 if (k->type == STK_CALL_FRAME) {\
1881 (addr) = k->u.call_frame.ret_addr;\
1886 else if (k->type == STK_RETURN)\
1892#define STRING_CMP(s1,s2,len) do {\
1893 while (len-- > 0) {\
1894 if (*s1++ != *s2++) goto fail;\
1898#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1899 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1903static int string_cmp_ic(
OnigEncoding enc,
int case_fold_flag,
1904 UChar* s1, UChar** ps2, OnigDistance mblen,
const UChar* text_end)
1906 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1907 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1908 UChar *p1, *p2, *end1, *s2;
1914 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1915 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1916 if (len1 != len2)
return 0;
1919 while (len1-- > 0) {
1920 if (*p1 != *p2)
return 0;
1930#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1932 while (len-- > 0) {\
1933 if (*s1++ != *s2++) {\
1934 is_fail = 1; break;\
1939#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1940 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1947#define IS_EMPTY_STR (str == end)
1948#define ON_STR_BEGIN(s) ((s) == str)
1949#define ON_STR_END(s) ((s) == end)
1950#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1951# define DATA_ENSURE_CHECK1 (s < right_range)
1952# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1953# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1954# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1955# define ABSENT_END_POS right_range
1957# define DATA_ENSURE_CHECK1 (s < end)
1958# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1959# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1960# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1961# define ABSENT_END_POS end
1964int onigenc_mbclen_approximate(
const OnigUChar* p,
const OnigUChar* e,
const struct OnigEncodingTypeST* enc);
1967enclen_approx(
OnigEncoding enc,
const OnigUChar* p,
const OnigUChar* e)
1969 if (enc->max_enc_len == enc->min_enc_len) {
1970 return (p < e ? enc->min_enc_len : 0);
1973 return onigenc_mbclen_approximate(p, e, enc);
1978#ifdef USE_CAPTURE_HISTORY
1980make_capture_history_tree(OnigCaptureTreeNode* node,
OnigStackType** kp,
1984 OnigCaptureTreeNode* child;
1987 while (k < stk_top) {
1988 if (k->type == STK_MEM_START) {
1990 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1991 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1992 child = history_node_new();
1993 CHECK_NULL_RETURN_MEMERR(child);
1995 child->beg = k->u.mem.pstr - str;
1996 r = history_tree_add_child(node, child);
1998 history_tree_free(child);
2002 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2003 if (r != 0)
return r;
2006 child->end = k->u.mem.pstr - str;
2009 else if (k->type == STK_MEM_END) {
2010 if (k->u.mem.num == node->group) {
2011 node->end = k->u.mem.pstr - str;
2023#ifdef USE_BACKREF_WITH_LEVEL
2025mem_is_in_memp(
int mem,
int num, UChar* memp)
2030 for (i = 0; i < num; i++) {
2031 GET_MEMNUM_INC(m, memp);
2032 if (mem == (
int )m)
return 1;
2037static int backref_match_at_nested_level(
regex_t* reg,
2039 int ignore_case,
int case_fold_flag,
2040 int nest,
int mem_num, UChar* memp, UChar** s,
const UChar* send)
2042 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2049 while (k >= stk_base) {
2050 if (k->type == STK_CALL_FRAME) {
2053 else if (k->type == STK_RETURN) {
2056 else if (level == nest) {
2057 if (k->type == STK_MEM_START) {
2058 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2059 pstart = k->u.mem.pstr;
2060 if (pend != NULL_UCHARP) {
2061 if (pend - pstart > send - *s)
return 0;
2065 if (ignore_case != 0) {
2066 if (string_cmp_ic(reg->enc, case_fold_flag,
2067 pstart, &ss, pend - pstart, send) == 0)
2072 if (*p++ != *ss++)
return 0;
2081 else if (k->type == STK_MEM_END) {
2082 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2083 pend = k->u.mem.pstr;
2095#ifdef ONIG_DEBUG_STATISTICS
2098# include <windows.h>
2099static LARGE_INTEGER ts, te, freq;
2100# define GETTIME(t) QueryPerformanceCounter(&(t))
2101# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2102 * 1000000 / freq.QuadPart)
2105# define USE_TIMEOFDAY
2107# ifdef USE_TIMEOFDAY
2108# ifdef HAVE_SYS_TIME_H
2109# include <sys/time.h>
2111# ifdef HAVE_UNISTD_H
2115# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2116# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2117 (((te).tv_sec - (ts).tv_sec)*1000000))
2119# ifdef HAVE_SYS_TIMES_H
2120# include <sys/times.h>
2122static struct tms ts, te;
2123# define GETTIME(t) times(&(t))
2124# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2129static int OpCounter[256];
2130static int OpPrevCounter[256];
2131static unsigned long OpTime[256];
2132static int OpCurr = OP_FINISH;
2133static int OpPrevTarget = OP_FAIL;
2134static int MaxStackDepth = 0;
2136# define MOP_IN(opcode) do {\
2137 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2139 OpCounter[opcode]++;\
2143# define MOP_OUT do {\
2145 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2149onig_statistics_init(
void)
2152 for (i = 0; i < 256; i++) {
2153 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2157 QueryPerformanceFrequency(&freq);
2162onig_print_statistics(
FILE* f)
2165 fprintf(f,
" count prev time\n");
2166 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2167 fprintf(f,
"%8d: %8d: %10lu: %s\n",
2168 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2170 fprintf(f,
"\nmax stack depth: %d\n", MaxStackDepth);
2173# define STACK_INC do {\
2175 if (stk - stk_base > MaxStackDepth) \
2176 MaxStackDepth = stk - stk_base;\
2180# define STACK_INC stk++
2182# define MOP_IN(opcode)
2187#ifdef ONIG_DEBUG_MATCH
2189stack_type_str(
int stack_type)
2191 switch (stack_type) {
2192 case STK_ALT:
return "Alt ";
2193 case STK_LOOK_BEHIND_NOT:
return "LBNot ";
2194 case STK_POS_NOT:
return "PosNot";
2195 case STK_MEM_START:
return "MemS ";
2196 case STK_MEM_END:
return "MemE ";
2197 case STK_REPEAT_INC:
return "RepInc";
2198 case STK_STATE_CHECK_MARK:
return "StChMk";
2199 case STK_NULL_CHECK_START:
return "NulChS";
2200 case STK_NULL_CHECK_END:
return "NulChE";
2201 case STK_MEM_END_MARK:
return "MemEMk";
2202 case STK_POS:
return "Pos ";
2203 case STK_STOP_BT:
return "StopBt";
2204 case STK_REPEAT:
return "Rep ";
2205 case STK_CALL_FRAME:
return "Call ";
2206 case STK_RETURN:
return "Ret ";
2207 case STK_VOID:
return "Void ";
2208 case STK_ABSENT_POS:
return "AbsPos";
2209 case STK_ABSENT:
return "Absent";
2210 case STK_MATCH_CACHE_POINT:
return "MCache";
2211 default:
return " ";
2215#ifdef USE_MATCH_CACHE
2218bsearch_cache_opcodes(
const OnigCacheOpcode *cache_opcodes,
long num_cache_opcodes,
const UChar* p)
2220 long l = 0, r = num_cache_opcodes - 1, m = 0;
2224 if (cache_opcodes[m].addr == p)
break;
2225 if (cache_opcodes[m].addr < p) l = m + 1;
2239 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
2241 long num_cache_points_at_outer_repeat;
2242 long num_cache_points_in_outer_repeat;
2244 m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
2246 if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
2250 cache_opcode = &cache_opcodes[m];
2251 *cache_opcode_ptr = &cache_opcodes[m];
2252 cache_point = cache_opcode->cache_point;
2253 if (cache_opcode->outer_repeat_mem == -1) {
2257 num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
2258 num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
2260 range = ®->repeat_range[cache_opcode->outer_repeat_mem];
2262 stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
2263 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
2265 if (count < range->lower) {
2266 return num_cache_points_at_outer_repeat +
2267 num_cache_points_in_outer_repeat * count +
2271 if (range->upper == 0x7fffffff) {
2272 return num_cache_points_at_outer_repeat +
2273 num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
2277 return num_cache_points_at_outer_repeat +
2278 num_cache_points_in_outer_repeat * (range->lower - 1) +
2279 (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
2284check_extended_match_cache_point(uint8_t *match_cache_buf,
long match_cache_point_index, uint8_t match_cache_point_mask)
2286 if (match_cache_point_mask & 0x80) {
2287 return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
2290 return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
2295memoize_extended_match_cache_point(uint8_t *match_cache_buf,
long match_cache_point_index, uint8_t match_cache_point_mask)
2297 match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
2298 if (match_cache_point_mask & 0x80) {
2299 match_cache_buf[match_cache_point_index + 1] |= 0x01;
2302 match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
2311match_at(
regex_t* reg,
const UChar* str,
const UChar* end,
2312#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2313 const UChar* right_range,
2317 static const UChar FinishCode[] = { OP_FINISH };
2319 int i, num_mem, pop_level;
2320 ptrdiff_t n, best_len;
2321 LengthType tlen, tlen2;
2324 OnigOptionType option = reg->options;
2326 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2327 UChar *s, *q, *sbegin;
2332 char *xmalloc_base = NULL;
2336 OnigStackIndex *repeat_stk;
2337 OnigStackIndex *mem_start_stk, *mem_end_stk;
2338#ifdef USE_COMBINATION_EXPLOSION_CHECK
2340 unsigned char* state_check_buff = msa->state_check_buff;
2341 int num_comb_exp_check = reg->num_comb_exp_check;
2344#if USE_TOKEN_THREADED_VM
2346# define VM_LOOP JUMP;
2348# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2349# define DEFAULT L_DEFAULT:
2350# define NEXT sprev = sbegin; JUMP
2351# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2377 &&L_OP_CCLASS_MB_NOT,
2378 &&L_OP_CCLASS_MIX_NOT,
2382 &&L_OP_ANYCHAR_STAR,
2383 &&L_OP_ANYCHAR_ML_STAR,
2384 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2385 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2390 &&L_OP_NOT_WORD_BOUND,
2391# ifdef USE_WORD_BEGIN_END
2399 &&L_OP_NOT_ASCII_WORD,
2400 &&L_OP_ASCII_WORD_BOUND,
2401 &&L_OP_NOT_ASCII_WORD_BOUND,
2402# ifdef USE_WORD_BEGIN_END
2403 &&L_OP_ASCII_WORD_BEGIN,
2404 &&L_OP_ASCII_WORD_END,
2414 &&L_OP_SEMI_END_BUF,
2415 &&L_OP_BEGIN_POSITION,
2421 &&L_OP_BACKREF_MULTI,
2422 &&L_OP_BACKREF_MULTI_IC,
2423# ifdef USE_BACKREF_WITH_LEVEL
2424 &&L_OP_BACKREF_WITH_LEVEL,
2428 &&L_OP_MEMORY_START,
2429 &&L_OP_MEMORY_START_PUSH,
2430 &&L_OP_MEMORY_END_PUSH,
2431# ifdef USE_SUBEXP_CALL
2432 &&L_OP_MEMORY_END_PUSH_REC,
2437# ifdef USE_SUBEXP_CALL
2438 &&L_OP_MEMORY_END_REC,
2449# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2450 &&L_OP_PUSH_OR_JUMP_EXACT1,
2454 &&L_OP_PUSH_IF_PEEK_NEXT,
2458 &&L_OP_REPEAT_INC_NG,
2459 &&L_OP_REPEAT_INC_SG,
2460 &&L_OP_REPEAT_INC_NG_SG,
2461 &&L_OP_NULL_CHECK_START,
2462 &&L_OP_NULL_CHECK_END,
2463# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2464 &&L_OP_NULL_CHECK_END_MEMST,
2468# ifdef USE_SUBEXP_CALL
2469 &&L_OP_NULL_CHECK_END_MEMST_PUSH,
2476 &&L_OP_PUSH_POS_NOT,
2478 &&L_OP_PUSH_STOP_BT,
2481 &&L_OP_PUSH_LOOK_BEHIND_NOT,
2482 &&L_OP_FAIL_LOOK_BEHIND_NOT,
2483 &&L_OP_PUSH_ABSENT_POS,
2487# ifdef USE_SUBEXP_CALL
2496# ifdef USE_COMBINATION_EXPLOSION_CHECK
2497 &&L_OP_STATE_CHECK_PUSH,
2498 &&L_OP_STATE_CHECK_PUSH_OR_JUMP,
2505# ifdef USE_COMBINATION_EXPLOSION_CHECK
2506 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2507 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2514 &&L_OP_SET_OPTION_PUSH,
2530# define VM_LOOP_END } sprev = sbegin; }
2531# define CASE(x) case x:
2532# define DEFAULT default:
2534# define JUMP continue; break
2538#ifdef USE_SUBEXP_CALL
2541# define ADD_NUMMEM 1
2544# define ADD_NUMMEM 0
2547 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2549 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2550 pop_level = reg->stack_pop_level;
2551 num_mem = reg->num_mem;
2552 repeat_stk = (OnigStackIndex* )alloca_base;
2554 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2555 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2557 OnigStackIndex *pp = mem_start_stk;
2558 for (; pp < repeat_stk + n; pp += 2) {
2559 pp[0] = INVALID_STACK_INDEX;
2560 pp[1] = INVALID_STACK_INDEX;
2563#ifndef USE_SUBEXP_CALL
2570#ifdef ONIG_DEBUG_MATCH
2571 fprintf(stderr,
"match_at: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), start: %"PRIuPTR
" (%p), sprev: %"PRIuPTR
" (%p)\n",
2572 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2573 fprintf(stderr,
"size: %d, start offset: %d\n",
2574 (
int )(end - str), (
int )(sstart - str));
2575 fprintf(stderr,
"\n ofs> str stk:type addr:opcode\n");
2578 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode);
2579 best_len = ONIG_MISMATCH;
2580 s = (UChar* )sstart;
2581 pkeep = (UChar* )sstart;
2584#ifdef ONIG_DEBUG_MATCH
2585# define OPCODE_EXEC_HOOK \
2587 UChar *op, *q, *bp, buf[50]; \
2589 op = p - OP_OFFSET; \
2590 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2593 if (*op != OP_FINISH) { \
2594 for (i = 0; i < 7 && q < end; i++) { \
2595 len = enclen(encode, q, end); \
2596 while (len-- > 0) *bp++ = *q++; \
2598 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2600 xmemcpy(bp, "\"", 1); bp += 1; \
2602 fputs((char* )buf, stderr); \
2603 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2604 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2605 stk - stk_base - 1, \
2606 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2607 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2608 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2609 fprintf(stderr, "\n"); \
2612# define OPCODE_EXEC_HOOK ((void) 0)
2615#ifdef USE_MATCH_CACHE
2616#ifdef ONIG_DEBUG_MATCH_CACHE
2617#define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2618#define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2620#define MATCH_CACHE_DEBUG ((void) 0)
2621#define MATCH_CACHE_DEBUG_HIT ((void) 0)
2624#define MATCH_CACHE_HIT ((void) 0)
2626# define CHECK_MATCH_CACHE do {\
2627 if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2628 const OnigCacheOpcode *cache_opcode;\
2629 long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2630 if (cache_point >= 0) {\
2631 long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2632 long match_cache_point_index = match_cache_point >> 3;\
2633 uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2635 if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2636 MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2637 if (cache_opcode->lookaround_nesting == 0) goto fail;\
2638 else if (cache_opcode->lookaround_nesting < 0) {\
2639 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2640 STACK_STOP_BT_FAIL;\
2646 if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2647 p = cache_opcode->match_addr;\
2654 STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2659# define CHECK_MATCH_CACHE ((void) 0)
2663 CASE(OP_END) MOP_IN(OP_END);
2667#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2668 if (IS_FIND_LONGEST(option)) {
2669 if (n > msa->best_len) {
2671 msa->best_s = (UChar* )sstart;
2678 region = msa->region;
2680 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2681 region->end[0] = s - str;
2682 for (i = 1; i <= num_mem; i++) {
2683 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2684 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2685 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2687 region->beg[i] = (UChar* )((
void* )mem_start_stk[i]) - str;
2689 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2690 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2691 : (UChar* )((
void* )mem_end_stk[i])) - str;
2694 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2698#ifdef USE_CAPTURE_HISTORY
2699 if (reg->capture_history != 0) {
2701 OnigCaptureTreeNode* node;
2703 if (IS_NULL(region->history_root)) {
2704 region->history_root = node = history_node_new();
2705 CHECK_NULL_RETURN_MEMERR(node);
2708 node = region->history_root;
2709 history_tree_clear(node);
2713 node->beg = ((pkeep > s) ? s : pkeep) - str;
2714 node->end = s - str;
2717 r = make_capture_history_tree(region->history_root, &stkp,
2718 stk, (UChar* )str, reg);
2728#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2733 if (IS_FIND_CONDITION(option)) {
2734 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2735 best_len = ONIG_MISMATCH;
2738 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2747 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2749 if (*p != *s)
goto fail;
2754 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2757 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2760 len = ONIGENC_MBC_CASE_FOLD(encode,
2776 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2778 if (*p != *s)
goto fail;
2780 if (*p != *s)
goto fail;
2786 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2788 if (*p != *s)
goto fail;
2790 if (*p != *s)
goto fail;
2792 if (*p != *s)
goto fail;
2798 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2800 if (*p != *s)
goto fail;
2802 if (*p != *s)
goto fail;
2804 if (*p != *s)
goto fail;
2806 if (*p != *s)
goto fail;
2812 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2814 if (*p != *s)
goto fail;
2816 if (*p != *s)
goto fail;
2818 if (*p != *s)
goto fail;
2820 if (*p != *s)
goto fail;
2822 if (*p != *s)
goto fail;
2828 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2829 GET_LENGTH_INC(tlen, p);
2831 while (tlen-- > 0) {
2832 if (*p++ != *s++)
goto fail;
2838 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2841 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2843 GET_LENGTH_INC(tlen, p);
2849 len = ONIGENC_MBC_CASE_FOLD(encode,
2856 if (*p != *q)
goto fail;
2865 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2867 if (*p != *s)
goto fail;
2869 if (*p != *s)
goto fail;
2874 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2876 if (*p != *s)
goto fail;
2878 if (*p != *s)
goto fail;
2881 if (*p != *s)
goto fail;
2883 if (*p != *s)
goto fail;
2888 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2890 if (*p != *s)
goto fail;
2892 if (*p != *s)
goto fail;
2894 if (*p != *s)
goto fail;
2896 if (*p != *s)
goto fail;
2899 if (*p != *s)
goto fail;
2901 if (*p != *s)
goto fail;
2906 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2907 GET_LENGTH_INC(tlen, p);
2908 DATA_ENSURE(tlen * 2);
2909 while (tlen-- > 0) {
2910 if (*p != *s)
goto fail;
2912 if (*p != *s)
goto fail;
2919 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2920 GET_LENGTH_INC(tlen, p);
2921 DATA_ENSURE(tlen * 3);
2922 while (tlen-- > 0) {
2923 if (*p != *s)
goto fail;
2925 if (*p != *s)
goto fail;
2927 if (*p != *s)
goto fail;
2934 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2935 GET_LENGTH_INC(tlen, p);
2936 GET_LENGTH_INC(tlen2, p);
2939 while (tlen2-- > 0) {
2940 if (*p != *s)
goto fail;
2947 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2949 if (BITSET_AT(((BitSetRef )p), *s) == 0)
goto fail;
2951 s += enclen(encode, s, end);
2955 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2956 if (! ONIGENC_IS_MBC_HEAD(encode, s, end))
goto fail;
2959 GET_LENGTH_INC(tlen, p);
2966 mb_len = enclen_approx(encode, s, end);
2967 DATA_ENSURE(mb_len);
2970 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2972#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2973 if (! onig_is_in_code_range(p, code))
goto fail;
2977 if (! onig_is_in_code_range(q, code))
goto fail;
2984 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2986 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2991 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2995 GET_LENGTH_INC(tlen, p);
3002 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
3004 if (BITSET_AT(((BitSetRef )p), *s) != 0)
goto fail;
3006 s += enclen(encode, s, end);
3010 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
3012 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3014 GET_LENGTH_INC(tlen, p);
3016 goto cc_mb_not_success;
3020 GET_LENGTH_INC(tlen, p);
3024 int mb_len = enclen(encode, s, end);
3026 if (! DATA_ENSURE_CHECK(mb_len)) {
3030 goto cc_mb_not_success;
3035 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3037#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3038 if (onig_is_in_code_range(p, code))
goto fail;
3042 if (onig_is_in_code_range(q, code))
goto fail;
3051 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
3053 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3058 if (BITSET_AT(((BitSetRef )p), *s) != 0)
3062 GET_LENGTH_INC(tlen, p);
3069 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
3071 n = enclen_approx(encode, s, end);
3073 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
3078 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
3080 n = enclen_approx(encode, s, end);
3086 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
3087 while (DATA_ENSURE_CHECK1) {
3089 STACK_PUSH_ALT(p, s, sprev, pkeep);
3090 n = enclen_approx(encode, s, end);
3092 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
3099 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
3100 while (DATA_ENSURE_CHECK1) {
3102 STACK_PUSH_ALT(p, s, sprev, pkeep);
3103 n = enclen_approx(encode, s, end);
3117 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3118 while (DATA_ENSURE_CHECK1) {
3121 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3123#ifdef USE_MATCH_CACHE
3129 n = enclen_approx(encode, s, end);
3131 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
3139 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3140 while (DATA_ENSURE_CHECK1) {
3143 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3145#ifdef USE_MATCH_CACHE
3151 n = enclen_approx(encode, s, end);
3166#ifdef USE_COMBINATION_EXPLOSION_CHECK
3167 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3168 GET_STATE_CHECK_NUM_INC(mem, p);
3169 while (DATA_ENSURE_CHECK1) {
3170 STATE_CHECK_VAL(scv, mem);
3173 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3174 n = enclen_approx(encode, s, end);
3176 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
3183 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3184 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3186 GET_STATE_CHECK_NUM_INC(mem, p);
3187 while (DATA_ENSURE_CHECK1) {
3188 STATE_CHECK_VAL(scv, mem);
3191 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3192 n = enclen_approx(encode, s, end);
3207 CASE(OP_WORD) MOP_IN(OP_WORD);
3209 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3212 s += enclen(encode, s, end);
3216 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
3218 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3221 s += enclen(encode, s, end);
3225 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
3227 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3230 s += enclen(encode, s, end);
3234 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
3236 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3239 s += enclen(encode, s, end);
3243 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
3244 if (ON_STR_BEGIN(s)) {
3246 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3249 else if (ON_STR_END(s)) {
3250 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3254 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3255 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3261 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
3262 if (ON_STR_BEGIN(s)) {
3264 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3267 else if (ON_STR_END(s)) {
3268 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3272 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3273 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3279 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3280 if (ON_STR_BEGIN(s)) {
3281 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3284 else if (ON_STR_END(s)) {
3285 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3289 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3290 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3296 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3297 if (ON_STR_BEGIN(s)) {
3298 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3301 else if (ON_STR_END(s)) {
3302 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3306 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3307 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3313#ifdef USE_WORD_BEGIN_END
3314 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3315 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3316 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3324 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3325 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3326 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3334 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3335 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3336 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3344 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3345 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3346 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3355 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3356 if (! ON_STR_BEGIN(s))
goto fail;
3357 if (IS_NOTBOS(msa->options))
goto fail;
3362 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3363 if (! ON_STR_END(s))
goto fail;
3364 if (IS_NOTEOS(msa->options))
goto fail;
3369 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3370 if (ON_STR_BEGIN(s)) {
3371 if (IS_NOTBOL(msa->options))
goto fail;
3375 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3376#ifdef USE_CRNL_AS_LINE_TERMINATOR
3377 && !(IS_NEWLINE_CRLF(option)
3378 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3380 && !ON_STR_END(s)) {
3387 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3388 if (ON_STR_END(s)) {
3389#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3390 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3392 if (IS_NOTEOL(msa->options))
goto fail;
3395#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3399 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3406 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3407 if (ON_STR_END(s)) {
3408#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3409 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3411 if (IS_NOTEOL(msa->options))
goto fail;
3414#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3418 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3419 UChar* ss = s + enclen(encode, s, end);
3420 if (ON_STR_END(ss)) {
3424#ifdef USE_CRNL_AS_LINE_TERMINATOR
3425 else if (IS_NEWLINE_CRLF(option)
3426 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3427 ss += enclen(encode, ss, end);
3428 if (ON_STR_END(ss)) {
3438 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3445 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3446 GET_MEMNUM_INC(mem, p);
3447 STACK_PUSH_MEM_START(mem, s);
3451 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3452 GET_MEMNUM_INC(mem, p);
3453 mem_start_stk[mem] = (OnigStackIndex )((
void* )s);
3454 mem_end_stk[mem] = INVALID_STACK_INDEX;
3458 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3459 GET_MEMNUM_INC(mem, p);
3460 STACK_PUSH_MEM_END(mem, s);
3464 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3465 GET_MEMNUM_INC(mem, p);
3466 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
3470 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3475#ifdef USE_SUBEXP_CALL
3476 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3477 GET_MEMNUM_INC(mem, p);
3478 STACK_GET_MEM_START(mem, stkp);
3479 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3480 STACK_PUSH_MEM_END(mem, s);
3484 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3485 GET_MEMNUM_INC(mem, p);
3486 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
3487 STACK_GET_MEM_START(mem, stkp);
3489 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3490 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3492 mem_start_stk[mem] = (OnigStackIndex )((
void* )stkp->u.mem.pstr);
3494 STACK_PUSH_MEM_END_MARK(mem);
3499 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3504 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3509 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3510 GET_MEMNUM_INC(mem, p);
3514 UChar *pstart, *pend;
3518 if (mem > num_mem)
goto fail;
3519 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3520 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3522 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3523 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3525 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3527 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3528 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3529 : (UChar* )((
void* )mem_end_stk[mem]));
3533 STRING_CMP(pstart, s, n);
3534 while (sprev + (
len = enclen_approx(encode, sprev, end)) < s)
3541 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3542 GET_MEMNUM_INC(mem, p);
3545 UChar *pstart, *pend;
3549 if (mem > num_mem)
goto fail;
3550 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3551 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3553 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3554 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3556 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3558 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3559 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3560 : (UChar* )((
void* )mem_end_stk[mem]));
3564 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3565 while (sprev + (
len = enclen_approx(encode, sprev, end)) < s)
3573 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3576 UChar *pstart, *pend, *swork;
3578 GET_LENGTH_INC(tlen, p);
3579 for (i = 0; i < tlen; i++) {
3580 GET_MEMNUM_INC(mem, p);
3582 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
3583 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
3585 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3586 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3588 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3590 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3591 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3592 : (UChar* )((
void* )mem_end_stk[mem]));
3594 DATA_ENSURE_CONTINUE(n);
3597 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3598 if (is_fail)
continue;
3600 while (sprev + (
len = enclen_approx(encode, sprev, end)) < s)
3603 p += (SIZE_MEMNUM * (tlen - i - 1));
3606 if (i == tlen)
goto fail;
3612 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3615 UChar *pstart, *pend, *swork;
3617 GET_LENGTH_INC(tlen, p);
3618 for (i = 0; i < tlen; i++) {
3619 GET_MEMNUM_INC(mem, p);
3621 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
3622 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
3624 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3625 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3627 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3629 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3630 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3631 : (UChar* )((
void* )mem_end_stk[mem]));
3633 DATA_ENSURE_CONTINUE(n);
3636 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3637 if (is_fail)
continue;
3639 while (sprev + (
len = enclen(encode, sprev, end)) < s)
3642 p += (SIZE_MEMNUM * (tlen - i - 1));
3645 if (i == tlen)
goto fail;
3650#ifdef USE_BACKREF_WITH_LEVEL
3651 CASE(OP_BACKREF_WITH_LEVEL)
3657 GET_OPTION_INC(ic, p);
3658 GET_LENGTH_INC(level, p);
3659 GET_LENGTH_INC(tlen, p);
3662 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3663 case_fold_flag, (
int )level, (
int )tlen, p, &s, end)) {
3664 while (sprev + (
len = enclen(encode, sprev, end)) < s)
3667 p += (SIZE_MEMNUM * tlen);
3679 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3680 GET_OPTION_INC(option, p);
3681 STACK_PUSH_ALT(p, s, sprev, pkeep);
3682 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3686 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3687 GET_OPTION_INC(option, p);
3692 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3693 GET_MEMNUM_INC(mem, p);
3694 STACK_PUSH_NULL_CHECK_START(mem, s);
3698 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3702 GET_MEMNUM_INC(mem, p);
3703 STACK_NULL_CHECK(isnull, mem, s);
3705#ifdef ONIG_DEBUG_MATCH
3706 fprintf(stderr,
"NULL_CHECK_END: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3707 (
int )mem, (uintptr_t )s, s);
3717 case OP_REPEAT_INC_NG:
3718 case OP_REPEAT_INC_SG:
3719 case OP_REPEAT_INC_NG_SG:
3723 goto unexpected_bytecode_error;
3731#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3732 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3736 GET_MEMNUM_INC(mem, p);
3737 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3739# ifdef ONIG_DEBUG_MATCH
3740 fprintf(stderr,
"NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3741 (
int )mem, (uintptr_t )s, s);
3743 if (isnull == -1)
goto fail;
3744 goto null_check_found;
3751#ifdef USE_SUBEXP_CALL
3752 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3753 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3757 GET_MEMNUM_INC(mem, p);
3758# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3759 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3761 STACK_NULL_CHECK_REC(isnull, mem, s);
3764# ifdef ONIG_DEBUG_MATCH
3765 fprintf(stderr,
"NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3766 (
int )mem, (uintptr_t )s, s);
3768 if (isnull == -1)
goto fail;
3769 goto null_check_found;
3772 STACK_PUSH_NULL_CHECK_END(mem);
3779 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3780 GET_RELADDR_INC(addr, p);
3783 CHECK_INTERRUPT_IN_MATCH_AT;
3786 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3787 GET_RELADDR_INC(addr, p);
3789 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3793#ifdef USE_COMBINATION_EXPLOSION_CHECK
3794 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3795 GET_STATE_CHECK_NUM_INC(mem, p);
3796 STATE_CHECK_VAL(scv, mem);
3799 GET_RELADDR_INC(addr, p);
3800 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3804 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3805 GET_STATE_CHECK_NUM_INC(mem, p);
3806 GET_RELADDR_INC(addr, p);
3807 STATE_CHECK_VAL(scv, mem);
3812 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3817 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3818 GET_STATE_CHECK_NUM_INC(mem, p);
3819 STATE_CHECK_VAL(scv, mem);
3822 STACK_PUSH_STATE_CHECK(s, mem);
3827 CASE(OP_POP) MOP_IN(OP_POP);
3829#ifdef USE_MATCH_CACHE
3837#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3838 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3839 GET_RELADDR_INC(addr, p);
3840 if (*p == *s && DATA_ENSURE_CHECK1) {
3843 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3852 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3853 GET_RELADDR_INC(addr, p);
3857 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3866 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3868 GET_MEMNUM_INC(mem, p);
3869 GET_RELADDR_INC(addr, p);
3872 repeat_stk[mem] = GET_STACK_INDEX(stk);
3873 STACK_PUSH_REPEAT(mem, p);
3875 if (reg->repeat_range[mem].lower == 0) {
3877 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3883 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3885 GET_MEMNUM_INC(mem, p);
3886 GET_RELADDR_INC(addr, p);
3889 repeat_stk[mem] = GET_STACK_INDEX(stk);
3890 STACK_PUSH_REPEAT(mem, p);
3892 if (reg->repeat_range[mem].lower == 0) {
3894 STACK_PUSH_ALT(p, s, sprev, pkeep);
3901 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3902 GET_MEMNUM_INC(mem, p);
3903 si = repeat_stk[mem];
3904 stkp = STACK_AT(si);
3907 stkp->u.repeat.count++;
3908 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3911 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3912#ifdef USE_MATCH_CACHE
3913 if (*pbegin == OP_REPEAT_INC) {
3914#undef MATCH_CACHE_HIT
3915#define MATCH_CACHE_HIT stkp->u.repeat.count--;
3917#undef MATCH_CACHE_HIT
3918#define MATCH_CACHE_HIT ((void) 0)
3921 STACK_PUSH_ALT(p, s, sprev, pkeep);
3922 p = STACK_AT(si)->u.repeat.pcode;
3925 p = stkp->u.repeat.pcode;
3927 STACK_PUSH_REPEAT_INC(si);
3929 CHECK_INTERRUPT_IN_MATCH_AT;
3932 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3933 GET_MEMNUM_INC(mem, p);
3934 STACK_GET_REPEAT(mem, stkp);
3935 si = GET_STACK_INDEX(stkp);
3939 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3940 GET_MEMNUM_INC(mem, p);
3941 si = repeat_stk[mem];
3942 stkp = STACK_AT(si);
3945 stkp->u.repeat.count++;
3946 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3947 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3948 UChar* pcode = stkp->u.repeat.pcode;
3950 STACK_PUSH_REPEAT_INC(si);
3951 if (*pbegin == OP_REPEAT_INC_NG) {
3954 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3957 p = stkp->u.repeat.pcode;
3958 STACK_PUSH_REPEAT_INC(si);
3961 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3962 STACK_PUSH_REPEAT_INC(si);
3965 CHECK_INTERRUPT_IN_MATCH_AT;
3968 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3969 GET_MEMNUM_INC(mem, p);
3970 STACK_GET_REPEAT(mem, stkp);
3971 si = GET_STACK_INDEX(stkp);
3975 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3976 STACK_PUSH_POS(s, sprev, pkeep);
3980 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3982 STACK_POS_END(stkp);
3983 s = stkp->u.state.pstr;
3984 sprev = stkp->u.state.pstr_prev;
3989 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3990 GET_RELADDR_INC(addr, p);
3991 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3995 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3996 STACK_POP_TIL_POS_NOT;
4000 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
4005 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
4010 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
4011 GET_LENGTH_INC(tlen, p);
4012 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
4013 if (IS_NULL(s))
goto fail;
4014 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4018 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
4019 GET_RELADDR_INC(addr, p);
4020 GET_LENGTH_INC(tlen, p);
4021 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
4029 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4031 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4036 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4037 STACK_POP_TIL_LOOK_BEHIND_NOT;
4041 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
4043 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4047 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
4049 const UChar* aend = ABSENT_END_POS;
4051 UChar* selfp = p - 1;
4053 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS);
4054 GET_RELADDR_INC(addr, p);
4055#ifdef ONIG_DEBUG_MATCH
4056 fprintf(stderr,
"ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4058 if ((absent > aend) && (s > absent)) {
4064 else if ((s >= aend) && (s > absent)) {
4074 else if (s == end) {
4080 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
4081 n = enclen(encode, s, end);
4082 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS);
4083 STACK_PUSH_ALT(selfp, s + n, s, pkeep);
4085 ABSENT_END_POS = aend;
4091 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
4094 if (sprev < ABSENT_END_POS)
4095 ABSENT_END_POS = sprev;
4096#ifdef ONIG_DEBUG_MATCH
4097 fprintf(stderr,
"ABSENT_END: end:%p\n", ABSENT_END_POS);
4099 STACK_POP_TIL_ABSENT;
4103#ifdef USE_SUBEXP_CALL
4104 CASE(OP_CALL) MOP_IN(OP_CALL);
4105 GET_ABSADDR_INC(addr, p);
4106 STACK_PUSH_CALL_FRAME(p);
4111 CASE(OP_RETURN) MOP_IN(OP_RETURN);
4118 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
4119 GET_MEMNUM_INC(mem, p);
4120 GET_RELADDR_INC(addr, p);
4121 if ((mem > num_mem) ||
4122 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
4123 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4141 p = stk->u.state.pcode;
4142 s = stk->u.state.pstr;
4143 sprev = stk->u.state.pstr_prev;
4144 pkeep = stk->u.state.pkeep;
4146#ifdef USE_MATCH_CACHE
4148 msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4149 ++msa->num_fails >= (
long)(end - str) * msa->num_cache_opcodes
4151 if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4152 msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4153 OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4154 if (r < 0)
goto bytecode_error;
4156 if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4157 msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4158 goto fail_match_cache;
4160 if (msa->num_fails < (
long)(end - str) * msa->num_cache_opcodes) {
4161 goto fail_match_cache;
4163 if (msa->cache_opcodes == NULL) {
4164 msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4166 if (cache_opcodes == NULL) {
4167 return ONIGERR_MEMORY;
4169 OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4171 if (r == ONIGERR_UNEXPECTED_BYTECODE)
goto unexpected_bytecode_error;
4172 else goto bytecode_error;
4174 msa->cache_opcodes = cache_opcodes;
4175#ifdef ONIG_DEBUG_MATCH_CACHE
4176 fprintf(stderr,
"MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4177 fprintf(stderr,
"MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4178 fprintf(stderr,
"MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4179 for (
int i = 0; i < msa->num_cache_opcodes; i++) {
4180 fprintf(stderr,
"MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4184 if (msa->match_cache_buf == NULL) {
4185 size_t length = (end - str) + 1;
4186 size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4187#ifdef ONIG_DEBUG_MATCH_CACHE
4188 fprintf(stderr,
"MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4191 if (num_match_cache_points / length != (
size_t)msa->num_cache_points) {
4192 return ONIGERR_MEMORY;
4194 if (num_match_cache_points >= LONG_MAX_LIMIT) {
4195 return ONIGERR_MEMORY;
4197 size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4198 uint8_t* match_cache_buf = (uint8_t*)
xmalloc(match_cache_buf_length *
sizeof(uint8_t));
4199 if (match_cache_buf == NULL) {
4200 return ONIGERR_MEMORY;
4202 xmemset(match_cache_buf, 0, match_cache_buf_length *
sizeof(uint8_t));
4203 msa->match_cache_buf = match_cache_buf;
4209#ifdef USE_COMBINATION_EXPLOSION_CHECK
4210 if (stk->u.state.state_check != 0) {
4211 stk->type = STK_STATE_CHECK_MARK;
4217 CHECK_INTERRUPT_IN_MATCH_AT;
4221 goto bytecode_error;
4226 xfree(xmalloc_base);
4232 xfree(xmalloc_base);
4233 return ONIGERR_STACK_BUG;
4238 xfree(xmalloc_base);
4239 return ONIGERR_UNDEFINED_BYTECODE;
4241 unexpected_bytecode_error:
4243 xfree(xmalloc_base);
4244 return ONIGERR_UNEXPECTED_BYTECODE;
4248 xfree(xmalloc_base);
4249 return ONIGERR_TIMEOUT;
4254slow_search(
OnigEncoding enc, UChar* target, UChar* target_end,
4255 const UChar* text,
const UChar* text_end, UChar* text_range)
4257 UChar *t, *p, *s, *end;
4259 end = (UChar* )text_end;
4260 end -= target_end - target - 1;
4261 if (end > text_range)
4266 if (enc->max_enc_len == enc->min_enc_len) {
4267 int n = enc->max_enc_len;
4270 if (*s == *target) {
4273 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4278 return (UChar* )NULL;
4281 if (*s == *target) {
4284 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4287 s += enclen(enc, s, text_end);
4290 return (UChar* )NULL;
4294str_lower_case_match(
OnigEncoding enc,
int case_fold_flag,
4295 const UChar* t,
const UChar* tend,
4296 const UChar* p,
const UChar* end)
4299 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4302 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4304 while (lowlen > 0) {
4305 if (*t++ != *q++)
return 0;
4315 UChar* target, UChar* target_end,
4316 const UChar* text,
const UChar* text_end, UChar* text_range)
4320 end = (UChar* )text_end;
4321 end -= target_end - target - 1;
4322 if (end > text_range)
4328 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4332 s += enclen(enc, s, text_end);
4335 return (UChar* )NULL;
4339slow_search_backward(
OnigEncoding enc, UChar* target, UChar* target_end,
4340 const UChar* text,
const UChar* adjust_text,
4341 const UChar* text_end,
const UChar* text_start)
4345 s = (UChar* )text_end;
4346 s -= (target_end - target);
4348 s = (UChar* )text_start;
4350 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4353 if (*s == *target) {
4356 while (t < target_end) {
4361 if (t == target_end)
4364 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4367 return (UChar* )NULL;
4371slow_search_backward_ic(
OnigEncoding enc,
int case_fold_flag,
4372 UChar* target, UChar* target_end,
4373 const UChar* text,
const UChar* adjust_text,
4374 const UChar* text_end,
const UChar* text_start)
4378 s = (UChar* )text_end;
4379 s -= (target_end - target);
4381 s = (UChar* )text_start;
4383 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4386 if (str_lower_case_match(enc, case_fold_flag,
4387 target, target_end, s, text_end))
4390 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4393 return (UChar* )NULL;
4396#ifndef USE_SUNDAY_QUICK_SEARCH
4399bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4400 const UChar* text,
const UChar* text_end,
4401 const UChar* text_range)
4403 const UChar *s, *se, *t, *p, *end;
4405 ptrdiff_t skip, tlen1;
4407# ifdef ONIG_DEBUG_SEARCH
4408 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4409 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4412 tail = target_end - 1;
4413 tlen1 = tail - target;
4415 if (end + tlen1 > text_end)
4416 end = text_end - tlen1;
4420 if (IS_NULL(reg->int_map)) {
4425 if (t == target)
return (UChar* )s;
4428 skip = reg->map[*se];
4431 s += enclen(reg->enc, s, end);
4432 }
while ((s - t) < skip && s < end);
4436# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4441 if (t == target)
return (UChar* )s;
4444 skip = reg->int_map[*se];
4447 s += enclen(reg->enc, s, end);
4448 }
while ((s - t) < skip && s < end);
4453 return (UChar* )NULL;
4458bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4459 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4461 const UChar *s, *t, *p, *end;
4464# ifdef ONIG_DEBUG_SEARCH
4465 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4466 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4469 end = text_range + (target_end - target) - 1;
4473 tail = target_end - 1;
4474 s = text + (target_end - target) - 1;
4475 if (IS_NULL(reg->int_map)) {
4479# ifdef ONIG_DEBUG_SEARCH
4480 fprintf(stderr,
"bm_search_loop: pos: %"PRIdPTR
" %s\n",
4481 (intptr_t )(s - text), s);
4484 if (t == target)
return (UChar* )p;
4491# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4496 if (t == target)
return (UChar* )p;
4499 s += reg->int_map[*s];
4503 return (UChar* )NULL;
4508bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4509 const UChar* text,
const UChar* text_end,
4510 const UChar* text_range)
4512 const UChar *s, *se, *t, *end;
4514 ptrdiff_t skip, tlen1;
4516 int case_fold_flag = reg->case_fold_flag;
4518# ifdef ONIG_DEBUG_SEARCH
4519 fprintf(stderr,
"bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4520 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
4523 tail = target_end - 1;
4524 tlen1 = tail - target;
4526 if (end + tlen1 > text_end)
4527 end = text_end - tlen1;
4531 if (IS_NULL(reg->int_map)) {
4534 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4537 skip = reg->map[*se];
4540 s += enclen(reg->enc, s, end);
4541 }
while ((s - t) < skip && s < end);
4545# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4548 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4551 skip = reg->int_map[*se];
4554 s += enclen(reg->enc, s, end);
4555 }
while ((s - t) < skip && s < end);
4560 return (UChar* )NULL;
4565bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4566 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4568 const UChar *s, *p, *end;
4571 int case_fold_flag = reg->case_fold_flag;
4573# ifdef ONIG_DEBUG_SEARCH
4574 fprintf(stderr,
"bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4575 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
4578 end = text_range + (target_end - target) - 1;
4582 tail = target_end - 1;
4583 s = text + (target_end - target) - 1;
4584 if (IS_NULL(reg->int_map)) {
4586 p = s - (target_end - target) + 1;
4587 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4594# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4596 p = s - (target_end - target) + 1;
4597 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4600 s += reg->int_map[*s];
4604 return (UChar* )NULL;
4611bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4612 const UChar* text,
const UChar* text_end,
4613 const UChar* text_range)
4615 const UChar *s, *se, *t, *p, *end;
4617 ptrdiff_t skip, tlen1;
4620# ifdef ONIG_DEBUG_SEARCH
4621 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4622 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4625 tail = target_end - 1;
4626 tlen1 = tail - target;
4628 if (end + tlen1 > text_end)
4629 end = text_end - tlen1;
4633 if (IS_NULL(reg->int_map)) {
4638 if (t == target)
return (UChar* )s;
4641 if (s + 1 >= end)
break;
4642 skip = reg->map[se[1]];
4645 s += enclen(enc, s, end);
4646 }
while ((s - t) < skip && s < end);
4650# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4655 if (t == target)
return (UChar* )s;
4658 if (s + 1 >= end)
break;
4659 skip = reg->int_map[se[1]];
4662 s += enclen(enc, s, end);
4663 }
while ((s - t) < skip && s < end);
4668 return (UChar* )NULL;
4673bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4674 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4676 const UChar *s, *t, *p, *end;
4680# ifdef ONIG_DEBUG_SEARCH
4681 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4682 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4685 tail = target_end - 1;
4686 tlen1 = tail - target;
4687 end = text_range + tlen1;
4692 if (IS_NULL(reg->int_map)) {
4697 if (t == target)
return (UChar* )p;
4700 if (s + 1 >= end)
break;
4701 s += reg->map[s[1]];
4705# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4710 if (t == target)
return (UChar* )p;
4713 if (s + 1 >= end)
break;
4714 s += reg->int_map[s[1]];
4718 return (UChar* )NULL;
4723bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4724 const UChar* text,
const UChar* text_end,
4725 const UChar* text_range)
4727 const UChar *s, *se, *t, *end;
4729 ptrdiff_t skip, tlen1;
4731 int case_fold_flag = reg->case_fold_flag;
4733# ifdef ONIG_DEBUG_SEARCH
4734 fprintf(stderr,
"bm_search_notrev_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4735 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4738 tail = target_end - 1;
4739 tlen1 = tail - target;
4741 if (end + tlen1 > text_end)
4742 end = text_end - tlen1;
4746 if (IS_NULL(reg->int_map)) {
4749 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4752 if (s + 1 >= end)
break;
4753 skip = reg->map[se[1]];
4756 s += enclen(enc, s, end);
4757 }
while ((s - t) < skip && s < end);
4761# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4764 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4767 if (s + 1 >= end)
break;
4768 skip = reg->int_map[se[1]];
4771 s += enclen(enc, s, end);
4772 }
while ((s - t) < skip && s < end);
4777 return (UChar* )NULL;
4782bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4783 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4785 const UChar *s, *p, *end;
4789 int case_fold_flag = reg->case_fold_flag;
4791# ifdef ONIG_DEBUG_SEARCH
4792 fprintf(stderr,
"bm_search_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4793 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4796 tail = target_end - 1;
4797 tlen1 = tail - target;
4798 end = text_range + tlen1;
4803 if (IS_NULL(reg->int_map)) {
4806 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4809 if (s + 1 >= end)
break;
4810 s += reg->map[s[1]];
4814# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4817 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4820 if (s + 1 >= end)
break;
4821 s += reg->int_map[s[1]];
4825 return (UChar* )NULL;
4829#ifdef USE_INT_MAP_BACKWARD
4831set_bm_backward_skip(UChar* s, UChar* end,
OnigEncoding enc ARG_UNUSED,
4836 if (IS_NULL(*skip)) {
4837 *skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4838 if (IS_NULL(*skip))
return ONIGERR_MEMORY;
4841 len = (int )(end - s);
4842 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4845 for (i =
len - 1; i > 0; i--)
4852bm_search_backward(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4853 const UChar* text,
const UChar* adjust_text,
4854 const UChar* text_end,
const UChar* text_start)
4856 const UChar *s, *t, *p;
4858 s = text_end - (target_end - target);
4862 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4867 while (t < target_end && *p == *t) {
4870 if (t == target_end)
4873 s -= reg->int_map_backward[*s];
4874 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4877 return (UChar* )NULL;
4883 const UChar* text,
const UChar* text_range,
const UChar* text_end)
4885 const UChar *s = text;
4887 while (s < text_range) {
4888 if (map[*s])
return (UChar* )s;
4890 s += enclen(enc, s, text_end);
4892 return (UChar* )NULL;
4897 const UChar* text,
const UChar* adjust_text,
4898 const UChar* text_start,
const UChar* text_end)
4900 const UChar *s = text_start;
4903 if (map[*s])
return (UChar* )s;
4905 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4907 return (UChar* )NULL;
4911onig_match(
regex_t* reg,
const UChar* str,
const UChar* end,
const UChar* at,
OnigRegion* region,
4912 OnigOptionType option)
4918 MATCH_ARG_INIT(msa, option, region, at, at);
4919#ifdef USE_COMBINATION_EXPLOSION_CHECK
4921 ptrdiff_t offset = at - str;
4922 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4927 r = onig_region_resize_clear(region, reg->num_mem + 1);
4933 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4934 r = match_at(reg, str, end,
4935#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4941 MATCH_ARG_FREE(msa);
4946forward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end, UChar* s,
4947 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4949 UChar *p, *pprev = (UChar* )NULL;
4950 size_t input_len = end - str;
4952#ifdef ONIG_DEBUG_SEARCH
4953 fprintf(stderr,
"forward_search_range: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), s: %"PRIuPTR
" (%p), range: %"PRIuPTR
" (%p)\n",
4954 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4957 if (reg->dmin > input_len) {
4962 if (reg->dmin > 0) {
4963 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4967 UChar *q = p + reg->dmin;
4969 if (q >= end)
return 0;
4970 while (p < q) p += enclen(reg->enc, p, end);
4975 switch (reg->optimize) {
4976 case ONIG_OPTIMIZE_EXACT:
4977 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4979 case ONIG_OPTIMIZE_EXACT_IC:
4980 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4981 reg->exact, reg->exact_end, p, end, range);
4984 case ONIG_OPTIMIZE_EXACT_BM:
4985 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4988 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4989 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4992 case ONIG_OPTIMIZE_EXACT_BM_IC:
4993 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4996 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4997 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
5000 case ONIG_OPTIMIZE_MAP:
5001 p = map_search(reg->enc, reg->map, p, range, end);
5005 if (p && p < range) {
5006 if (p - reg->dmin < s) {
5009 p += enclen(reg->enc, p, end);
5013 if (reg->sub_anchor) {
5016 switch (reg->sub_anchor) {
5017 case ANCHOR_BEGIN_LINE:
5018 if (!ON_STR_BEGIN(p)) {
5019 prev = onigenc_get_prev_char_head(reg->enc,
5020 (pprev ? pprev : str), p, end);
5021 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
5026 case ANCHOR_END_LINE:
5027 if (ON_STR_END(p)) {
5028#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5029 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
5030 (pprev ? pprev : str), p);
5031 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
5035 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
5041 if (reg->dmax == 0) {
5045 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
5047 *low_prev = onigenc_get_prev_char_head(reg->enc,
5048 (pprev ? pprev : str), p, end);
5052 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5053 if (p < str + reg->dmax) {
5054 *low = (UChar* )str;
5056 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
5059 *low = p - reg->dmax;
5061 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
5062 *low, end, (
const UChar** )low_prev);
5063 if (low_prev && IS_NULL(*low_prev))
5064 *low_prev = onigenc_get_prev_char_head(reg->enc,
5065 (pprev ? pprev : s), *low, end);
5069 *low_prev = onigenc_get_prev_char_head(reg->enc,
5070 (pprev ? pprev : str), *low, end);
5076 *high = p - reg->dmin;
5078#ifdef ONIG_DEBUG_SEARCH
5080 "forward_search_range success: low: %"PRIdPTR
", high: %"PRIdPTR
", dmin: %"PRIdPTR
", dmax: %"PRIdPTR
"\n",
5081 *low - str, *high - str, reg->dmin, reg->dmax);
5089#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
5092backward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end,
5093 UChar* s,
const UChar* range, UChar* adjrange,
5094 UChar** low, UChar** high)
5097 size_t input_len = end - str;
5099 if (reg->dmin > input_len) {
5107 switch (reg->optimize) {
5108 case ONIG_OPTIMIZE_EXACT:
5110 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
5111 range, adjrange, end, p);
5114 case ONIG_OPTIMIZE_EXACT_IC:
5115 case ONIG_OPTIMIZE_EXACT_BM_IC:
5116 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
5117 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
5118 reg->exact, reg->exact_end,
5119 range, adjrange, end, p);
5122 case ONIG_OPTIMIZE_EXACT_BM:
5123 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
5124#ifdef USE_INT_MAP_BACKWARD
5125 if (IS_NULL(reg->int_map_backward)) {
5127 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
5130 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
5131 &(reg->int_map_backward));
5134 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
5141 case ONIG_OPTIMIZE_MAP:
5142 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
5147 if (reg->sub_anchor) {
5150 switch (reg->sub_anchor) {
5151 case ANCHOR_BEGIN_LINE:
5152 if (!ON_STR_BEGIN(p)) {
5153 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
5154 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
5161 case ANCHOR_END_LINE:
5162 if (ON_STR_END(p)) {
5163#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
5164 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
5165 if (IS_NULL(prev))
goto fail;
5166 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
5172 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
5173 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
5174 if (IS_NULL(p))
goto fail;
5182 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5183 *low = p - reg->dmax;
5184 *high = p - reg->dmin;
5185 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
5188#ifdef ONIG_DEBUG_SEARCH
5189 fprintf(stderr,
"backward_search_range: low: %d, high: %d\n",
5190 (
int )(*low - str), (
int )(*high - str));
5196#ifdef ONIG_DEBUG_SEARCH
5197 fprintf(stderr,
"backward_search_range: fail.\n");
5204onig_search(
regex_t* reg,
const UChar* str,
const UChar* end,
5205 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
5207 return onig_search_gpos(reg, str, end, start, start, range, region, option);
5211onig_search_gpos(
regex_t* reg,
const UChar* str,
const UChar* end,
5212 const UChar* global_pos,
5213 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
5218#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5219 const UChar *orig_start = start;
5220 const UChar *orig_range = range;
5223#ifdef ONIG_DEBUG_SEARCH
5225 "onig_search (entry point): str: %"PRIuPTR
" (%p), end: %"PRIuPTR
", start: %"PRIuPTR
", range: %"PRIuPTR
"\n",
5226 (uintptr_t )str, str, end - str, start - str, range - str);
5230 r = onig_region_resize_clear(region, reg->num_mem + 1);
5231 if (r)
goto finish_no_msa;
5234 if (start > end || start < str)
goto mismatch_no_msa;
5237#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
5238# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5239# define MATCH_AND_RETURN_CHECK(upper_range) \
5240 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5242 case ONIG_MISMATCH: \
5244 case ONIGERR_TIMEOUT: \
5248 if (! IS_FIND_LONGEST(reg->options)) { \
5255# define MATCH_AND_RETURN_CHECK(upper_range) \
5256 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
5258 case ONIG_MISMATCH: \
5260 case ONIGERR_TIMEOUT: \
5270# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5271# define MATCH_AND_RETURN_CHECK(none) \
5272 r = match_at(reg, str, end, s, prev, &msa);\
5274 case ONIG_MISMATCH: \
5276 case ONIGERR_TIMEOUT: \
5280 if (! IS_FIND_LONGEST(reg->options)) { \
5287# define MATCH_AND_RETURN_CHECK(none) \
5288 r = match_at(reg, str, end, s, prev, &msa);\
5290 case ONIG_MISMATCH: \
5292 case ONIGERR_TIMEOUT: \
5305 if (reg->anchor != 0 && str < end) {
5306 UChar *min_semi_end, *max_semi_end;
5308 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5313 if (global_pos > start)
5315 if (global_pos < range)
5316 range = global_pos + 1;
5324 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5326 if (range > start) {
5327 if (start != str)
goto mismatch_no_msa;
5336 goto mismatch_no_msa;
5339 else if (reg->anchor & ANCHOR_END_BUF) {
5340 min_semi_end = max_semi_end = (UChar* )end;
5343 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5344 goto mismatch_no_msa;
5346 if (range > start) {
5347 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5348 start = min_semi_end - reg->anchor_dmax;
5350 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5352 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5353 range = max_semi_end - reg->anchor_dmin + 1;
5356 if (start > range)
goto mismatch_no_msa;
5361 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5362 range = min_semi_end - reg->anchor_dmax;
5364 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5365 start = max_semi_end - reg->anchor_dmin;
5366 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5368 if (range > start)
goto mismatch_no_msa;
5371 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5372 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5374 max_semi_end = (UChar* )end;
5375 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5376 min_semi_end = pre_end;
5378#ifdef USE_CRNL_AS_LINE_TERMINATOR
5379 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5380 if (IS_NOT_NULL(pre_end) &&
5381 IS_NEWLINE_CRLF(reg->options) &&
5382 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5383 min_semi_end = pre_end;
5386 if (min_semi_end > str && start <= min_semi_end) {
5391 min_semi_end = (UChar* )end;
5395 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5396 goto begin_position;
5399 else if (str == end) {
5400 static const UChar address_for_empty_string[] =
"";
5402#ifdef ONIG_DEBUG_SEARCH
5403 fprintf(stderr,
"onig_search: empty string.\n");
5406 if (reg->threshold_len == 0) {
5407 start = end = str = address_for_empty_string;
5409 prev = (UChar* )NULL;
5411 MATCH_ARG_INIT(msa, option, region, start, start);
5412#ifdef USE_COMBINATION_EXPLOSION_CHECK
5413 msa.state_check_buff = (
void* )0;
5414 msa.state_check_buff_size = 0;
5416 MATCH_AND_RETURN_CHECK(end);
5419 goto mismatch_no_msa;
5422#ifdef ONIG_DEBUG_SEARCH
5423 fprintf(stderr,
"onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5424 (
int )(end - str), (
int )(start - str), (
int )(range - str));
5427 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5428#ifdef USE_COMBINATION_EXPLOSION_CHECK
5430 ptrdiff_t offset = (MIN(start, range) - str);
5431 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5436 if (range > start) {
5438 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5440 prev = (UChar* )NULL;
5442 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5443 UChar *sch_range, *low, *high, *low_prev;
5445 sch_range = (UChar* )range;
5446 if (reg->dmax != 0) {
5447 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5448 sch_range = (UChar* )end;
5450 sch_range += reg->dmax;
5451 if (sch_range > end) sch_range = (UChar* )end;
5455 if ((end - start) < reg->threshold_len)
5458 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5460 if (! forward_search_range(reg, str, end, s, sch_range,
5461 &low, &high, &low_prev))
goto mismatch;
5467 MATCH_AND_RETURN_CHECK(orig_range);
5469 s += enclen(reg->enc, s, end);
5471 }
while (s < range);
5475 if (! forward_search_range(reg, str, end, s, sch_range,
5476 &low, &high, (UChar** )NULL))
goto mismatch;
5478 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5480 MATCH_AND_RETURN_CHECK(orig_range);
5482 s += enclen(reg->enc, s, end);
5484 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5485 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5488 s += enclen(reg->enc, s, end);
5491 }
while (s < range);
5498 MATCH_AND_RETURN_CHECK(orig_range);
5500 s += enclen(reg->enc, s, end);
5501 }
while (s < range);
5504 MATCH_AND_RETURN_CHECK(orig_range);
5508 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5509 UChar *low, *high, *adjrange, *sch_start;
5512 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5514 adjrange = (UChar* )end;
5516 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5517 (end - range) >= reg->threshold_len) {
5519 sch_start = s + reg->dmax;
5520 if (sch_start > end) sch_start = (UChar* )end;
5521 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5529 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5530 MATCH_AND_RETURN_CHECK(orig_start);
5533 }
while (s >= range);
5537 if ((end - range) < reg->threshold_len)
goto mismatch;
5540 if (reg->dmax != 0) {
5541 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5542 sch_start = (UChar* )end;
5544 sch_start += reg->dmax;
5545 if (sch_start > end) sch_start = (UChar* )end;
5547 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5548 start, sch_start, end);
5551 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5552 &low, &high) <= 0)
goto mismatch;
5557 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5558 MATCH_AND_RETURN_CHECK(orig_start);
5560 }
while (s >= range);
5564#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5565 if (IS_FIND_LONGEST(reg->options)) {
5566 if (msa.best_len >= 0) {
5575 MATCH_ARG_FREE(msa);
5579 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5580 onig_region_clear(region);
5584 if (r != ONIG_MISMATCH)
5585 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
5593 if (r != ONIG_MISMATCH)
5594 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
5599 MATCH_ARG_FREE(msa);
5603 MATCH_ARG_FREE(msa);
5604 return ONIGERR_TIMEOUT;
5608onig_scan(
regex_t* reg,
const UChar* str,
const UChar* end,
5610 int (*scan_callback)(OnigPosition, OnigPosition,
OnigRegion*,
void*),
5621 r = onig_search(reg, str, end, start, end, region, option);
5623 rs = scan_callback(n, r, region, callback_arg);
5628 if (region->end[0] == start - str) {
5629 if (start >= end)
break;
5630 start += enclen(reg->enc, start, end);
5633 start = str + region->end[0];
5638 else if (r == ONIG_MISMATCH) {
5650onig_get_encoding(
const regex_t* reg)
5655extern OnigOptionType
5656onig_get_options(
const regex_t* reg)
5658 return reg->options;
5661extern OnigCaseFoldType
5662onig_get_case_fold_flag(
const regex_t* reg)
5664 return reg->case_fold_flag;
5668onig_get_syntax(
const regex_t* reg)
5674onig_number_of_captures(
const regex_t* reg)
5676 return reg->num_mem;
5680onig_number_of_capture_histories(
const regex_t* reg)
5682#ifdef USE_CAPTURE_HISTORY
5686 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5687 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
int len
Length of the buffer.