33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(
void)
38 return OnigDefaultCaseFoldFlag;
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 OnigDefaultCaseFoldFlag = case_fold_flag;
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
55str_dup(UChar* s, UChar* end)
57 ptrdiff_t
len = end - s;
74 c = *a; *a = *b; *b = c;
76 if (NTYPE(a) == NT_STR) {
79 size_t len = sn->end - sn->s;
81 sn->end = sn->s +
len;
85 if (NTYPE(b) == NT_STR) {
88 size_t len = sn->end - sn->s;
90 sn->end = sn->s +
len;
96distance_add(OnigDistance d1, OnigDistance d2)
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2)
return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
107distance_multiply(OnigDistance d,
int m)
109 if (m == 0)
return 0;
111 if (d < ONIG_INFINITE_DISTANCE / m)
114 return ONIG_INFINITE_DISTANCE;
118bitset_is_empty(BitSetRef bs)
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0)
return 0;
129bitset_on_num(BitSetRef bs)
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr =
xrealloc(reg->p, reg->used);
155 reg->alloc = reg->used;
159 }
while ((reg = reg->chain) != 0);
163onig_bbuf_init(
BBuf* buf, OnigDistance size)
170 buf->p = (UChar* )
xmalloc(size);
171 if (IS_NULL(buf->p))
return(ONIGERR_MEMORY);
174 buf->alloc = (
unsigned int )size;
180#ifdef USE_SUBEXP_CALL
188 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
224add_opcode(
regex_t* reg,
int opcode)
226 BBUF_ADD1(reg, opcode);
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
232add_state_check_num(
regex_t* reg,
int num)
234 StateCheckNumType n = (StateCheckNumType )num;
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
242add_rel_addr(
regex_t* reg,
int addr)
244 RelAddrType ra = (RelAddrType )addr;
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
251add_abs_addr(
regex_t* reg,
int addr)
253 AbsAddrType ra = (AbsAddrType )addr;
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
262 LengthType l = (LengthType )
len;
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
269add_mem_num(
regex_t* reg,
int num)
271 MemNumType n = (MemNumType )num;
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
279add_pointer(
regex_t* reg,
void* addr)
281 PointerType ptr = (PointerType )addr;
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
289add_option(
regex_t* reg, OnigOptionType option)
291 BBUF_ADD(reg, &option, SIZE_OPTION);
296add_opcode_rel_addr(
regex_t* reg,
int opcode,
int addr)
300 r = add_opcode(reg, opcode);
302 r = add_rel_addr(reg, addr);
307add_bytes(
regex_t* reg, UChar* bytes, OnigDistance
len)
309 BBUF_ADD(reg, bytes,
len);
314add_bitset(
regex_t* reg, BitSetRef bs)
316 BBUF_ADD(reg, bs, SIZE_BITSET);
321add_opcode_option(
regex_t* reg,
int opcode, OnigOptionType option)
325 r = add_opcode(reg, opcode);
327 r = add_option(reg, option);
331static int compile_length_tree(
Node* node,
regex_t* reg);
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
340select_str_opcode(
int mb_len, OnigDistance byte_len,
int ignore_case)
343 OnigDistance str_len = roomof(byte_len, mb_len);
347 case 1: op = OP_EXACT1_IC;
break;
348 default: op = OP_EXACTN_IC;
break;
355 case 1: op = OP_EXACT1;
break;
356 case 2: op = OP_EXACT2;
break;
357 case 3: op = OP_EXACT3;
break;
358 case 4: op = OP_EXACT4;
break;
359 case 5: op = OP_EXACT5;
break;
360 default: op = OP_EXACTN;
break;
366 case 1: op = OP_EXACTMB2N1;
break;
367 case 2: op = OP_EXACTMB2N2;
break;
368 case 3: op = OP_EXACTMB2N3;
break;
369 default: op = OP_EXACTMB2N;
break;
386compile_tree_empty_check(
Node* node,
regex_t* reg,
int empty_info)
389 int saved_num_null_check = reg->num_null_check;
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
394 r = add_mem_num(reg, reg->num_null_check);
396 reg->num_null_check++;
399 r = compile_tree(node, reg);
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
411 r = add_mem_num(reg, saved_num_null_check);
416#ifdef USE_SUBEXP_CALL
422 r = add_opcode(reg, OP_CALL);
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
427 r = add_abs_addr(reg, 0 );
433compile_tree_n_times(
Node* node,
int n,
regex_t* reg)
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
445add_compile_string_length(UChar* s ARG_UNUSED,
int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED,
int ignore_case)
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
453 if (op == OP_EXACTMBN)
len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
457 len += (int )byte_len;
462add_compile_string(UChar* s,
int mb_len, OnigDistance byte_len,
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
475 add_length(reg, byte_len / mb_len);
478 add_bytes(reg, s, byte_len);
484compile_length_string_node(
Node* node,
regex_t* reg)
486 int rlen, r,
len, prev_len, blen, ambig;
492 if (sn->end <= sn->s)
495 ambig = NSTRING_IS_AMBIG(node);
498 prev_len = enclen(enc, p, sn->end);
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (
len == prev_len || ambig) {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
525 if (sn->end <= sn->s)
528 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
534 int r,
len, prev_len, blen, ambig;
536 UChar *p, *prev, *end;
540 if (sn->end <= sn->s)
544 ambig = NSTRING_IS_AMBIG(node);
547 prev_len = enclen(enc, p, end);
552 len = enclen(enc, p, end);
553 if (
len == prev_len || ambig) {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
573 if (sn->end <= sn->s)
576 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593 r = add_bytes(reg, mbuf->p, mbuf->used);
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
615 len = SIZE_OPCODE + SIZE_BITSET;
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
636 add_opcode(reg, OP_CCLASS);
638 r = add_bitset(reg, cc->bs);
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
645 add_opcode(reg, OP_CCLASS_MB);
647 r = add_multi_byte_cclass(cc->mbuf, reg);
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
653 add_opcode(reg, OP_CCLASS_MIX);
655 r = add_bitset(reg, cc->bs);
657 r = add_multi_byte_cclass(cc->mbuf, reg);
665entry_repeat_range(
regex_t* reg,
int id,
int lower,
int upper)
667#define REPEAT_RANGE_ALLOC 4
671 if (reg->repeat_range_alloc == 0) {
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
677 else if (reg->repeat_range_alloc <=
id) {
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
687 p = reg->repeat_range;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
696compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
700 int num_repeat = reg->num_repeat;
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704 r = add_mem_num(reg, num_repeat);
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
717#ifdef USE_SUBEXP_CALL
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
727 r = add_mem_num(reg, num_repeat);
732is_anychar_star_quantifier(
QtfrNode* qn)
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
749 int len, mod_tlen, cklen;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
755 if (tlen < 0)
return tlen;
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
776 if (infinite && qn->lower <= 1) {
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0)
796 len = SIZE_OP_JUMP + tlen;
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
806 len = SIZE_OP_PUSH + tlen;
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820 len += SIZE_OP_STATE_CHECK;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
835 if (tlen < 0)
return tlen;
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
849 r = add_state_check_num(reg, ckn);
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
868 r = add_state_check_num(reg, ckn);
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
879 if (infinite && qn->lower <= 1) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890 r = add_state_check_num(reg, ckn);
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (
int )SIZE_OP_JUMP
902 + (
int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914 r = add_state_check_num(reg, ckn);
916 r = add_rel_addr(reg,
917 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) {
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927 r = compile_tree(qn->target, reg);
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937 r = add_state_check_num(reg, ckn);
939 r = add_rel_addr(reg, tlen);
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
947 r = compile_tree(qn->target, reg);
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953 r = add_state_check_num(reg, ckn);
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964 r = compile_tree(qn->target, reg);
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
970 r = add_opcode(reg, OP_STATE_CHECK);
972 r = add_state_check_num(reg, ckn);
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
988 if (tlen < 0)
return tlen;
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1011 len = tlen * qn->lower;
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028 else if (qn->upper == 0 && qn->is_referred != 0) {
1029 len = SIZE_OP_JUMP + tlen;
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1056 if (tlen < 0)
return tlen;
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH));
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
1147 else if (qn->upper == 0 && qn->is_referred != 0) {
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150 r = compile_tree(qn->target, reg);
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164 r = compile_tree(qn->target, reg);
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173 r = compile_tree(qn->target, reg);
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1186 OnigOptionType prev = reg->options;
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1192 if (tlen < 0)
return tlen;
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1206 OnigOptionType prev = reg->options;
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 r = add_opcode(reg, OP_FAIL);
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0)
return tlen;
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1268 len = SIZE_OP_MEMORY_START;
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1275 case ENCLOSE_STOP_BACKTRACK:
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0)
return tlen;
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1301 tlen = compile_length_tree(NCAR(x), reg);
1302 if (tlen < 0)
return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1306 tlen = compile_length_tree(NCAR(x), reg);
1307 if (tlen < 0)
return tlen;
1309 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1312 return ONIGERR_PARSER_BUG;
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1321 return ONIGERR_TYPE_BUG;
1328static int get_char_length_tree(
Node* node,
regex_t* reg,
int*
len);
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (
int )node->call_addr);
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1357 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1364 r = add_opcode(reg, OP_MEMORY_START);
1366 r = add_mem_num(reg, node->regnum);
1368 r = compile_tree(node->target, reg);
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1380 r = add_mem_num(reg, node->regnum);
1382 r = add_opcode(reg, OP_RETURN);
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1390 r = add_mem_num(reg, node->regnum);
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1398 r = add_opcode(reg, OP_MEMORY_END);
1400 r = add_mem_num(reg, node->regnum);
1404 case ENCLOSE_STOP_BACKTRACK:
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1414 len = compile_length_tree(qn->target, reg);
1417 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_POP + SIZE_OP_JUMP);
1419 r = compile_tree(qn->target, reg);
1421 r = add_opcode(reg, OP_POP);
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((
int )SIZE_OP_PUSH +
len + (
int )SIZE_OP_POP + (
int )SIZE_OP_JUMP));
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1430 r = compile_tree(node->target, reg);
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1441 r = add_mem_num(reg, node->regnum);
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1448 len = compile_length_tree(NCAR(x), reg);
1450 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1452 len2 = compile_length_tree(NCAR(x), reg);
1453 if (len2 < 0)
return len2;
1454 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1457 r = add_rel_addr(reg,
len + SIZE_OP_JUMP);
1459 r = compile_tree(NCAR(x), reg);
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1464 r = compile_tree(NCAR(x), reg);
1467 return ONIGERR_PARSER_BUG;
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1477 r = add_opcode_rel_addr(reg, OP_ABSENT,
len + SIZE_OP_ABSENT_END);
1479 r = compile_tree(node->target, reg);
1481 r = add_opcode(reg, OP_ABSENT_END);
1485 return ONIGERR_TYPE_BUG;
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0)
return tlen;
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF);
break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF);
break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE);
break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE);
break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF);
break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION);
break;
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP);
break;
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1561 r = compile_tree(node->target, reg);
1563 r = add_opcode(reg, OP_POP_POS);
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT,
len + SIZE_OP_FAIL_POS);
1571 r = compile_tree(node->target, reg);
1573 r = add_opcode(reg, OP_FAIL_POS);
1576 case ANCHOR_LOOK_BEHIND:
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1587 r = add_length(reg, n);
1589 r = compile_tree(node->target, reg);
1593 case ANCHOR_LOOK_BEHIND_NOT:
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1606 r = add_length(reg, n);
1608 r = compile_tree(node->target, reg);
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1615 return ONIGERR_TYPE_BUG;
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0)
return r;
1635 }
while (IS_NOT_NULL(node = NCDR(node)));
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0)
return r;
1648 }
while (IS_NOT_NULL(node = NCDR(node)));
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1658 r = compile_length_string_node(node, reg);
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1691#ifdef USE_SUBEXP_CALL
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1710 return ONIGERR_TYPE_BUG;
1726 r = compile_tree(NCAR(node), reg);
1727 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1739 }
while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used +
len;
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_JUMP);
1748 r = compile_tree(NCAR(node), reg);
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1755 }
while (IS_NOT_NULL(node = NCDR(node)));
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1763 r = compile_string_node(node, reg);
1767 r = compile_cclass_node(NCCLASS(node), reg);
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1786 return ONIGERR_TYPE_BUG;
1789 r = add_opcode(reg, op);
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1797 r = add_opcode(reg, OP_ANYCHAR);
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1810 r = add_length(reg, br->nest_level);
1813 goto add_bacref_mems;
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1822 r = add_mem_num(reg, n);
1826 case 1: r = add_opcode(reg, OP_BACKREF1);
break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2);
break;
1829 r = add_opcode(reg, OP_BACKREFN);
1831 r = add_mem_num(reg, n);
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1848#ifdef USE_BACKREF_WITH_LEVEL
1851 r = add_length(reg, br->back_num);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1862#ifdef USE_SUBEXP_CALL
1864 r = compile_call(NCALL(node), reg);
1869 r = compile_quantifier_node(NQTFR(node), reg);
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1877 r = compile_anchor_node(NANCHOR(node), reg);
1882 fprintf(stderr,
"compile_tree: undefined node type %d\n", NTYPE(node));
1890#ifdef USE_NAMED_GROUP
1896 Node* node = *plink;
1898 switch (NTYPE(node)) {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1934 r = noname_disable_map(&(en->target), map, counter);
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1953 int i, pos, n, old_num;
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1964 backs = bn->back_dynamic;
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem)
return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1984 switch (NTYPE(node)) {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem)
return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2001 r = renumber_by_map(en->target, map, num_mem);
2006 r = renumber_node_backref(node, map, num_mem);
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2022numbered_ref_check(
Node* node)
2026 switch (NTYPE(node)) {
2030 r = numbered_ref_check(NCAR(node));
2031 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2034 r = numbered_ref_check(NQTFR(node)->target);
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2060 int r, i, pos, counter;
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0)
return r;
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0)
return r;
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2094 return onig_renumber_name_table(reg, map);
2098#ifdef USE_SUBEXP_CALL
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en))
return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2120quantifiers_memory_node_info(
Node* node)
2124 switch (NTYPE(node)) {
2130 v = quantifiers_memory_node_info(NCAR(node));
2132 }
while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2136# ifdef USE_SUBEXP_CALL
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC;
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2190get_min_match_length(
Node* node, OnigDistance *min,
ScanEnv* env)
2196 switch (NTYPE(node)) {
2201 Node** nodes = SCANENV_MEM_NODES(env);
2203 if (br->state & NST_RECURSION)
break;
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2213 if (*min > tmin) *min = tmin;
2218#ifdef USE_SUBEXP_CALL
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2243 r = get_min_match_length(x, &tmin, env);
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 }
while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2254 *min = sn->end - sn->s;
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2274 *min = distance_multiply(*min, qn->lower);
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2307 case ENCLOSE_ABSENT:
2322get_max_match_length(
Node* node, OnigDistance *max,
ScanEnv* env)
2328 switch (NTYPE(node)) {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2333 *max = distance_add(*max, tmax);
2334 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2347 *max = sn->end - sn->s;
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2364 Node** nodes = SCANENV_MEM_NODES(env);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2375 if (*max < tmax) *max = tmax;
2380#ifdef USE_SUBEXP_CALL
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2385 *max = ONIG_INFINITE_DISTANCE;
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2399 *max = ONIG_INFINITE_DISTANCE;
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2433 case ENCLOSE_ABSENT:
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2452get_char_length_tree1(
Node* node,
regex_t* reg,
int*
len,
int level)
2459 switch (NTYPE(node)) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2464 *
len = (int )distance_add(*
len, tlen);
2465 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2486 r = GET_CHAR_LEN_VARLEN;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2511 *
len = (int )distance_multiply(tlen, qn->lower);
2514 r = GET_CHAR_LEN_VARLEN;
2518#ifdef USE_SUBEXP_CALL
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg,
len, level);
2523 r = GET_CHAR_LEN_VARLEN;
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *
len = en->char_len;
2545 r = get_char_length_tree1(en->target, reg,
len, level);
2547 en->char_len = *
len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg,
len, level);
2558 case ENCLOSE_ABSENT:
2569 r = GET_CHAR_LEN_VARLEN;
2579 return get_char_length_tree1(node, reg,
len, 0);
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2611 tmp = x; x = y; y = tmp;
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i))
return 0;
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i))
return 0;
2650 if (IS_NOT_NULL(xc->mbuf))
return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2663 if (! BITSET_AT(xc->bs, i))
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2712 if (NSTRING_LEN(x) == 0)
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2723 return !(NCTYPE(y)->not);
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2729 return !(NCTYPE(y)->not);
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2751 len = NSTRING_LEN(x);
2752 if (
len > NSTRING_LEN(y))
len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i <
len; i++, p++, q++) {
2759 if (*p != *q)
return 1;
2779get_head_value_node(
Node* node,
int exact,
regex_t* reg)
2781 Node* n = NULL_NODE;
2783 switch (NTYPE(node)) {
2787#ifdef USE_SUBEXP_CALL
2800 n = get_head_value_node(NCAR(node), exact, reg);
2807 if (sn->end <= sn->s)
2811 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2822 if (qn->lower > 0) {
2823#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2824 if (IS_NOT_NULL(qn->head_exact))
2828 n = get_head_value_node(qn->target, exact, reg);
2837 case ENCLOSE_OPTION:
2839 OnigOptionType options = reg->options;
2841 reg->options = NENCLOSE(node)->option;
2842 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2843 reg->options = options;
2847 case ENCLOSE_MEMORY:
2848 case ENCLOSE_STOP_BACKTRACK:
2849 case ENCLOSE_CONDITION:
2850 n = get_head_value_node(en->target, exact, reg);
2853 case ENCLOSE_ABSENT:
2860 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2861 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2872check_type_tree(
Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2877 if ((NTYPE2BIT(type) & type_mask) == 0)
2884 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2886 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2890 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2897 if ((en->type & enclose_mask) == 0)
2900 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2905 type = NANCHOR(node)->type;
2906 if ((type & anchor_mask) == 0)
2909 if (NANCHOR(node)->target)
2910 r = check_type_tree(NANCHOR(node)->target,
2911 type_mask, enclose_mask, anchor_mask);
2920#ifdef USE_SUBEXP_CALL
2922# define RECURSION_EXIST 1
2923# define RECURSION_INFINITE 2
2926subexp_inf_recursive_check(
Node* node,
ScanEnv* env,
int head)
2941 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2942 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2945 ret = get_min_match_length(NCAR(x), &min, env);
2946 if (ret != 0)
return ret;
2947 if (min != 0) head = 0;
2949 }
while (IS_NOT_NULL(x = NCDR(x)));
2956 r = RECURSION_EXIST;
2958 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2959 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2961 }
while (IS_NOT_NULL(node = NCDR(node)));
2966 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2967 if (r == RECURSION_EXIST) {
2968 if (NQTFR(node)->lower == 0) r = 0;
2976 case ANCHOR_PREC_READ:
2977 case ANCHOR_PREC_READ_NOT:
2978 case ANCHOR_LOOK_BEHIND:
2979 case ANCHOR_LOOK_BEHIND_NOT:
2980 r = subexp_inf_recursive_check(an->target, env, head);
2987 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2991 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2993 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2994 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2996 SET_ENCLOSE_STATUS(node, NST_MARK2);
2997 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2998 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3010subexp_inf_recursive_check_trav(
Node* node,
ScanEnv* env)
3020 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3021 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3025 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3032 case ANCHOR_PREC_READ:
3033 case ANCHOR_PREC_READ_NOT:
3034 case ANCHOR_LOOK_BEHIND:
3035 case ANCHOR_LOOK_BEHIND_NOT:
3036 r = subexp_inf_recursive_check_trav(an->target, env);
3046 if (IS_ENCLOSE_RECURSION(en)) {
3047 SET_ENCLOSE_STATUS(node, NST_MARK1);
3048 r = subexp_inf_recursive_check(en->target, env, 1);
3049 if (r > 0)
return ONIGERR_NEVER_ENDING_RECURSION;
3050 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3052 r = subexp_inf_recursive_check_trav(en->target, env);
3065subexp_recursive_check(
Node* node)
3069 switch (NTYPE(node)) {
3073 r |= subexp_recursive_check(NCAR(node));
3074 }
while (IS_NOT_NULL(node = NCDR(node)));
3078 r = subexp_recursive_check(NQTFR(node)->target);
3085 case ANCHOR_PREC_READ:
3086 case ANCHOR_PREC_READ_NOT:
3087 case ANCHOR_LOOK_BEHIND:
3088 case ANCHOR_LOOK_BEHIND_NOT:
3089 r = subexp_recursive_check(an->target);
3096 r = subexp_recursive_check(NCALL(node)->target);
3097 if (r != 0) SET_CALL_RECURSION(node);
3101 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3103 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3106 SET_ENCLOSE_STATUS(node, NST_MARK2);
3107 r = subexp_recursive_check(NENCLOSE(node)->target);
3108 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3121subexp_recursive_check_trav(
Node* node,
ScanEnv* env)
3123# define FOUND_CALLED_NODE 1
3135 ret = subexp_recursive_check_trav(NCAR(node), env);
3136 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3137 else if (ret < 0)
return ret;
3138 }
while (IS_NOT_NULL(node = NCDR(node)));
3143 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3144 if (NQTFR(node)->upper == 0) {
3145 if (r == FOUND_CALLED_NODE)
3146 NQTFR(node)->is_referred = 1;
3154 case ANCHOR_PREC_READ:
3155 case ANCHOR_PREC_READ_NOT:
3156 case ANCHOR_LOOK_BEHIND:
3157 case ANCHOR_LOOK_BEHIND_NOT:
3158 r = subexp_recursive_check_trav(an->target, env);
3168 if (! IS_ENCLOSE_RECURSION(en)) {
3169 if (IS_ENCLOSE_CALLED(en)) {
3170 SET_ENCLOSE_STATUS(node, NST_MARK1);
3171 r = subexp_recursive_check(en->target);
3172 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3173 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3176 r = subexp_recursive_check_trav(en->target, env);
3177 if (IS_ENCLOSE_CALLED(en))
3178 r |= FOUND_CALLED_NODE;
3199 r = setup_subexp_call(NCAR(node), env);
3200 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3205 r = setup_subexp_call(NCAR(node), env);
3206 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3210 r = setup_subexp_call(NQTFR(node)->target, env);
3213 r = setup_subexp_call(NENCLOSE(node)->target, env);
3219 Node** nodes = SCANENV_MEM_NODES(env);
3221 if (cn->group_num != 0) {
3222 int gnum = cn->group_num;
3224# ifdef USE_NAMED_GROUP
3225 if (env->num_named > 0 &&
3226 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3227 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3228 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3231 if (gnum > env->num_mem) {
3232 onig_scan_env_set_error_string(env,
3233 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3234 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3237# ifdef USE_NAMED_GROUP
3240 cn->target = nodes[cn->group_num];
3241 if (IS_NULL(cn->target)) {
3242 onig_scan_env_set_error_string(env,
3243 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3244 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3246 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3247 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3248 cn->unset_addr_list = env->unset_addr_list;
3250# ifdef USE_NAMED_GROUP
3251# ifdef USE_PERL_SUBEXP_CALL
3252 else if (cn->name == cn->name_end) {
3259 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3262 onig_scan_env_set_error_string(env,
3263 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3264 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3267 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3268 onig_scan_env_set_error_string(env,
3269 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3270 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3273 cn->group_num = refs[0];
3286 case ANCHOR_PREC_READ:
3287 case ANCHOR_PREC_READ_NOT:
3288 case ANCHOR_LOOK_BEHIND:
3289 case ANCHOR_LOOK_BEHIND_NOT:
3290 r = setup_subexp_call(an->target, env);
3309divide_look_behind_alternatives(
Node* node)
3311 Node *head, *np, *insert_node;
3313 int anc_type = an->type;
3317 swap_node(node, head);
3319 NANCHOR(head)->target = np;
3322 while ((np = NCDR(np)) != NULL_NODE) {
3323 insert_node = onig_node_new_anchor(anc_type);
3324 CHECK_NULL_RETURN_MEMERR(insert_node);
3325 NANCHOR(insert_node)->target = NCAR(np);
3326 NCAR(np) = insert_node;
3329 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3332 SET_NTYPE(np, NT_LIST);
3333 }
while ((np = NCDR(np)) != NULL_NODE);
3344 r = get_char_length_tree(an->target, reg, &
len);
3347 else if (r == GET_CHAR_LEN_VARLEN)
3348 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3349 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3350 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3351 r = divide_look_behind_alternatives(node);
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3366 if (type == NT_QTFR) {
3368 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3369#ifdef USE_QTFR_PEEK_NEXT
3370 Node* n = get_head_value_node(next_node, 1, reg);
3372 if (IS_NOT_NULL(n) && NSTR(n)->s[0] !=
'\0') {
3373 qn->next_head_exact = n;
3377 if (qn->lower <= 1) {
3378 int ttype = NTYPE(qn->target);
3379 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3381 x = get_head_value_node(qn->target, 0, reg);
3382 if (IS_NOT_NULL(x)) {
3383 y = get_head_value_node(next_node, 0, reg);
3384 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3385 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3386 CHECK_NULL_RETURN_MEMERR(en);
3387 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3388 swap_node(node, en);
3389 NENCLOSE(node)->target = en;
3396 else if (type == NT_ENCLOSE) {
3398 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3408update_string_node_case_fold(
regex_t* reg,
Node *node)
3410 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3411 UChar *sbuf, *ebuf, *sp;
3413 OnigDistance sbuf_size;
3417 sbuf_size = (end - sn->s) * 2;
3418 sbuf = (UChar* )
xmalloc(sbuf_size);
3419 CHECK_NULL_RETURN_MEMERR(sbuf);
3420 ebuf = sbuf + sbuf_size;
3425 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3426 for (i = 0; i <
len; i++) {
3428 UChar* p = (UChar* )
xrealloc(sbuf, sbuf_size * 2);
3431 return ONIGERR_MEMORY;
3434 sp = sbuf + sbuf_size;
3436 ebuf = sbuf + sbuf_size;
3443 r = onig_node_str_set(node, sbuf, sp);
3450expand_case_fold_make_rem_string(
Node** rnode, UChar *s, UChar *end,
3456 node = onig_node_new_str(s, end);
3457 if (IS_NULL(node))
return ONIGERR_MEMORY;
3459 r = update_string_node_case_fold(reg, node);
3461 onig_node_free(node);
3465 NSTRING_SET_AMBIG(node);
3466 NSTRING_SET_DONT_GET_OPT_INFO(node);
3477 for (i = 0; i < item_num; i++) {
3478 if (items[i].byte_len != slen) {
3481 if (items[i].code_len != 1) {
3490 UChar *p,
int slen, UChar *end,
3493 int r, i, j,
len, varlen;
3494 Node *anode, *var_anode, *snode, *xnode, *an;
3495 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3497 *rnode = var_anode = NULL_NODE;
3500 for (i = 0; i < item_num; i++) {
3501 if (items[i].byte_len != slen) {
3508 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3509 if (IS_NULL(var_anode))
return ONIGERR_MEMORY;
3511 xnode = onig_node_new_list(NULL, NULL);
3512 if (IS_NULL(xnode))
goto mem_err;
3513 NCAR(var_anode) = xnode;
3515 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3516 if (IS_NULL(anode))
goto mem_err;
3517 NCAR(xnode) = anode;
3520 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode))
return ONIGERR_MEMORY;
3524 snode = onig_node_new_str(p, p + slen);
3525 if (IS_NULL(snode))
goto mem_err;
3527 NCAR(anode) = snode;
3529 for (i = 0; i < item_num; i++) {
3530 snode = onig_node_new_str(NULL, NULL);
3531 if (IS_NULL(snode))
goto mem_err;
3533 for (j = 0; j < items[i].code_len; j++) {
3534 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3540 r = onig_node_str_cat(snode, buf, buf +
len);
3541 if (r != 0)
goto mem_err2;
3544 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3549 if (items[i].byte_len != slen) {
3551 UChar *q = p + items[i].byte_len;
3554 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3560 xnode = onig_node_list_add(NULL_NODE, snode);
3561 if (IS_NULL(xnode)) {
3563 onig_node_free(rem);
3566 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3568 onig_node_free(xnode);
3569 onig_node_free(rem);
3579 NCDR(var_anode) = an;
3592 onig_node_free(snode);
3595 onig_node_free(*rnode);
3597 return ONIGERR_MEMORY;
3603#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3605 int r, n,
len, alt_num;
3607 UChar *start, *end, *p;
3608 Node *top_root, *root, *snode, *prev_node;
3612 if (NSTRING_IS_AMBIG(node))
return 0;
3616 if (start >= end)
return 0;
3619 top_root = root = prev_node = snode = NULL_NODE;
3623 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3630 len = enclen(reg->enc, p, end);
3632 varlen = is_case_fold_variable_len(n, items,
len);
3633 if (n == 0 || varlen == 0) {
3634 if (IS_NULL(snode)) {
3635 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3636 onig_node_free(top_root);
3637 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3638 if (IS_NULL(root)) {
3639 onig_node_free(prev_node);
3644 prev_node = snode = onig_node_new_str(NULL, NULL);
3645 if (IS_NULL(snode))
goto mem_err;
3646 if (IS_NOT_NULL(root)) {
3647 if (IS_NULL(onig_node_list_add(root, snode))) {
3648 onig_node_free(snode);
3654 r = onig_node_str_cat(snode, p, p +
len);
3655 if (r != 0)
goto err;
3659 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION)
break;
3661 if (IS_NOT_NULL(snode)) {
3662 r = update_string_node_case_fold(reg, snode);
3664 NSTRING_SET_AMBIG(snode);
3667 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3668 onig_node_free(top_root);
3669 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3670 if (IS_NULL(root)) {
3671 onig_node_free(prev_node);
3676 r = expand_case_fold_string_alt(n, items, p,
len, end, reg, &prev_node);
3677 if (r < 0)
goto mem_err;
3679 if (IS_NULL(root)) {
3680 top_root = prev_node;
3683 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3684 onig_node_free(prev_node);
3689 root = NCAR(prev_node);
3692 if (IS_NOT_NULL(root)) {
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3705 if (IS_NOT_NULL(snode)) {
3706 r = update_string_node_case_fold(reg, snode);
3708 NSTRING_SET_AMBIG(snode);
3715 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3716 if (r != 0)
goto mem_err;
3718 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3719 onig_node_free(top_root);
3720 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3721 if (IS_NULL(root)) {
3722 onig_node_free(srem);
3723 onig_node_free(prev_node);
3728 if (IS_NULL(root)) {
3732 if (IS_NULL(onig_node_list_add(root, srem))) {
3733 onig_node_free(srem);
3740 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3741 swap_node(node, top_root);
3742 onig_node_free(top_root);
3749 onig_node_free(top_root);
3754#ifdef USE_COMBINATION_EXPLOSION_CHECK
3756# define CEC_THRES_NUM_BIG_REPEAT 512
3757# define CEC_INFINITE_NUM 0x7fffffff
3759# define CEC_IN_INFINITE_REPEAT (1<<0)
3760# define CEC_IN_FINITE_REPEAT (1<<1)
3761# define CEC_CONT_BIG_REPEAT (1<<2)
3764setup_comb_exp_check(
Node* node,
int state,
ScanEnv* env)
3774 r = setup_comb_exp_check(NCAR(node), r, env);
3775 }
while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3783 ret = setup_comb_exp_check(NCAR(node), state, env);
3785 }
while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3791 int child_state = state;
3794 Node* target = qn->target;
3797 if (! IS_REPEAT_INFINITE(qn->upper)) {
3798 if (qn->upper > 1) {
3800 child_state |= CEC_IN_FINITE_REPEAT;
3803 if (env->backrefed_mem == 0) {
3804 if (NTYPE(qn->target) == NT_ENCLOSE) {
3806 if (en->type == ENCLOSE_MEMORY) {
3807 if (NTYPE(en->target) == NT_QTFR) {
3809 if (IS_REPEAT_INFINITE(q->upper)
3810 && q->greedy == qn->greedy) {
3811 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3813 child_state = state;
3822 if (state & CEC_IN_FINITE_REPEAT) {
3823 qn->comb_exp_check_num = -1;
3826 if (IS_REPEAT_INFINITE(qn->upper)) {
3827 var_num = CEC_INFINITE_NUM;
3828 child_state |= CEC_IN_INFINITE_REPEAT;
3831 var_num = qn->upper - qn->lower;
3834 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3835 add_state |= CEC_CONT_BIG_REPEAT;
3837 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3838 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3839 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3840 if (qn->comb_exp_check_num == 0) {
3841 env->num_comb_exp_check++;
3842 qn->comb_exp_check_num = env->num_comb_exp_check;
3843 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3844 env->comb_exp_max_regnum = env->curr_max_regnum;
3849 r = setup_comb_exp_check(target, child_state, env);
3859 case ENCLOSE_MEMORY:
3861 if (env->curr_max_regnum < en->regnum)
3862 env->curr_max_regnum = en->regnum;
3864 r = setup_comb_exp_check(en->target, state, env);
3869 r = setup_comb_exp_check(en->target, state, env);
3875# ifdef USE_SUBEXP_CALL
3877 if (IS_CALL_RECURSION(NCALL(node)))
3878 env->has_recursion = 1;
3880 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3892#define IN_ALT (1<<0)
3893#define IN_NOT (1<<1)
3894#define IN_REPEAT (1<<2)
3895#define IN_VAR_REPEAT (1<<3)
3896#define IN_CALL (1<<4)
3897#define IN_RECCALL (1<<5)
3918 Node* prev = NULL_NODE;
3920 r = setup_tree(NCAR(node), reg, state, env);
3921 if (IS_NOT_NULL(prev) && r == 0) {
3922 r = next_setup(prev, NCAR(node), reg);
3925 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3931 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3932 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3939 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3940 r = expand_case_fold_string(node, reg);
3948#ifdef USE_SUBEXP_CALL
3957 Node** nodes = SCANENV_MEM_NODES(env);
3960 for (i = 0; i < br->back_num; i++) {
3961 if (p[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
3962 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3963 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3964#ifdef USE_BACKREF_WITH_LEVEL
3965 if (IS_BACKREF_NEST_LEVEL(br)) {
3966 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3969 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3978 Node* target = qn->target;
3980 if ((state & IN_REPEAT) != 0) {
3981 qn->state |= NST_IN_REPEAT;
3984 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3985 r = get_min_match_length(target, &d, env);
3988 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3989#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3990 r = quantifiers_memory_node_info(target);
3993 qn->target_empty_info = r;
3997 r = get_max_match_length(target, &d, env);
3998 if (r == 0 && d == 0) {
4001 if (qn->lower > 1) qn->lower = 1;
4002 if (NTYPE(target) == NT_STR) {
4003 qn->upper = qn->lower = 0;
4011 if (qn->lower != qn->upper)
4012 state |= IN_VAR_REPEAT;
4013 r = setup_tree(target, reg, state, env);
4017#define EXPAND_STRING_MAX_LENGTH 100
4018 if (NTYPE(target) == NT_STR) {
4019 if (qn->lower > 1) {
4020 int i, n = qn->lower;
4021 OnigDistance
len = NSTRING_LEN(target);
4025 np = onig_node_new_str(sn->s, sn->end);
4026 if (IS_NULL(np))
return ONIGERR_MEMORY;
4027 NSTR(np)->flag = sn->flag;
4029 for (i = 1; i < n && (i+1) *
len <= EXPAND_STRING_MAX_LENGTH; i++) {
4030 r = onig_node_str_cat(np, sn->s, sn->end);
4036 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4040 if (! IS_REPEAT_INFINITE(qn->upper))
4043 np1 = onig_node_new_list(np, NULL);
4046 return ONIGERR_MEMORY;
4048 swap_node(np1, node);
4049 np2 = onig_node_list_add(node, np1);
4051 onig_node_free(np1);
4052 return ONIGERR_MEMORY;
4056 swap_node(np, node);
4063#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4064 if (qn->greedy && (qn->target_empty_info != 0)) {
4065 if (NTYPE(target) == NT_QTFR) {
4067 if (IS_NOT_NULL(tqn->head_exact)) {
4068 qn->head_exact = tqn->head_exact;
4069 tqn->head_exact = NULL;
4073 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4085 case ENCLOSE_OPTION:
4087 OnigOptionType options = reg->options;
4088 reg->options = NENCLOSE(node)->option;
4089 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4090 reg->options = options;
4094 case ENCLOSE_MEMORY:
4095 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4096 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4099 if (IS_ENCLOSE_CALLED(en))
4101 if (IS_ENCLOSE_RECURSION(en))
4102 state |= IN_RECCALL;
4103 else if ((state & IN_RECCALL) != 0)
4104 SET_CALL_RECURSION(node);
4105 r = setup_tree(en->target, reg, state, env);
4108 case ENCLOSE_STOP_BACKTRACK:
4110 Node* target = en->target;
4111 r = setup_tree(target, reg, state, env);
4112 if (NTYPE(target) == NT_QTFR) {
4114 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4116 int qtype = NTYPE(tqn->target);
4117 if (IS_NODE_TYPE_SIMPLE(qtype))
4118 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4124 case ENCLOSE_CONDITION:
4125#ifdef USE_NAMED_GROUP
4126 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4127 env->num_named > 0 &&
4128 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4129 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4130 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4133 if (NENCLOSE(node)->regnum > env->num_mem)
4134 return ONIGERR_INVALID_BACKREF;
4135 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4138 case ENCLOSE_ABSENT:
4139 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4150 case ANCHOR_PREC_READ:
4151 r = setup_tree(an->target, reg, state, env);
4153 case ANCHOR_PREC_READ_NOT:
4154 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4158#define ALLOWED_TYPE_IN_LB \
4159 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4160 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4162#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4163#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4165#define ALLOWED_ANCHOR_IN_LB \
4166( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4167 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4168 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4169 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4170#define ALLOWED_ANCHOR_IN_LB_NOT \
4171( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4172 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4173 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4174 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4176 case ANCHOR_LOOK_BEHIND:
4178 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4179 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4180 if (r < 0)
return r;
4181 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4182 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4183 r = setup_tree(an->target, reg, state, env);
4184 if (r != 0)
return r;
4185 r = setup_look_behind(node, reg, env);
4189 case ANCHOR_LOOK_BEHIND_NOT:
4191 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4192 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4193 if (r < 0)
return r;
4194 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4195 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4196 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4197 if (r != 0)
return r;
4198 r = setup_look_behind(node, reg, env);
4212#ifndef USE_SUNDAY_QUICK_SEARCH
4215set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4216 UChar skip[],
int** int_skip,
int ignore_case)
4218 OnigDistance i,
len;
4219 int clen, flen, n, j, k;
4220 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4225 if (
len < ONIG_CHAR_TABLE_SIZE) {
4226 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )
len;
4229 for (i = 0; i <
len - 1; i += clen) {
4232 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4234 clen = enclen(enc, p, end);
4236 clen = (int )(end - p);
4238 for (j = 0; j < n; j++) {
4239 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4241 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4245 for (j = 0; j < clen; j++) {
4246 skip[s[i + j]] = (UChar )(
len - 1 - i - j);
4247 for (k = 0; k < n; k++) {
4248 skip[buf[k][j]] = (UChar )(
len - 1 - i - j);
4254# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4256 return ONIGERR_TYPE_BUG;
4258 if (IS_NULL(*int_skip)) {
4259 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4260 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4262 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )
len;
4265 for (i = 0; i <
len - 1; i += clen) {
4268 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4270 clen = enclen(enc, p, end);
4272 clen = (int )(end - p);
4274 for (j = 0; j < n; j++) {
4275 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4277 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4281 for (j = 0; j < clen; j++) {
4282 (*int_skip)[s[i + j]] = (int )(
len - 1 - i - j);
4283 for (k = 0; k < n; k++) {
4284 (*int_skip)[buf[k][j]] = (int )(
len - 1 - i - j);
4297set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4298 UChar skip[],
int** int_skip,
int ignore_case)
4300 OnigDistance i,
len;
4301 int clen, flen, n, j, k;
4302 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4307 if (
len < ONIG_CHAR_TABLE_SIZE) {
4308 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(
len + 1);
4311 for (i = 0; i <
len; i += clen) {
4314 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4316 clen = enclen(enc, p, end);
4318 clen = (int )(end - p);
4320 for (j = 0; j < n; j++) {
4321 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4323 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4327 for (j = 0; j < clen; j++) {
4328 skip[s[i + j]] = (UChar )(
len - i - j);
4329 for (k = 0; k < n; k++) {
4330 skip[buf[k][j]] = (UChar )(
len - i - j);
4336# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4338 return ONIGERR_TYPE_BUG;
4340 if (IS_NULL(*int_skip)) {
4341 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4342 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4344 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(
len + 1);
4347 for (i = 0; i <
len; i += clen) {
4350 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4352 clen = enclen(enc, p, end);
4354 clen = (int )(end - p);
4356 for (j = 0; j < n; j++) {
4357 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4359 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4363 for (j = 0; j < clen; j++) {
4364 (*int_skip)[s[i + j]] = (int )(
len - i - j);
4365 for (k = 0; k < n; k++) {
4366 (*int_skip)[buf[k][j]] = (int )(
len - i - j);
4384 OnigOptionType options;
4385 OnigCaseFoldType case_fold_flag;
4401 UChar s[OPT_EXACT_MAXLEN];
4409 UChar map[ONIG_CHAR_TABLE_SIZE];
4427 static const short int ByteValTable[] = {
4428 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4430 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4431 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4432 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4433 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4434 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4435 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4438 if (i < numberof(ByteValTable)) {
4439 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4442 return (
int )ByteValTable[i];
4452 static const short int dist_vals[] = {
4453 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4454 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4455 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4456 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4457 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4458 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4459 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4460 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4461 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4462 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4467 if (mm->max == ONIG_INFINITE_DISTANCE)
return 0;
4469 d = mm->max - mm->min;
4470 if (d < numberof(dist_vals))
4472 return (
int )dist_vals[d];
4480 if (v2 <= 0)
return -1;
4481 if (v1 <= 0)
return 1;
4483 v1 *= distance_value(d1);
4484 v2 *= distance_value(d2);
4486 if (v2 > v1)
return 1;
4487 if (v2 < v1)
return -1;
4489 if (d2->min < d1->min)
return 1;
4490 if (d2->min > d1->min)
return -1;
4497 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4502set_mml(
MinMaxLen* mml, OnigDistance min, OnigDistance max)
4511 mml->min = mml->max = 0;
4517 to->min = from->min;
4518 to->max = from->max;
4524 to->min = distance_add(to->min, from->min);
4525 to->max = distance_add(to->max, from->max);
4532 to->min = distance_add(to->min,
len);
4533 to->max = distance_add(to->max,
len);
4540 if (to->min > from->min) to->min = from->min;
4541 if (to->max < from->max) to->max = from->max;
4553 anc->left_anchor = 0;
4554 anc->right_anchor = 0;
4565 OnigDistance left_len, OnigDistance right_len)
4567 clear_opt_anc_info(to);
4569 to->left_anchor = left->left_anchor;
4570 if (left_len == 0) {
4571 to->left_anchor |= right->left_anchor;
4574 to->right_anchor = right->right_anchor;
4575 if (right_len == 0) {
4576 to->right_anchor |= left->right_anchor;
4579 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4584is_left_anchor(
int anc)
4586 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4587 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4588 anc == ANCHOR_PREC_READ_NOT)
4597 if ((to->left_anchor & anc) != 0)
return 1;
4599 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4605 if (is_left_anchor(anc))
4606 to->left_anchor |= anc;
4608 to->right_anchor |= anc;
4614 if (is_left_anchor(anc))
4615 to->left_anchor &= ~anc;
4617 to->right_anchor &= ~anc;
4623 to->left_anchor &= add->left_anchor;
4624 to->right_anchor &= add->right_anchor;
4630 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4636 clear_mml(&ex->mmd);
4637 clear_opt_anc_info(&ex->anc);
4639 ex->ignore_case = -1;
4657 if (to->ignore_case < 0)
4658 to->ignore_case = add->ignore_case;
4659 else if (to->ignore_case != add->ignore_case)
4664 for (i = to->len; p < end; ) {
4665 len = enclen(enc, p, end);
4666 if (i +
len > OPT_EXACT_MAXLEN)
break;
4667 for (j = 0; j <
len && p < end; j++)
4672 to->reach_end = (p == end ? add->reach_end : 0);
4674 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4675 if (! to->reach_end) tanc.right_anchor = 0;
4676 copy_opt_anc_info(&to->anc, &tanc);
4680concat_opt_exact_info_str(
OptExactInfo* to, UChar* s, UChar* end,
4686 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4687 len = enclen(enc, p, end);
4688 if (i +
len > OPT_EXACT_MAXLEN)
break;
4689 for (j = 0; j <
len && p < end; j++)
4701 if (add->len == 0 || to->len == 0) {
4702 clear_opt_exact_info(to);
4706 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4707 clear_opt_exact_info(to);
4711 for (i = 0; i < to->len && i < add->len; ) {
4712 if (to->s[i] != add->s[i])
break;
4713 len = enclen(env->enc, to->s + i, to->s + to->len);
4715 for (j = 1; j <
len; j++) {
4716 if (to->s[i+j] != add->s[i+j])
break;
4722 if (! add->reach_end || i < add->
len || i < to->
len) {
4726 if (to->ignore_case < 0)
4727 to->ignore_case = add->ignore_case;
4728 else if (add->ignore_case >= 0)
4729 to->ignore_case |= add->ignore_case;
4731 alt_merge_opt_anc_info(&to->anc, &add->anc);
4732 if (! to->reach_end) to->anc.right_anchor = 0;
4747 copy_opt_exact_info(now, alt);
4750 else if (v1 <= 2 && v2 <= 2) {
4752 v2 = map_position_value(enc, now->s[0]);
4753 v1 = map_position_value(enc, alt->s[0]);
4755 if (now->len > 1) v1 += 5;
4756 if (alt->len > 1) v2 += 5;
4759 if (now->ignore_case <= 0) v1 *= 2;
4760 if (alt->ignore_case <= 0) v2 *= 2;
4762 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4763 copy_opt_exact_info(now, alt);
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4777 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4791 xmemcpy(map, &clean_info,
sizeof(
OptMapInfo));
4803 if (map->map[c] == 0) {
4805 map->value += map_position_value(enc, c);
4810add_char_amb_opt_map_info(
OptMapInfo* map, UChar* p, UChar* end,
4814 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4817 add_char_opt_map_info(map, p[0], enc);
4819 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4820 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4821 if (n < 0)
return n;
4823 for (i = 0; i < n; i++) {
4824 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4825 add_char_opt_map_info(map, buf[0], enc);
4834 const int z = 1<<15;
4838 if (alt->value == 0) return ;
4839 if (now->value == 0) {
4840 copy_opt_map_info(now, alt);
4844 v1 = z / now->value;
4845 v2 = z / alt->value;
4846 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4847 copy_opt_map_info(now, alt);
4853#define COMP_EM_BASE 20
4856 if (m->value <= 0)
return -1;
4858 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4859 vm = COMP_EM_BASE * 5 * 2 / m->value;
4860 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4869 if (to->value == 0) return ;
4870 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4871 clear_opt_map_info(to);
4875 alt_merge_mml(&to->mmd, &add->mmd);
4878 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4883 val += map_position_value(enc, i);
4887 alt_merge_opt_anc_info(&to->anc, &add->anc);
4893 copy_mml(&(opt->exb.mmd), mmd);
4894 copy_mml(&(opt->expr.mmd), mmd);
4895 copy_mml(&(opt->map.mmd), mmd);
4901 clear_mml(&opt->len);
4902 clear_opt_anc_info(&opt->anc);
4903 clear_opt_exact_info(&opt->exb);
4904 clear_opt_exact_info(&opt->exm);
4905 clear_opt_exact_info(&opt->expr);
4906 clear_opt_map_info(&opt->map);
4918 int exb_reach, exm_reach;
4921 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4922 copy_opt_anc_info(&to->anc, &tanc);
4924 if (add->exb.len > 0 && to->len.max == 0) {
4925 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4926 to->len.max, add->len.max);
4927 copy_opt_anc_info(&add->exb.anc, &tanc);
4930 if (add->map.value > 0 && to->len.max == 0) {
4931 if (add->map.mmd.max == 0)
4932 add->map.anc.left_anchor |= to->anc.left_anchor;
4935 exb_reach = to->exb.reach_end;
4936 exm_reach = to->exm.reach_end;
4938 if (add->len.max != 0)
4939 to->exb.reach_end = to->exm.reach_end = 0;
4941 if (add->exb.len > 0) {
4943 concat_opt_exact_info(&to->exb, &add->exb, enc);
4944 clear_opt_exact_info(&add->exb);
4946 else if (exm_reach) {
4947 concat_opt_exact_info(&to->exm, &add->exb, enc);
4948 clear_opt_exact_info(&add->exb);
4951 select_opt_exact_info(enc, &to->exm, &add->exb);
4952 select_opt_exact_info(enc, &to->exm, &add->exm);
4954 if (to->expr.len > 0) {
4955 if (add->len.max > 0) {
4956 if (to->expr.len > (
int )add->len.max)
4957 to->expr.len = (int )add->len.max;
4959 if (to->expr.mmd.max == 0)
4960 select_opt_exact_info(enc, &to->exb, &to->expr);
4962 select_opt_exact_info(enc, &to->exm, &to->expr);
4965 else if (add->expr.len > 0) {
4966 copy_opt_exact_info(&to->expr, &add->expr);
4969 select_opt_map_info(&to->map, &add->map);
4971 add_mml(&to->len, &add->len);
4977 alt_merge_opt_anc_info (&to->anc, &add->anc);
4978 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4979 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4980 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4981 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4983 alt_merge_mml(&to->len, &add->len);
4987#define MAX_NODE_OPT_INFO_REF_COUNT 5
4995 clear_node_opt_info(opt);
4996 set_bound_node_opt_info(opt, &env->mmd);
5006 copy_opt_env(&nenv, env);
5008 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
5010 add_mml(&nenv.mmd, &nopt.len);
5011 concat_left_node_opt_info(env->enc, opt, &nopt);
5013 }
while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5023 r = optimize_node_left(NCAR(nd), &nopt, env);
5025 if (nd == node) copy_node_opt_info(opt, &nopt);
5026 else alt_merge_node_opt_info(opt, &nopt, env);
5028 }
while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5035 OnigDistance slen = sn->end - sn->s;
5036 int is_raw = NSTRING_IS_RAW(node);
5038 if (! NSTRING_IS_AMBIG(node)) {
5039 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5041 opt->exb.ignore_case = 0;
5043 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5045 set_mml(&opt->len, slen, slen);
5050 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5051 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5052 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5055 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5057 opt->exb.ignore_case = 1;
5060 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5061 env->enc, env->case_fold_flag);
5068 set_mml(&opt->len, slen, max);
5071 if ((OnigDistance )opt->exb.len == slen)
5072 opt->exb.reach_end = 1;
5083 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5084 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5085 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5087 set_mml(&opt->len, min, max);
5090 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5091 z = BITSET_AT(cc->bs, i);
5092 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5093 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5096 set_mml(&opt->len, 1, 1);
5106 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5111 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5112 switch (NCTYPE(node)->ctype) {
5113 case ONIGENC_CTYPE_WORD:
5114 if (NCTYPE(node)->not != 0) {
5115 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5116 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5117 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5122 for (i = 0; i < maxcode; i++) {
5123 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5124 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5132 min = ONIGENC_MBC_MINLEN(env->enc);
5134 set_mml(&opt->len, min, max);
5140 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5141 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5142 set_mml(&opt->len, min, max);
5147 switch (NANCHOR(node)->type) {
5148 case ANCHOR_BEGIN_BUF:
5149 case ANCHOR_BEGIN_POSITION:
5150 case ANCHOR_BEGIN_LINE:
5151 case ANCHOR_END_BUF:
5152 case ANCHOR_SEMI_END_BUF:
5153 case ANCHOR_END_LINE:
5154 case ANCHOR_LOOK_BEHIND:
5155 case ANCHOR_PREC_READ_NOT:
5156 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5159 case ANCHOR_PREC_READ:
5163 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5165 if (nopt.exb.len > 0)
5166 copy_opt_exact_info(&opt->expr, &nopt.exb);
5167 else if (nopt.exm.len > 0)
5168 copy_opt_exact_info(&opt->expr, &nopt.exm);
5170 opt->expr.reach_end = 0;
5172 if (nopt.map.value > 0)
5173 copy_opt_map_info(&opt->map, &nopt.map);
5178 case ANCHOR_LOOK_BEHIND_NOT:
5187 OnigDistance min, max, tmin, tmax;
5188 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5191 if (br->state & NST_RECURSION) {
5192 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5195 backs = BACKREFS_P(br);
5196 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5198 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5200 for (i = 1; i < br->back_num; i++) {
5201 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5203 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5205 if (min > tmin) min = tmin;
5206 if (max < tmax) max = tmax;
5208 if (r == 0) set_mml(&opt->len, min, max);
5212#ifdef USE_SUBEXP_CALL
5214 if (IS_CALL_RECURSION(NCALL(node)))
5215 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5217 OnigOptionType save = env->options;
5218 env->options = NENCLOSE(NCALL(node)->target)->option;
5219 r = optimize_node_left(NCALL(node)->target, opt, env);
5220 env->options = save;
5228 OnigDistance min, max;
5232 r = optimize_node_left(qn->target, &nopt, env);
5235 if ( IS_REPEAT_INFINITE(qn->upper)) {
5236 if (env->mmd.max == 0 &&
5237 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5238 if (IS_MULTILINE(env->options))
5240 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5242 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5246 if (qn->lower > 0) {
5247 copy_node_opt_info(opt, &nopt);
5248 if (nopt.exb.len > 0) {
5249 if (nopt.exb.reach_end) {
5250 for (i = 2; i <= qn->lower &&
5251 ! is_full_opt_exact_info(&opt->exb); i++) {
5252 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5254 if (i < qn->lower) {
5255 opt->exb.reach_end = 0;
5260 if (qn->lower != qn->upper) {
5261 opt->exb.reach_end = 0;
5262 opt->exm.reach_end = 0;
5265 opt->exm.reach_end = 0;
5269 min = distance_multiply(nopt.len.min, qn->lower);
5270 if (IS_REPEAT_INFINITE(qn->upper))
5271 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5273 max = distance_multiply(nopt.len.max, qn->upper);
5275 set_mml(&opt->len, min, max);
5284 case ENCLOSE_OPTION:
5286 OnigOptionType save = env->options;
5288 env->options = en->option;
5289 r = optimize_node_left(en->target, opt, env);
5290 env->options = save;
5294 case ENCLOSE_MEMORY:
5295#ifdef USE_SUBEXP_CALL
5297 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5298 OnigDistance min, max;
5301 max = ONIG_INFINITE_DISTANCE;
5302 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5303 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5304 set_mml(&opt->len, min, max);
5309 r = optimize_node_left(en->target, opt, env);
5311 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5312 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5313 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5318 case ENCLOSE_STOP_BACKTRACK:
5319 case ENCLOSE_CONDITION:
5320 r = optimize_node_left(en->target, opt, env);
5323 case ENCLOSE_ABSENT:
5324 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5332 fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
5335 r = ONIGERR_TYPE_BUG;
5348 if (e->len == 0)
return 0;
5350 reg->exact = (UChar* )
xmalloc(e->len);
5351 CHECK_NULL_RETURN_MEMERR(reg->exact);
5352 xmemcpy(reg->exact, e->s, e->len);
5353 reg->exact_end = reg->exact + e->len;
5356 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5358 if (e->ignore_case > 0) {
5359 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5360 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5361 reg->map, &(reg->int_map), 1);
5363 reg->optimize = (allow_reverse != 0
5364 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5367 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5371 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5375 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5376 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5377 reg->map, &(reg->int_map), 0);
5379 reg->optimize = (allow_reverse != 0
5380 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5383 reg->optimize = ONIG_OPTIMIZE_EXACT;
5387 reg->optimize = ONIG_OPTIMIZE_EXACT;
5391 reg->dmin = e->mmd.min;
5392 reg->dmax = e->mmd.max;
5394 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5395 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5406 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5407 reg->map[i] = m->map[i];
5409 reg->optimize = ONIG_OPTIMIZE_MAP;
5410 reg->dmin = m->mmd.min;
5411 reg->dmax = m->mmd.max;
5413 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5414 reg->threshold_len = (int )(reg->dmin + 1);
5421 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5422 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5425#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5426static void print_optimize_info(
FILE* f,
regex_t* reg);
5438 env.options = reg->options;
5439 env.case_fold_flag = reg->case_fold_flag;
5440 env.scan_env = scan_env;
5441 clear_mml(&env.mmd);
5443 r = optimize_node_left(node, &opt, &env);
5446 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5447 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5448 ANCHOR_LOOK_BEHIND);
5450 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5451 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5453 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5454 ANCHOR_PREC_READ_NOT);
5456 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5457 reg->anchor_dmin = opt.len.min;
5458 reg->anchor_dmax = opt.len.max;
5461 if (opt.exb.len > 0 || opt.exm.len > 0) {
5462 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5463 if (opt.map.value > 0 &&
5464 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5468 r = set_optimize_exact_info(reg, &opt.exb);
5469 set_sub_anchor(reg, &opt.exb.anc);
5472 else if (opt.map.value > 0) {
5474 set_optimize_map_info(reg, &opt.map);
5475 set_sub_anchor(reg, &opt.map.anc);
5478 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5479 if (opt.len.max == 0)
5480 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5483#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5484 print_optimize_info(stderr, reg);
5490clear_optimize_info(
regex_t* reg)
5492 reg->optimize = ONIG_OPTIMIZE_NONE;
5494 reg->anchor_dmin = 0;
5495 reg->anchor_dmax = 0;
5496 reg->sub_anchor = 0;
5497 reg->exact_end = (UChar* )NULL;
5498 reg->threshold_len = 0;
5500 reg->exact = (UChar* )NULL;
5506 const UChar *s,
const UChar *end)
5508 fprintf(fp,
"\nPATTERN: /");
5510 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5516 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5518 fprintf(fp,
" 0x%04x ", (
int )code);
5521 fputc((
int )code, fp);
5524 p += enclen(enc, p, end);
5529 fputc((
int )*s, fp);
5534 fprintf(fp,
"/ (%s)\n", enc->name);
5538#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5540print_distance_range(
FILE* f, OnigDistance a, OnigDistance b)
5542 if (a == ONIG_INFINITE_DISTANCE)
5545 fprintf(f,
"(%"PRIuPTR
")", a);
5549 if (b == ONIG_INFINITE_DISTANCE)
5552 fprintf(f,
"(%"PRIuPTR
")", b);
5556print_anchor(
FILE* f,
int anchor)
5562 if (anchor & ANCHOR_BEGIN_BUF) {
5563 fprintf(f,
"begin-buf");
5566 if (anchor & ANCHOR_BEGIN_LINE) {
5567 if (q) fprintf(f,
", ");
5569 fprintf(f,
"begin-line");
5571 if (anchor & ANCHOR_BEGIN_POSITION) {
5572 if (q) fprintf(f,
", ");
5574 fprintf(f,
"begin-pos");
5576 if (anchor & ANCHOR_END_BUF) {
5577 if (q) fprintf(f,
", ");
5579 fprintf(f,
"end-buf");
5581 if (anchor & ANCHOR_SEMI_END_BUF) {
5582 if (q) fprintf(f,
", ");
5584 fprintf(f,
"semi-end-buf");
5586 if (anchor & ANCHOR_END_LINE) {
5587 if (q) fprintf(f,
", ");
5589 fprintf(f,
"end-line");
5591 if (anchor & ANCHOR_ANYCHAR_STAR) {
5592 if (q) fprintf(f,
", ");
5594 fprintf(f,
"anychar-star");
5596 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5597 if (q) fprintf(f,
", ");
5598 fprintf(f,
"anychar-star-ml");
5607 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5609 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5611 fprintf(f,
"optimize: %s\n", on[reg->optimize]);
5612 fprintf(f,
" anchor: "); print_anchor(f, reg->anchor);
5613 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5614 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5617 if (reg->optimize) {
5618 fprintf(f,
" sub anchor: "); print_anchor(f, reg->sub_anchor);
5625 fprintf(f,
"exact: [");
5626 for (p = reg->exact; p < reg->exact_end; p++) {
5629 fprintf(f,
"]: length: %"PRIdPTR
"\n", (reg->exact_end - reg->exact));
5631 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5634 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5635 if (reg->map[i]) n++;
5637 fprintf(f,
"map: n=%d\n", n);
5641 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5642 if (reg->map[i] != 0) {
5643 if (c > 0) fputs(
", ", f);
5645 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5646 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5649 fprintf(f,
"%d", i);
5662 if (IS_NOT_NULL(reg)) {
5665 xfree(reg->int_map);
5666 xfree(reg->int_map_backward);
5667 xfree(reg->repeat_range);
5668 onig_free(reg->chain);
5670#ifdef USE_NAMED_GROUP
5671 onig_names_free(reg);
5679 if (IS_NOT_NULL(reg)) {
5680 onig_free_body(reg);
5686dup_copy(
const void *ptr,
size_t size)
5689 if (IS_NOT_NULL(newptr)) {
5690 memcpy(newptr, ptr, size);
5698 if (IS_NOT_NULL(oreg)) {
5700 if (IS_NULL(reg))
return ONIGERR_MEMORY;
5704# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5706 if (IS_NOT_NULL(reg->exact)) {
5707 size_t exact_size = reg->exact_end - reg->exact;
5708 if (COPY_FAILED(exact, exact_size))
5710 (reg)->exact_end = (reg)->exact + exact_size;
5713 if (IS_NOT_NULL(reg->int_map)) {
5714 if (COPY_FAILED(int_map,
sizeof(
int) * ONIG_CHAR_TABLE_SIZE))
5717 if (IS_NOT_NULL(reg->int_map_backward)) {
5718 if (COPY_FAILED(int_map_backward,
sizeof(
int) * ONIG_CHAR_TABLE_SIZE))
5719 goto err_int_map_backward;
5721 if (IS_NOT_NULL(reg->p)) {
5722 if (COPY_FAILED(p, reg->alloc))
5725 if (IS_NOT_NULL(reg->repeat_range)) {
5726 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc *
sizeof(
OnigRepeatRange)))
5727 goto err_repeat_range;
5729 if (IS_NOT_NULL(reg->name_table)) {
5730 if (onig_names_copy(reg, oreg))
5731 goto err_name_table;
5733 if (IS_NOT_NULL(reg->chain)) {
5734 if (onig_reg_copy(®->chain, reg->chain))
5741 onig_names_free(reg);
5743 xfree(reg->repeat_range);
5747 xfree(reg->int_map_backward);
5748 err_int_map_backward:
5749 xfree(reg->int_map);
5754 return ONIGERR_MEMORY;
5761onig_memsize(
const regex_t *reg)
5763 size_t size =
sizeof(
regex_t);
5764 if (IS_NULL(reg))
return 0;
5765 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5766 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5767 if (IS_NOT_NULL(reg->int_map)) size +=
sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5768 if (IS_NOT_NULL(reg->int_map_backward)) size +=
sizeof(
int) * ONIG_CHAR_TABLE_SIZE;
5769 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc *
sizeof(
OnigRepeatRange);
5770 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5778 size_t size =
sizeof(*regs);
5779 if (IS_NULL(regs))
return 0;
5780 size += regs->allocated * (
sizeof(*regs->beg) +
sizeof(*regs->end));
5785#define REGEX_TRANSFER(to,from) do {\
5786 onig_free_body(to);\
5787 xmemcpy(to, from, sizeof(regex_t));\
5795 REGEX_TRANSFER(to, from);
5799#ifdef ONIG_DEBUG_COMPILE
5800static void print_compiled_byte_code_list(
FILE* f,
regex_t* reg);
5802#ifdef ONIG_DEBUG_PARSE_TREE
5803static void print_tree(
FILE* f,
Node* node);
5808onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5811 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5817onig_compile_ruby(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5818 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5821onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5825#define COMPILE_INIT_SIZE 20
5828 OnigDistance init_size;
5831#ifdef USE_SUBEXP_CALL
5835 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5838 scan_env.sourcefile = sourcefile;
5839 scan_env.sourceline = sourceline;
5843 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5846 if (reg->alloc == 0) {
5847 init_size = (pattern_end - pattern) * 2;
5848 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5849 r = BBUF_INIT(reg, init_size);
5850 if (r != 0)
goto end;
5856 reg->num_repeat = 0;
5857 reg->num_null_check = 0;
5858 reg->repeat_range_alloc = 0;
5860#ifdef USE_COMBINATION_EXPLOSION_CHECK
5861 reg->num_comb_exp_check = 0;
5864 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5865 if (r != 0)
goto err;
5867#ifdef ONIG_DEBUG_PARSE_TREE
5869 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5870 print_tree(stderr, root);
5874#ifdef USE_NAMED_GROUP
5876 if (scan_env.num_named > 0 &&
5877 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5878 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5879 if (scan_env.num_named != scan_env.num_mem)
5880 r = disable_noname_group_capture(&root, reg, &scan_env);
5882 r = numbered_ref_check(root);
5884 if (r != 0)
goto err;
5888#ifdef USE_SUBEXP_CALL
5889 if (scan_env.num_call > 0) {
5890 r = unset_addr_list_init(&uslist, scan_env.num_call);
5891 if (r != 0)
goto err;
5892 scan_env.unset_addr_list = &uslist;
5893 r = setup_subexp_call(root, &scan_env);
5894 if (r != 0)
goto err_unset;
5895 r = subexp_recursive_check_trav(root, &scan_env);
5896 if (r < 0)
goto err_unset;
5897 r = subexp_inf_recursive_check_trav(root, &scan_env);
5898 if (r != 0)
goto err_unset;
5900 reg->num_call = scan_env.num_call;
5906 r = setup_tree(root, reg, 0, &scan_env);
5907 if (r != 0)
goto err_unset;
5909#ifdef ONIG_DEBUG_PARSE_TREE
5910 print_tree(stderr, root);
5913 reg->capture_history = scan_env.capture_history;
5914 reg->bt_mem_start = scan_env.bt_mem_start;
5915 reg->bt_mem_start |= reg->capture_history;
5916 if (IS_FIND_CONDITION(reg->options))
5917 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5919 reg->bt_mem_end = scan_env.bt_mem_end;
5920 reg->bt_mem_end |= reg->capture_history;
5923#ifdef USE_COMBINATION_EXPLOSION_CHECK
5924 if (scan_env.backrefed_mem == 0
5925# ifdef USE_SUBEXP_CALL
5926 || scan_env.num_call == 0
5929 setup_comb_exp_check(root, 0, &scan_env);
5930# ifdef USE_SUBEXP_CALL
5931 if (scan_env.has_recursion != 0) {
5932 scan_env.num_comb_exp_check = 0;
5936 if (scan_env.comb_exp_max_regnum > 0) {
5938 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5939 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5940 scan_env.num_comb_exp_check = 0;
5947 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5950 clear_optimize_info(reg);
5951#ifndef ONIG_DONT_OPTIMIZE
5952 r = set_optimize_info_from_tree(root, reg, &scan_env);
5953 if (r != 0)
goto err_unset;
5956 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5957 xfree(scan_env.mem_nodes_dynamic);
5958 scan_env.mem_nodes_dynamic = (
Node** )NULL;
5961 r = compile_tree(root, reg);
5963 r = add_opcode(reg, OP_END);
5964#ifdef USE_SUBEXP_CALL
5965 if (scan_env.num_call > 0) {
5966 r = unset_addr_list_fix(&uslist, reg);
5967 unset_addr_list_end(&uslist);
5972 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5973 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5975 if (reg->bt_mem_start != 0)
5976 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5978 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5981#ifdef USE_SUBEXP_CALL
5982 else if (scan_env.num_call > 0) {
5983 unset_addr_list_end(&uslist);
5986 onig_node_free(root);
5988#ifdef ONIG_DEBUG_COMPILE
5989# ifdef USE_NAMED_GROUP
5990 onig_print_names(stderr, reg);
5992 print_compiled_byte_code_list(stderr, reg);
5996 onig_reg_resize(reg);
6000#ifdef USE_SUBEXP_CALL
6001 if (scan_env.num_call > 0) {
6002 unset_addr_list_end(&uslist);
6006 if (IS_NOT_NULL(scan_env.error)) {
6007 if (IS_NOT_NULL(einfo)) {
6008 einfo->enc = scan_env.enc;
6009 einfo->par = scan_env.error;
6010 einfo->par_end = scan_env.error_end;
6014 onig_node_free(root);
6015 xfree(scan_env.mem_nodes_dynamic);
6020static int onig_inited = 0;
6023onig_reg_init(
regex_t* reg, OnigOptionType option,
6024 OnigCaseFoldType case_fold_flag,
6031 return ONIGERR_INVALID_ARGUMENT;
6033 if (ONIGENC_IS_UNDEF(enc))
6034 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
6036 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6037 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6038 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6041 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6042 option |= syntax->options;
6043 option &= ~ONIG_OPTION_SINGLELINE;
6046 option |= syntax->options;
6049 (reg)->options = option;
6050 (reg)->syntax = syntax;
6051 (reg)->optimize = 0;
6052 (reg)->exact = (UChar* )NULL;
6053 (reg)->int_map = (
int* )NULL;
6054 (reg)->int_map_backward = (
int* )NULL;
6055 (reg)->chain = (
regex_t* )NULL;
6057 (reg)->p = (UChar* )NULL;
6060 (reg)->name_table = (
void* )NULL;
6062 (reg)->case_fold_flag = case_fold_flag;
6064 (reg)->timelimit = 0;
6070onig_new_without_alloc(
regex_t* reg,
const UChar* pattern,
6071 const UChar* pattern_end, OnigOptionType option,
OnigEncoding enc,
6076 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6079 r = onig_compile(reg, pattern, pattern_end, einfo);
6084onig_new(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
6089 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
6091 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6101onig_initialize(
OnigEncoding encodings[] ARG_UNUSED,
int n ARG_UNUSED)
6109 if (onig_inited != 0)
6114#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6115 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6121#ifdef ONIG_DEBUG_STATISTICS
6122 onig_statistics_init();
6131extern void onig_add_end_call(
void (*func)(
void))
6136 if (item == 0) return ;
6138 item->next = EndCallTop;
6145exec_end_call_list(
void)
6150 while (EndCallTop != 0) {
6151 func = EndCallTop->func;
6155 EndCallTop = EndCallTop->next;
6163 exec_end_call_list();
6165#ifdef ONIG_DEBUG_STATISTICS
6166 onig_print_statistics(stderr);
6169#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6170 _CrtDumpMemoryLeaks();
6179onig_is_in_code_range(
const UChar* p, OnigCodePoint code)
6181 OnigCodePoint n, *data;
6182 OnigCodePoint low, high, x;
6184 GET_CODE_POINT(n, p);
6185 data = (OnigCodePoint* )p;
6188 for (low = 0, high = n; low < high; ) {
6189 x = (low + high) >> 1;
6190 if (code > data[x * 2 + 1])
6196 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6200onig_is_code_in_cc_len(
int elen, OnigCodePoint code,
CClassNode* cc)
6204 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6205 if (IS_NULL(cc->mbuf)) {
6209 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6213 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6216 if (IS_NCCLASS_NOT(cc))
6227 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6231 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6233 return onig_is_code_in_cc_len(
len, code, cc);
6240# define ARG_SPECIAL -1
6242# define ARG_RELADDR 1
6243# define ARG_ABSADDR 2
6244# define ARG_LENGTH 3
6245# define ARG_MEMNUM 4
6246# define ARG_OPTION 5
6247# define ARG_STATE_CHECK 6
6249OnigOpInfoType OnigOpInfo[] = {
6250 { OP_FINISH,
"finish", ARG_NON },
6251 { OP_END,
"end", ARG_NON },
6252 { OP_EXACT1,
"exact1", ARG_SPECIAL },
6253 { OP_EXACT2,
"exact2", ARG_SPECIAL },
6254 { OP_EXACT3,
"exact3", ARG_SPECIAL },
6255 { OP_EXACT4,
"exact4", ARG_SPECIAL },
6256 { OP_EXACT5,
"exact5", ARG_SPECIAL },
6257 { OP_EXACTN,
"exactn", ARG_SPECIAL },
6258 { OP_EXACTMB2N1,
"exactmb2-n1", ARG_SPECIAL },
6259 { OP_EXACTMB2N2,
"exactmb2-n2", ARG_SPECIAL },
6260 { OP_EXACTMB2N3,
"exactmb2-n3", ARG_SPECIAL },
6261 { OP_EXACTMB2N,
"exactmb2-n", ARG_SPECIAL },
6262 { OP_EXACTMB3N,
"exactmb3n" , ARG_SPECIAL },
6263 { OP_EXACTMBN,
"exactmbn", ARG_SPECIAL },
6264 { OP_EXACT1_IC,
"exact1-ic", ARG_SPECIAL },
6265 { OP_EXACTN_IC,
"exactn-ic", ARG_SPECIAL },
6266 { OP_CCLASS,
"cclass", ARG_SPECIAL },
6267 { OP_CCLASS_MB,
"cclass-mb", ARG_SPECIAL },
6268 { OP_CCLASS_MIX,
"cclass-mix", ARG_SPECIAL },
6269 { OP_CCLASS_NOT,
"cclass-not", ARG_SPECIAL },
6270 { OP_CCLASS_MB_NOT,
"cclass-mb-not", ARG_SPECIAL },
6271 { OP_CCLASS_MIX_NOT,
"cclass-mix-not", ARG_SPECIAL },
6272 { OP_ANYCHAR,
"anychar", ARG_NON },
6273 { OP_ANYCHAR_ML,
"anychar-ml", ARG_NON },
6274 { OP_ANYCHAR_STAR,
"anychar*", ARG_NON },
6275 { OP_ANYCHAR_ML_STAR,
"anychar-ml*", ARG_NON },
6276 { OP_ANYCHAR_STAR_PEEK_NEXT,
"anychar*-peek-next", ARG_SPECIAL },
6277 { OP_ANYCHAR_ML_STAR_PEEK_NEXT,
"anychar-ml*-peek-next", ARG_SPECIAL },
6278 { OP_WORD,
"word", ARG_NON },
6279 { OP_NOT_WORD,
"not-word", ARG_NON },
6280 { OP_WORD_BOUND,
"word-bound", ARG_NON },
6281 { OP_NOT_WORD_BOUND,
"not-word-bound", ARG_NON },
6282 { OP_WORD_BEGIN,
"word-begin", ARG_NON },
6283 { OP_WORD_END,
"word-end", ARG_NON },
6284 { OP_ASCII_WORD,
"ascii-word", ARG_NON },
6285 { OP_NOT_ASCII_WORD,
"not-ascii-word", ARG_NON },
6286 { OP_ASCII_WORD_BOUND,
"ascii-word-bound", ARG_NON },
6287 { OP_NOT_ASCII_WORD_BOUND,
"not-ascii-word-bound", ARG_NON },
6288 { OP_ASCII_WORD_BEGIN,
"ascii-word-begin", ARG_NON },
6289 { OP_ASCII_WORD_END,
"ascii-word-end", ARG_NON },
6290 { OP_BEGIN_BUF,
"begin-buf", ARG_NON },
6291 { OP_END_BUF,
"end-buf", ARG_NON },
6292 { OP_BEGIN_LINE,
"begin-line", ARG_NON },
6293 { OP_END_LINE,
"end-line", ARG_NON },
6294 { OP_SEMI_END_BUF,
"semi-end-buf", ARG_NON },
6295 { OP_BEGIN_POSITION,
"begin-position", ARG_NON },
6296 { OP_BACKREF1,
"backref1", ARG_NON },
6297 { OP_BACKREF2,
"backref2", ARG_NON },
6298 { OP_BACKREFN,
"backrefn", ARG_MEMNUM },
6299 { OP_BACKREFN_IC,
"backrefn-ic", ARG_SPECIAL },
6300 { OP_BACKREF_MULTI,
"backref_multi", ARG_SPECIAL },
6301 { OP_BACKREF_MULTI_IC,
"backref_multi-ic", ARG_SPECIAL },
6302 { OP_BACKREF_WITH_LEVEL,
"backref_at_level", ARG_SPECIAL },
6303 { OP_MEMORY_START_PUSH,
"mem-start-push", ARG_MEMNUM },
6304 { OP_MEMORY_START,
"mem-start", ARG_MEMNUM },
6305 { OP_MEMORY_END_PUSH,
"mem-end-push", ARG_MEMNUM },
6306 { OP_MEMORY_END_PUSH_REC,
"mem-end-push-rec", ARG_MEMNUM },
6307 { OP_MEMORY_END,
"mem-end", ARG_MEMNUM },
6308 { OP_MEMORY_END_REC,
"mem-end-rec", ARG_MEMNUM },
6309 { OP_SET_OPTION_PUSH,
"set-option-push", ARG_OPTION },
6310 { OP_SET_OPTION,
"set-option", ARG_OPTION },
6311 { OP_KEEP,
"keep", ARG_NON },
6312 { OP_FAIL,
"fail", ARG_NON },
6313 { OP_JUMP,
"jump", ARG_RELADDR },
6314 { OP_PUSH,
"push", ARG_RELADDR },
6315 { OP_POP,
"pop", ARG_NON },
6316 { OP_PUSH_OR_JUMP_EXACT1,
"push-or-jump-e1", ARG_SPECIAL },
6317 { OP_PUSH_IF_PEEK_NEXT,
"push-if-peek-next", ARG_SPECIAL },
6318 { OP_REPEAT,
"repeat", ARG_SPECIAL },
6319 { OP_REPEAT_NG,
"repeat-ng", ARG_SPECIAL },
6320 { OP_REPEAT_INC,
"repeat-inc", ARG_MEMNUM },
6321 { OP_REPEAT_INC_NG,
"repeat-inc-ng", ARG_MEMNUM },
6322 { OP_REPEAT_INC_SG,
"repeat-inc-sg", ARG_MEMNUM },
6323 { OP_REPEAT_INC_NG_SG,
"repeat-inc-ng-sg", ARG_MEMNUM },
6324 { OP_NULL_CHECK_START,
"null-check-start", ARG_MEMNUM },
6325 { OP_NULL_CHECK_END,
"null-check-end", ARG_MEMNUM },
6326 { OP_NULL_CHECK_END_MEMST,
"null-check-end-memst", ARG_MEMNUM },
6327 { OP_NULL_CHECK_END_MEMST_PUSH,
"null-check-end-memst-push", ARG_MEMNUM },
6328 { OP_PUSH_POS,
"push-pos", ARG_NON },
6329 { OP_POP_POS,
"pop-pos", ARG_NON },
6330 { OP_PUSH_POS_NOT,
"push-pos-not", ARG_RELADDR },
6331 { OP_FAIL_POS,
"fail-pos", ARG_NON },
6332 { OP_PUSH_STOP_BT,
"push-stop-bt", ARG_NON },
6333 { OP_POP_STOP_BT,
"pop-stop-bt", ARG_NON },
6334 { OP_LOOK_BEHIND,
"look-behind", ARG_SPECIAL },
6335 { OP_PUSH_LOOK_BEHIND_NOT,
"push-look-behind-not", ARG_SPECIAL },
6336 { OP_FAIL_LOOK_BEHIND_NOT,
"fail-look-behind-not", ARG_NON },
6337 { OP_PUSH_ABSENT_POS,
"push-absent-pos", ARG_NON },
6338 { OP_ABSENT,
"absent", ARG_RELADDR },
6339 { OP_ABSENT_END,
"absent-end", ARG_NON },
6340 { OP_CALL,
"call", ARG_ABSADDR },
6341 { OP_RETURN,
"return", ARG_NON },
6342 { OP_CONDITION,
"condition", ARG_SPECIAL },
6343 { OP_STATE_CHECK_PUSH,
"state-check-push", ARG_SPECIAL },
6344 { OP_STATE_CHECK_PUSH_OR_JUMP,
"state-check-push-or-jump", ARG_SPECIAL },
6345 { OP_STATE_CHECK,
"state-check", ARG_STATE_CHECK },
6346 { OP_STATE_CHECK_ANYCHAR_STAR,
"state-check-anychar*", ARG_STATE_CHECK },
6347 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6348 "state-check-anychar-ml*", ARG_STATE_CHECK },
6357 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6358 if (opcode == OnigOpInfo[i].opcode)
6359 return OnigOpInfo[i].name;
6365op2arg_type(
int opcode)
6369 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6370 if (opcode == OnigOpInfo[i].opcode)
6371 return OnigOpInfo[i].arg_type;
6376# ifdef ONIG_DEBUG_PARSE_TREE
6378Indent(
FILE* f,
int indent)
6381 for (i = 0; i < indent; i++) putc(
' ', f);
6386p_string(
FILE* f, ptrdiff_t
len, UChar* s)
6389 while (
len-- > 0) { fputc(*s++, f); }
6393p_len_string(
FILE* f, LengthType
len,
int mb_len, UChar* s)
6395 int x =
len * mb_len;
6397 fprintf(f,
":%d:",
len);
6398 while (x-- > 0) { fputc(*s++, f); }
6402onig_print_compiled_byte_code(
FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6409 StateCheckNumType scn;
6413 fprintf(f,
"[%s", op2name(*bp));
6414 arg_type = op2arg_type(*bp);
6415 if (arg_type != ARG_SPECIAL) {
6421 GET_RELADDR_INC(addr, bp);
6422 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6425 GET_ABSADDR_INC(addr, bp);
6426 fprintf(f,
":(%d)", addr);
6429 GET_LENGTH_INC(
len, bp);
6430 fprintf(f,
":%d",
len);
6433 mem = *((MemNumType* )bp);
6435 fprintf(f,
":%d", mem);
6439 OnigOptionType option = *((OnigOptionType* )bp);
6441 fprintf(f,
":%d", option);
6445 case ARG_STATE_CHECK:
6446 scn = *((StateCheckNumType* )bp);
6447 bp += SIZE_STATE_CHECK_NUM;
6448 fprintf(f,
":%d", scn);
6455 case OP_ANYCHAR_STAR_PEEK_NEXT:
6456 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6457 p_string(f, 1, bp++);
break;
6459 p_string(f, 2, bp); bp += 2;
break;
6461 p_string(f, 3, bp); bp += 3;
break;
6463 p_string(f, 4, bp); bp += 4;
break;
6465 p_string(f, 5, bp); bp += 5;
break;
6467 GET_LENGTH_INC(
len, bp);
6468 p_len_string(f,
len, 1, bp);
6473 p_string(f, 2, bp); bp += 2;
break;
6475 p_string(f, 4, bp); bp += 4;
break;
6477 p_string(f, 6, bp); bp += 6;
break;
6479 GET_LENGTH_INC(
len, bp);
6480 p_len_string(f,
len, 2, bp);
6484 GET_LENGTH_INC(
len, bp);
6485 p_len_string(f,
len, 3, bp);
6492 GET_LENGTH_INC(mb_len, bp);
6493 GET_LENGTH_INC(
len, bp);
6494 fprintf(f,
":%d:%d:", mb_len,
len);
6496 while (n-- > 0) { fputc(*bp++, f); }
6501 len = enclen(enc, bp, bpend);
6502 p_string(f,
len, bp);
6506 GET_LENGTH_INC(
len, bp);
6507 p_len_string(f,
len, 1, bp);
6512 n = bitset_on_num((BitSetRef )bp);
6514 fprintf(f,
":%d", n);
6518 n = bitset_on_num((BitSetRef )bp);
6520 fprintf(f,
":%d", n);
6524 case OP_CCLASS_MB_NOT:
6525 GET_LENGTH_INC(
len, bp);
6527# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6530 GET_CODE_POINT(code, q);
6532 fprintf(f,
":%d:%d", (
int )code,
len);
6536 case OP_CCLASS_MIX_NOT:
6537 n = bitset_on_num((BitSetRef )bp);
6539 GET_LENGTH_INC(
len, bp);
6541# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6544 GET_CODE_POINT(code, q);
6546 fprintf(f,
":%d:%d:%d", n, (
int )code,
len);
6549 case OP_BACKREFN_IC:
6550 mem = *((MemNumType* )bp);
6552 fprintf(f,
":%d", mem);
6555 case OP_BACKREF_MULTI_IC:
6556 case OP_BACKREF_MULTI:
6558 GET_LENGTH_INC(
len, bp);
6559 for (i = 0; i <
len; i++) {
6560 GET_MEMNUM_INC(mem, bp);
6561 if (i > 0) fputs(
", ", f);
6562 fprintf(f,
"%d", mem);
6566 case OP_BACKREF_WITH_LEVEL:
6568 OnigOptionType option;
6571 GET_OPTION_INC(option, bp);
6572 fprintf(f,
":%d", option);
6573 GET_LENGTH_INC(level, bp);
6574 fprintf(f,
":%d", level);
6577 GET_LENGTH_INC(
len, bp);
6578 for (i = 0; i <
len; i++) {
6579 GET_MEMNUM_INC(mem, bp);
6580 if (i > 0) fputs(
", ", f);
6581 fprintf(f,
"%d", mem);
6589 mem = *((MemNumType* )bp);
6591 addr = *((RelAddrType* )bp);
6593 fprintf(f,
":%d:%d", mem, addr);
6597 case OP_PUSH_OR_JUMP_EXACT1:
6598 case OP_PUSH_IF_PEEK_NEXT:
6599 addr = *((RelAddrType* )bp);
6601 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6606 case OP_LOOK_BEHIND:
6607 GET_LENGTH_INC(
len, bp);
6608 fprintf(f,
":%d",
len);
6611 case OP_PUSH_LOOK_BEHIND_NOT:
6612 GET_RELADDR_INC(addr, bp);
6613 GET_LENGTH_INC(
len, bp);
6614 fprintf(f,
":%d:(%s%d)",
len, (addr >= 0) ?
"+" :
"", addr);
6617 case OP_STATE_CHECK_PUSH:
6618 case OP_STATE_CHECK_PUSH_OR_JUMP:
6619 scn = *((StateCheckNumType* )bp);
6620 bp += SIZE_STATE_CHECK_NUM;
6621 addr = *((RelAddrType* )bp);
6623 fprintf(f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6627 GET_MEMNUM_INC(mem, bp);
6628 GET_RELADDR_INC(addr, bp);
6629 fprintf(f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6633 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6638 if (nextp) *nextp = bp;
6641# ifdef ONIG_DEBUG_COMPILE
6643print_compiled_byte_code_list(
FILE* f,
regex_t* reg)
6647 UChar* end = reg->p + reg->used;
6649 fprintf(f,
"code length: %d", reg->used);
6655 fprintf(f,
"\n%ld:", bp - reg->p);
6657 fprintf(f,
" %ld:", bp - reg->p);
6658 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6665# ifdef ONIG_DEBUG_PARSE_TREE
6667print_indent_tree(
FILE* f,
Node* node,
int indent)
6669 int i,
type, container_p = 0;
6674 if (IS_NULL(node)) {
6675 fprintf(f,
"ERROR: null node!!!\n");
6683 if (NTYPE(node) == NT_LIST)
6684 fprintf(f,
"<list:%"PRIxPTR
">\n", (intptr_t )node);
6686 fprintf(f,
"<alt:%"PRIxPTR
">\n", (intptr_t )node);
6688 print_indent_tree(f, NCAR(node), indent + add);
6689 while (IS_NOT_NULL(node = NCDR(node))) {
6690 if (NTYPE(node) != type) {
6691 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6694 print_indent_tree(f, NCAR(node), indent + add);
6699 fprintf(f,
"<string%s:%"PRIxPTR
">",
6700 (NSTRING_IS_RAW(node) ?
"-raw" :
""), (intptr_t )node);
6701 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6702 if (*p >= 0x20 && *p < 0x7f)
6705 fprintf(f,
" 0x%02x", *p);
6711 fprintf(f,
"<cclass:%"PRIxPTR
">", (intptr_t )node);
6712 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(
"not ", f);
6713 if (NCCLASS(node)->mbuf) {
6714 BBuf* bbuf = NCCLASS(node)->mbuf;
6715 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6716 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6717 fprintf(f,
"%d", *data++);
6718 for (; data < end; data+=2) {
6720 fprintf(f,
"%04x-%04x", data[0], data[1]);
6726 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (intptr_t )node);
6727 switch (NCTYPE(node)->ctype) {
6728 case ONIGENC_CTYPE_WORD:
6729 if (NCTYPE(node)->not != 0)
6730 fputs(
"not word", f);
6736 fprintf(f,
"ERROR: undefined ctype.\n");
6742 fprintf(f,
"<anychar:%"PRIxPTR
">", (intptr_t )node);
6746 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (intptr_t )node);
6747 switch (NANCHOR(node)->type) {
6748 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6749 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6750 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6751 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6752 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6753 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6755 case ANCHOR_WORD_BOUND: fputs(
"word bound", f);
break;
6756 case ANCHOR_NOT_WORD_BOUND: fputs(
"not word bound", f);
break;
6757# ifdef USE_WORD_BEGIN_END
6758 case ANCHOR_WORD_BEGIN: fputs(
"word begin", f);
break;
6759 case ANCHOR_WORD_END: fputs(
"word end", f);
break;
6761 case ANCHOR_PREC_READ: fputs(
"prec read", f); container_p = TRUE;
break;
6762 case ANCHOR_PREC_READ_NOT: fputs(
"prec read not", f); container_p = TRUE;
break;
6763 case ANCHOR_LOOK_BEHIND: fputs(
"look_behind", f); container_p = TRUE;
break;
6764 case ANCHOR_LOOK_BEHIND_NOT: fputs(
"look_behind_not",f); container_p = TRUE;
break;
6765 case ANCHOR_KEEP: fputs(
"keep",f);
break;
6768 fprintf(f,
"ERROR: undefined anchor type.\n");
6778 fprintf(f,
"<backref:%"PRIxPTR
">", (intptr_t )node);
6779 for (i = 0; i < br->back_num; i++) {
6780 if (i > 0) fputs(
", ", f);
6781 fprintf(f,
"%d", p[i]);
6786# ifdef USE_SUBEXP_CALL
6790 fprintf(f,
"<call:%"PRIxPTR
">", (intptr_t )node);
6791 p_string(f, cn->name_end - cn->name, cn->name);
6797 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node,
6798 NQTFR(node)->lower, NQTFR(node)->upper,
6799 (NQTFR(node)->greedy ?
"" :
"?"));
6800 print_indent_tree(f, NQTFR(node)->target, indent + add);
6804 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (intptr_t )node);
6805 switch (NENCLOSE(node)->type) {
6806 case ENCLOSE_OPTION:
6807 fprintf(f,
"option:%d", NENCLOSE(node)->option);
6809 case ENCLOSE_MEMORY:
6810 fprintf(f,
"memory:%d", NENCLOSE(node)->regnum);
6812 case ENCLOSE_STOP_BACKTRACK:
6813 fprintf(f,
"stop-bt");
6815 case ENCLOSE_CONDITION:
6816 fprintf(f,
"condition:%d", NENCLOSE(node)->regnum);
6818 case ENCLOSE_ABSENT:
6819 fprintf(f,
"absent");
6826 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6830 fprintf(f,
"print_indent_tree: undefined node type %d\n", NTYPE(node));
6834 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6838 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6846 print_indent_tree(f, node, 0);
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
int len
Length of the buffer.
VALUE type(ANYARGS)
ANYARGS-ed function type.