33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(
void)
38 return OnigDefaultCaseFoldFlag;
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 OnigDefaultCaseFoldFlag = case_fold_flag;
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
55str_dup(UChar* s, UChar* end)
57 ptrdiff_t
len = end - s;
74 c = *a; *a = *b; *b = c;
76 if (NTYPE(a) == NT_STR) {
79 size_t len = sn->end - sn->s;
81 sn->end = sn->s +
len;
85 if (NTYPE(b) == NT_STR) {
88 size_t len = sn->end - sn->s;
90 sn->end = sn->s +
len;
96distance_add(OnigDistance d1, OnigDistance d2)
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2)
return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
107distance_multiply(OnigDistance d,
int m)
109 if (m == 0)
return 0;
111 if (d < ONIG_INFINITE_DISTANCE / m)
114 return ONIG_INFINITE_DISTANCE;
118bitset_is_empty(BitSetRef bs)
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0)
return 0;
129bitset_on_num(BitSetRef bs)
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr =
xrealloc(reg->p, reg->used);
155 reg->alloc = reg->used;
159 }
while ((reg = reg->chain) != 0);
163onig_bbuf_init(
BBuf* buf, OnigDistance size)
170 buf->p = (UChar* )
xmalloc(size);
171 if (IS_NULL(buf->p))
return(ONIGERR_MEMORY);
174 buf->alloc = (
unsigned int )size;
180#ifdef USE_SUBEXP_CALL
188 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
224add_opcode(
regex_t* reg,
int opcode)
226 BBUF_ADD1(reg, opcode);
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
232add_state_check_num(
regex_t* reg,
int num)
234 StateCheckNumType n = (StateCheckNumType )num;
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
242add_rel_addr(
regex_t* reg,
int addr)
244 RelAddrType ra = (RelAddrType )addr;
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
251add_abs_addr(
regex_t* reg,
int addr)
253 AbsAddrType ra = (AbsAddrType )addr;
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
262 LengthType l = (LengthType )
len;
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
269add_mem_num(
regex_t* reg,
int num)
271 MemNumType n = (MemNumType )num;
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
279add_pointer(
regex_t* reg,
void* addr)
281 PointerType ptr = (PointerType )addr;
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
289add_option(
regex_t* reg, OnigOptionType option)
291 BBUF_ADD(reg, &option, SIZE_OPTION);
296add_opcode_rel_addr(
regex_t* reg,
int opcode,
int addr)
300 r = add_opcode(reg, opcode);
302 r = add_rel_addr(reg, addr);
307add_bytes(
regex_t* reg, UChar* bytes, OnigDistance
len)
309 BBUF_ADD(reg, bytes,
len);
314add_bitset(
regex_t* reg, BitSetRef bs)
316 BBUF_ADD(reg, bs, SIZE_BITSET);
321add_opcode_option(
regex_t* reg,
int opcode, OnigOptionType option)
325 r = add_opcode(reg, opcode);
327 r = add_option(reg, option);
331static int compile_length_tree(
Node* node,
regex_t* reg);
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
340select_str_opcode(
int mb_len, OnigDistance byte_len,
int ignore_case)
343 OnigDistance str_len = roomof(byte_len, mb_len);
347 case 1: op = OP_EXACT1_IC;
break;
348 default: op = OP_EXACTN_IC;
break;
355 case 1: op = OP_EXACT1;
break;
356 case 2: op = OP_EXACT2;
break;
357 case 3: op = OP_EXACT3;
break;
358 case 4: op = OP_EXACT4;
break;
359 case 5: op = OP_EXACT5;
break;
360 default: op = OP_EXACTN;
break;
366 case 1: op = OP_EXACTMB2N1;
break;
367 case 2: op = OP_EXACTMB2N2;
break;
368 case 3: op = OP_EXACTMB2N3;
break;
369 default: op = OP_EXACTMB2N;
break;
386compile_tree_empty_check(
Node* node,
regex_t* reg,
int empty_info)
389 int saved_num_null_check = reg->num_null_check;
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
394 r = add_mem_num(reg, reg->num_null_check);
396 reg->num_null_check++;
399 r = compile_tree(node, reg);
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
411 r = add_mem_num(reg, saved_num_null_check);
416#ifdef USE_SUBEXP_CALL
422 r = add_opcode(reg, OP_CALL);
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
427 r = add_abs_addr(reg, 0 );
433compile_tree_n_times(
Node* node,
int n,
regex_t* reg)
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
445add_compile_string_length(UChar* s ARG_UNUSED,
int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED,
int ignore_case)
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
453 if (op == OP_EXACTMBN)
len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
457 len += (int )byte_len;
462add_compile_string(UChar* s,
int mb_len, OnigDistance byte_len,
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
475 add_length(reg, byte_len / mb_len);
478 add_bytes(reg, s, byte_len);
484compile_length_string_node(
Node* node,
regex_t* reg)
486 int rlen, r,
len, prev_len, blen, ambig;
492 if (sn->end <= sn->s)
495 ambig = NSTRING_IS_AMBIG(node);
498 prev_len = enclen(enc, p, sn->end);
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (
len == prev_len || ambig) {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
525 if (sn->end <= sn->s)
528 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
534 int r,
len, prev_len, blen, ambig;
536 UChar *p, *prev, *end;
540 if (sn->end <= sn->s)
544 ambig = NSTRING_IS_AMBIG(node);
547 prev_len = enclen(enc, p, end);
552 len = enclen(enc, p, end);
553 if (
len == prev_len || ambig) {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
573 if (sn->end <= sn->s)
576 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593 r = add_bytes(reg, mbuf->p, mbuf->used);
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
615 len = SIZE_OPCODE + SIZE_BITSET;
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
636 add_opcode(reg, OP_CCLASS);
638 r = add_bitset(reg, cc->bs);
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
645 add_opcode(reg, OP_CCLASS_MB);
647 r = add_multi_byte_cclass(cc->mbuf, reg);
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
653 add_opcode(reg, OP_CCLASS_MIX);
655 r = add_bitset(reg, cc->bs);
657 r = add_multi_byte_cclass(cc->mbuf, reg);
665entry_repeat_range(
regex_t* reg,
int id,
int lower,
int upper)
667#define REPEAT_RANGE_ALLOC 4
671 if (reg->repeat_range_alloc == 0) {
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
677 else if (reg->repeat_range_alloc <=
id) {
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
687 p = reg->repeat_range;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
696compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
700 int num_repeat = reg->num_repeat;
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704 r = add_mem_num(reg, num_repeat);
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
717#ifdef USE_SUBEXP_CALL
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
727 r = add_mem_num(reg, num_repeat);
732is_anychar_star_quantifier(
QtfrNode* qn)
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
749 int len, mod_tlen, cklen;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
755 if (tlen < 0)
return tlen;
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
776 if (infinite && qn->lower <= 1) {
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0)
796 len = SIZE_OP_JUMP + tlen;
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
806 len = SIZE_OP_PUSH + tlen;
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820 len += SIZE_OP_STATE_CHECK;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
835 if (tlen < 0)
return tlen;
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
849 r = add_state_check_num(reg, ckn);
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
868 r = add_state_check_num(reg, ckn);
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
879 if (infinite && qn->lower <= 1) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890 r = add_state_check_num(reg, ckn);
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (
int )SIZE_OP_JUMP
902 + (
int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914 r = add_state_check_num(reg, ckn);
916 r = add_rel_addr(reg,
917 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) {
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927 r = compile_tree(qn->target, reg);
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937 r = add_state_check_num(reg, ckn);
939 r = add_rel_addr(reg, tlen);
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
947 r = compile_tree(qn->target, reg);
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953 r = add_state_check_num(reg, ckn);
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964 r = compile_tree(qn->target, reg);
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
970 r = add_opcode(reg, OP_STATE_CHECK);
972 r = add_state_check_num(reg, ckn);
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
988 if (tlen < 0)
return tlen;
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1011 len = tlen * qn->lower;
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028 else if (qn->upper == 0 && qn->is_referred != 0) {
1029 len = SIZE_OP_JUMP + tlen;
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1056 if (tlen < 0)
return tlen;
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH));
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
1147 else if (qn->upper == 0 && qn->is_referred != 0) {
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150 r = compile_tree(qn->target, reg);
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164 r = compile_tree(qn->target, reg);
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173 r = compile_tree(qn->target, reg);
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1186 OnigOptionType prev = reg->options;
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1192 if (tlen < 0)
return tlen;
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1206 OnigOptionType prev = reg->options;
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 r = add_opcode(reg, OP_FAIL);
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0)
return tlen;
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1268 len = SIZE_OP_MEMORY_START;
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1275 case ENCLOSE_STOP_BACKTRACK:
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0)
return tlen;
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1301 tlen = compile_length_tree(NCAR(x), reg);
1302 if (tlen < 0)
return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1306 tlen = compile_length_tree(NCAR(x), reg);
1307 if (tlen < 0)
return tlen;
1309 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1312 return ONIGERR_PARSER_BUG;
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1321 return ONIGERR_TYPE_BUG;
1328static int get_char_length_tree(
Node* node,
regex_t* reg,
int*
len);
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (
int )node->call_addr);
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1357 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1364 r = add_opcode(reg, OP_MEMORY_START);
1366 r = add_mem_num(reg, node->regnum);
1368 r = compile_tree(node->target, reg);
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1380 r = add_mem_num(reg, node->regnum);
1382 r = add_opcode(reg, OP_RETURN);
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1390 r = add_mem_num(reg, node->regnum);
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1398 r = add_opcode(reg, OP_MEMORY_END);
1400 r = add_mem_num(reg, node->regnum);
1404 case ENCLOSE_STOP_BACKTRACK:
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1414 len = compile_length_tree(qn->target, reg);
1417 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_POP + SIZE_OP_JUMP);
1419 r = compile_tree(qn->target, reg);
1421 r = add_opcode(reg, OP_POP);
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((
int )SIZE_OP_PUSH +
len + (
int )SIZE_OP_POP + (
int )SIZE_OP_JUMP));
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1430 r = compile_tree(node->target, reg);
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1441 r = add_mem_num(reg, node->regnum);
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1448 len = compile_length_tree(NCAR(x), reg);
1450 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1452 len2 = compile_length_tree(NCAR(x), reg);
1453 if (len2 < 0)
return len2;
1454 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1457 r = add_rel_addr(reg,
len + SIZE_OP_JUMP);
1459 r = compile_tree(NCAR(x), reg);
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1464 r = compile_tree(NCAR(x), reg);
1467 return ONIGERR_PARSER_BUG;
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1477 r = add_opcode_rel_addr(reg, OP_ABSENT,
len + SIZE_OP_ABSENT_END);
1479 r = compile_tree(node->target, reg);
1481 r = add_opcode(reg, OP_ABSENT_END);
1485 return ONIGERR_TYPE_BUG;
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0)
return tlen;
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF);
break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF);
break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE);
break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE);
break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF);
break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION);
break;
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP);
break;
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1561 r = compile_tree(node->target, reg);
1563 r = add_opcode(reg, OP_POP_POS);
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT,
len + SIZE_OP_FAIL_POS);
1571 r = compile_tree(node->target, reg);
1573 r = add_opcode(reg, OP_FAIL_POS);
1576 case ANCHOR_LOOK_BEHIND:
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1587 r = add_length(reg, n);
1589 r = compile_tree(node->target, reg);
1593 case ANCHOR_LOOK_BEHIND_NOT:
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1606 r = add_length(reg, n);
1608 r = compile_tree(node->target, reg);
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1615 return ONIGERR_TYPE_BUG;
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0)
return r;
1635 }
while (IS_NOT_NULL(node = NCDR(node)));
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0)
return r;
1648 }
while (IS_NOT_NULL(node = NCDR(node)));
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1658 r = compile_length_string_node(node, reg);
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1691#ifdef USE_SUBEXP_CALL
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1710 return ONIGERR_TYPE_BUG;
1726 r = compile_tree(NCAR(node), reg);
1727 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1739 }
while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used +
len;
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_JUMP);
1748 r = compile_tree(NCAR(node), reg);
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1755 }
while (IS_NOT_NULL(node = NCDR(node)));
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1763 r = compile_string_node(node, reg);
1767 r = compile_cclass_node(NCCLASS(node), reg);
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1786 return ONIGERR_TYPE_BUG;
1789 r = add_opcode(reg, op);
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1797 r = add_opcode(reg, OP_ANYCHAR);
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1810 r = add_length(reg, br->nest_level);
1813 goto add_bacref_mems;
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1822 r = add_mem_num(reg, n);
1826 case 1: r = add_opcode(reg, OP_BACKREF1);
break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2);
break;
1829 r = add_opcode(reg, OP_BACKREFN);
1831 r = add_mem_num(reg, n);
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1848#ifdef USE_BACKREF_WITH_LEVEL
1851 r = add_length(reg, br->back_num);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1862#ifdef USE_SUBEXP_CALL
1864 r = compile_call(NCALL(node), reg);
1869 r = compile_quantifier_node(NQTFR(node), reg);
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1877 r = compile_anchor_node(NANCHOR(node), reg);
1882 fprintf(stderr,
"compile_tree: undefined node type %d\n", NTYPE(node));
1890#ifdef USE_NAMED_GROUP
1896 Node* node = *plink;
1898 switch (NTYPE(node)) {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1934 r = noname_disable_map(&(en->target), map, counter);
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1953 int i, pos, n, old_num;
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1964 backs = bn->back_dynamic;
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem)
return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1984 switch (NTYPE(node)) {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem)
return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2001 r = renumber_by_map(en->target, map, num_mem);
2006 r = renumber_node_backref(node, map, num_mem);
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2022numbered_ref_check(
Node* node)
2026 switch (NTYPE(node)) {
2030 r = numbered_ref_check(NCAR(node));
2031 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2034 r = numbered_ref_check(NQTFR(node)->target);
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2060 int r, i, pos, counter;
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0)
return r;
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0)
return r;
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2094 return onig_renumber_name_table(reg, map);
2098#ifdef USE_SUBEXP_CALL
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en))
return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2120quantifiers_memory_node_info(
Node* node)
2124 switch (NTYPE(node)) {
2130 v = quantifiers_memory_node_info(NCAR(node));
2132 }
while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2136# ifdef USE_SUBEXP_CALL
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC;
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2190get_min_match_length(
Node* node, OnigDistance *min,
ScanEnv* env)
2196 switch (NTYPE(node)) {
2201 Node** nodes = SCANENV_MEM_NODES(env);
2203 if (br->state & NST_RECURSION)
break;
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2213 if (*min > tmin) *min = tmin;
2218#ifdef USE_SUBEXP_CALL
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2243 r = get_min_match_length(x, &tmin, env);
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 }
while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2254 *min = sn->end - sn->s;
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2274 *min = distance_multiply(*min, qn->lower);
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2307 case ENCLOSE_ABSENT:
2322get_max_match_length(
Node* node, OnigDistance *max,
ScanEnv* env)
2328 switch (NTYPE(node)) {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2333 *max = distance_add(*max, tmax);
2334 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2347 *max = sn->end - sn->s;
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2364 Node** nodes = SCANENV_MEM_NODES(env);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2375 if (*max < tmax) *max = tmax;
2380#ifdef USE_SUBEXP_CALL
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2385 *max = ONIG_INFINITE_DISTANCE;
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2399 *max = ONIG_INFINITE_DISTANCE;
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2433 case ENCLOSE_ABSENT:
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2452get_char_length_tree1(
Node* node,
regex_t* reg,
int*
len,
int level)
2459 switch (NTYPE(node)) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2464 *
len = (int )distance_add(*
len, tlen);
2465 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2486 r = GET_CHAR_LEN_VARLEN;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2511 *
len = (int )distance_multiply(tlen, qn->lower);
2514 r = GET_CHAR_LEN_VARLEN;
2518#ifdef USE_SUBEXP_CALL
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg,
len, level);
2523 r = GET_CHAR_LEN_VARLEN;
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *
len = en->char_len;
2545 r = get_char_length_tree1(en->target, reg,
len, level);
2547 en->char_len = *
len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg,
len, level);
2558 case ENCLOSE_ABSENT:
2569 r = GET_CHAR_LEN_VARLEN;
2579 return get_char_length_tree1(node, reg,
len, 0);
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2611 tmp = x; x = y; y = tmp;
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i))
return 0;
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i))
return 0;
2650 if (IS_NOT_NULL(xc->mbuf))
return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2663 if (! BITSET_AT(xc->bs, i))
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2712 if (NSTRING_LEN(x) == 0)
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2723 return !(NCTYPE(y)->not);
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2729 return !(NCTYPE(y)->not);
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2751 len = NSTRING_LEN(x);
2752 if (
len > NSTRING_LEN(y))
len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i <
len; i++, p++, q++) {
2759 if (*p != *q)
return 1;
2779get_head_value_node(
Node* node,
int exact,
regex_t* reg)
2781 Node* n = NULL_NODE;
2783 switch (NTYPE(node)) {
2787#ifdef USE_SUBEXP_CALL
2800 n = get_head_value_node(NCAR(node), exact, reg);
2806 if (sn->end <= sn->s)
2810 NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2819 if (qn->lower > 0) {
2820#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2821 if (IS_NOT_NULL(qn->head_exact))
2825 n = get_head_value_node(qn->target, exact, reg);
2834 case ENCLOSE_OPTION:
2836 OnigOptionType options = reg->options;
2838 reg->options = NENCLOSE(node)->option;
2839 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2840 reg->options = options;
2844 case ENCLOSE_MEMORY:
2845 case ENCLOSE_STOP_BACKTRACK:
2846 case ENCLOSE_CONDITION:
2847 n = get_head_value_node(en->target, exact, reg);
2850 case ENCLOSE_ABSENT:
2857 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2858 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2869check_type_tree(
Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2874 if ((NTYPE2BIT(type) & type_mask) == 0)
2881 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2883 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2887 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2894 if ((en->type & enclose_mask) == 0)
2897 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2902 type = NANCHOR(node)->type;
2903 if ((type & anchor_mask) == 0)
2906 if (NANCHOR(node)->target)
2907 r = check_type_tree(NANCHOR(node)->target,
2908 type_mask, enclose_mask, anchor_mask);
2917#ifdef USE_SUBEXP_CALL
2919# define RECURSION_EXIST 1
2920# define RECURSION_INFINITE 2
2923subexp_inf_recursive_check(
Node* node,
ScanEnv* env,
int head)
2938 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2939 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2942 ret = get_min_match_length(NCAR(x), &min, env);
2943 if (ret != 0)
return ret;
2944 if (min != 0) head = 0;
2946 }
while (IS_NOT_NULL(x = NCDR(x)));
2953 r = RECURSION_EXIST;
2955 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2956 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2958 }
while (IS_NOT_NULL(node = NCDR(node)));
2963 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2964 if (r == RECURSION_EXIST) {
2965 if (NQTFR(node)->lower == 0) r = 0;
2973 case ANCHOR_PREC_READ:
2974 case ANCHOR_PREC_READ_NOT:
2975 case ANCHOR_LOOK_BEHIND:
2976 case ANCHOR_LOOK_BEHIND_NOT:
2977 r = subexp_inf_recursive_check(an->target, env, head);
2984 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2988 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2990 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2991 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2993 SET_ENCLOSE_STATUS(node, NST_MARK2);
2994 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2995 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3007subexp_inf_recursive_check_trav(
Node* node,
ScanEnv* env)
3017 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3018 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3022 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3029 case ANCHOR_PREC_READ:
3030 case ANCHOR_PREC_READ_NOT:
3031 case ANCHOR_LOOK_BEHIND:
3032 case ANCHOR_LOOK_BEHIND_NOT:
3033 r = subexp_inf_recursive_check_trav(an->target, env);
3043 if (IS_ENCLOSE_RECURSION(en)) {
3044 SET_ENCLOSE_STATUS(node, NST_MARK1);
3045 r = subexp_inf_recursive_check(en->target, env, 1);
3046 if (r > 0)
return ONIGERR_NEVER_ENDING_RECURSION;
3047 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3049 r = subexp_inf_recursive_check_trav(en->target, env);
3062subexp_recursive_check(
Node* node)
3066 switch (NTYPE(node)) {
3070 r |= subexp_recursive_check(NCAR(node));
3071 }
while (IS_NOT_NULL(node = NCDR(node)));
3075 r = subexp_recursive_check(NQTFR(node)->target);
3082 case ANCHOR_PREC_READ:
3083 case ANCHOR_PREC_READ_NOT:
3084 case ANCHOR_LOOK_BEHIND:
3085 case ANCHOR_LOOK_BEHIND_NOT:
3086 r = subexp_recursive_check(an->target);
3093 r = subexp_recursive_check(NCALL(node)->target);
3094 if (r != 0) SET_CALL_RECURSION(node);
3098 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3100 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3103 SET_ENCLOSE_STATUS(node, NST_MARK2);
3104 r = subexp_recursive_check(NENCLOSE(node)->target);
3105 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3118subexp_recursive_check_trav(
Node* node,
ScanEnv* env)
3120# define FOUND_CALLED_NODE 1
3132 ret = subexp_recursive_check_trav(NCAR(node), env);
3133 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3134 else if (ret < 0)
return ret;
3135 }
while (IS_NOT_NULL(node = NCDR(node)));
3140 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3141 if (NQTFR(node)->upper == 0) {
3142 if (r == FOUND_CALLED_NODE)
3143 NQTFR(node)->is_referred = 1;
3151 case ANCHOR_PREC_READ:
3152 case ANCHOR_PREC_READ_NOT:
3153 case ANCHOR_LOOK_BEHIND:
3154 case ANCHOR_LOOK_BEHIND_NOT:
3155 r = subexp_recursive_check_trav(an->target, env);
3165 if (! IS_ENCLOSE_RECURSION(en)) {
3166 if (IS_ENCLOSE_CALLED(en)) {
3167 SET_ENCLOSE_STATUS(node, NST_MARK1);
3168 r = subexp_recursive_check(en->target);
3169 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3170 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3173 r = subexp_recursive_check_trav(en->target, env);
3174 if (IS_ENCLOSE_CALLED(en))
3175 r |= FOUND_CALLED_NODE;
3196 r = setup_subexp_call(NCAR(node), env);
3197 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3202 r = setup_subexp_call(NCAR(node), env);
3203 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3207 r = setup_subexp_call(NQTFR(node)->target, env);
3210 r = setup_subexp_call(NENCLOSE(node)->target, env);
3216 Node** nodes = SCANENV_MEM_NODES(env);
3218 if (cn->group_num != 0) {
3219 int gnum = cn->group_num;
3221# ifdef USE_NAMED_GROUP
3222 if (env->num_named > 0 &&
3223 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3224 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3225 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3228 if (gnum > env->num_mem) {
3229 onig_scan_env_set_error_string(env,
3230 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3231 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3234# ifdef USE_NAMED_GROUP
3237 cn->target = nodes[cn->group_num];
3238 if (IS_NULL(cn->target)) {
3239 onig_scan_env_set_error_string(env,
3240 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3241 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3243 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3244 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3245 cn->unset_addr_list = env->unset_addr_list;
3247# ifdef USE_NAMED_GROUP
3248# ifdef USE_PERL_SUBEXP_CALL
3249 else if (cn->name == cn->name_end) {
3256 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3259 onig_scan_env_set_error_string(env,
3260 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3261 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3264 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3265 onig_scan_env_set_error_string(env,
3266 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3267 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3270 cn->group_num = refs[0];
3283 case ANCHOR_PREC_READ:
3284 case ANCHOR_PREC_READ_NOT:
3285 case ANCHOR_LOOK_BEHIND:
3286 case ANCHOR_LOOK_BEHIND_NOT:
3287 r = setup_subexp_call(an->target, env);
3301#define IN_ALT (1<<0)
3302#define IN_NOT (1<<1)
3303#define IN_REPEAT (1<<2)
3304#define IN_VAR_REPEAT (1<<3)
3305#define IN_CALL (1<<4)
3306#define IN_RECCALL (1<<5)
3307#define IN_LOOK_BEHIND (1<<6)
3314divide_look_behind_alternatives(
Node* node)
3316 Node *head, *np, *insert_node;
3318 int anc_type = an->type;
3322 swap_node(node, head);
3324 NANCHOR(head)->target = np;
3327 while ((np = NCDR(np)) != NULL_NODE) {
3328 insert_node = onig_node_new_anchor(anc_type);
3329 CHECK_NULL_RETURN_MEMERR(insert_node);
3330 NANCHOR(insert_node)->target = NCAR(np);
3331 NCAR(np) = insert_node;
3334 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3337 SET_NTYPE(np, NT_LIST);
3338 }
while ((np = NCDR(np)) != NULL_NODE);
3349 r = get_char_length_tree(an->target, reg, &
len);
3352 else if (r == GET_CHAR_LEN_VARLEN)
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3354 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3355 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3356 r = divide_look_behind_alternatives(node);
3358 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3371 if (type == NT_QTFR) {
3373 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3374#ifdef USE_QTFR_PEEK_NEXT
3375 Node* n = get_head_value_node(next_node, 1, reg);
3377 if (IS_NOT_NULL(n) && NSTR(n)->s[0] !=
'\0') {
3378 qn->next_head_exact = n;
3382 if (qn->lower <= 1) {
3383 int ttype = NTYPE(qn->target);
3384 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3386 x = get_head_value_node(qn->target, 0, reg);
3387 if (IS_NOT_NULL(x)) {
3388 y = get_head_value_node(next_node, 0, reg);
3389 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3390 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3391 CHECK_NULL_RETURN_MEMERR(en);
3392 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3393 swap_node(node, en);
3394 NENCLOSE(node)->target = en;
3401 else if (type == NT_ENCLOSE) {
3403 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3413update_string_node_case_fold(
regex_t* reg,
Node *node)
3415 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3416 UChar *sbuf, *ebuf, *sp;
3418 OnigDistance sbuf_size;
3422 sbuf_size = (end - sn->s) * 2;
3423 sbuf = (UChar* )
xmalloc(sbuf_size);
3424 CHECK_NULL_RETURN_MEMERR(sbuf);
3425 ebuf = sbuf + sbuf_size;
3430 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3431 for (i = 0; i <
len; i++) {
3433 UChar* p = (UChar* )
xrealloc(sbuf, sbuf_size * 2);
3436 return ONIGERR_MEMORY;
3439 sp = sbuf + sbuf_size;
3441 ebuf = sbuf + sbuf_size;
3448 r = onig_node_str_set(node, sbuf, sp);
3455expand_case_fold_make_rem_string(
Node** rnode, UChar *s, UChar *end,
3461 node = onig_node_new_str(s, end);
3462 if (IS_NULL(node))
return ONIGERR_MEMORY;
3464 r = update_string_node_case_fold(reg, node);
3466 onig_node_free(node);
3470 NSTRING_SET_AMBIG(node);
3471 NSTRING_SET_DONT_GET_OPT_INFO(node);
3482 for (i = 0; i < item_num; i++) {
3483 if (items[i].byte_len != slen) {
3486 if (items[i].code_len != 1) {
3495 UChar *p,
int slen, UChar *end,
3498 int r, i, j,
len, varlen;
3499 Node *anode, *var_anode, *snode, *xnode, *an;
3500 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3502 *rnode = var_anode = NULL_NODE;
3505 for (i = 0; i < item_num; i++) {
3506 if (items[i].byte_len != slen) {
3513 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3514 if (IS_NULL(var_anode))
return ONIGERR_MEMORY;
3516 xnode = onig_node_new_list(NULL, NULL);
3517 if (IS_NULL(xnode))
goto mem_err;
3518 NCAR(var_anode) = xnode;
3520 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode))
goto mem_err;
3522 NCAR(xnode) = anode;
3525 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3526 if (IS_NULL(anode))
return ONIGERR_MEMORY;
3529 snode = onig_node_new_str(p, p + slen);
3530 if (IS_NULL(snode))
goto mem_err;
3532 NCAR(anode) = snode;
3534 for (i = 0; i < item_num; i++) {
3535 snode = onig_node_new_str(NULL, NULL);
3536 if (IS_NULL(snode))
goto mem_err;
3538 for (j = 0; j < items[i].code_len; j++) {
3539 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3545 r = onig_node_str_cat(snode, buf, buf +
len);
3546 if (r != 0)
goto mem_err2;
3549 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3554 if (items[i].byte_len != slen) {
3556 UChar *q = p + items[i].byte_len;
3559 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3565 xnode = onig_node_list_add(NULL_NODE, snode);
3566 if (IS_NULL(xnode)) {
3568 onig_node_free(rem);
3571 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3573 onig_node_free(xnode);
3574 onig_node_free(rem);
3584 NCDR(var_anode) = an;
3597 onig_node_free(snode);
3600 onig_node_free(*rnode);
3602 return ONIGERR_MEMORY;
3605#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3608expand_case_fold_string(
Node* node,
regex_t* reg,
int state)
3610 int r, n,
len, alt_num;
3612 int is_in_look_behind;
3613 UChar *start, *end, *p;
3614 Node *top_root, *root, *snode, *prev_node;
3618 if (NSTRING_IS_AMBIG(node))
return 0;
3624 if (start >= end)
return 0;
3626 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3629 top_root = root = prev_node = snode = NULL_NODE;
3633 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3640 len = enclen(reg->enc, p, end);
3642 varlen = is_case_fold_variable_len(n, items,
len);
3643 if (n == 0 || varlen == 0 || is_in_look_behind) {
3644 if (IS_NULL(snode)) {
3645 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3646 onig_node_free(top_root);
3647 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3648 if (IS_NULL(root)) {
3649 onig_node_free(prev_node);
3654 prev_node = snode = onig_node_new_str(NULL, NULL);
3655 if (IS_NULL(snode))
goto mem_err;
3656 if (IS_NOT_NULL(root)) {
3657 if (IS_NULL(onig_node_list_add(root, snode))) {
3658 onig_node_free(snode);
3664 r = onig_node_str_cat(snode, p, p +
len);
3665 if (r != 0)
goto err;
3669 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION)
break;
3671 if (IS_NOT_NULL(snode)) {
3672 r = update_string_node_case_fold(reg, snode);
3674 NSTRING_SET_AMBIG(snode);
3677 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3678 onig_node_free(top_root);
3679 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3680 if (IS_NULL(root)) {
3681 onig_node_free(prev_node);
3686 r = expand_case_fold_string_alt(n, items, p,
len, end, reg, &prev_node);
3687 if (r < 0)
goto mem_err;
3689 if (IS_NULL(root)) {
3690 top_root = prev_node;
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3699 root = NCAR(prev_node);
3702 if (IS_NOT_NULL(root)) {
3703 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3704 onig_node_free(prev_node);
3715 if (IS_NOT_NULL(snode)) {
3716 r = update_string_node_case_fold(reg, snode);
3718 NSTRING_SET_AMBIG(snode);
3725 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3726 if (r != 0)
goto mem_err;
3728 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3729 onig_node_free(top_root);
3730 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3731 if (IS_NULL(root)) {
3732 onig_node_free(srem);
3733 onig_node_free(prev_node);
3738 if (IS_NULL(root)) {
3742 if (IS_NULL(onig_node_list_add(root, srem))) {
3743 onig_node_free(srem);
3750 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3751 swap_node(node, top_root);
3752 onig_node_free(top_root);
3759 onig_node_free(top_root);
3764#ifdef USE_COMBINATION_EXPLOSION_CHECK
3766# define CEC_THRES_NUM_BIG_REPEAT 512
3767# define CEC_INFINITE_NUM 0x7fffffff
3769# define CEC_IN_INFINITE_REPEAT (1<<0)
3770# define CEC_IN_FINITE_REPEAT (1<<1)
3771# define CEC_CONT_BIG_REPEAT (1<<2)
3774setup_comb_exp_check(
Node* node,
int state,
ScanEnv* env)
3784 r = setup_comb_exp_check(NCAR(node), r, env);
3785 }
while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3793 ret = setup_comb_exp_check(NCAR(node), state, env);
3795 }
while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3801 int child_state = state;
3804 Node* target = qn->target;
3807 if (! IS_REPEAT_INFINITE(qn->upper)) {
3808 if (qn->upper > 1) {
3810 child_state |= CEC_IN_FINITE_REPEAT;
3813 if (env->backrefed_mem == 0) {
3814 if (NTYPE(qn->target) == NT_ENCLOSE) {
3816 if (en->type == ENCLOSE_MEMORY) {
3817 if (NTYPE(en->target) == NT_QTFR) {
3819 if (IS_REPEAT_INFINITE(q->upper)
3820 && q->greedy == qn->greedy) {
3821 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3823 child_state = state;
3832 if (state & CEC_IN_FINITE_REPEAT) {
3833 qn->comb_exp_check_num = -1;
3836 if (IS_REPEAT_INFINITE(qn->upper)) {
3837 var_num = CEC_INFINITE_NUM;
3838 child_state |= CEC_IN_INFINITE_REPEAT;
3841 var_num = qn->upper - qn->lower;
3844 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3845 add_state |= CEC_CONT_BIG_REPEAT;
3847 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3848 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3849 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3850 if (qn->comb_exp_check_num == 0) {
3851 env->num_comb_exp_check++;
3852 qn->comb_exp_check_num = env->num_comb_exp_check;
3853 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3854 env->comb_exp_max_regnum = env->curr_max_regnum;
3859 r = setup_comb_exp_check(target, child_state, env);
3869 case ENCLOSE_MEMORY:
3871 if (env->curr_max_regnum < en->regnum)
3872 env->curr_max_regnum = en->regnum;
3874 r = setup_comb_exp_check(en->target, state, env);
3879 r = setup_comb_exp_check(en->target, state, env);
3885# ifdef USE_SUBEXP_CALL
3887 if (IS_CALL_RECURSION(NCALL(node)))
3888 env->has_recursion = 1;
3890 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3921 Node* prev = NULL_NODE;
3923 r = setup_tree(NCAR(node), reg, state, env);
3924 if (IS_NOT_NULL(prev) && r == 0) {
3925 r = next_setup(prev, NCAR(node), reg);
3928 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3934 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3935 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3942 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3943 r = expand_case_fold_string(node, reg, state);
3951#ifdef USE_SUBEXP_CALL
3960 Node** nodes = SCANENV_MEM_NODES(env);
3963 for (i = 0; i < br->back_num; i++) {
3964 if (p[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
3965 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3966 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3967#ifdef USE_BACKREF_WITH_LEVEL
3968 if (IS_BACKREF_NEST_LEVEL(br)) {
3969 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3972 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3981 Node* target = qn->target;
3983 if ((state & IN_REPEAT) != 0) {
3984 qn->state |= NST_IN_REPEAT;
3987 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3988 r = get_min_match_length(target, &d, env);
3991 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3992#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3993 r = quantifiers_memory_node_info(target);
3996 qn->target_empty_info = r;
4000 r = get_max_match_length(target, &d, env);
4001 if (r == 0 && d == 0) {
4004 if (qn->lower > 1) qn->lower = 1;
4005 if (NTYPE(target) == NT_STR) {
4006 qn->upper = qn->lower = 0;
4014 if (qn->lower != qn->upper)
4015 state |= IN_VAR_REPEAT;
4016 r = setup_tree(target, reg, state, env);
4020#define EXPAND_STRING_MAX_LENGTH 100
4021 if (NTYPE(target) == NT_STR) {
4022 if (qn->lower > 1) {
4023 int i, n = qn->lower;
4024 OnigDistance
len = NSTRING_LEN(target);
4028 np = onig_node_new_str(sn->s, sn->end);
4029 if (IS_NULL(np))
return ONIGERR_MEMORY;
4030 NSTR(np)->flag = sn->flag;
4032 for (i = 1; i < n && (i+1) *
len <= EXPAND_STRING_MAX_LENGTH; i++) {
4033 r = onig_node_str_cat(np, sn->s, sn->end);
4039 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4043 if (! IS_REPEAT_INFINITE(qn->upper))
4046 np1 = onig_node_new_list(np, NULL);
4049 return ONIGERR_MEMORY;
4051 swap_node(np1, node);
4052 np2 = onig_node_list_add(node, np1);
4054 onig_node_free(np1);
4055 return ONIGERR_MEMORY;
4059 swap_node(np, node);
4066#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4067 if (qn->greedy && (qn->target_empty_info != 0)) {
4068 if (NTYPE(target) == NT_QTFR) {
4070 if (IS_NOT_NULL(tqn->head_exact)) {
4071 qn->head_exact = tqn->head_exact;
4072 tqn->head_exact = NULL;
4076 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4088 case ENCLOSE_OPTION:
4090 OnigOptionType options = reg->options;
4091 reg->options = NENCLOSE(node)->option;
4092 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4093 reg->options = options;
4097 case ENCLOSE_MEMORY:
4098 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4099 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4102 if (IS_ENCLOSE_CALLED(en))
4104 if (IS_ENCLOSE_RECURSION(en))
4105 state |= IN_RECCALL;
4106 else if ((state & IN_RECCALL) != 0)
4107 SET_CALL_RECURSION(node);
4108 r = setup_tree(en->target, reg, state, env);
4111 case ENCLOSE_STOP_BACKTRACK:
4113 Node* target = en->target;
4114 r = setup_tree(target, reg, state, env);
4115 if (NTYPE(target) == NT_QTFR) {
4117 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4119 int qtype = NTYPE(tqn->target);
4120 if (IS_NODE_TYPE_SIMPLE(qtype))
4121 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4127 case ENCLOSE_CONDITION:
4128#ifdef USE_NAMED_GROUP
4129 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4130 env->num_named > 0 &&
4131 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4132 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4133 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4136 if (NENCLOSE(node)->regnum > env->num_mem)
4137 return ONIGERR_INVALID_BACKREF;
4138 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4141 case ENCLOSE_ABSENT:
4142 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4153 case ANCHOR_PREC_READ:
4154 r = setup_tree(an->target, reg, state, env);
4156 case ANCHOR_PREC_READ_NOT:
4157 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4161#define ALLOWED_TYPE_IN_LB \
4162 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4163 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4165#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4166#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4168#define ALLOWED_ANCHOR_IN_LB \
4169( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4170 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4171 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4172 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4173#define ALLOWED_ANCHOR_IN_LB_NOT \
4174( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4175 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4176 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4177 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4179 case ANCHOR_LOOK_BEHIND:
4181 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4182 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4183 if (r < 0)
return r;
4184 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4185 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4186 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4187 if (r != 0)
return r;
4188 r = setup_look_behind(node, reg, env);
4192 case ANCHOR_LOOK_BEHIND_NOT:
4194 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4195 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4196 if (r < 0)
return r;
4197 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4198 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4199 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4201 if (r != 0)
return r;
4202 r = setup_look_behind(node, reg, env);
4218set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4219 UChar skip[],
int** int_skip,
int ignore_case)
4221 OnigDistance i,
len;
4222 int clen, flen, n, j, k;
4223 UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4228 if (
len < ONIG_CHAR_TABLE_SIZE) {
4230 for (i = 0; i <
len; i += clen) {
4232 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4234 clen = enclen(enc, p, end);
4236 clen = (int )(end - p);
4238 for (j = 0; j < n; j++) {
4239 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4244 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4257 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4258 skip[i] = (UChar )(
len + 1);
4260 for (i = 0; i <
len; i += clen) {
4263 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4265 clen = enclen(enc, p, end);
4267 clen = (int )(end - p);
4269 for (j = 0; j < clen; j++) {
4270 skip[s[i + j]] = (UChar )(
len - i - j);
4271 for (k = 0; k < n; k++) {
4272 ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4273 skip[buf[j]] = (UChar )(
len - i - j);
4279# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4281 return ONIGERR_TYPE_BUG;
4283 if (IS_NULL(*int_skip)) {
4284 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4285 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4287 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(
len + 1);
4290 for (i = 0; i <
len; i += clen) {
4293 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4295 clen = enclen(enc, p, end);
4297 clen = (int )(end - p);
4299 for (j = 0; j < n; j++) {
4300 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4302 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4306 for (j = 0; j < clen; j++) {
4307 (*int_skip)[s[i + j]] = (int )(
len - i - j);
4308 for (k = 0; k < n; k++) {
4309 (*int_skip)[buf[k][j]] = (int )(
len - i - j);
4326 OnigOptionType options;
4327 OnigCaseFoldType case_fold_flag;
4343 UChar s[OPT_EXACT_MAXLEN];
4351 UChar map[ONIG_CHAR_TABLE_SIZE];
4369 static const short int ByteValTable[] = {
4370 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4372 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4373 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4374 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4375 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4376 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4377 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4380 if (i < numberof(ByteValTable)) {
4381 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4384 return (
int )ByteValTable[i];
4394 static const short int dist_vals[] = {
4395 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4396 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4397 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4398 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4399 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4400 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4401 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4402 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4403 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4404 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4409 if (mm->max == ONIG_INFINITE_DISTANCE)
return 0;
4411 d = mm->max - mm->min;
4412 if (d < numberof(dist_vals))
4414 return (
int )dist_vals[d];
4422 if (v2 <= 0)
return -1;
4423 if (v1 <= 0)
return 1;
4425 v1 *= distance_value(d1);
4426 v2 *= distance_value(d2);
4428 if (v2 > v1)
return 1;
4429 if (v2 < v1)
return -1;
4431 if (d2->min < d1->min)
return 1;
4432 if (d2->min > d1->min)
return -1;
4439 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4444set_mml(
MinMaxLen* mml, OnigDistance min, OnigDistance max)
4453 mml->min = mml->max = 0;
4459 to->min = from->min;
4460 to->max = from->max;
4466 to->min = distance_add(to->min, from->min);
4467 to->max = distance_add(to->max, from->max);
4474 to->min = distance_add(to->min,
len);
4475 to->max = distance_add(to->max,
len);
4482 if (to->min > from->min) to->min = from->min;
4483 if (to->max < from->max) to->max = from->max;
4495 anc->left_anchor = 0;
4496 anc->right_anchor = 0;
4507 OnigDistance left_len, OnigDistance right_len)
4509 clear_opt_anc_info(to);
4511 to->left_anchor = left->left_anchor;
4512 if (left_len == 0) {
4513 to->left_anchor |= right->left_anchor;
4516 to->right_anchor = right->right_anchor;
4517 if (right_len == 0) {
4518 to->right_anchor |= left->right_anchor;
4521 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4526is_left_anchor(
int anc)
4528 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4529 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4530 anc == ANCHOR_PREC_READ_NOT)
4539 if ((to->left_anchor & anc) != 0)
return 1;
4541 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4547 if (is_left_anchor(anc))
4548 to->left_anchor |= anc;
4550 to->right_anchor |= anc;
4556 if (is_left_anchor(anc))
4557 to->left_anchor &= ~anc;
4559 to->right_anchor &= ~anc;
4565 to->left_anchor &= add->left_anchor;
4566 to->right_anchor &= add->right_anchor;
4572 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4578 clear_mml(&ex->mmd);
4579 clear_opt_anc_info(&ex->anc);
4581 ex->ignore_case = -1;
4599 if (to->ignore_case < 0)
4600 to->ignore_case = add->ignore_case;
4601 else if (to->ignore_case != add->ignore_case)
4606 for (i = to->len; p < end; ) {
4607 len = enclen(enc, p, end);
4608 if (i +
len > OPT_EXACT_MAXLEN)
break;
4609 for (j = 0; j <
len && p < end; j++)
4614 to->reach_end = (p == end ? add->reach_end : 0);
4616 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4617 if (! to->reach_end) tanc.right_anchor = 0;
4618 copy_opt_anc_info(&to->anc, &tanc);
4622concat_opt_exact_info_str(
OptExactInfo* to, UChar* s, UChar* end,
4628 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4629 len = enclen(enc, p, end);
4630 if (i +
len > OPT_EXACT_MAXLEN)
break;
4631 for (j = 0; j <
len && p < end; j++)
4643 if (add->len == 0 || to->len == 0) {
4644 clear_opt_exact_info(to);
4648 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4649 clear_opt_exact_info(to);
4653 for (i = 0; i < to->len && i < add->len; ) {
4654 if (to->s[i] != add->s[i])
break;
4655 len = enclen(env->enc, to->s + i, to->s + to->len);
4657 for (j = 1; j <
len; j++) {
4658 if (to->s[i+j] != add->s[i+j])
break;
4664 if (! add->reach_end || i < add->
len || i < to->
len) {
4668 if (to->ignore_case < 0)
4669 to->ignore_case = add->ignore_case;
4670 else if (add->ignore_case >= 0)
4671 to->ignore_case |= add->ignore_case;
4673 alt_merge_opt_anc_info(&to->anc, &add->anc);
4674 if (! to->reach_end) to->anc.right_anchor = 0;
4689 copy_opt_exact_info(now, alt);
4692 else if (v1 <= 2 && v2 <= 2) {
4694 v2 = map_position_value(enc, now->s[0]);
4695 v1 = map_position_value(enc, alt->s[0]);
4697 if (now->len > 1) v1 += 5;
4698 if (alt->len > 1) v2 += 5;
4701 if (now->ignore_case <= 0) v1 *= 2;
4702 if (alt->ignore_case <= 0) v2 *= 2;
4704 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4705 copy_opt_exact_info(now, alt);
4714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4726 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4728 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4729 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4733 xmemcpy(map, &clean_info,
sizeof(
OptMapInfo));
4745 if (map->map[c] == 0) {
4747 map->value += map_position_value(enc, c);
4752add_char_amb_opt_map_info(
OptMapInfo* map, UChar* p, UChar* end,
4756 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4759 add_char_opt_map_info(map, p[0], enc);
4761 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4762 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4763 if (n < 0)
return n;
4765 for (i = 0; i < n; i++) {
4766 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4767 add_char_opt_map_info(map, buf[0], enc);
4776 const int z = 1<<15;
4780 if (alt->value == 0) return ;
4781 if (now->value == 0) {
4782 copy_opt_map_info(now, alt);
4786 v1 = z / now->value;
4787 v2 = z / alt->value;
4788 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4789 copy_opt_map_info(now, alt);
4795#define COMP_EM_BASE 20
4798 if (m->value <= 0)
return -1;
4800 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4801 vm = COMP_EM_BASE * 5 * 2 / m->value;
4802 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4811 if (to->value == 0) return ;
4812 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4813 clear_opt_map_info(to);
4817 alt_merge_mml(&to->mmd, &add->mmd);
4820 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4825 val += map_position_value(enc, i);
4829 alt_merge_opt_anc_info(&to->anc, &add->anc);
4835 copy_mml(&(opt->exb.mmd), mmd);
4836 copy_mml(&(opt->expr.mmd), mmd);
4837 copy_mml(&(opt->map.mmd), mmd);
4843 clear_mml(&opt->len);
4844 clear_opt_anc_info(&opt->anc);
4845 clear_opt_exact_info(&opt->exb);
4846 clear_opt_exact_info(&opt->exm);
4847 clear_opt_exact_info(&opt->expr);
4848 clear_opt_map_info(&opt->map);
4860 int exb_reach, exm_reach;
4863 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4864 copy_opt_anc_info(&to->anc, &tanc);
4866 if (add->exb.len > 0 && to->len.max == 0) {
4867 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4868 to->len.max, add->len.max);
4869 copy_opt_anc_info(&add->exb.anc, &tanc);
4872 if (add->map.value > 0 && to->len.max == 0) {
4873 if (add->map.mmd.max == 0)
4874 add->map.anc.left_anchor |= to->anc.left_anchor;
4877 exb_reach = to->exb.reach_end;
4878 exm_reach = to->exm.reach_end;
4880 if (add->len.max != 0)
4881 to->exb.reach_end = to->exm.reach_end = 0;
4883 if (add->exb.len > 0) {
4885 concat_opt_exact_info(&to->exb, &add->exb, enc);
4886 clear_opt_exact_info(&add->exb);
4888 else if (exm_reach) {
4889 concat_opt_exact_info(&to->exm, &add->exb, enc);
4890 clear_opt_exact_info(&add->exb);
4893 select_opt_exact_info(enc, &to->exm, &add->exb);
4894 select_opt_exact_info(enc, &to->exm, &add->exm);
4896 if (to->expr.len > 0) {
4897 if (add->len.max > 0) {
4898 if (to->expr.len > (
int )add->len.max)
4899 to->expr.len = (int )add->len.max;
4901 if (to->expr.mmd.max == 0)
4902 select_opt_exact_info(enc, &to->exb, &to->expr);
4904 select_opt_exact_info(enc, &to->exm, &to->expr);
4907 else if (add->expr.len > 0) {
4908 copy_opt_exact_info(&to->expr, &add->expr);
4911 select_opt_map_info(&to->map, &add->map);
4913 add_mml(&to->len, &add->len);
4919 alt_merge_opt_anc_info (&to->anc, &add->anc);
4920 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4921 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4922 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4923 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4925 alt_merge_mml(&to->len, &add->len);
4929#define MAX_NODE_OPT_INFO_REF_COUNT 5
4937 clear_node_opt_info(opt);
4938 set_bound_node_opt_info(opt, &env->mmd);
4948 copy_opt_env(&nenv, env);
4950 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4952 add_mml(&nenv.mmd, &nopt.len);
4953 concat_left_node_opt_info(env->enc, opt, &nopt);
4955 }
while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4965 r = optimize_node_left(NCAR(nd), &nopt, env);
4967 if (nd == node) copy_node_opt_info(opt, &nopt);
4968 else alt_merge_node_opt_info(opt, &nopt, env);
4970 }
while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4977 OnigDistance slen = sn->end - sn->s;
4978 int is_raw = NSTRING_IS_RAW(node);
4980 if (! NSTRING_IS_AMBIG(node)) {
4981 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4983 opt->exb.ignore_case = 0;
4985 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4987 set_mml(&opt->len, slen, slen);
4992 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4993 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4994 max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
4997 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4999 opt->exb.ignore_case = 1;
5002 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5003 env->enc, env->case_fold_flag);
5010 set_mml(&opt->len, slen, max);
5013 if ((OnigDistance )opt->exb.len == slen)
5014 opt->exb.reach_end = 1;
5025 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5026 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5027 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5029 set_mml(&opt->len, min, max);
5032 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5033 z = BITSET_AT(cc->bs, i);
5034 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5035 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5038 set_mml(&opt->len, 1, 1);
5048 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5053 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5054 switch (NCTYPE(node)->ctype) {
5055 case ONIGENC_CTYPE_WORD:
5056 if (NCTYPE(node)->not != 0) {
5057 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5058 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5059 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5064 for (i = 0; i < maxcode; i++) {
5065 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5066 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5074 min = ONIGENC_MBC_MINLEN(env->enc);
5076 set_mml(&opt->len, min, max);
5082 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5083 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5084 set_mml(&opt->len, min, max);
5089 switch (NANCHOR(node)->type) {
5090 case ANCHOR_BEGIN_BUF:
5091 case ANCHOR_BEGIN_POSITION:
5092 case ANCHOR_BEGIN_LINE:
5093 case ANCHOR_END_BUF:
5094 case ANCHOR_SEMI_END_BUF:
5095 case ANCHOR_END_LINE:
5096 case ANCHOR_LOOK_BEHIND:
5097 case ANCHOR_PREC_READ_NOT:
5098 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5101 case ANCHOR_PREC_READ:
5105 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5107 if (nopt.exb.len > 0)
5108 copy_opt_exact_info(&opt->expr, &nopt.exb);
5109 else if (nopt.exm.len > 0)
5110 copy_opt_exact_info(&opt->expr, &nopt.exm);
5112 opt->expr.reach_end = 0;
5114 if (nopt.map.value > 0)
5115 copy_opt_map_info(&opt->map, &nopt.map);
5120 case ANCHOR_LOOK_BEHIND_NOT:
5129 OnigDistance min, max, tmin, tmax;
5130 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5133 if (br->state & NST_RECURSION) {
5134 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5137 backs = BACKREFS_P(br);
5138 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5140 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5142 for (i = 1; i < br->back_num; i++) {
5143 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5145 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5147 if (min > tmin) min = tmin;
5148 if (max < tmax) max = tmax;
5150 if (r == 0) set_mml(&opt->len, min, max);
5154#ifdef USE_SUBEXP_CALL
5156 if (IS_CALL_RECURSION(NCALL(node)))
5157 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5159 OnigOptionType save = env->options;
5160 env->options = NENCLOSE(NCALL(node)->target)->option;
5161 r = optimize_node_left(NCALL(node)->target, opt, env);
5162 env->options = save;
5170 OnigDistance min, max;
5174 r = optimize_node_left(qn->target, &nopt, env);
5177 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5178 if (env->mmd.max == 0 &&
5179 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5180 if (IS_MULTILINE(env->options))
5182 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5184 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5188 if (qn->lower > 0) {
5189 copy_node_opt_info(opt, &nopt);
5190 if (nopt.exb.len > 0) {
5191 if (nopt.exb.reach_end) {
5192 for (i = 2; i <= qn->lower &&
5193 ! is_full_opt_exact_info(&opt->exb); i++) {
5194 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5196 if (i < qn->lower) {
5197 opt->exb.reach_end = 0;
5202 if (qn->lower != qn->upper) {
5203 opt->exb.reach_end = 0;
5204 opt->exm.reach_end = 0;
5207 opt->exm.reach_end = 0;
5211 min = distance_multiply(nopt.len.min, qn->lower);
5212 if (IS_REPEAT_INFINITE(qn->upper))
5213 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5215 max = distance_multiply(nopt.len.max, qn->upper);
5217 set_mml(&opt->len, min, max);
5226 case ENCLOSE_OPTION:
5228 OnigOptionType save = env->options;
5230 env->options = en->option;
5231 r = optimize_node_left(en->target, opt, env);
5232 env->options = save;
5236 case ENCLOSE_MEMORY:
5237#ifdef USE_SUBEXP_CALL
5239 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5240 OnigDistance min, max;
5243 max = ONIG_INFINITE_DISTANCE;
5244 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5245 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5246 set_mml(&opt->len, min, max);
5251 r = optimize_node_left(en->target, opt, env);
5253 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5254 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5255 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5260 case ENCLOSE_STOP_BACKTRACK:
5261 case ENCLOSE_CONDITION:
5262 r = optimize_node_left(en->target, opt, env);
5265 case ENCLOSE_ABSENT:
5266 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5274 fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
5277 r = ONIGERR_TYPE_BUG;
5289 if (e->len == 0)
return 0;
5291 reg->exact = (UChar* )
xmalloc(e->len);
5292 CHECK_NULL_RETURN_MEMERR(reg->exact);
5293 xmemcpy(reg->exact, e->s, e->len);
5294 reg->exact_end = reg->exact + e->len;
5297 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5299 if (e->ignore_case > 0) {
5300 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5301 e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5302 reg->map, &(reg->int_map), 1);
5303 reg->exact_end = reg->exact + e->len;
5305 reg->optimize = (allow_reverse != 0
5306 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5308 else if (e->len > 0) {
5309 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5315 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5319 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5320 set_bm_skip(reg->exact, reg->exact_end, reg,
5321 reg->map, &(reg->int_map), 0);
5322 reg->optimize = (allow_reverse != 0
5323 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5326 reg->optimize = ONIG_OPTIMIZE_EXACT;
5330 reg->dmin = e->mmd.min;
5331 reg->dmax = e->mmd.max;
5333 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5334 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5345 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5346 reg->map[i] = m->map[i];
5348 reg->optimize = ONIG_OPTIMIZE_MAP;
5349 reg->dmin = m->mmd.min;
5350 reg->dmax = m->mmd.max;
5352 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5353 reg->threshold_len = (int )(reg->dmin + 1);
5360 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5361 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5364#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5365static void print_optimize_info(
FILE* f,
regex_t* reg);
5377 env.options = reg->options;
5378 env.case_fold_flag = reg->case_fold_flag;
5379 env.scan_env = scan_env;
5380 clear_mml(&env.mmd);
5382 r = optimize_node_left(node, &opt, &env);
5385 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5386 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5387 ANCHOR_LOOK_BEHIND);
5389 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5390 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5392 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5393 ANCHOR_PREC_READ_NOT);
5395 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5396 reg->anchor_dmin = opt.len.min;
5397 reg->anchor_dmax = opt.len.max;
5400 if (opt.exb.len > 0 || opt.exm.len > 0) {
5401 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5402 if (opt.map.value > 0 &&
5403 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5407 r = set_optimize_exact_info(reg, &opt.exb);
5408 set_sub_anchor(reg, &opt.exb.anc);
5411 else if (opt.map.value > 0) {
5413 set_optimize_map_info(reg, &opt.map);
5414 set_sub_anchor(reg, &opt.map.anc);
5417 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5418 if (opt.len.max == 0)
5419 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5422#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5423 print_optimize_info(stderr, reg);
5429clear_optimize_info(
regex_t* reg)
5431 reg->optimize = ONIG_OPTIMIZE_NONE;
5433 reg->anchor_dmin = 0;
5434 reg->anchor_dmax = 0;
5435 reg->sub_anchor = 0;
5436 reg->exact_end = (UChar* )NULL;
5437 reg->threshold_len = 0;
5439 reg->exact = (UChar* )NULL;
5445 const UChar *s,
const UChar *end)
5447 fprintf(fp,
"\nPATTERN: /");
5449 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5455 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5457 fprintf(fp,
" 0x%04x ", (
int )code);
5460 fputc((
int )code, fp);
5463 p += enclen(enc, p, end);
5468 fputc((
int )*s, fp);
5473 fprintf(fp,
"/ (%s)\n", enc->name);
5477#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5479print_distance_range(
FILE* f, OnigDistance a, OnigDistance b)
5481 if (a == ONIG_INFINITE_DISTANCE)
5484 fprintf(f,
"(%"PRIuPTR
")", a);
5488 if (b == ONIG_INFINITE_DISTANCE)
5491 fprintf(f,
"(%"PRIuPTR
")", b);
5495print_anchor(
FILE* f,
int anchor)
5501 if (anchor & ANCHOR_BEGIN_BUF) {
5502 fprintf(f,
"begin-buf");
5505 if (anchor & ANCHOR_BEGIN_LINE) {
5506 if (q) fprintf(f,
", ");
5508 fprintf(f,
"begin-line");
5510 if (anchor & ANCHOR_BEGIN_POSITION) {
5511 if (q) fprintf(f,
", ");
5513 fprintf(f,
"begin-pos");
5515 if (anchor & ANCHOR_END_BUF) {
5516 if (q) fprintf(f,
", ");
5518 fprintf(f,
"end-buf");
5520 if (anchor & ANCHOR_SEMI_END_BUF) {
5521 if (q) fprintf(f,
", ");
5523 fprintf(f,
"semi-end-buf");
5525 if (anchor & ANCHOR_END_LINE) {
5526 if (q) fprintf(f,
", ");
5528 fprintf(f,
"end-line");
5530 if (anchor & ANCHOR_ANYCHAR_STAR) {
5531 if (q) fprintf(f,
", ");
5533 fprintf(f,
"anychar-star");
5535 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5536 if (q) fprintf(f,
", ");
5537 fprintf(f,
"anychar-star-ml");
5546 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5548 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5550 fprintf(f,
"optimize: %s\n", on[reg->optimize]);
5551 fprintf(f,
" anchor: "); print_anchor(f, reg->anchor);
5552 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5553 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5556 if (reg->optimize) {
5557 fprintf(f,
" sub anchor: "); print_anchor(f, reg->sub_anchor);
5564 fprintf(f,
"exact: [");
5565 for (p = reg->exact; p < reg->exact_end; p++) {
5568 fprintf(f,
"]: length: %"PRIdPTR
"\n", (reg->exact_end - reg->exact));
5570 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5573 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5574 if (reg->map[i]) n++;
5576 fprintf(f,
"map: n=%d\n", n);
5580 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5581 if (reg->map[i] != 0) {
5582 if (c > 0) fputs(
", ", f);
5584 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5585 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5588 fprintf(f,
"%d", i);
5601 if (IS_NOT_NULL(reg)) {
5604 xfree(reg->int_map);
5605 xfree(reg->int_map_backward);
5606 xfree(reg->repeat_range);
5607 onig_free(reg->chain);
5609#ifdef USE_NAMED_GROUP
5610 onig_names_free(reg);
5618 if (IS_NOT_NULL(reg)) {
5619 onig_free_body(reg);
5625dup_copy(
const void *ptr,
size_t size)
5628 if (IS_NOT_NULL(newptr)) {
5629 memcpy(newptr, ptr, size);
5637 if (IS_NOT_NULL(oreg)) {
5639 if (IS_NULL(reg))
return ONIGERR_MEMORY;
5643# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5645 if (IS_NOT_NULL(reg->exact)) {
5646 size_t exact_size = reg->exact_end - reg->exact;
5647 if (COPY_FAILED(exact, exact_size))
5649 (reg)->exact_end = (reg)->exact + exact_size;
5652 if (IS_NOT_NULL(reg->int_map)) {
5653 if (COPY_FAILED(int_map,
sizeof(
int) * ONIG_CHAR_TABLE_SIZE))
5656 if (IS_NOT_NULL(reg->int_map_backward)) {
5657 if (COPY_FAILED(int_map_backward,
sizeof(
int) * ONIG_CHAR_TABLE_SIZE))
5658 goto err_int_map_backward;
5660 if (IS_NOT_NULL(reg->p)) {
5661 if (COPY_FAILED(p, reg->alloc))
5664 if (IS_NOT_NULL(reg->repeat_range)) {
5665 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc *
sizeof(
OnigRepeatRange)))
5666 goto err_repeat_range;
5668 if (IS_NOT_NULL(reg->name_table)) {
5669 if (onig_names_copy(reg, oreg))
5670 goto err_name_table;
5672 if (IS_NOT_NULL(reg->chain)) {
5673 if (onig_reg_copy(®->chain, reg->chain))
5680 onig_names_free(reg);
5682 xfree(reg->repeat_range);
5686 xfree(reg->int_map_backward);
5687 err_int_map_backward:
5688 xfree(reg->int_map);
5693 return ONIGERR_MEMORY;
5700onig_memsize(
const regex_t *reg)
5702 size_t size =
sizeof(
regex_t);
5703 if (IS_NULL(reg))
return 0;
5704 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5705 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5706 if (IS_NOT_NULL(reg->int_map)) size +=
sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5707 if (IS_NOT_NULL(reg->int_map_backward)) size +=
sizeof(
int) * ONIG_CHAR_TABLE_SIZE;
5708 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc *
sizeof(
OnigRepeatRange);
5709 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5717 size_t size =
sizeof(*regs);
5718 if (IS_NULL(regs))
return 0;
5719 size += regs->allocated * (
sizeof(*regs->beg) +
sizeof(*regs->end));
5724#define REGEX_TRANSFER(to,from) do {\
5725 onig_free_body(to);\
5726 xmemcpy(to, from, sizeof(regex_t));\
5734 REGEX_TRANSFER(to, from);
5738#ifdef ONIG_DEBUG_COMPILE
5739static void print_compiled_byte_code_list(
FILE* f,
regex_t* reg);
5741#ifdef ONIG_DEBUG_PARSE_TREE
5742static void print_tree(
FILE* f,
Node* node);
5747onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5750 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5756onig_compile_ruby(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5757 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5760onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5764#define COMPILE_INIT_SIZE 20
5767 OnigDistance init_size;
5770#ifdef USE_SUBEXP_CALL
5774 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5777 scan_env.sourcefile = sourcefile;
5778 scan_env.sourceline = sourceline;
5782 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5785 if (reg->alloc == 0) {
5786 init_size = (pattern_end - pattern) * 2;
5787 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5788 r = BBUF_INIT(reg, init_size);
5789 if (r != 0)
goto end;
5795 reg->num_repeat = 0;
5796 reg->num_null_check = 0;
5797 reg->repeat_range_alloc = 0;
5799#ifdef USE_COMBINATION_EXPLOSION_CHECK
5800 reg->num_comb_exp_check = 0;
5803 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5804 if (r != 0)
goto err;
5806#ifdef ONIG_DEBUG_PARSE_TREE
5808 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5809 print_tree(stderr, root);
5813#ifdef USE_NAMED_GROUP
5815 if (scan_env.num_named > 0 &&
5816 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5817 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5818 if (scan_env.num_named != scan_env.num_mem)
5819 r = disable_noname_group_capture(&root, reg, &scan_env);
5821 r = numbered_ref_check(root);
5823 if (r != 0)
goto err;
5827#ifdef USE_SUBEXP_CALL
5828 if (scan_env.num_call > 0) {
5829 r = unset_addr_list_init(&uslist, scan_env.num_call);
5830 if (r != 0)
goto err;
5831 scan_env.unset_addr_list = &uslist;
5832 r = setup_subexp_call(root, &scan_env);
5833 if (r != 0)
goto err_unset;
5834 r = subexp_recursive_check_trav(root, &scan_env);
5835 if (r < 0)
goto err_unset;
5836 r = subexp_inf_recursive_check_trav(root, &scan_env);
5837 if (r != 0)
goto err_unset;
5839 reg->num_call = scan_env.num_call;
5845 r = setup_tree(root, reg, 0, &scan_env);
5846 if (r != 0)
goto err_unset;
5848#ifdef ONIG_DEBUG_PARSE_TREE
5849 print_tree(stderr, root);
5852 reg->capture_history = scan_env.capture_history;
5853 reg->bt_mem_start = scan_env.bt_mem_start;
5854 reg->bt_mem_start |= reg->capture_history;
5855 if (IS_FIND_CONDITION(reg->options))
5856 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5858 reg->bt_mem_end = scan_env.bt_mem_end;
5859 reg->bt_mem_end |= reg->capture_history;
5862#ifdef USE_COMBINATION_EXPLOSION_CHECK
5863 if (scan_env.backrefed_mem == 0
5864# ifdef USE_SUBEXP_CALL
5865 || scan_env.num_call == 0
5868 setup_comb_exp_check(root, 0, &scan_env);
5869# ifdef USE_SUBEXP_CALL
5870 if (scan_env.has_recursion != 0) {
5871 scan_env.num_comb_exp_check = 0;
5875 if (scan_env.comb_exp_max_regnum > 0) {
5877 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5878 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5879 scan_env.num_comb_exp_check = 0;
5886 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5889 clear_optimize_info(reg);
5890#ifndef ONIG_DONT_OPTIMIZE
5891 r = set_optimize_info_from_tree(root, reg, &scan_env);
5892 if (r != 0)
goto err_unset;
5895 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5896 xfree(scan_env.mem_nodes_dynamic);
5897 scan_env.mem_nodes_dynamic = (
Node** )NULL;
5900 r = compile_tree(root, reg);
5902 r = add_opcode(reg, OP_END);
5903#ifdef USE_SUBEXP_CALL
5904 if (scan_env.num_call > 0) {
5905 r = unset_addr_list_fix(&uslist, reg);
5906 unset_addr_list_end(&uslist);
5911 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5912 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5914 if (reg->bt_mem_start != 0)
5915 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5917 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5920#ifdef USE_SUBEXP_CALL
5921 else if (scan_env.num_call > 0) {
5922 unset_addr_list_end(&uslist);
5925 onig_node_free(root);
5927#ifdef ONIG_DEBUG_COMPILE
5928# ifdef USE_NAMED_GROUP
5929 onig_print_names(stderr, reg);
5931 print_compiled_byte_code_list(stderr, reg);
5935 onig_reg_resize(reg);
5939#ifdef USE_SUBEXP_CALL
5940 if (scan_env.num_call > 0) {
5941 unset_addr_list_end(&uslist);
5945 if (IS_NOT_NULL(scan_env.error)) {
5946 if (IS_NOT_NULL(einfo)) {
5947 einfo->enc = scan_env.enc;
5948 einfo->par = scan_env.error;
5949 einfo->par_end = scan_env.error_end;
5953 onig_node_free(root);
5954 xfree(scan_env.mem_nodes_dynamic);
5959static int onig_inited = 0;
5962onig_reg_init(
regex_t* reg, OnigOptionType option,
5963 OnigCaseFoldType case_fold_flag,
5970 return ONIGERR_INVALID_ARGUMENT;
5972 if (ONIGENC_IS_UNDEF(enc))
5973 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5975 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5976 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5977 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5980 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5981 option |= syntax->options;
5982 option &= ~ONIG_OPTION_SINGLELINE;
5985 option |= syntax->options;
5988 (reg)->options = option;
5989 (reg)->syntax = syntax;
5990 (reg)->optimize = 0;
5991 (reg)->exact = (UChar* )NULL;
5992 (reg)->int_map = (
int* )NULL;
5993 (reg)->int_map_backward = (
int* )NULL;
5994 (reg)->chain = (
regex_t* )NULL;
5996 (reg)->p = (UChar* )NULL;
5999 (reg)->name_table = (
void* )NULL;
6001 (reg)->case_fold_flag = case_fold_flag;
6003 (reg)->timelimit = 0;
6009onig_new_without_alloc(
regex_t* reg,
const UChar* pattern,
6010 const UChar* pattern_end, OnigOptionType option,
OnigEncoding enc,
6015 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6018 r = onig_compile(reg, pattern, pattern_end, einfo);
6023onig_new(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
6028 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
6030 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6040onig_initialize(
OnigEncoding encodings[] ARG_UNUSED,
int n ARG_UNUSED)
6048 if (onig_inited != 0)
6053#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6054 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6060#ifdef ONIG_DEBUG_STATISTICS
6061 onig_statistics_init();
6070extern void onig_add_end_call(
void (*func)(
void))
6075 if (item == 0) return ;
6077 item->next = EndCallTop;
6084exec_end_call_list(
void)
6089 while (EndCallTop != 0) {
6090 func = EndCallTop->func;
6094 EndCallTop = EndCallTop->next;
6102 exec_end_call_list();
6104#ifdef ONIG_DEBUG_STATISTICS
6105 onig_print_statistics(stderr);
6108#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6109 _CrtDumpMemoryLeaks();
6118onig_is_in_code_range(
const UChar* p, OnigCodePoint code)
6120 OnigCodePoint n, *data;
6121 OnigCodePoint low, high, x;
6123 GET_CODE_POINT(n, p);
6124 data = (OnigCodePoint* )p;
6127 for (low = 0, high = n; low < high; ) {
6128 x = (low + high) >> 1;
6129 if (code > data[x * 2 + 1])
6135 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6139onig_is_code_in_cc_len(
int elen, OnigCodePoint code,
CClassNode* cc)
6143 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6144 if (IS_NULL(cc->mbuf)) {
6148 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6152 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6155 if (IS_NCCLASS_NOT(cc))
6166 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6170 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6172 return onig_is_code_in_cc_len(
len, code, cc);
6179# define ARG_SPECIAL -1
6181# define ARG_RELADDR 1
6182# define ARG_ABSADDR 2
6183# define ARG_LENGTH 3
6184# define ARG_MEMNUM 4
6185# define ARG_OPTION 5
6186# define ARG_STATE_CHECK 6
6188OnigOpInfoType OnigOpInfo[] = {
6189 { OP_FINISH,
"finish", ARG_NON },
6190 { OP_END,
"end", ARG_NON },
6191 { OP_EXACT1,
"exact1", ARG_SPECIAL },
6192 { OP_EXACT2,
"exact2", ARG_SPECIAL },
6193 { OP_EXACT3,
"exact3", ARG_SPECIAL },
6194 { OP_EXACT4,
"exact4", ARG_SPECIAL },
6195 { OP_EXACT5,
"exact5", ARG_SPECIAL },
6196 { OP_EXACTN,
"exactn", ARG_SPECIAL },
6197 { OP_EXACTMB2N1,
"exactmb2-n1", ARG_SPECIAL },
6198 { OP_EXACTMB2N2,
"exactmb2-n2", ARG_SPECIAL },
6199 { OP_EXACTMB2N3,
"exactmb2-n3", ARG_SPECIAL },
6200 { OP_EXACTMB2N,
"exactmb2-n", ARG_SPECIAL },
6201 { OP_EXACTMB3N,
"exactmb3n" , ARG_SPECIAL },
6202 { OP_EXACTMBN,
"exactmbn", ARG_SPECIAL },
6203 { OP_EXACT1_IC,
"exact1-ic", ARG_SPECIAL },
6204 { OP_EXACTN_IC,
"exactn-ic", ARG_SPECIAL },
6205 { OP_CCLASS,
"cclass", ARG_SPECIAL },
6206 { OP_CCLASS_MB,
"cclass-mb", ARG_SPECIAL },
6207 { OP_CCLASS_MIX,
"cclass-mix", ARG_SPECIAL },
6208 { OP_CCLASS_NOT,
"cclass-not", ARG_SPECIAL },
6209 { OP_CCLASS_MB_NOT,
"cclass-mb-not", ARG_SPECIAL },
6210 { OP_CCLASS_MIX_NOT,
"cclass-mix-not", ARG_SPECIAL },
6211 { OP_ANYCHAR,
"anychar", ARG_NON },
6212 { OP_ANYCHAR_ML,
"anychar-ml", ARG_NON },
6213 { OP_ANYCHAR_STAR,
"anychar*", ARG_NON },
6214 { OP_ANYCHAR_ML_STAR,
"anychar-ml*", ARG_NON },
6215 { OP_ANYCHAR_STAR_PEEK_NEXT,
"anychar*-peek-next", ARG_SPECIAL },
6216 { OP_ANYCHAR_ML_STAR_PEEK_NEXT,
"anychar-ml*-peek-next", ARG_SPECIAL },
6217 { OP_WORD,
"word", ARG_NON },
6218 { OP_NOT_WORD,
"not-word", ARG_NON },
6219 { OP_WORD_BOUND,
"word-bound", ARG_NON },
6220 { OP_NOT_WORD_BOUND,
"not-word-bound", ARG_NON },
6221 { OP_WORD_BEGIN,
"word-begin", ARG_NON },
6222 { OP_WORD_END,
"word-end", ARG_NON },
6223 { OP_ASCII_WORD,
"ascii-word", ARG_NON },
6224 { OP_NOT_ASCII_WORD,
"not-ascii-word", ARG_NON },
6225 { OP_ASCII_WORD_BOUND,
"ascii-word-bound", ARG_NON },
6226 { OP_NOT_ASCII_WORD_BOUND,
"not-ascii-word-bound", ARG_NON },
6227 { OP_ASCII_WORD_BEGIN,
"ascii-word-begin", ARG_NON },
6228 { OP_ASCII_WORD_END,
"ascii-word-end", ARG_NON },
6229 { OP_BEGIN_BUF,
"begin-buf", ARG_NON },
6230 { OP_END_BUF,
"end-buf", ARG_NON },
6231 { OP_BEGIN_LINE,
"begin-line", ARG_NON },
6232 { OP_END_LINE,
"end-line", ARG_NON },
6233 { OP_SEMI_END_BUF,
"semi-end-buf", ARG_NON },
6234 { OP_BEGIN_POSITION,
"begin-position", ARG_NON },
6235 { OP_BACKREF1,
"backref1", ARG_NON },
6236 { OP_BACKREF2,
"backref2", ARG_NON },
6237 { OP_BACKREFN,
"backrefn", ARG_MEMNUM },
6238 { OP_BACKREFN_IC,
"backrefn-ic", ARG_SPECIAL },
6239 { OP_BACKREF_MULTI,
"backref_multi", ARG_SPECIAL },
6240 { OP_BACKREF_MULTI_IC,
"backref_multi-ic", ARG_SPECIAL },
6241 { OP_BACKREF_WITH_LEVEL,
"backref_at_level", ARG_SPECIAL },
6242 { OP_MEMORY_START_PUSH,
"mem-start-push", ARG_MEMNUM },
6243 { OP_MEMORY_START,
"mem-start", ARG_MEMNUM },
6244 { OP_MEMORY_END_PUSH,
"mem-end-push", ARG_MEMNUM },
6245 { OP_MEMORY_END_PUSH_REC,
"mem-end-push-rec", ARG_MEMNUM },
6246 { OP_MEMORY_END,
"mem-end", ARG_MEMNUM },
6247 { OP_MEMORY_END_REC,
"mem-end-rec", ARG_MEMNUM },
6248 { OP_SET_OPTION_PUSH,
"set-option-push", ARG_OPTION },
6249 { OP_SET_OPTION,
"set-option", ARG_OPTION },
6250 { OP_KEEP,
"keep", ARG_NON },
6251 { OP_FAIL,
"fail", ARG_NON },
6252 { OP_JUMP,
"jump", ARG_RELADDR },
6253 { OP_PUSH,
"push", ARG_RELADDR },
6254 { OP_POP,
"pop", ARG_NON },
6255 { OP_PUSH_OR_JUMP_EXACT1,
"push-or-jump-e1", ARG_SPECIAL },
6256 { OP_PUSH_IF_PEEK_NEXT,
"push-if-peek-next", ARG_SPECIAL },
6257 { OP_REPEAT,
"repeat", ARG_SPECIAL },
6258 { OP_REPEAT_NG,
"repeat-ng", ARG_SPECIAL },
6259 { OP_REPEAT_INC,
"repeat-inc", ARG_MEMNUM },
6260 { OP_REPEAT_INC_NG,
"repeat-inc-ng", ARG_MEMNUM },
6261 { OP_REPEAT_INC_SG,
"repeat-inc-sg", ARG_MEMNUM },
6262 { OP_REPEAT_INC_NG_SG,
"repeat-inc-ng-sg", ARG_MEMNUM },
6263 { OP_NULL_CHECK_START,
"null-check-start", ARG_MEMNUM },
6264 { OP_NULL_CHECK_END,
"null-check-end", ARG_MEMNUM },
6265 { OP_NULL_CHECK_END_MEMST,
"null-check-end-memst", ARG_MEMNUM },
6266 { OP_NULL_CHECK_END_MEMST_PUSH,
"null-check-end-memst-push", ARG_MEMNUM },
6267 { OP_PUSH_POS,
"push-pos", ARG_NON },
6268 { OP_POP_POS,
"pop-pos", ARG_NON },
6269 { OP_PUSH_POS_NOT,
"push-pos-not", ARG_RELADDR },
6270 { OP_FAIL_POS,
"fail-pos", ARG_NON },
6271 { OP_PUSH_STOP_BT,
"push-stop-bt", ARG_NON },
6272 { OP_POP_STOP_BT,
"pop-stop-bt", ARG_NON },
6273 { OP_LOOK_BEHIND,
"look-behind", ARG_SPECIAL },
6274 { OP_PUSH_LOOK_BEHIND_NOT,
"push-look-behind-not", ARG_SPECIAL },
6275 { OP_FAIL_LOOK_BEHIND_NOT,
"fail-look-behind-not", ARG_NON },
6276 { OP_PUSH_ABSENT_POS,
"push-absent-pos", ARG_NON },
6277 { OP_ABSENT,
"absent", ARG_RELADDR },
6278 { OP_ABSENT_END,
"absent-end", ARG_NON },
6279 { OP_CALL,
"call", ARG_ABSADDR },
6280 { OP_RETURN,
"return", ARG_NON },
6281 { OP_CONDITION,
"condition", ARG_SPECIAL },
6282 { OP_STATE_CHECK_PUSH,
"state-check-push", ARG_SPECIAL },
6283 { OP_STATE_CHECK_PUSH_OR_JUMP,
"state-check-push-or-jump", ARG_SPECIAL },
6284 { OP_STATE_CHECK,
"state-check", ARG_STATE_CHECK },
6285 { OP_STATE_CHECK_ANYCHAR_STAR,
"state-check-anychar*", ARG_STATE_CHECK },
6286 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6287 "state-check-anychar-ml*", ARG_STATE_CHECK },
6296 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6297 if (opcode == OnigOpInfo[i].opcode)
6298 return OnigOpInfo[i].name;
6304op2arg_type(
int opcode)
6308 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6309 if (opcode == OnigOpInfo[i].opcode)
6310 return OnigOpInfo[i].arg_type;
6315# ifdef ONIG_DEBUG_PARSE_TREE
6317Indent(
FILE* f,
int indent)
6320 for (i = 0; i < indent; i++) putc(
' ', f);
6325p_string(
FILE* f, ptrdiff_t
len, UChar* s)
6328 while (
len-- > 0) { fputc(*s++, f); }
6332p_len_string(
FILE* f, LengthType
len,
int mb_len, UChar* s)
6334 int x =
len * mb_len;
6336 fprintf(f,
":%d:",
len);
6337 while (x-- > 0) { fputc(*s++, f); }
6341onig_print_compiled_byte_code(
FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6348 StateCheckNumType scn;
6352 fprintf(f,
"[%s", op2name(*bp));
6353 arg_type = op2arg_type(*bp);
6354 if (arg_type != ARG_SPECIAL) {
6360 GET_RELADDR_INC(addr, bp);
6361 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6364 GET_ABSADDR_INC(addr, bp);
6365 fprintf(f,
":(%d)", addr);
6368 GET_LENGTH_INC(
len, bp);
6369 fprintf(f,
":%d",
len);
6372 mem = *((MemNumType* )bp);
6374 fprintf(f,
":%d", mem);
6378 OnigOptionType option = *((OnigOptionType* )bp);
6380 fprintf(f,
":%d", option);
6384 case ARG_STATE_CHECK:
6385 scn = *((StateCheckNumType* )bp);
6386 bp += SIZE_STATE_CHECK_NUM;
6387 fprintf(f,
":%d", scn);
6394 case OP_ANYCHAR_STAR_PEEK_NEXT:
6395 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6396 p_string(f, 1, bp++);
break;
6398 p_string(f, 2, bp); bp += 2;
break;
6400 p_string(f, 3, bp); bp += 3;
break;
6402 p_string(f, 4, bp); bp += 4;
break;
6404 p_string(f, 5, bp); bp += 5;
break;
6406 GET_LENGTH_INC(
len, bp);
6407 p_len_string(f,
len, 1, bp);
6412 p_string(f, 2, bp); bp += 2;
break;
6414 p_string(f, 4, bp); bp += 4;
break;
6416 p_string(f, 6, bp); bp += 6;
break;
6418 GET_LENGTH_INC(
len, bp);
6419 p_len_string(f,
len, 2, bp);
6423 GET_LENGTH_INC(
len, bp);
6424 p_len_string(f,
len, 3, bp);
6431 GET_LENGTH_INC(mb_len, bp);
6432 GET_LENGTH_INC(
len, bp);
6433 fprintf(f,
":%d:%d:", mb_len,
len);
6435 while (n-- > 0) { fputc(*bp++, f); }
6440 len = enclen(enc, bp, bpend);
6441 p_string(f,
len, bp);
6445 GET_LENGTH_INC(
len, bp);
6446 p_len_string(f,
len, 1, bp);
6451 n = bitset_on_num((BitSetRef )bp);
6453 fprintf(f,
":%d", n);
6457 n = bitset_on_num((BitSetRef )bp);
6459 fprintf(f,
":%d", n);
6463 case OP_CCLASS_MB_NOT:
6464 GET_LENGTH_INC(
len, bp);
6466# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6469 GET_CODE_POINT(code, q);
6471 fprintf(f,
":%d:%d", (
int )code,
len);
6475 case OP_CCLASS_MIX_NOT:
6476 n = bitset_on_num((BitSetRef )bp);
6478 GET_LENGTH_INC(
len, bp);
6480# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6483 GET_CODE_POINT(code, q);
6485 fprintf(f,
":%d:%d:%d", n, (
int )code,
len);
6488 case OP_BACKREFN_IC:
6489 mem = *((MemNumType* )bp);
6491 fprintf(f,
":%d", mem);
6494 case OP_BACKREF_MULTI_IC:
6495 case OP_BACKREF_MULTI:
6497 GET_LENGTH_INC(
len, bp);
6498 for (i = 0; i <
len; i++) {
6499 GET_MEMNUM_INC(mem, bp);
6500 if (i > 0) fputs(
", ", f);
6501 fprintf(f,
"%d", mem);
6505 case OP_BACKREF_WITH_LEVEL:
6507 OnigOptionType option;
6510 GET_OPTION_INC(option, bp);
6511 fprintf(f,
":%d", option);
6512 GET_LENGTH_INC(level, bp);
6513 fprintf(f,
":%d", level);
6516 GET_LENGTH_INC(
len, bp);
6517 for (i = 0; i <
len; i++) {
6518 GET_MEMNUM_INC(mem, bp);
6519 if (i > 0) fputs(
", ", f);
6520 fprintf(f,
"%d", mem);
6528 mem = *((MemNumType* )bp);
6530 addr = *((RelAddrType* )bp);
6532 fprintf(f,
":%d:%d", mem, addr);
6536 case OP_PUSH_OR_JUMP_EXACT1:
6537 case OP_PUSH_IF_PEEK_NEXT:
6538 addr = *((RelAddrType* )bp);
6540 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6545 case OP_LOOK_BEHIND:
6546 GET_LENGTH_INC(
len, bp);
6547 fprintf(f,
":%d",
len);
6550 case OP_PUSH_LOOK_BEHIND_NOT:
6551 GET_RELADDR_INC(addr, bp);
6552 GET_LENGTH_INC(
len, bp);
6553 fprintf(f,
":%d:(%s%d)",
len, (addr >= 0) ?
"+" :
"", addr);
6556 case OP_STATE_CHECK_PUSH:
6557 case OP_STATE_CHECK_PUSH_OR_JUMP:
6558 scn = *((StateCheckNumType* )bp);
6559 bp += SIZE_STATE_CHECK_NUM;
6560 addr = *((RelAddrType* )bp);
6562 fprintf(f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6566 GET_MEMNUM_INC(mem, bp);
6567 GET_RELADDR_INC(addr, bp);
6568 fprintf(f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6572 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6577 if (nextp) *nextp = bp;
6580# ifdef ONIG_DEBUG_COMPILE
6582print_compiled_byte_code_list(
FILE* f,
regex_t* reg)
6586 UChar* end = reg->p + reg->used;
6588 fprintf(f,
"code length: %d", reg->used);
6594 fprintf(f,
"\n%ld:", bp - reg->p);
6596 fprintf(f,
" %ld:", bp - reg->p);
6597 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6604# ifdef ONIG_DEBUG_PARSE_TREE
6606print_indent_tree(
FILE* f,
Node* node,
int indent)
6608 int i,
type, container_p = 0;
6613 if (IS_NULL(node)) {
6614 fprintf(f,
"ERROR: null node!!!\n");
6622 if (NTYPE(node) == NT_LIST)
6623 fprintf(f,
"<list:%"PRIxPTR
">\n", (intptr_t )node);
6625 fprintf(f,
"<alt:%"PRIxPTR
">\n", (intptr_t )node);
6627 print_indent_tree(f, NCAR(node), indent + add);
6628 while (IS_NOT_NULL(node = NCDR(node))) {
6629 if (NTYPE(node) != type) {
6630 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6633 print_indent_tree(f, NCAR(node), indent + add);
6638 fprintf(f,
"<string%s:%"PRIxPTR
">",
6639 (NSTRING_IS_RAW(node) ?
"-raw" :
""), (intptr_t )node);
6640 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6641 if (*p >= 0x20 && *p < 0x7f)
6644 fprintf(f,
" 0x%02x", *p);
6650 fprintf(f,
"<cclass:%"PRIxPTR
">", (intptr_t )node);
6651 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(
"not ", f);
6652 if (NCCLASS(node)->mbuf) {
6653 BBuf* bbuf = NCCLASS(node)->mbuf;
6654 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6655 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6656 fprintf(f,
"%d", *data++);
6657 for (; data < end; data+=2) {
6659 fprintf(f,
"%04x-%04x", data[0], data[1]);
6665 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (intptr_t )node);
6666 switch (NCTYPE(node)->ctype) {
6667 case ONIGENC_CTYPE_WORD:
6668 if (NCTYPE(node)->not != 0)
6669 fputs(
"not word", f);
6675 fprintf(f,
"ERROR: undefined ctype.\n");
6681 fprintf(f,
"<anychar:%"PRIxPTR
">", (intptr_t )node);
6685 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (intptr_t )node);
6686 switch (NANCHOR(node)->type) {
6687 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6688 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6689 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6690 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6691 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6692 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6694 case ANCHOR_WORD_BOUND: fputs(
"word bound", f);
break;
6695 case ANCHOR_NOT_WORD_BOUND: fputs(
"not word bound", f);
break;
6696# ifdef USE_WORD_BEGIN_END
6697 case ANCHOR_WORD_BEGIN: fputs(
"word begin", f);
break;
6698 case ANCHOR_WORD_END: fputs(
"word end", f);
break;
6700 case ANCHOR_PREC_READ: fputs(
"prec read", f); container_p = TRUE;
break;
6701 case ANCHOR_PREC_READ_NOT: fputs(
"prec read not", f); container_p = TRUE;
break;
6702 case ANCHOR_LOOK_BEHIND: fputs(
"look_behind", f); container_p = TRUE;
break;
6703 case ANCHOR_LOOK_BEHIND_NOT: fputs(
"look_behind_not",f); container_p = TRUE;
break;
6704 case ANCHOR_KEEP: fputs(
"keep",f);
break;
6707 fprintf(f,
"ERROR: undefined anchor type.\n");
6717 fprintf(f,
"<backref:%"PRIxPTR
">", (intptr_t )node);
6718 for (i = 0; i < br->back_num; i++) {
6719 if (i > 0) fputs(
", ", f);
6720 fprintf(f,
"%d", p[i]);
6725# ifdef USE_SUBEXP_CALL
6729 fprintf(f,
"<call:%"PRIxPTR
">", (intptr_t )node);
6730 p_string(f, cn->name_end - cn->name, cn->name);
6736 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node,
6737 NQTFR(node)->lower, NQTFR(node)->upper,
6738 (NQTFR(node)->greedy ?
"" :
"?"));
6739 print_indent_tree(f, NQTFR(node)->target, indent + add);
6743 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (intptr_t )node);
6744 switch (NENCLOSE(node)->type) {
6745 case ENCLOSE_OPTION:
6746 fprintf(f,
"option:%d", NENCLOSE(node)->option);
6748 case ENCLOSE_MEMORY:
6749 fprintf(f,
"memory:%d", NENCLOSE(node)->regnum);
6751 case ENCLOSE_STOP_BACKTRACK:
6752 fprintf(f,
"stop-bt");
6754 case ENCLOSE_CONDITION:
6755 fprintf(f,
"condition:%d", NENCLOSE(node)->regnum);
6757 case ENCLOSE_ABSENT:
6758 fprintf(f,
"absent");
6765 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6769 fprintf(f,
"print_indent_tree: undefined node type %d\n", NTYPE(node));
6773 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6777 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6785 print_indent_tree(f, node, 0);
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
int len
Length of the buffer.
VALUE type(ANYARGS)
ANYARGS-ed function type.