33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(
void)
38 return OnigDefaultCaseFoldFlag;
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 OnigDefaultCaseFoldFlag = case_fold_flag;
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
55str_dup(UChar* s, UChar* end)
57 ptrdiff_t
len = end - s;
74 c = *a; *a = *b; *b = c;
76 if (NTYPE(a) == NT_STR) {
79 size_t len = sn->end - sn->s;
81 sn->end = sn->s +
len;
85 if (NTYPE(b) == NT_STR) {
88 size_t len = sn->end - sn->s;
90 sn->end = sn->s +
len;
96distance_add(OnigDistance d1, OnigDistance d2)
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2)
return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
107distance_multiply(OnigDistance d,
int m)
109 if (m == 0)
return 0;
111 if (d < ONIG_INFINITE_DISTANCE / m)
114 return ONIG_INFINITE_DISTANCE;
118bitset_is_empty(BitSetRef bs)
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0)
return 0;
129bitset_on_num(BitSetRef bs)
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr =
xrealloc(reg->p, reg->used);
155 reg->alloc = reg->used;
159 }
while ((reg = reg->chain) != 0);
163onig_bbuf_init(
BBuf* buf, OnigDistance size)
170 buf->p = (UChar* )
xmalloc(size);
171 if (IS_NULL(buf->p))
return(ONIGERR_MEMORY);
174 buf->alloc = (
unsigned int )size;
180#ifdef USE_SUBEXP_CALL
188 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
224add_opcode(
regex_t* reg,
int opcode)
226 BBUF_ADD1(reg, opcode);
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
232add_state_check_num(
regex_t* reg,
int num)
234 StateCheckNumType n = (StateCheckNumType )num;
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
242add_rel_addr(
regex_t* reg,
int addr)
244 RelAddrType ra = (RelAddrType )addr;
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
251add_abs_addr(
regex_t* reg,
int addr)
253 AbsAddrType ra = (AbsAddrType )addr;
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
262 LengthType l = (LengthType )
len;
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
269add_mem_num(
regex_t* reg,
int num)
271 MemNumType n = (MemNumType )num;
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
279add_pointer(
regex_t* reg,
void* addr)
281 PointerType ptr = (PointerType )addr;
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
289add_option(
regex_t* reg, OnigOptionType option)
291 BBUF_ADD(reg, &option, SIZE_OPTION);
296add_opcode_rel_addr(
regex_t* reg,
int opcode,
int addr)
300 r = add_opcode(reg, opcode);
302 r = add_rel_addr(reg, addr);
307add_bytes(
regex_t* reg, UChar* bytes, OnigDistance
len)
309 BBUF_ADD(reg, bytes,
len);
314add_bitset(
regex_t* reg, BitSetRef bs)
316 BBUF_ADD(reg, bs, SIZE_BITSET);
321add_opcode_option(
regex_t* reg,
int opcode, OnigOptionType option)
325 r = add_opcode(reg, opcode);
327 r = add_option(reg, option);
331static int compile_length_tree(
Node* node,
regex_t* reg);
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
340select_str_opcode(
int mb_len, OnigDistance byte_len,
int ignore_case)
343 OnigDistance str_len = roomof(byte_len, mb_len);
347 case 1: op = OP_EXACT1_IC;
break;
348 default: op = OP_EXACTN_IC;
break;
355 case 1: op = OP_EXACT1;
break;
356 case 2: op = OP_EXACT2;
break;
357 case 3: op = OP_EXACT3;
break;
358 case 4: op = OP_EXACT4;
break;
359 case 5: op = OP_EXACT5;
break;
360 default: op = OP_EXACTN;
break;
366 case 1: op = OP_EXACTMB2N1;
break;
367 case 2: op = OP_EXACTMB2N2;
break;
368 case 3: op = OP_EXACTMB2N3;
break;
369 default: op = OP_EXACTMB2N;
break;
386compile_tree_empty_check(
Node* node,
regex_t* reg,
int empty_info)
389 int saved_num_null_check = reg->num_null_check;
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
394 r = add_mem_num(reg, reg->num_null_check);
396 reg->num_null_check++;
399 r = compile_tree(node, reg);
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
411 r = add_mem_num(reg, saved_num_null_check);
416#ifdef USE_SUBEXP_CALL
422 r = add_opcode(reg, OP_CALL);
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
427 r = add_abs_addr(reg, 0 );
433compile_tree_n_times(
Node* node,
int n,
regex_t* reg)
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
445add_compile_string_length(UChar* s ARG_UNUSED,
int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED,
int ignore_case)
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
453 if (op == OP_EXACTMBN)
len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
457 len += (int )byte_len;
462add_compile_string(UChar* s,
int mb_len, OnigDistance byte_len,
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
475 add_length(reg, byte_len / mb_len);
478 add_bytes(reg, s, byte_len);
484compile_length_string_node(
Node* node,
regex_t* reg)
486 int rlen, r,
len, prev_len, blen, ambig;
492 if (sn->end <= sn->s)
495 ambig = NSTRING_IS_AMBIG(node);
498 prev_len = enclen(enc, p, sn->end);
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (
len == prev_len || ambig) {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
525 if (sn->end <= sn->s)
528 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
534 int r,
len, prev_len, blen, ambig;
536 UChar *p, *prev, *end;
540 if (sn->end <= sn->s)
544 ambig = NSTRING_IS_AMBIG(node);
547 prev_len = enclen(enc, p, end);
552 len = enclen(enc, p, end);
553 if (
len == prev_len || ambig) {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
573 if (sn->end <= sn->s)
576 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593 r = add_bytes(reg, mbuf->p, mbuf->used);
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
615 len = SIZE_OPCODE + SIZE_BITSET;
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
636 add_opcode(reg, OP_CCLASS);
638 r = add_bitset(reg, cc->bs);
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
645 add_opcode(reg, OP_CCLASS_MB);
647 r = add_multi_byte_cclass(cc->mbuf, reg);
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
653 add_opcode(reg, OP_CCLASS_MIX);
655 r = add_bitset(reg, cc->bs);
657 r = add_multi_byte_cclass(cc->mbuf, reg);
665entry_repeat_range(
regex_t* reg,
int id,
int lower,
int upper)
667#define REPEAT_RANGE_ALLOC 4
671 if (reg->repeat_range_alloc == 0) {
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
677 else if (reg->repeat_range_alloc <=
id) {
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
687 p = reg->repeat_range;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
696compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
700 int num_repeat = reg->num_repeat;
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704 r = add_mem_num(reg, num_repeat);
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
717#ifdef USE_SUBEXP_CALL
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
727 r = add_mem_num(reg, num_repeat);
732is_anychar_star_quantifier(
QtfrNode* qn)
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
749 int len, mod_tlen, cklen;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
755 if (tlen < 0)
return tlen;
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
776 if (infinite && qn->lower <= 1) {
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0)
796 len = SIZE_OP_JUMP + tlen;
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
806 len = SIZE_OP_PUSH + tlen;
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820 len += SIZE_OP_STATE_CHECK;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
835 if (tlen < 0)
return tlen;
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
849 r = add_state_check_num(reg, ckn);
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
868 r = add_state_check_num(reg, ckn);
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
879 if (infinite && qn->lower <= 1) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890 r = add_state_check_num(reg, ckn);
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (
int )SIZE_OP_JUMP
902 + (
int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914 r = add_state_check_num(reg, ckn);
916 r = add_rel_addr(reg,
917 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) {
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927 r = compile_tree(qn->target, reg);
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937 r = add_state_check_num(reg, ckn);
939 r = add_rel_addr(reg, tlen);
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
947 r = compile_tree(qn->target, reg);
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953 r = add_state_check_num(reg, ckn);
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964 r = compile_tree(qn->target, reg);
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
970 r = add_opcode(reg, OP_STATE_CHECK);
972 r = add_state_check_num(reg, ckn);
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
988 if (tlen < 0)
return tlen;
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1011 len = tlen * qn->lower;
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028 else if (qn->upper == 0 && qn->is_referred != 0) {
1029 len = SIZE_OP_JUMP + tlen;
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1056 if (tlen < 0)
return tlen;
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH));
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
1147 else if (qn->upper == 0 && qn->is_referred != 0) {
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150 r = compile_tree(qn->target, reg);
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164 r = compile_tree(qn->target, reg);
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173 r = compile_tree(qn->target, reg);
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1186 OnigOptionType prev = reg->options;
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1192 if (tlen < 0)
return tlen;
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1206 OnigOptionType prev = reg->options;
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 r = add_opcode(reg, OP_FAIL);
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0)
return tlen;
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1268 len = SIZE_OP_MEMORY_START;
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1275 case ENCLOSE_STOP_BACKTRACK:
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0)
return tlen;
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1301 tlen = compile_length_tree(NCAR(x), reg);
1302 if (tlen < 0)
return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1306 tlen = compile_length_tree(NCAR(x), reg);
1307 if (tlen < 0)
return tlen;
1309 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1312 return ONIGERR_PARSER_BUG;
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1321 return ONIGERR_TYPE_BUG;
1328static int get_char_length_tree(
Node* node,
regex_t* reg,
int*
len);
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (
int )node->call_addr);
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1357 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1364 r = add_opcode(reg, OP_MEMORY_START);
1366 r = add_mem_num(reg, node->regnum);
1368 r = compile_tree(node->target, reg);
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1380 r = add_mem_num(reg, node->regnum);
1382 r = add_opcode(reg, OP_RETURN);
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1390 r = add_mem_num(reg, node->regnum);
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1398 r = add_opcode(reg, OP_MEMORY_END);
1400 r = add_mem_num(reg, node->regnum);
1404 case ENCLOSE_STOP_BACKTRACK:
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1414 len = compile_length_tree(qn->target, reg);
1417 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_POP + SIZE_OP_JUMP);
1419 r = compile_tree(qn->target, reg);
1421 r = add_opcode(reg, OP_POP);
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((
int )SIZE_OP_PUSH +
len + (
int )SIZE_OP_POP + (
int )SIZE_OP_JUMP));
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1430 r = compile_tree(node->target, reg);
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1441 r = add_mem_num(reg, node->regnum);
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1448 len = compile_length_tree(NCAR(x), reg);
1450 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1452 len2 = compile_length_tree(NCAR(x), reg);
1453 if (len2 < 0)
return len2;
1454 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1457 r = add_rel_addr(reg,
len + SIZE_OP_JUMP);
1459 r = compile_tree(NCAR(x), reg);
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1464 r = compile_tree(NCAR(x), reg);
1467 return ONIGERR_PARSER_BUG;
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1477 r = add_opcode_rel_addr(reg, OP_ABSENT,
len + SIZE_OP_ABSENT_END);
1479 r = compile_tree(node->target, reg);
1481 r = add_opcode(reg, OP_ABSENT_END);
1485 return ONIGERR_TYPE_BUG;
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0)
return tlen;
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF);
break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF);
break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE);
break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE);
break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF);
break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION);
break;
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP);
break;
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1561 r = compile_tree(node->target, reg);
1563 r = add_opcode(reg, OP_POP_POS);
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT,
len + SIZE_OP_FAIL_POS);
1571 r = compile_tree(node->target, reg);
1573 r = add_opcode(reg, OP_FAIL_POS);
1576 case ANCHOR_LOOK_BEHIND:
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1587 r = add_length(reg, n);
1589 r = compile_tree(node->target, reg);
1593 case ANCHOR_LOOK_BEHIND_NOT:
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1606 r = add_length(reg, n);
1608 r = compile_tree(node->target, reg);
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1615 return ONIGERR_TYPE_BUG;
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0)
return r;
1635 }
while (IS_NOT_NULL(node = NCDR(node)));
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0)
return r;
1648 }
while (IS_NOT_NULL(node = NCDR(node)));
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1658 r = compile_length_string_node(node, reg);
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1691#ifdef USE_SUBEXP_CALL
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1710 return ONIGERR_TYPE_BUG;
1726 r = compile_tree(NCAR(node), reg);
1727 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1739 }
while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used +
len;
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_JUMP);
1748 r = compile_tree(NCAR(node), reg);
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1755 }
while (IS_NOT_NULL(node = NCDR(node)));
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1763 r = compile_string_node(node, reg);
1767 r = compile_cclass_node(NCCLASS(node), reg);
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1786 return ONIGERR_TYPE_BUG;
1789 r = add_opcode(reg, op);
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1797 r = add_opcode(reg, OP_ANYCHAR);
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1810 r = add_length(reg, br->nest_level);
1813 goto add_bacref_mems;
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1822 r = add_mem_num(reg, n);
1826 case 1: r = add_opcode(reg, OP_BACKREF1);
break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2);
break;
1829 r = add_opcode(reg, OP_BACKREFN);
1831 r = add_mem_num(reg, n);
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1848#ifdef USE_BACKREF_WITH_LEVEL
1851 r = add_length(reg, br->back_num);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1862#ifdef USE_SUBEXP_CALL
1864 r = compile_call(NCALL(node), reg);
1869 r = compile_quantifier_node(NQTFR(node), reg);
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1877 r = compile_anchor_node(NANCHOR(node), reg);
1882 fprintf(stderr,
"compile_tree: undefined node type %d\n", NTYPE(node));
1890#ifdef USE_NAMED_GROUP
1896 Node* node = *plink;
1898 switch (NTYPE(node)) {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1934 r = noname_disable_map(&(en->target), map, counter);
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1953 int i, pos, n, old_num;
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1964 backs = bn->back_dynamic;
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem)
return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1984 switch (NTYPE(node)) {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem)
return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2001 r = renumber_by_map(en->target, map, num_mem);
2006 r = renumber_node_backref(node, map, num_mem);
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2022numbered_ref_check(
Node* node)
2026 switch (NTYPE(node)) {
2030 r = numbered_ref_check(NCAR(node));
2031 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2034 r = numbered_ref_check(NQTFR(node)->target);
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2060 int r, i, pos, counter;
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0)
return r;
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0)
return r;
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2094 return onig_renumber_name_table(reg, map);
2098#ifdef USE_SUBEXP_CALL
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en))
return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2120quantifiers_memory_node_info(
Node* node)
2124 switch (NTYPE(node)) {
2130 v = quantifiers_memory_node_info(NCAR(node));
2132 }
while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2136# ifdef USE_SUBEXP_CALL
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC;
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2190get_min_match_length(
Node* node, OnigDistance *min,
ScanEnv* env)
2196 switch (NTYPE(node)) {
2201 Node** nodes = SCANENV_MEM_NODES(env);
2203 if (br->state & NST_RECURSION)
break;
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2213 if (*min > tmin) *min = tmin;
2218#ifdef USE_SUBEXP_CALL
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2243 r = get_min_match_length(x, &tmin, env);
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 }
while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2254 *min = sn->end - sn->s;
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2274 *min = distance_multiply(*min, qn->lower);
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2307 case ENCLOSE_ABSENT:
2322get_max_match_length(
Node* node, OnigDistance *max,
ScanEnv* env)
2328 switch (NTYPE(node)) {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2333 *max = distance_add(*max, tmax);
2334 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2347 *max = sn->end - sn->s;
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2364 Node** nodes = SCANENV_MEM_NODES(env);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2375 if (*max < tmax) *max = tmax;
2380#ifdef USE_SUBEXP_CALL
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2385 *max = ONIG_INFINITE_DISTANCE;
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2399 *max = ONIG_INFINITE_DISTANCE;
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2433 case ENCLOSE_ABSENT:
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2452get_char_length_tree1(
Node* node,
regex_t* reg,
int*
len,
int level)
2459 switch (NTYPE(node)) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2464 *
len = (int )distance_add(*
len, tlen);
2465 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2486 r = GET_CHAR_LEN_VARLEN;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2511 *
len = (int )distance_multiply(tlen, qn->lower);
2514 r = GET_CHAR_LEN_VARLEN;
2518#ifdef USE_SUBEXP_CALL
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg,
len, level);
2523 r = GET_CHAR_LEN_VARLEN;
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *
len = en->char_len;
2545 r = get_char_length_tree1(en->target, reg,
len, level);
2547 en->char_len = *
len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg,
len, level);
2558 case ENCLOSE_ABSENT:
2569 r = GET_CHAR_LEN_VARLEN;
2579 return get_char_length_tree1(node, reg,
len, 0);
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2611 tmp = x; x = y; y = tmp;
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i))
return 0;
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i))
return 0;
2650 if (IS_NOT_NULL(xc->mbuf))
return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2663 if (! BITSET_AT(xc->bs, i))
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2712 if (NSTRING_LEN(x) == 0)
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2723 return !(NCTYPE(y)->not);
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2729 return !(NCTYPE(y)->not);
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2751 len = NSTRING_LEN(x);
2752 if (
len > NSTRING_LEN(y))
len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i <
len; i++, p++, q++) {
2759 if (*p != *q)
return 1;
2779get_head_value_node(
Node* node,
int exact,
regex_t* reg)
2781 Node* n = NULL_NODE;
2783 switch (NTYPE(node)) {
2787#ifdef USE_SUBEXP_CALL
2800 n = get_head_value_node(NCAR(node), exact, reg);
2806 if (sn->end <= sn->s)
2810 NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2819 if (qn->lower > 0) {
2820#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2821 if (IS_NOT_NULL(qn->head_exact))
2825 n = get_head_value_node(qn->target, exact, reg);
2834 case ENCLOSE_OPTION:
2836 OnigOptionType options = reg->options;
2838 reg->options = NENCLOSE(node)->option;
2839 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2840 reg->options = options;
2844 case ENCLOSE_MEMORY:
2845 case ENCLOSE_STOP_BACKTRACK:
2846 case ENCLOSE_CONDITION:
2847 n = get_head_value_node(en->target, exact, reg);
2850 case ENCLOSE_ABSENT:
2857 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2858 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2869check_type_tree(
Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2874 if ((NTYPE2BIT(type) & type_mask) == 0)
2881 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2883 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2887 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2894 if ((en->type & enclose_mask) == 0)
2897 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2902 type = NANCHOR(node)->type;
2903 if ((type & anchor_mask) == 0)
2906 if (NANCHOR(node)->target)
2907 r = check_type_tree(NANCHOR(node)->target,
2908 type_mask, enclose_mask, anchor_mask);
2917#ifdef USE_SUBEXP_CALL
2919# define RECURSION_EXIST 1
2920# define RECURSION_INFINITE 2
2923subexp_inf_recursive_check(
Node* node,
ScanEnv* env,
int head)
2938 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2939 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2942 ret = get_min_match_length(NCAR(x), &min, env);
2943 if (ret != 0)
return ret;
2944 if (min != 0) head = 0;
2946 }
while (IS_NOT_NULL(x = NCDR(x)));
2953 r = RECURSION_EXIST;
2955 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2956 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2958 }
while (IS_NOT_NULL(node = NCDR(node)));
2963 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2964 if (r == RECURSION_EXIST) {
2965 if (NQTFR(node)->lower == 0) r = 0;
2973 case ANCHOR_PREC_READ:
2974 case ANCHOR_PREC_READ_NOT:
2975 case ANCHOR_LOOK_BEHIND:
2976 case ANCHOR_LOOK_BEHIND_NOT:
2977 r = subexp_inf_recursive_check(an->target, env, head);
2984 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2988 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2990 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2991 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2993 SET_ENCLOSE_STATUS(node, NST_MARK2);
2994 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2995 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3007subexp_inf_recursive_check_trav(
Node* node,
ScanEnv* env)
3017 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3018 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3022 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3029 case ANCHOR_PREC_READ:
3030 case ANCHOR_PREC_READ_NOT:
3031 case ANCHOR_LOOK_BEHIND:
3032 case ANCHOR_LOOK_BEHIND_NOT:
3033 r = subexp_inf_recursive_check_trav(an->target, env);
3043 if (IS_ENCLOSE_RECURSION(en)) {
3044 SET_ENCLOSE_STATUS(node, NST_MARK1);
3045 r = subexp_inf_recursive_check(en->target, env, 1);
3046 if (r > 0)
return ONIGERR_NEVER_ENDING_RECURSION;
3047 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3049 r = subexp_inf_recursive_check_trav(en->target, env);
3062subexp_recursive_check(
Node* node)
3066 switch (NTYPE(node)) {
3070 r |= subexp_recursive_check(NCAR(node));
3071 }
while (IS_NOT_NULL(node = NCDR(node)));
3075 r = subexp_recursive_check(NQTFR(node)->target);
3082 case ANCHOR_PREC_READ:
3083 case ANCHOR_PREC_READ_NOT:
3084 case ANCHOR_LOOK_BEHIND:
3085 case ANCHOR_LOOK_BEHIND_NOT:
3086 r = subexp_recursive_check(an->target);
3093 r = subexp_recursive_check(NCALL(node)->target);
3094 if (r != 0) SET_CALL_RECURSION(node);
3098 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3100 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3103 SET_ENCLOSE_STATUS(node, NST_MARK2);
3104 r = subexp_recursive_check(NENCLOSE(node)->target);
3105 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3118subexp_recursive_check_trav(
Node* node,
ScanEnv* env)
3120# define FOUND_CALLED_NODE 1
3132 ret = subexp_recursive_check_trav(NCAR(node), env);
3133 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3134 else if (ret < 0)
return ret;
3135 }
while (IS_NOT_NULL(node = NCDR(node)));
3140 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3141 if (NQTFR(node)->upper == 0) {
3142 if (r == FOUND_CALLED_NODE)
3143 NQTFR(node)->is_referred = 1;
3151 case ANCHOR_PREC_READ:
3152 case ANCHOR_PREC_READ_NOT:
3153 case ANCHOR_LOOK_BEHIND:
3154 case ANCHOR_LOOK_BEHIND_NOT:
3155 r = subexp_recursive_check_trav(an->target, env);
3165 if (! IS_ENCLOSE_RECURSION(en)) {
3166 if (IS_ENCLOSE_CALLED(en)) {
3167 SET_ENCLOSE_STATUS(node, NST_MARK1);
3168 r = subexp_recursive_check(en->target);
3169 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3170 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3173 r = subexp_recursive_check_trav(en->target, env);
3174 if (IS_ENCLOSE_CALLED(en))
3175 r |= FOUND_CALLED_NODE;
3196 r = setup_subexp_call(NCAR(node), env);
3197 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3202 r = setup_subexp_call(NCAR(node), env);
3203 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3207 r = setup_subexp_call(NQTFR(node)->target, env);
3210 r = setup_subexp_call(NENCLOSE(node)->target, env);
3216 Node** nodes = SCANENV_MEM_NODES(env);
3218 if (cn->group_num != 0) {
3219 int gnum = cn->group_num;
3221# ifdef USE_NAMED_GROUP
3222 if (env->num_named > 0 &&
3223 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3224 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3225 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3228 if (gnum > env->num_mem) {
3229 onig_scan_env_set_error_string(env,
3230 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3231 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3234# ifdef USE_NAMED_GROUP
3237 cn->target = nodes[cn->group_num];
3238 if (IS_NULL(cn->target)) {
3239 onig_scan_env_set_error_string(env,
3240 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3241 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3243 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3244 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3245 cn->unset_addr_list = env->unset_addr_list;
3247# ifdef USE_NAMED_GROUP
3248# ifdef USE_PERL_SUBEXP_CALL
3249 else if (cn->name == cn->name_end) {
3256 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3259 onig_scan_env_set_error_string(env,
3260 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3261 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3264 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3265 onig_scan_env_set_error_string(env,
3266 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3267 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3270 cn->group_num = refs[0];
3283 case ANCHOR_PREC_READ:
3284 case ANCHOR_PREC_READ_NOT:
3285 case ANCHOR_LOOK_BEHIND:
3286 case ANCHOR_LOOK_BEHIND_NOT:
3287 r = setup_subexp_call(an->target, env);
3301#define IN_ALT (1<<0)
3302#define IN_NOT (1<<1)
3303#define IN_REPEAT (1<<2)
3304#define IN_VAR_REPEAT (1<<3)
3305#define IN_CALL (1<<4)
3306#define IN_RECCALL (1<<5)
3307#define IN_LOOK_BEHIND (1<<6)
3314divide_look_behind_alternatives(
Node* node)
3316 Node *head, *np, *insert_node;
3318 int anc_type = an->type;
3322 swap_node(node, head);
3324 NANCHOR(head)->target = np;
3327 while ((np = NCDR(np)) != NULL_NODE) {
3328 insert_node = onig_node_new_anchor(anc_type);
3329 CHECK_NULL_RETURN_MEMERR(insert_node);
3330 NANCHOR(insert_node)->target = NCAR(np);
3331 NCAR(np) = insert_node;
3334 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3337 SET_NTYPE(np, NT_LIST);
3338 }
while ((np = NCDR(np)) != NULL_NODE);
3349 r = get_char_length_tree(an->target, reg, &
len);
3352 else if (r == GET_CHAR_LEN_VARLEN)
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3354 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3355 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3356 r = divide_look_behind_alternatives(node);
3358 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3371 if (type == NT_QTFR) {
3373 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3374#ifdef USE_QTFR_PEEK_NEXT
3375 Node* n = get_head_value_node(next_node, 1, reg);
3377 if (IS_NOT_NULL(n) && NSTR(n)->s[0] !=
'\0') {
3378 qn->next_head_exact = n;
3382 if (qn->lower <= 1) {
3383 int ttype = NTYPE(qn->target);
3384 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3386 x = get_head_value_node(qn->target, 0, reg);
3387 if (IS_NOT_NULL(x)) {
3388 y = get_head_value_node(next_node, 0, reg);
3389 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3390 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3391 CHECK_NULL_RETURN_MEMERR(en);
3392 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3393 swap_node(node, en);
3394 NENCLOSE(node)->target = en;
3401 else if (type == NT_ENCLOSE) {
3403 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3413update_string_node_case_fold(
regex_t* reg,
Node *node)
3415 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3416 UChar *sbuf, *ebuf, *sp;
3418 OnigDistance sbuf_size;
3422 sbuf_size = (end - sn->s) * 2;
3423 sbuf = (UChar* )
xmalloc(sbuf_size);
3424 CHECK_NULL_RETURN_MEMERR(sbuf);
3425 ebuf = sbuf + sbuf_size;
3430 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3431 for (i = 0; i <
len; i++) {
3433 UChar* p = (UChar* )
xrealloc(sbuf, sbuf_size * 2);
3436 return ONIGERR_MEMORY;
3439 sp = sbuf + sbuf_size;
3441 ebuf = sbuf + sbuf_size;
3448 r = onig_node_str_set(node, sbuf, sp);
3455expand_case_fold_make_rem_string(
Node** rnode, UChar *s, UChar *end,
3461 node = onig_node_new_str(s, end);
3462 if (IS_NULL(node))
return ONIGERR_MEMORY;
3464 r = update_string_node_case_fold(reg, node);
3466 onig_node_free(node);
3470 NSTRING_SET_AMBIG(node);
3471 NSTRING_SET_DONT_GET_OPT_INFO(node);
3482 for (i = 0; i < item_num; i++) {
3483 if (items[i].byte_len != slen) {
3486 if (items[i].code_len != 1) {
3495 UChar *p,
int slen, UChar *end,
3498 int r, i, j,
len, varlen;
3499 Node *anode, *var_anode, *snode, *xnode, *an;
3500 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3502 *rnode = var_anode = NULL_NODE;
3505 for (i = 0; i < item_num; i++) {
3506 if (items[i].byte_len != slen) {
3513 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3514 if (IS_NULL(var_anode))
return ONIGERR_MEMORY;
3516 xnode = onig_node_new_list(NULL, NULL);
3517 if (IS_NULL(xnode))
goto mem_err;
3518 NCAR(var_anode) = xnode;
3520 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode))
goto mem_err;
3522 NCAR(xnode) = anode;
3525 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3526 if (IS_NULL(anode))
return ONIGERR_MEMORY;
3529 snode = onig_node_new_str(p, p + slen);
3530 if (IS_NULL(snode))
goto mem_err;
3532 NCAR(anode) = snode;
3534 for (i = 0; i < item_num; i++) {
3535 snode = onig_node_new_str(NULL, NULL);
3536 if (IS_NULL(snode))
goto mem_err;
3538 for (j = 0; j < items[i].code_len; j++) {
3539 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3545 r = onig_node_str_cat(snode, buf, buf +
len);
3546 if (r != 0)
goto mem_err2;
3549 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3554 if (items[i].byte_len != slen) {
3556 UChar *q = p + items[i].byte_len;
3559 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3565 xnode = onig_node_list_add(NULL_NODE, snode);
3566 if (IS_NULL(xnode)) {
3568 onig_node_free(rem);
3571 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3573 onig_node_free(xnode);
3574 onig_node_free(rem);
3584 NCDR(var_anode) = an;
3597 onig_node_free(snode);
3600 onig_node_free(*rnode);
3602 return ONIGERR_MEMORY;
3605#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3608expand_case_fold_string(
Node* node,
regex_t* reg,
int state)
3610 int r, n,
len, alt_num;
3612 int is_in_look_behind;
3613 UChar *start, *end, *p;
3614 Node *top_root, *root, *snode, *prev_node;
3618 if (NSTRING_IS_AMBIG(node))
return 0;
3624 if (start >= end)
return 0;
3626 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3629 top_root = root = prev_node = snode = NULL_NODE;
3633 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3640 len = enclen(reg->enc, p, end);
3642 varlen = is_case_fold_variable_len(n, items,
len);
3643 if (n == 0 || varlen == 0 || is_in_look_behind) {
3644 if (IS_NULL(snode)) {
3645 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3646 onig_node_free(top_root);
3647 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3648 if (IS_NULL(root)) {
3649 onig_node_free(prev_node);
3654 prev_node = snode = onig_node_new_str(NULL, NULL);
3655 if (IS_NULL(snode))
goto mem_err;
3656 if (IS_NOT_NULL(root)) {
3657 if (IS_NULL(onig_node_list_add(root, snode))) {
3658 onig_node_free(snode);
3664 r = onig_node_str_cat(snode, p, p +
len);
3665 if (r != 0)
goto err;
3669 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION)
break;
3671 if (IS_NOT_NULL(snode)) {
3672 r = update_string_node_case_fold(reg, snode);
3674 NSTRING_SET_AMBIG(snode);
3677 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3678 onig_node_free(top_root);
3679 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3680 if (IS_NULL(root)) {
3681 onig_node_free(prev_node);
3686 r = expand_case_fold_string_alt(n, items, p,
len, end, reg, &prev_node);
3687 if (r < 0)
goto mem_err;
3689 if (IS_NULL(root)) {
3690 top_root = prev_node;
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3699 root = NCAR(prev_node);
3702 if (IS_NOT_NULL(root)) {
3703 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3704 onig_node_free(prev_node);
3715 if (IS_NOT_NULL(snode)) {
3716 r = update_string_node_case_fold(reg, snode);
3718 NSTRING_SET_AMBIG(snode);
3725 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3726 if (r != 0)
goto mem_err;
3728 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3729 onig_node_free(top_root);
3730 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3731 if (IS_NULL(root)) {
3732 onig_node_free(srem);
3733 onig_node_free(prev_node);
3738 if (IS_NULL(root)) {
3742 if (IS_NULL(onig_node_list_add(root, srem))) {
3743 onig_node_free(srem);
3750 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3751 swap_node(node, top_root);
3752 onig_node_free(top_root);
3759 onig_node_free(top_root);
3764#ifdef USE_COMBINATION_EXPLOSION_CHECK
3766# define CEC_THRES_NUM_BIG_REPEAT 512
3767# define CEC_INFINITE_NUM 0x7fffffff
3769# define CEC_IN_INFINITE_REPEAT (1<<0)
3770# define CEC_IN_FINITE_REPEAT (1<<1)
3771# define CEC_CONT_BIG_REPEAT (1<<2)
3774setup_comb_exp_check(
Node* node,
int state,
ScanEnv* env)
3784 r = setup_comb_exp_check(NCAR(node), r, env);
3785 }
while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3793 ret = setup_comb_exp_check(NCAR(node), state, env);
3795 }
while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3801 int child_state = state;
3804 Node* target = qn->target;
3807 if (! IS_REPEAT_INFINITE(qn->upper)) {
3808 if (qn->upper > 1) {
3810 child_state |= CEC_IN_FINITE_REPEAT;
3813 if (env->backrefed_mem == 0) {
3814 if (NTYPE(qn->target) == NT_ENCLOSE) {
3816 if (en->type == ENCLOSE_MEMORY) {
3817 if (NTYPE(en->target) == NT_QTFR) {
3819 if (IS_REPEAT_INFINITE(q->upper)
3820 && q->greedy == qn->greedy) {
3821 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3823 child_state = state;
3832 if (state & CEC_IN_FINITE_REPEAT) {
3833 qn->comb_exp_check_num = -1;
3836 if (IS_REPEAT_INFINITE(qn->upper)) {
3837 var_num = CEC_INFINITE_NUM;
3838 child_state |= CEC_IN_INFINITE_REPEAT;
3841 var_num = qn->upper - qn->lower;
3844 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3845 add_state |= CEC_CONT_BIG_REPEAT;
3847 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3848 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3849 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3850 if (qn->comb_exp_check_num == 0) {
3851 env->num_comb_exp_check++;
3852 qn->comb_exp_check_num = env->num_comb_exp_check;
3853 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3854 env->comb_exp_max_regnum = env->curr_max_regnum;
3859 r = setup_comb_exp_check(target, child_state, env);
3869 case ENCLOSE_MEMORY:
3871 if (env->curr_max_regnum < en->regnum)
3872 env->curr_max_regnum = en->regnum;
3874 r = setup_comb_exp_check(en->target, state, env);
3879 r = setup_comb_exp_check(en->target, state, env);
3885# ifdef USE_SUBEXP_CALL
3887 if (IS_CALL_RECURSION(NCALL(node)))
3888 env->has_recursion = 1;
3890 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3921 Node* prev = NULL_NODE;
3923 r = setup_tree(NCAR(node), reg, state, env);
3924 if (IS_NOT_NULL(prev) && r == 0) {
3925 r = next_setup(prev, NCAR(node), reg);
3928 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3934 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3935 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3942 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3943 r = expand_case_fold_string(node, reg, state);
3951#ifdef USE_SUBEXP_CALL
3960 Node** nodes = SCANENV_MEM_NODES(env);
3963 for (i = 0; i < br->back_num; i++) {
3964 if (p[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
3965 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3966 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3967#ifdef USE_BACKREF_WITH_LEVEL
3968 if (IS_BACKREF_NEST_LEVEL(br)) {
3969 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3972 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3981 Node* target = qn->target;
3983 if ((state & IN_REPEAT) != 0) {
3984 qn->state |= NST_IN_REPEAT;
3987 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3988 r = get_min_match_length(target, &d, env);
3991 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3992#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3993 r = quantifiers_memory_node_info(target);
3996 qn->target_empty_info = r;
4000 r = get_max_match_length(target, &d, env);
4001 if (r == 0 && d == 0) {
4004 if (qn->lower > 1) qn->lower = 1;
4005 if (NTYPE(target) == NT_STR) {
4006 qn->upper = qn->lower = 0;
4014 if (qn->lower != qn->upper)
4015 state |= IN_VAR_REPEAT;
4016 r = setup_tree(target, reg, state, env);
4020#define EXPAND_STRING_MAX_LENGTH 100
4021 if (NTYPE(target) == NT_STR) {
4022 if (qn->lower > 1) {
4023 int i, n = qn->lower;
4024 OnigDistance
len = NSTRING_LEN(target);
4028 np = onig_node_new_str(sn->s, sn->end);
4029 if (IS_NULL(np))
return ONIGERR_MEMORY;
4030 NSTR(np)->flag = sn->flag;
4032 for (i = 1; i < n && (i+1) *
len <= EXPAND_STRING_MAX_LENGTH; i++) {
4033 r = onig_node_str_cat(np, sn->s, sn->end);
4039 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4043 if (! IS_REPEAT_INFINITE(qn->upper))
4046 np1 = onig_node_new_list(np, NULL);
4049 return ONIGERR_MEMORY;
4051 swap_node(np1, node);
4052 np2 = onig_node_list_add(node, np1);
4054 onig_node_free(np1);
4055 return ONIGERR_MEMORY;
4059 swap_node(np, node);
4066#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4067 if (qn->greedy && (qn->target_empty_info != 0)) {
4068 if (NTYPE(target) == NT_QTFR) {
4070 if (IS_NOT_NULL(tqn->head_exact)) {
4071 qn->head_exact = tqn->head_exact;
4072 tqn->head_exact = NULL;
4076 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4088 case ENCLOSE_OPTION:
4090 OnigOptionType options = reg->options;
4091 reg->options = NENCLOSE(node)->option;
4092 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4093 reg->options = options;
4097 case ENCLOSE_MEMORY:
4098 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4099 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4102 if (IS_ENCLOSE_CALLED(en))
4104 if (IS_ENCLOSE_RECURSION(en))
4105 state |= IN_RECCALL;
4106 else if ((state & IN_RECCALL) != 0)
4107 SET_CALL_RECURSION(node);
4108 r = setup_tree(en->target, reg, state, env);
4111 case ENCLOSE_STOP_BACKTRACK:
4113 Node* target = en->target;
4114 r = setup_tree(target, reg, state, env);
4115 if (NTYPE(target) == NT_QTFR) {
4117 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4119 int qtype = NTYPE(tqn->target);
4120 if (IS_NODE_TYPE_SIMPLE(qtype))
4121 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4127 case ENCLOSE_CONDITION:
4128#ifdef USE_NAMED_GROUP
4129 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4130 env->num_named > 0 &&
4131 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4132 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4133 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4136 if (NENCLOSE(node)->regnum > env->num_mem)
4137 return ONIGERR_INVALID_BACKREF;
4138 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4141 case ENCLOSE_ABSENT:
4142 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4153 case ANCHOR_PREC_READ:
4154 r = setup_tree(an->target, reg, state, env);
4156 case ANCHOR_PREC_READ_NOT:
4157 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4161#define ALLOWED_TYPE_IN_LB \
4162 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4163 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4165#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4166#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4168#define ALLOWED_ANCHOR_IN_LB \
4169( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4170 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4171 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4172 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4173#define ALLOWED_ANCHOR_IN_LB_NOT \
4174( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4175 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4176 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4177 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4179 case ANCHOR_LOOK_BEHIND:
4181 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4182 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4183 if (r < 0)
return r;
4184 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4185 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4186 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4187 if (r != 0)
return r;
4188 r = setup_look_behind(node, reg, env);
4192 case ANCHOR_LOOK_BEHIND_NOT:
4194 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4195 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4196 if (r < 0)
return r;
4197 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4198 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4199 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4201 if (r != 0)
return r;
4202 r = setup_look_behind(node, reg, env);
4218set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4219 UChar skip[],
int ignore_case)
4221 OnigDistance i,
len;
4222 int clen, flen, n, j, k;
4223 UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4228 if (
len >= ONIG_CHAR_TABLE_SIZE) {
4230 return ONIGERR_TYPE_BUG;
4234 for (i = 0; i <
len; i += clen) {
4236 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4238 clen = enclen(enc, p, end);
4240 clen = (int )(end - p);
4242 for (j = 0; j < n; j++) {
4243 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4248 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4260 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4261 skip[i] = (UChar )(
len + 1);
4263 for (i = 0; i <
len; i += clen) {
4266 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4268 clen = enclen(enc, p, end);
4270 clen = (int )(end - p);
4272 for (j = 0; j < clen; j++) {
4273 skip[s[i + j]] = (UChar )(
len - i - j);
4274 for (k = 0; k < n; k++) {
4275 ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4276 skip[buf[j]] = (UChar )(
len - i - j);
4292 OnigOptionType options;
4293 OnigCaseFoldType case_fold_flag;
4309 UChar s[OPT_EXACT_MAXLEN];
4317 UChar map[ONIG_CHAR_TABLE_SIZE];
4335 static const short int ByteValTable[] = {
4336 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4338 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4339 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4340 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4341 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4342 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4343 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4346 if (i < numberof(ByteValTable)) {
4347 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4350 return (
int )ByteValTable[i];
4360 static const short int dist_vals[] = {
4361 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4362 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4363 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4364 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4365 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4366 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4367 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4368 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4369 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4370 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4375 if (mm->max == ONIG_INFINITE_DISTANCE)
return 0;
4377 d = mm->max - mm->min;
4378 if (d < numberof(dist_vals))
4380 return (
int )dist_vals[d];
4388 if (v2 <= 0)
return -1;
4389 if (v1 <= 0)
return 1;
4391 v1 *= distance_value(d1);
4392 v2 *= distance_value(d2);
4394 if (v2 > v1)
return 1;
4395 if (v2 < v1)
return -1;
4397 if (d2->min < d1->min)
return 1;
4398 if (d2->min > d1->min)
return -1;
4405 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4410set_mml(
MinMaxLen* mml, OnigDistance min, OnigDistance max)
4419 mml->min = mml->max = 0;
4425 to->min = from->min;
4426 to->max = from->max;
4432 to->min = distance_add(to->min, from->min);
4433 to->max = distance_add(to->max, from->max);
4440 to->min = distance_add(to->min,
len);
4441 to->max = distance_add(to->max,
len);
4448 if (to->min > from->min) to->min = from->min;
4449 if (to->max < from->max) to->max = from->max;
4461 anc->left_anchor = 0;
4462 anc->right_anchor = 0;
4473 OnigDistance left_len, OnigDistance right_len)
4475 clear_opt_anc_info(to);
4477 to->left_anchor = left->left_anchor;
4478 if (left_len == 0) {
4479 to->left_anchor |= right->left_anchor;
4482 to->right_anchor = right->right_anchor;
4483 if (right_len == 0) {
4484 to->right_anchor |= left->right_anchor;
4487 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4492is_left_anchor(
int anc)
4494 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4495 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4496 anc == ANCHOR_PREC_READ_NOT)
4505 if ((to->left_anchor & anc) != 0)
return 1;
4507 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4513 if (is_left_anchor(anc))
4514 to->left_anchor |= anc;
4516 to->right_anchor |= anc;
4522 if (is_left_anchor(anc))
4523 to->left_anchor &= ~anc;
4525 to->right_anchor &= ~anc;
4531 to->left_anchor &= add->left_anchor;
4532 to->right_anchor &= add->right_anchor;
4538 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4544 clear_mml(&ex->mmd);
4545 clear_opt_anc_info(&ex->anc);
4547 ex->ignore_case = -1;
4565 if (to->ignore_case < 0)
4566 to->ignore_case = add->ignore_case;
4567 else if (to->ignore_case != add->ignore_case)
4572 for (i = to->len; p < end; ) {
4573 len = enclen(enc, p, end);
4574 if (i +
len > OPT_EXACT_MAXLEN)
break;
4575 for (j = 0; j <
len && p < end; j++)
4580 to->reach_end = (p == end ? add->reach_end : 0);
4582 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4583 if (! to->reach_end) tanc.right_anchor = 0;
4584 copy_opt_anc_info(&to->anc, &tanc);
4588concat_opt_exact_info_str(
OptExactInfo* to, UChar* s, UChar* end,
4594 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4595 len = enclen(enc, p, end);
4596 if (i +
len > OPT_EXACT_MAXLEN)
break;
4597 for (j = 0; j <
len && p < end; j++)
4609 if (add->len == 0 || to->len == 0) {
4610 clear_opt_exact_info(to);
4614 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4615 clear_opt_exact_info(to);
4619 for (i = 0; i < to->len && i < add->len; ) {
4620 if (to->s[i] != add->s[i])
break;
4621 len = enclen(env->enc, to->s + i, to->s + to->len);
4623 for (j = 1; j <
len; j++) {
4624 if (to->s[i+j] != add->s[i+j])
break;
4630 if (! add->reach_end || i < add->
len || i < to->
len) {
4634 if (to->ignore_case < 0)
4635 to->ignore_case = add->ignore_case;
4636 else if (add->ignore_case >= 0)
4637 to->ignore_case |= add->ignore_case;
4639 alt_merge_opt_anc_info(&to->anc, &add->anc);
4640 if (! to->reach_end) to->anc.right_anchor = 0;
4655 copy_opt_exact_info(now, alt);
4658 else if (v1 <= 2 && v2 <= 2) {
4660 v2 = map_position_value(enc, now->s[0]);
4661 v1 = map_position_value(enc, alt->s[0]);
4663 if (now->len > 1) v1 += 5;
4664 if (alt->len > 1) v2 += 5;
4667 if (now->ignore_case <= 0) v1 *= 2;
4668 if (alt->ignore_case <= 0) v2 *= 2;
4670 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4671 copy_opt_exact_info(now, alt);
4680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4699 xmemcpy(map, &clean_info,
sizeof(
OptMapInfo));
4711 if (map->map[c] == 0) {
4713 map->value += map_position_value(enc, c);
4718add_char_amb_opt_map_info(
OptMapInfo* map, UChar* p, UChar* end,
4722 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4725 add_char_opt_map_info(map, p[0], enc);
4727 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4728 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4729 if (n < 0)
return n;
4731 for (i = 0; i < n; i++) {
4732 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4733 add_char_opt_map_info(map, buf[0], enc);
4742 const int z = 1<<15;
4746 if (alt->value == 0) return ;
4747 if (now->value == 0) {
4748 copy_opt_map_info(now, alt);
4752 v1 = z / now->value;
4753 v2 = z / alt->value;
4754 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4755 copy_opt_map_info(now, alt);
4761#define COMP_EM_BASE 20
4764 if (m->value <= 0)
return -1;
4766 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4767 vm = COMP_EM_BASE * 5 * 2 / m->value;
4768 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4777 if (to->value == 0) return ;
4778 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4779 clear_opt_map_info(to);
4783 alt_merge_mml(&to->mmd, &add->mmd);
4786 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4791 val += map_position_value(enc, i);
4795 alt_merge_opt_anc_info(&to->anc, &add->anc);
4801 copy_mml(&(opt->exb.mmd), mmd);
4802 copy_mml(&(opt->expr.mmd), mmd);
4803 copy_mml(&(opt->map.mmd), mmd);
4809 clear_mml(&opt->len);
4810 clear_opt_anc_info(&opt->anc);
4811 clear_opt_exact_info(&opt->exb);
4812 clear_opt_exact_info(&opt->exm);
4813 clear_opt_exact_info(&opt->expr);
4814 clear_opt_map_info(&opt->map);
4826 int exb_reach, exm_reach;
4829 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4830 copy_opt_anc_info(&to->anc, &tanc);
4832 if (add->exb.len > 0 && to->len.max == 0) {
4833 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4834 to->len.max, add->len.max);
4835 copy_opt_anc_info(&add->exb.anc, &tanc);
4838 if (add->map.value > 0 && to->len.max == 0) {
4839 if (add->map.mmd.max == 0)
4840 add->map.anc.left_anchor |= to->anc.left_anchor;
4843 exb_reach = to->exb.reach_end;
4844 exm_reach = to->exm.reach_end;
4846 if (add->len.max != 0)
4847 to->exb.reach_end = to->exm.reach_end = 0;
4849 if (add->exb.len > 0) {
4851 concat_opt_exact_info(&to->exb, &add->exb, enc);
4852 clear_opt_exact_info(&add->exb);
4854 else if (exm_reach) {
4855 concat_opt_exact_info(&to->exm, &add->exb, enc);
4856 clear_opt_exact_info(&add->exb);
4859 select_opt_exact_info(enc, &to->exm, &add->exb);
4860 select_opt_exact_info(enc, &to->exm, &add->exm);
4862 if (to->expr.len > 0) {
4863 if (add->len.max > 0) {
4864 if (to->expr.len > (
int )add->len.max)
4865 to->expr.len = (int )add->len.max;
4867 if (to->expr.mmd.max == 0)
4868 select_opt_exact_info(enc, &to->exb, &to->expr);
4870 select_opt_exact_info(enc, &to->exm, &to->expr);
4873 else if (add->expr.len > 0) {
4874 copy_opt_exact_info(&to->expr, &add->expr);
4877 select_opt_map_info(&to->map, &add->map);
4879 add_mml(&to->len, &add->len);
4885 alt_merge_opt_anc_info (&to->anc, &add->anc);
4886 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4887 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4888 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4889 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4891 alt_merge_mml(&to->len, &add->len);
4895#define MAX_NODE_OPT_INFO_REF_COUNT 5
4903 clear_node_opt_info(opt);
4904 set_bound_node_opt_info(opt, &env->mmd);
4914 copy_opt_env(&nenv, env);
4916 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4918 add_mml(&nenv.mmd, &nopt.len);
4919 concat_left_node_opt_info(env->enc, opt, &nopt);
4921 }
while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4931 r = optimize_node_left(NCAR(nd), &nopt, env);
4933 if (nd == node) copy_node_opt_info(opt, &nopt);
4934 else alt_merge_node_opt_info(opt, &nopt, env);
4936 }
while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4943 OnigDistance slen = sn->end - sn->s;
4944 int is_raw = NSTRING_IS_RAW(node);
4946 if (! NSTRING_IS_AMBIG(node)) {
4947 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4949 opt->exb.ignore_case = 0;
4951 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4953 set_mml(&opt->len, slen, slen);
4958 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4959 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4960 max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
4963 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4965 opt->exb.ignore_case = 1;
4968 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4969 env->enc, env->case_fold_flag);
4976 set_mml(&opt->len, slen, max);
4979 if ((OnigDistance )opt->exb.len == slen)
4980 opt->exb.reach_end = 1;
4991 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4992 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4993 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4995 set_mml(&opt->len, min, max);
4998 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4999 z = BITSET_AT(cc->bs, i);
5000 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5001 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5004 set_mml(&opt->len, 1, 1);
5014 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5019 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5020 switch (NCTYPE(node)->ctype) {
5021 case ONIGENC_CTYPE_WORD:
5022 if (NCTYPE(node)->not != 0) {
5023 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5024 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5025 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5030 for (i = 0; i < maxcode; i++) {
5031 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5032 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5040 min = ONIGENC_MBC_MINLEN(env->enc);
5042 set_mml(&opt->len, min, max);
5048 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5049 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5050 set_mml(&opt->len, min, max);
5055 switch (NANCHOR(node)->type) {
5056 case ANCHOR_BEGIN_BUF:
5057 case ANCHOR_BEGIN_POSITION:
5058 case ANCHOR_BEGIN_LINE:
5059 case ANCHOR_END_BUF:
5060 case ANCHOR_SEMI_END_BUF:
5061 case ANCHOR_END_LINE:
5062 case ANCHOR_LOOK_BEHIND:
5063 case ANCHOR_PREC_READ_NOT:
5064 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5067 case ANCHOR_PREC_READ:
5071 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5073 if (nopt.exb.len > 0)
5074 copy_opt_exact_info(&opt->expr, &nopt.exb);
5075 else if (nopt.exm.len > 0)
5076 copy_opt_exact_info(&opt->expr, &nopt.exm);
5078 opt->expr.reach_end = 0;
5080 if (nopt.map.value > 0)
5081 copy_opt_map_info(&opt->map, &nopt.map);
5086 case ANCHOR_LOOK_BEHIND_NOT:
5095 OnigDistance min, max, tmin, tmax;
5096 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5099 if (br->state & NST_RECURSION) {
5100 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5103 backs = BACKREFS_P(br);
5104 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5106 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5108 for (i = 1; i < br->back_num; i++) {
5109 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5111 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5113 if (min > tmin) min = tmin;
5114 if (max < tmax) max = tmax;
5116 if (r == 0) set_mml(&opt->len, min, max);
5120#ifdef USE_SUBEXP_CALL
5122 if (IS_CALL_RECURSION(NCALL(node)))
5123 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5125 OnigOptionType save = env->options;
5126 env->options = NENCLOSE(NCALL(node)->target)->option;
5127 r = optimize_node_left(NCALL(node)->target, opt, env);
5128 env->options = save;
5136 OnigDistance min, max;
5140 r = optimize_node_left(qn->target, &nopt, env);
5143 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5144 if (env->mmd.max == 0 &&
5145 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5146 if (IS_MULTILINE(env->options))
5148 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5150 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5154 if (qn->lower > 0) {
5155 copy_node_opt_info(opt, &nopt);
5156 if (nopt.exb.len > 0) {
5157 if (nopt.exb.reach_end) {
5158 for (i = 2; i <= qn->lower &&
5159 ! is_full_opt_exact_info(&opt->exb); i++) {
5160 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5162 if (i < qn->lower) {
5163 opt->exb.reach_end = 0;
5168 if (qn->lower != qn->upper) {
5169 opt->exb.reach_end = 0;
5170 opt->exm.reach_end = 0;
5173 opt->exm.reach_end = 0;
5177 min = distance_multiply(nopt.len.min, qn->lower);
5178 if (IS_REPEAT_INFINITE(qn->upper))
5179 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5181 max = distance_multiply(nopt.len.max, qn->upper);
5183 set_mml(&opt->len, min, max);
5192 case ENCLOSE_OPTION:
5194 OnigOptionType save = env->options;
5196 env->options = en->option;
5197 r = optimize_node_left(en->target, opt, env);
5198 env->options = save;
5202 case ENCLOSE_MEMORY:
5203#ifdef USE_SUBEXP_CALL
5205 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5206 OnigDistance min, max;
5209 max = ONIG_INFINITE_DISTANCE;
5210 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5211 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5212 set_mml(&opt->len, min, max);
5217 r = optimize_node_left(en->target, opt, env);
5219 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5220 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5221 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5226 case ENCLOSE_STOP_BACKTRACK:
5227 case ENCLOSE_CONDITION:
5228 r = optimize_node_left(en->target, opt, env);
5231 case ENCLOSE_ABSENT:
5232 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5240 fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
5243 r = ONIGERR_TYPE_BUG;
5255 if (e->len == 0)
return 0;
5257 reg->exact = (UChar* )
xmalloc(e->len);
5258 CHECK_NULL_RETURN_MEMERR(reg->exact);
5259 xmemcpy(reg->exact, e->s, e->len);
5260 reg->exact_end = reg->exact + e->len;
5263 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5265 if (e->ignore_case > 0) {
5266 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5267 int orig_len = e->len;
5268 e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5271 reg->exact_end = reg->exact + e->len;
5272 reg->optimize = (allow_reverse != 0
5273 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5283 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5287 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5291 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5292 set_bm_skip(reg->exact, reg->exact_end, reg,
5294 reg->optimize = (allow_reverse != 0
5295 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5298 reg->optimize = ONIG_OPTIMIZE_EXACT;
5302 reg->dmin = e->mmd.min;
5303 reg->dmax = e->mmd.max;
5305 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5306 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5317 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5318 reg->map[i] = m->map[i];
5320 reg->optimize = ONIG_OPTIMIZE_MAP;
5321 reg->dmin = m->mmd.min;
5322 reg->dmax = m->mmd.max;
5324 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5325 reg->threshold_len = (int )(reg->dmin + 1);
5332 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5333 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5336#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5337static void print_optimize_info(
FILE* f,
regex_t* reg);
5349 env.options = reg->options;
5350 env.case_fold_flag = reg->case_fold_flag;
5351 env.scan_env = scan_env;
5352 clear_mml(&env.mmd);
5354 r = optimize_node_left(node, &opt, &env);
5357 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5358 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5359 ANCHOR_LOOK_BEHIND);
5361 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5362 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5364 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5365 ANCHOR_PREC_READ_NOT);
5367 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5368 reg->anchor_dmin = opt.len.min;
5369 reg->anchor_dmax = opt.len.max;
5372 if (opt.exb.len > 0 || opt.exm.len > 0) {
5373 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5374 if (opt.map.value > 0 &&
5375 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5379 r = set_optimize_exact_info(reg, &opt.exb);
5380 set_sub_anchor(reg, &opt.exb.anc);
5383 else if (opt.map.value > 0) {
5385 set_optimize_map_info(reg, &opt.map);
5386 set_sub_anchor(reg, &opt.map.anc);
5389 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5390 if (opt.len.max == 0)
5391 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5394#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5395 print_optimize_info(stderr, reg);
5401clear_optimize_info(
regex_t* reg)
5403 reg->optimize = ONIG_OPTIMIZE_NONE;
5405 reg->anchor_dmin = 0;
5406 reg->anchor_dmax = 0;
5407 reg->sub_anchor = 0;
5408 reg->exact_end = (UChar* )NULL;
5409 reg->threshold_len = 0;
5411 reg->exact = (UChar* )NULL;
5417 const UChar *s,
const UChar *end)
5419 fprintf(fp,
"\nPATTERN: /");
5421 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5427 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5429 fprintf(fp,
" 0x%04x ", (
int )code);
5432 fputc((
int )code, fp);
5435 p += enclen(enc, p, end);
5440 fputc((
int )*s, fp);
5445 fprintf(fp,
"/ (%s)\n", enc->name);
5449#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5451print_distance_range(
FILE* f, OnigDistance a, OnigDistance b)
5453 if (a == ONIG_INFINITE_DISTANCE)
5456 fprintf(f,
"(%"PRIuPTR
")", a);
5460 if (b == ONIG_INFINITE_DISTANCE)
5463 fprintf(f,
"(%"PRIuPTR
")", b);
5467print_anchor(
FILE* f,
int anchor)
5473 if (anchor & ANCHOR_BEGIN_BUF) {
5474 fprintf(f,
"begin-buf");
5477 if (anchor & ANCHOR_BEGIN_LINE) {
5478 if (q) fprintf(f,
", ");
5480 fprintf(f,
"begin-line");
5482 if (anchor & ANCHOR_BEGIN_POSITION) {
5483 if (q) fprintf(f,
", ");
5485 fprintf(f,
"begin-pos");
5487 if (anchor & ANCHOR_END_BUF) {
5488 if (q) fprintf(f,
", ");
5490 fprintf(f,
"end-buf");
5492 if (anchor & ANCHOR_SEMI_END_BUF) {
5493 if (q) fprintf(f,
", ");
5495 fprintf(f,
"semi-end-buf");
5497 if (anchor & ANCHOR_END_LINE) {
5498 if (q) fprintf(f,
", ");
5500 fprintf(f,
"end-line");
5502 if (anchor & ANCHOR_ANYCHAR_STAR) {
5503 if (q) fprintf(f,
", ");
5505 fprintf(f,
"anychar-star");
5507 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5508 if (q) fprintf(f,
", ");
5509 fprintf(f,
"anychar-star-ml");
5518 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5520 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5522 fprintf(f,
"optimize: %s\n", on[reg->optimize]);
5523 fprintf(f,
" anchor: "); print_anchor(f, reg->anchor);
5524 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5525 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5528 if (reg->optimize) {
5529 fprintf(f,
" sub anchor: "); print_anchor(f, reg->sub_anchor);
5536 fprintf(f,
"exact: [");
5537 for (p = reg->exact; p < reg->exact_end; p++) {
5540 fprintf(f,
"]: length: %"PRIdPTR
"\n", (reg->exact_end - reg->exact));
5542 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5545 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5546 if (reg->map[i]) n++;
5548 fprintf(f,
"map: n=%d\n", n);
5552 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5553 if (reg->map[i] != 0) {
5554 if (c > 0) fputs(
", ", f);
5556 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5557 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5560 fprintf(f,
"%d", i);
5573 if (IS_NOT_NULL(reg)) {
5576 xfree(reg->repeat_range);
5577 onig_free(reg->chain);
5579#ifdef USE_NAMED_GROUP
5580 onig_names_free(reg);
5588 if (IS_NOT_NULL(reg)) {
5589 onig_free_body(reg);
5595dup_copy(
const void *ptr,
size_t size)
5598 if (IS_NOT_NULL(newptr)) {
5599 memcpy(newptr, ptr, size);
5607 if (IS_NOT_NULL(oreg)) {
5609 if (IS_NULL(reg))
return ONIGERR_MEMORY;
5613# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5615 if (IS_NOT_NULL(reg->exact)) {
5616 size_t exact_size = reg->exact_end - reg->exact;
5617 if (COPY_FAILED(exact, exact_size))
5619 (reg)->exact_end = (reg)->exact + exact_size;
5622 if (IS_NOT_NULL(reg->p)) {
5623 if (COPY_FAILED(p, reg->alloc))
5626 if (IS_NOT_NULL(reg->repeat_range)) {
5627 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc *
sizeof(
OnigRepeatRange)))
5628 goto err_repeat_range;
5630 if (IS_NOT_NULL(reg->name_table)) {
5631 if (onig_names_copy(reg, oreg))
5632 goto err_name_table;
5634 if (IS_NOT_NULL(reg->chain)) {
5635 if (onig_reg_copy(®->chain, reg->chain))
5642 onig_names_free(reg);
5644 xfree(reg->repeat_range);
5651 return ONIGERR_MEMORY;
5658onig_memsize(
const regex_t *reg)
5660 size_t size =
sizeof(
regex_t);
5661 if (IS_NULL(reg))
return 0;
5662 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5663 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5664 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc *
sizeof(
OnigRepeatRange);
5665 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5673 size_t size =
sizeof(*regs);
5674 if (IS_NULL(regs))
return 0;
5675 size += regs->allocated * (
sizeof(*regs->beg) +
sizeof(*regs->end));
5680#define REGEX_TRANSFER(to,from) do {\
5681 onig_free_body(to);\
5682 xmemcpy(to, from, sizeof(regex_t));\
5690 REGEX_TRANSFER(to, from);
5694#ifdef ONIG_DEBUG_COMPILE
5695static void print_compiled_byte_code_list(
FILE* f,
regex_t* reg);
5697#ifdef ONIG_DEBUG_PARSE_TREE
5698static void print_tree(
FILE* f,
Node* node);
5703onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5706 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5712onig_compile_ruby(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5713 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5716onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5720#define COMPILE_INIT_SIZE 20
5723 OnigDistance init_size;
5726#ifdef USE_SUBEXP_CALL
5730 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5733 scan_env.sourcefile = sourcefile;
5734 scan_env.sourceline = sourceline;
5738 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5741 if (reg->alloc == 0) {
5742 init_size = (pattern_end - pattern) * 2;
5743 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5744 r = BBUF_INIT(reg, init_size);
5745 if (r != 0)
goto end;
5751 reg->num_repeat = 0;
5752 reg->num_null_check = 0;
5753 reg->repeat_range_alloc = 0;
5755#ifdef USE_COMBINATION_EXPLOSION_CHECK
5756 reg->num_comb_exp_check = 0;
5759 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5760 if (r != 0)
goto err;
5762#ifdef ONIG_DEBUG_PARSE_TREE
5764 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5765 print_tree(stderr, root);
5769#ifdef USE_NAMED_GROUP
5771 if (scan_env.num_named > 0 &&
5772 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5773 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5774 if (scan_env.num_named != scan_env.num_mem)
5775 r = disable_noname_group_capture(&root, reg, &scan_env);
5777 r = numbered_ref_check(root);
5779 if (r != 0)
goto err;
5783#ifdef USE_SUBEXP_CALL
5784 if (scan_env.num_call > 0) {
5785 r = unset_addr_list_init(&uslist, scan_env.num_call);
5786 if (r != 0)
goto err;
5787 scan_env.unset_addr_list = &uslist;
5788 r = setup_subexp_call(root, &scan_env);
5789 if (r != 0)
goto err_unset;
5790 r = subexp_recursive_check_trav(root, &scan_env);
5791 if (r < 0)
goto err_unset;
5792 r = subexp_inf_recursive_check_trav(root, &scan_env);
5793 if (r != 0)
goto err_unset;
5795 reg->num_call = scan_env.num_call;
5801 r = setup_tree(root, reg, 0, &scan_env);
5802 if (r != 0)
goto err_unset;
5804#ifdef ONIG_DEBUG_PARSE_TREE
5805 print_tree(stderr, root);
5808 reg->capture_history = scan_env.capture_history;
5809 reg->bt_mem_start = scan_env.bt_mem_start;
5810 reg->bt_mem_start |= reg->capture_history;
5811 if (IS_FIND_CONDITION(reg->options))
5812 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5814 reg->bt_mem_end = scan_env.bt_mem_end;
5815 reg->bt_mem_end |= reg->capture_history;
5818#ifdef USE_COMBINATION_EXPLOSION_CHECK
5819 if (scan_env.backrefed_mem == 0
5820# ifdef USE_SUBEXP_CALL
5821 || scan_env.num_call == 0
5824 setup_comb_exp_check(root, 0, &scan_env);
5825# ifdef USE_SUBEXP_CALL
5826 if (scan_env.has_recursion != 0) {
5827 scan_env.num_comb_exp_check = 0;
5831 if (scan_env.comb_exp_max_regnum > 0) {
5833 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5834 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5835 scan_env.num_comb_exp_check = 0;
5842 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5845 clear_optimize_info(reg);
5846#ifndef ONIG_DONT_OPTIMIZE
5847 r = set_optimize_info_from_tree(root, reg, &scan_env);
5848 if (r != 0)
goto err_unset;
5851 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5852 xfree(scan_env.mem_nodes_dynamic);
5853 scan_env.mem_nodes_dynamic = (
Node** )NULL;
5856 r = compile_tree(root, reg);
5858 r = add_opcode(reg, OP_END);
5859#ifdef USE_SUBEXP_CALL
5860 if (scan_env.num_call > 0) {
5861 r = unset_addr_list_fix(&uslist, reg);
5862 unset_addr_list_end(&uslist);
5867 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5868 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5870 if (reg->bt_mem_start != 0)
5871 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5873 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5876#ifdef USE_SUBEXP_CALL
5877 else if (scan_env.num_call > 0) {
5878 unset_addr_list_end(&uslist);
5881 onig_node_free(root);
5883#ifdef ONIG_DEBUG_COMPILE
5884# ifdef USE_NAMED_GROUP
5885 onig_print_names(stderr, reg);
5887 print_compiled_byte_code_list(stderr, reg);
5891 onig_reg_resize(reg);
5895#ifdef USE_SUBEXP_CALL
5896 if (scan_env.num_call > 0) {
5897 unset_addr_list_end(&uslist);
5901 if (IS_NOT_NULL(scan_env.error)) {
5902 if (IS_NOT_NULL(einfo)) {
5903 einfo->enc = scan_env.enc;
5904 einfo->par = scan_env.error;
5905 einfo->par_end = scan_env.error_end;
5909 onig_node_free(root);
5910 xfree(scan_env.mem_nodes_dynamic);
5915static int onig_inited = 0;
5918onig_reg_init(
regex_t* reg, OnigOptionType option,
5919 OnigCaseFoldType case_fold_flag,
5926 return ONIGERR_INVALID_ARGUMENT;
5928 (reg)->exact = (UChar* )NULL;
5929 (reg)->chain = (
regex_t* )NULL;
5930 (reg)->p = (UChar* )NULL;
5931 (reg)->name_table = (
void* )NULL;
5934 if (ONIGENC_IS_UNDEF(enc))
5935 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5937 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5938 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5939 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5942 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5943 option |= syntax->options;
5944 option &= ~ONIG_OPTION_SINGLELINE;
5947 option |= syntax->options;
5950 (reg)->options = option;
5951 (reg)->syntax = syntax;
5952 (reg)->optimize = 0;
5957 (reg)->case_fold_flag = case_fold_flag;
5959 (reg)->timelimit = 0;
5965onig_new_without_alloc(
regex_t* reg,
const UChar* pattern,
5966 const UChar* pattern_end, OnigOptionType option,
OnigEncoding enc,
5971 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5974 r = onig_compile(reg, pattern, pattern_end, einfo);
5979onig_new(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
5984 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
5986 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
5996onig_initialize(
OnigEncoding encodings[] ARG_UNUSED,
int n ARG_UNUSED)
6004 if (onig_inited != 0)
6009#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6010 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6016#ifdef ONIG_DEBUG_STATISTICS
6017 onig_statistics_init();
6026extern void onig_add_end_call(
void (*func)(
void))
6031 if (item == 0) return ;
6033 item->next = EndCallTop;
6040exec_end_call_list(
void)
6045 while (EndCallTop != 0) {
6046 func = EndCallTop->func;
6050 EndCallTop = EndCallTop->next;
6058 exec_end_call_list();
6060#ifdef ONIG_DEBUG_STATISTICS
6061 onig_print_statistics(stderr);
6064#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6065 _CrtDumpMemoryLeaks();
6074onig_is_in_code_range(
const UChar* p, OnigCodePoint code)
6076 OnigCodePoint n, *data;
6077 OnigCodePoint low, high, x;
6079 GET_CODE_POINT(n, p);
6080 data = (OnigCodePoint* )p;
6083 for (low = 0, high = n; low < high; ) {
6084 x = (low + high) >> 1;
6085 if (code > data[x * 2 + 1])
6091 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6095onig_is_code_in_cc_len(
int elen, OnigCodePoint code,
CClassNode* cc)
6099 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6100 if (IS_NULL(cc->mbuf)) {
6104 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6108 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6111 if (IS_NCCLASS_NOT(cc))
6122 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6126 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6128 return onig_is_code_in_cc_len(
len, code, cc);
6135# define ARG_SPECIAL -1
6137# define ARG_RELADDR 1
6138# define ARG_ABSADDR 2
6139# define ARG_LENGTH 3
6140# define ARG_MEMNUM 4
6141# define ARG_OPTION 5
6142# define ARG_STATE_CHECK 6
6144OnigOpInfoType OnigOpInfo[] = {
6145 { OP_FINISH,
"finish", ARG_NON },
6146 { OP_END,
"end", ARG_NON },
6147 { OP_EXACT1,
"exact1", ARG_SPECIAL },
6148 { OP_EXACT2,
"exact2", ARG_SPECIAL },
6149 { OP_EXACT3,
"exact3", ARG_SPECIAL },
6150 { OP_EXACT4,
"exact4", ARG_SPECIAL },
6151 { OP_EXACT5,
"exact5", ARG_SPECIAL },
6152 { OP_EXACTN,
"exactn", ARG_SPECIAL },
6153 { OP_EXACTMB2N1,
"exactmb2-n1", ARG_SPECIAL },
6154 { OP_EXACTMB2N2,
"exactmb2-n2", ARG_SPECIAL },
6155 { OP_EXACTMB2N3,
"exactmb2-n3", ARG_SPECIAL },
6156 { OP_EXACTMB2N,
"exactmb2-n", ARG_SPECIAL },
6157 { OP_EXACTMB3N,
"exactmb3n" , ARG_SPECIAL },
6158 { OP_EXACTMBN,
"exactmbn", ARG_SPECIAL },
6159 { OP_EXACT1_IC,
"exact1-ic", ARG_SPECIAL },
6160 { OP_EXACTN_IC,
"exactn-ic", ARG_SPECIAL },
6161 { OP_CCLASS,
"cclass", ARG_SPECIAL },
6162 { OP_CCLASS_MB,
"cclass-mb", ARG_SPECIAL },
6163 { OP_CCLASS_MIX,
"cclass-mix", ARG_SPECIAL },
6164 { OP_CCLASS_NOT,
"cclass-not", ARG_SPECIAL },
6165 { OP_CCLASS_MB_NOT,
"cclass-mb-not", ARG_SPECIAL },
6166 { OP_CCLASS_MIX_NOT,
"cclass-mix-not", ARG_SPECIAL },
6167 { OP_ANYCHAR,
"anychar", ARG_NON },
6168 { OP_ANYCHAR_ML,
"anychar-ml", ARG_NON },
6169 { OP_ANYCHAR_STAR,
"anychar*", ARG_NON },
6170 { OP_ANYCHAR_ML_STAR,
"anychar-ml*", ARG_NON },
6171 { OP_ANYCHAR_STAR_PEEK_NEXT,
"anychar*-peek-next", ARG_SPECIAL },
6172 { OP_ANYCHAR_ML_STAR_PEEK_NEXT,
"anychar-ml*-peek-next", ARG_SPECIAL },
6173 { OP_WORD,
"word", ARG_NON },
6174 { OP_NOT_WORD,
"not-word", ARG_NON },
6175 { OP_WORD_BOUND,
"word-bound", ARG_NON },
6176 { OP_NOT_WORD_BOUND,
"not-word-bound", ARG_NON },
6177 { OP_WORD_BEGIN,
"word-begin", ARG_NON },
6178 { OP_WORD_END,
"word-end", ARG_NON },
6179 { OP_ASCII_WORD,
"ascii-word", ARG_NON },
6180 { OP_NOT_ASCII_WORD,
"not-ascii-word", ARG_NON },
6181 { OP_ASCII_WORD_BOUND,
"ascii-word-bound", ARG_NON },
6182 { OP_NOT_ASCII_WORD_BOUND,
"not-ascii-word-bound", ARG_NON },
6183 { OP_ASCII_WORD_BEGIN,
"ascii-word-begin", ARG_NON },
6184 { OP_ASCII_WORD_END,
"ascii-word-end", ARG_NON },
6185 { OP_BEGIN_BUF,
"begin-buf", ARG_NON },
6186 { OP_END_BUF,
"end-buf", ARG_NON },
6187 { OP_BEGIN_LINE,
"begin-line", ARG_NON },
6188 { OP_END_LINE,
"end-line", ARG_NON },
6189 { OP_SEMI_END_BUF,
"semi-end-buf", ARG_NON },
6190 { OP_BEGIN_POSITION,
"begin-position", ARG_NON },
6191 { OP_BACKREF1,
"backref1", ARG_NON },
6192 { OP_BACKREF2,
"backref2", ARG_NON },
6193 { OP_BACKREFN,
"backrefn", ARG_MEMNUM },
6194 { OP_BACKREFN_IC,
"backrefn-ic", ARG_SPECIAL },
6195 { OP_BACKREF_MULTI,
"backref_multi", ARG_SPECIAL },
6196 { OP_BACKREF_MULTI_IC,
"backref_multi-ic", ARG_SPECIAL },
6197 { OP_BACKREF_WITH_LEVEL,
"backref_at_level", ARG_SPECIAL },
6198 { OP_MEMORY_START_PUSH,
"mem-start-push", ARG_MEMNUM },
6199 { OP_MEMORY_START,
"mem-start", ARG_MEMNUM },
6200 { OP_MEMORY_END_PUSH,
"mem-end-push", ARG_MEMNUM },
6201 { OP_MEMORY_END_PUSH_REC,
"mem-end-push-rec", ARG_MEMNUM },
6202 { OP_MEMORY_END,
"mem-end", ARG_MEMNUM },
6203 { OP_MEMORY_END_REC,
"mem-end-rec", ARG_MEMNUM },
6204 { OP_SET_OPTION_PUSH,
"set-option-push", ARG_OPTION },
6205 { OP_SET_OPTION,
"set-option", ARG_OPTION },
6206 { OP_KEEP,
"keep", ARG_NON },
6207 { OP_FAIL,
"fail", ARG_NON },
6208 { OP_JUMP,
"jump", ARG_RELADDR },
6209 { OP_PUSH,
"push", ARG_RELADDR },
6210 { OP_POP,
"pop", ARG_NON },
6211 { OP_PUSH_OR_JUMP_EXACT1,
"push-or-jump-e1", ARG_SPECIAL },
6212 { OP_PUSH_IF_PEEK_NEXT,
"push-if-peek-next", ARG_SPECIAL },
6213 { OP_REPEAT,
"repeat", ARG_SPECIAL },
6214 { OP_REPEAT_NG,
"repeat-ng", ARG_SPECIAL },
6215 { OP_REPEAT_INC,
"repeat-inc", ARG_MEMNUM },
6216 { OP_REPEAT_INC_NG,
"repeat-inc-ng", ARG_MEMNUM },
6217 { OP_REPEAT_INC_SG,
"repeat-inc-sg", ARG_MEMNUM },
6218 { OP_REPEAT_INC_NG_SG,
"repeat-inc-ng-sg", ARG_MEMNUM },
6219 { OP_NULL_CHECK_START,
"null-check-start", ARG_MEMNUM },
6220 { OP_NULL_CHECK_END,
"null-check-end", ARG_MEMNUM },
6221 { OP_NULL_CHECK_END_MEMST,
"null-check-end-memst", ARG_MEMNUM },
6222 { OP_NULL_CHECK_END_MEMST_PUSH,
"null-check-end-memst-push", ARG_MEMNUM },
6223 { OP_PUSH_POS,
"push-pos", ARG_NON },
6224 { OP_POP_POS,
"pop-pos", ARG_NON },
6225 { OP_PUSH_POS_NOT,
"push-pos-not", ARG_RELADDR },
6226 { OP_FAIL_POS,
"fail-pos", ARG_NON },
6227 { OP_PUSH_STOP_BT,
"push-stop-bt", ARG_NON },
6228 { OP_POP_STOP_BT,
"pop-stop-bt", ARG_NON },
6229 { OP_LOOK_BEHIND,
"look-behind", ARG_SPECIAL },
6230 { OP_PUSH_LOOK_BEHIND_NOT,
"push-look-behind-not", ARG_SPECIAL },
6231 { OP_FAIL_LOOK_BEHIND_NOT,
"fail-look-behind-not", ARG_NON },
6232 { OP_PUSH_ABSENT_POS,
"push-absent-pos", ARG_NON },
6233 { OP_ABSENT,
"absent", ARG_RELADDR },
6234 { OP_ABSENT_END,
"absent-end", ARG_NON },
6235 { OP_CALL,
"call", ARG_ABSADDR },
6236 { OP_RETURN,
"return", ARG_NON },
6237 { OP_CONDITION,
"condition", ARG_SPECIAL },
6238 { OP_STATE_CHECK_PUSH,
"state-check-push", ARG_SPECIAL },
6239 { OP_STATE_CHECK_PUSH_OR_JUMP,
"state-check-push-or-jump", ARG_SPECIAL },
6240 { OP_STATE_CHECK,
"state-check", ARG_STATE_CHECK },
6241 { OP_STATE_CHECK_ANYCHAR_STAR,
"state-check-anychar*", ARG_STATE_CHECK },
6242 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6243 "state-check-anychar-ml*", ARG_STATE_CHECK },
6252 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6253 if (opcode == OnigOpInfo[i].opcode)
6254 return OnigOpInfo[i].name;
6260op2arg_type(
int opcode)
6264 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6265 if (opcode == OnigOpInfo[i].opcode)
6266 return OnigOpInfo[i].arg_type;
6271# ifdef ONIG_DEBUG_PARSE_TREE
6273Indent(
FILE* f,
int indent)
6276 for (i = 0; i < indent; i++) putc(
' ', f);
6281p_string(
FILE* f, ptrdiff_t
len, UChar* s)
6284 while (
len-- > 0) { fputc(*s++, f); }
6288p_len_string(
FILE* f, LengthType
len,
int mb_len, UChar* s)
6290 int x =
len * mb_len;
6292 fprintf(f,
":%d:",
len);
6293 while (x-- > 0) { fputc(*s++, f); }
6297onig_print_compiled_byte_code(
FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6304 StateCheckNumType scn;
6308 fprintf(f,
"[%s", op2name(*bp));
6309 arg_type = op2arg_type(*bp);
6310 if (arg_type != ARG_SPECIAL) {
6316 GET_RELADDR_INC(addr, bp);
6317 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6320 GET_ABSADDR_INC(addr, bp);
6321 fprintf(f,
":(%d)", addr);
6324 GET_LENGTH_INC(
len, bp);
6325 fprintf(f,
":%d",
len);
6328 mem = *((MemNumType* )bp);
6330 fprintf(f,
":%d", mem);
6334 OnigOptionType option = *((OnigOptionType* )bp);
6336 fprintf(f,
":%d", option);
6340 case ARG_STATE_CHECK:
6341 scn = *((StateCheckNumType* )bp);
6342 bp += SIZE_STATE_CHECK_NUM;
6343 fprintf(f,
":%d", scn);
6350 case OP_ANYCHAR_STAR_PEEK_NEXT:
6351 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6352 p_string(f, 1, bp++);
break;
6354 p_string(f, 2, bp); bp += 2;
break;
6356 p_string(f, 3, bp); bp += 3;
break;
6358 p_string(f, 4, bp); bp += 4;
break;
6360 p_string(f, 5, bp); bp += 5;
break;
6362 GET_LENGTH_INC(
len, bp);
6363 p_len_string(f,
len, 1, bp);
6368 p_string(f, 2, bp); bp += 2;
break;
6370 p_string(f, 4, bp); bp += 4;
break;
6372 p_string(f, 6, bp); bp += 6;
break;
6374 GET_LENGTH_INC(
len, bp);
6375 p_len_string(f,
len, 2, bp);
6379 GET_LENGTH_INC(
len, bp);
6380 p_len_string(f,
len, 3, bp);
6387 GET_LENGTH_INC(mb_len, bp);
6388 GET_LENGTH_INC(
len, bp);
6389 fprintf(f,
":%d:%d:", mb_len,
len);
6391 while (n-- > 0) { fputc(*bp++, f); }
6396 len = enclen(enc, bp, bpend);
6397 p_string(f,
len, bp);
6401 GET_LENGTH_INC(
len, bp);
6402 p_len_string(f,
len, 1, bp);
6407 n = bitset_on_num((BitSetRef )bp);
6409 fprintf(f,
":%d", n);
6413 n = bitset_on_num((BitSetRef )bp);
6415 fprintf(f,
":%d", n);
6419 case OP_CCLASS_MB_NOT:
6420 GET_LENGTH_INC(
len, bp);
6422# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6425 GET_CODE_POINT(code, q);
6427 fprintf(f,
":%d:%d", (
int )code,
len);
6431 case OP_CCLASS_MIX_NOT:
6432 n = bitset_on_num((BitSetRef )bp);
6434 GET_LENGTH_INC(
len, bp);
6436# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6439 GET_CODE_POINT(code, q);
6441 fprintf(f,
":%d:%d:%d", n, (
int )code,
len);
6444 case OP_BACKREFN_IC:
6445 mem = *((MemNumType* )bp);
6447 fprintf(f,
":%d", mem);
6450 case OP_BACKREF_MULTI_IC:
6451 case OP_BACKREF_MULTI:
6453 GET_LENGTH_INC(
len, bp);
6454 for (i = 0; i <
len; i++) {
6455 GET_MEMNUM_INC(mem, bp);
6456 if (i > 0) fputs(
", ", f);
6457 fprintf(f,
"%d", mem);
6461 case OP_BACKREF_WITH_LEVEL:
6463 OnigOptionType option;
6466 GET_OPTION_INC(option, bp);
6467 fprintf(f,
":%d", option);
6468 GET_LENGTH_INC(level, bp);
6469 fprintf(f,
":%d", level);
6472 GET_LENGTH_INC(
len, bp);
6473 for (i = 0; i <
len; i++) {
6474 GET_MEMNUM_INC(mem, bp);
6475 if (i > 0) fputs(
", ", f);
6476 fprintf(f,
"%d", mem);
6484 mem = *((MemNumType* )bp);
6486 addr = *((RelAddrType* )bp);
6488 fprintf(f,
":%d:%d", mem, addr);
6492 case OP_PUSH_OR_JUMP_EXACT1:
6493 case OP_PUSH_IF_PEEK_NEXT:
6494 addr = *((RelAddrType* )bp);
6496 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6501 case OP_LOOK_BEHIND:
6502 GET_LENGTH_INC(
len, bp);
6503 fprintf(f,
":%d",
len);
6506 case OP_PUSH_LOOK_BEHIND_NOT:
6507 GET_RELADDR_INC(addr, bp);
6508 GET_LENGTH_INC(
len, bp);
6509 fprintf(f,
":%d:(%s%d)",
len, (addr >= 0) ?
"+" :
"", addr);
6512 case OP_STATE_CHECK_PUSH:
6513 case OP_STATE_CHECK_PUSH_OR_JUMP:
6514 scn = *((StateCheckNumType* )bp);
6515 bp += SIZE_STATE_CHECK_NUM;
6516 addr = *((RelAddrType* )bp);
6518 fprintf(f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6522 GET_MEMNUM_INC(mem, bp);
6523 GET_RELADDR_INC(addr, bp);
6524 fprintf(f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6528 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6533 if (nextp) *nextp = bp;
6536# ifdef ONIG_DEBUG_COMPILE
6538print_compiled_byte_code_list(
FILE* f,
regex_t* reg)
6542 UChar* end = reg->p + reg->used;
6544 fprintf(f,
"code length: %d", reg->used);
6550 fprintf(f,
"\n%ld:", bp - reg->p);
6552 fprintf(f,
" %ld:", bp - reg->p);
6553 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6560# ifdef ONIG_DEBUG_PARSE_TREE
6562print_indent_tree(
FILE* f,
Node* node,
int indent)
6564 int i,
type, container_p = 0;
6569 if (IS_NULL(node)) {
6570 fprintf(f,
"ERROR: null node!!!\n");
6578 if (NTYPE(node) == NT_LIST)
6579 fprintf(f,
"<list:%"PRIxPTR
">\n", (intptr_t )node);
6581 fprintf(f,
"<alt:%"PRIxPTR
">\n", (intptr_t )node);
6583 print_indent_tree(f, NCAR(node), indent + add);
6584 while (IS_NOT_NULL(node = NCDR(node))) {
6585 if (NTYPE(node) != type) {
6586 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6589 print_indent_tree(f, NCAR(node), indent + add);
6594 fprintf(f,
"<string%s:%"PRIxPTR
">",
6595 (NSTRING_IS_RAW(node) ?
"-raw" :
""), (intptr_t )node);
6596 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6597 if (*p >= 0x20 && *p < 0x7f)
6600 fprintf(f,
" 0x%02x", *p);
6606 fprintf(f,
"<cclass:%"PRIxPTR
">", (intptr_t )node);
6607 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(
"not ", f);
6608 if (NCCLASS(node)->mbuf) {
6609 BBuf* bbuf = NCCLASS(node)->mbuf;
6610 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6611 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6612 fprintf(f,
"%d", *data++);
6613 for (; data < end; data+=2) {
6615 fprintf(f,
"%04x-%04x", data[0], data[1]);
6621 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (intptr_t )node);
6622 switch (NCTYPE(node)->ctype) {
6623 case ONIGENC_CTYPE_WORD:
6624 if (NCTYPE(node)->not != 0)
6625 fputs(
"not word", f);
6631 fprintf(f,
"ERROR: undefined ctype.\n");
6637 fprintf(f,
"<anychar:%"PRIxPTR
">", (intptr_t )node);
6641 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (intptr_t )node);
6642 switch (NANCHOR(node)->type) {
6643 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6644 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6645 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6646 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6647 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6648 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6650 case ANCHOR_WORD_BOUND: fputs(
"word bound", f);
break;
6651 case ANCHOR_NOT_WORD_BOUND: fputs(
"not word bound", f);
break;
6652# ifdef USE_WORD_BEGIN_END
6653 case ANCHOR_WORD_BEGIN: fputs(
"word begin", f);
break;
6654 case ANCHOR_WORD_END: fputs(
"word end", f);
break;
6656 case ANCHOR_PREC_READ: fputs(
"prec read", f); container_p = TRUE;
break;
6657 case ANCHOR_PREC_READ_NOT: fputs(
"prec read not", f); container_p = TRUE;
break;
6658 case ANCHOR_LOOK_BEHIND: fputs(
"look_behind", f); container_p = TRUE;
break;
6659 case ANCHOR_LOOK_BEHIND_NOT: fputs(
"look_behind_not",f); container_p = TRUE;
break;
6660 case ANCHOR_KEEP: fputs(
"keep",f);
break;
6663 fprintf(f,
"ERROR: undefined anchor type.\n");
6673 fprintf(f,
"<backref:%"PRIxPTR
">", (intptr_t )node);
6674 for (i = 0; i < br->back_num; i++) {
6675 if (i > 0) fputs(
", ", f);
6676 fprintf(f,
"%d", p[i]);
6681# ifdef USE_SUBEXP_CALL
6685 fprintf(f,
"<call:%"PRIxPTR
">", (intptr_t )node);
6686 p_string(f, cn->name_end - cn->name, cn->name);
6692 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node,
6693 NQTFR(node)->lower, NQTFR(node)->upper,
6694 (NQTFR(node)->greedy ?
"" :
"?"));
6695 print_indent_tree(f, NQTFR(node)->target, indent + add);
6699 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (intptr_t )node);
6700 switch (NENCLOSE(node)->type) {
6701 case ENCLOSE_OPTION:
6702 fprintf(f,
"option:%d", NENCLOSE(node)->option);
6704 case ENCLOSE_MEMORY:
6705 fprintf(f,
"memory:%d", NENCLOSE(node)->regnum);
6707 case ENCLOSE_STOP_BACKTRACK:
6708 fprintf(f,
"stop-bt");
6710 case ENCLOSE_CONDITION:
6711 fprintf(f,
"condition:%d", NENCLOSE(node)->regnum);
6713 case ENCLOSE_ABSENT:
6714 fprintf(f,
"absent");
6721 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6725 fprintf(f,
"print_indent_tree: undefined node type %d\n", NTYPE(node));
6729 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6733 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6741 print_indent_tree(f, node, 0);
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
int len
Length of the buffer.
VALUE type(ANYARGS)
ANYARGS-ed function type.