Ruby 3.5.0dev (2025-01-10 revision 5fab31b15e32622c4b71d1d347a41937e9f9c212)
regcomp.c (5fab31b15e32622c4b71d1d347a41937e9f9c212)
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
163onig_bbuf_init(BBuf* buf, OnigDistance size)
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188 CHECK_NULL_RETURN_MEMERR(p);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 xfree(uslist->us);
199}
200
201static int
202unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
203{
204 UnsetAddr* p;
205 int size;
206
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
209 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
212 uslist->us = p;
213 }
214
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
217 uslist->num++;
218 return 0;
219}
220#endif /* USE_SUBEXP_CALL */
221
222
223static int
224add_opcode(regex_t* reg, int opcode)
225{
226 BBUF_ADD1(reg, opcode);
227 return 0;
228}
229
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
231static int
232add_state_check_num(regex_t* reg, int num)
233{
234 StateCheckNumType n = (StateCheckNumType )num;
235
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
237 return 0;
238}
239#endif
240
241static int
242add_rel_addr(regex_t* reg, int addr)
243{
244 RelAddrType ra = (RelAddrType )addr;
245
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
247 return 0;
248}
249
250static int
251add_abs_addr(regex_t* reg, int addr)
252{
253 AbsAddrType ra = (AbsAddrType )addr;
254
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
256 return 0;
257}
258
259static int
260add_length(regex_t* reg, OnigDistance len)
261{
262 LengthType l = (LengthType )len;
263
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
265 return 0;
266}
267
268static int
269add_mem_num(regex_t* reg, int num)
270{
271 MemNumType n = (MemNumType )num;
272
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
274 return 0;
275}
276
277#if 0
278static int
279add_pointer(regex_t* reg, void* addr)
280{
281 PointerType ptr = (PointerType )addr;
282
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
284 return 0;
285}
286#endif
287
288static int
289add_option(regex_t* reg, OnigOptionType option)
290{
291 BBUF_ADD(reg, &option, SIZE_OPTION);
292 return 0;
293}
294
295static int
296add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
297{
298 int r;
299
300 r = add_opcode(reg, opcode);
301 if (r) return r;
302 r = add_rel_addr(reg, addr);
303 return r;
304}
305
306static int
307add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
308{
309 BBUF_ADD(reg, bytes, len);
310 return 0;
311}
312
313static int
314add_bitset(regex_t* reg, BitSetRef bs)
315{
316 BBUF_ADD(reg, bs, SIZE_BITSET);
317 return 0;
318}
319
320static int
321add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
322{
323 int r;
324
325 r = add_opcode(reg, opcode);
326 if (r) return r;
327 r = add_option(reg, option);
328 return r;
329}
330
331static int compile_length_tree(Node* node, regex_t* reg);
332static int compile_tree(Node* node, regex_t* reg);
333
334
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
338
339static int
340select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
341{
342 int op;
343 OnigDistance str_len = roomof(byte_len, mb_len);
344
345 if (ignore_case) {
346 switch (str_len) {
347 case 1: op = OP_EXACT1_IC; break;
348 default: op = OP_EXACTN_IC; break;
349 }
350 }
351 else {
352 switch (mb_len) {
353 case 1:
354 switch (str_len) {
355 case 1: op = OP_EXACT1; break;
356 case 2: op = OP_EXACT2; break;
357 case 3: op = OP_EXACT3; break;
358 case 4: op = OP_EXACT4; break;
359 case 5: op = OP_EXACT5; break;
360 default: op = OP_EXACTN; break;
361 }
362 break;
363
364 case 2:
365 switch (str_len) {
366 case 1: op = OP_EXACTMB2N1; break;
367 case 2: op = OP_EXACTMB2N2; break;
368 case 3: op = OP_EXACTMB2N3; break;
369 default: op = OP_EXACTMB2N; break;
370 }
371 break;
372
373 case 3:
374 op = OP_EXACTMB3N;
375 break;
376
377 default:
378 op = OP_EXACTMBN;
379 break;
380 }
381 }
382 return op;
383}
384
385static int
386compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
387{
388 int r;
389 int saved_num_null_check = reg->num_null_check;
390
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
393 if (r) return r;
394 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
395 if (r) return r;
396 reg->num_null_check++;
397 }
398
399 r = compile_tree(node, reg);
400 if (r) return r;
401
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
409
410 if (r) return r;
411 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
412 }
413 return r;
414}
415
416#ifdef USE_SUBEXP_CALL
417static int
418compile_call(CallNode* node, regex_t* reg)
419{
420 int r;
421
422 r = add_opcode(reg, OP_CALL);
423 if (r) return r;
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
425 node->target);
426 if (r) return r;
427 r = add_abs_addr(reg, 0 /*dummy addr.*/);
428 return r;
429}
430#endif
431
432static int
433compile_tree_n_times(Node* node, int n, regex_t* reg)
434{
435 int i, r;
436
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
439 if (r) return r;
440 }
441 return 0;
442}
443
444static int
445add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED, int ignore_case)
447{
448 int len;
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
450
451 len = SIZE_OPCODE;
452
453 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
455 len += SIZE_LENGTH;
456
457 len += (int )byte_len;
458 return len;
459}
460
461static int
462add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
463 regex_t* reg, int ignore_case)
464{
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
466 add_opcode(reg, op);
467
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
470
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
474 else
475 add_length(reg, byte_len / mb_len);
476 }
477
478 add_bytes(reg, s, byte_len);
479 return 0;
480}
481
482
483static int
484compile_length_string_node(Node* node, regex_t* reg)
485{
486 int rlen, r, len, prev_len, blen, ambig;
487 OnigEncoding enc = reg->enc;
488 UChar *p, *prev;
489 StrNode* sn;
490
491 sn = NSTR(node);
492 if (sn->end <= sn->s)
493 return 0;
494
495 ambig = NSTRING_IS_AMBIG(node);
496
497 p = prev = sn->s;
498 prev_len = enclen(enc, p, sn->end);
499 p += prev_len;
500 blen = prev_len;
501 rlen = 0;
502
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (len == prev_len || ambig) {
506 blen += len;
507 }
508 else {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
510 rlen += r;
511 prev = p;
512 blen = len;
513 prev_len = len;
514 }
515 p += len;
516 }
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 rlen += r;
519 return rlen;
520}
521
522static int
523compile_length_string_raw_node(StrNode* sn, regex_t* reg)
524{
525 if (sn->end <= sn->s)
526 return 0;
527
528 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
529}
530
531static int
532compile_string_node(Node* node, regex_t* reg)
533{
534 int r, len, prev_len, blen, ambig;
535 OnigEncoding enc = reg->enc;
536 UChar *p, *prev, *end;
537 StrNode* sn;
538
539 sn = NSTR(node);
540 if (sn->end <= sn->s)
541 return 0;
542
543 end = sn->end;
544 ambig = NSTRING_IS_AMBIG(node);
545
546 p = prev = sn->s;
547 prev_len = enclen(enc, p, end);
548 p += prev_len;
549 blen = prev_len;
550
551 for (; p < end; ) {
552 len = enclen(enc, p, end);
553 if (len == prev_len || ambig) {
554 blen += len;
555 }
556 else {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
558 if (r) return r;
559
560 prev = p;
561 blen = len;
562 prev_len = len;
563 }
564
565 p += len;
566 }
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
568}
569
570static int
571compile_string_raw_node(StrNode* sn, regex_t* reg)
572{
573 if (sn->end <= sn->s)
574 return 0;
575
576 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
577}
578
579static int
580add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
581{
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
585#else
586 int r, pad_size;
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
588
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
592
593 r = add_bytes(reg, mbuf->p, mbuf->used);
594
595 /* padding for return value from compile_length_cclass_node() to be fix. */
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
598 return r;
599#endif
600}
601
602static int
603compile_length_cclass_node(CClassNode* cc, regex_t* reg)
604{
605 int len;
606
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
609 }
610 else {
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
612 len = SIZE_OPCODE;
613 }
614 else {
615 len = SIZE_OPCODE + SIZE_BITSET;
616 }
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
619#else
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
621#endif
622 }
623
624 return len;
625}
626
627static int
628compile_cclass_node(CClassNode* cc, regex_t* reg)
629{
630 int r;
631
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
635 else
636 add_opcode(reg, OP_CCLASS);
637
638 r = add_bitset(reg, cc->bs);
639 }
640 else {
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
644 else
645 add_opcode(reg, OP_CCLASS_MB);
646
647 r = add_multi_byte_cclass(cc->mbuf, reg);
648 }
649 else {
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
652 else
653 add_opcode(reg, OP_CCLASS_MIX);
654
655 r = add_bitset(reg, cc->bs);
656 if (r) return r;
657 r = add_multi_byte_cclass(cc->mbuf, reg);
658 }
659 }
660
661 return r;
662}
663
664static int
665entry_repeat_range(regex_t* reg, int id, int lower, int upper)
666{
667#define REPEAT_RANGE_ALLOC 4
668
670
671 if (reg->repeat_range_alloc == 0) {
672 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
676 }
677 else if (reg->repeat_range_alloc <= id) {
678 int n;
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
680 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
681 sizeof(OnigRepeatRange) * n);
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
685 }
686 else {
687 p = reg->repeat_range;
688 }
689
690 p[id].lower = lower;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
692 return 0;
693}
694
695static int
696compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
697 regex_t* reg)
698{
699 int r;
700 int num_repeat = reg->num_repeat;
701
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
703 if (r) return r;
704 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
705 reg->num_repeat++;
706 if (r) return r;
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
708 if (r) return r;
709
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
711 if (r) return r;
712
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
714 if (r) return r;
715
716 if (
717#ifdef USE_SUBEXP_CALL
718 reg->num_call > 0 ||
719#endif
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
722 }
723 else {
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
725 }
726 if (r) return r;
727 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
728 return r;
729}
730
731static int
732is_anychar_star_quantifier(QtfrNode* qn)
733{
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
736 return 1;
737 else
738 return 0;
739}
740
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
743
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
745
746static int
747compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
748{
749 int len, mod_tlen, cklen;
750 int ckn;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
754
755 if (tlen < 0) return tlen;
756
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
758
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
760
761 /* anychar repeat */
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
766 else
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
768 }
769 }
770
771 if (empty_info != 0)
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
773 else
774 mod_tlen = tlen;
775
776 if (infinite && qn->lower <= 1) {
777 if (qn->greedy) {
778 if (qn->lower == 1)
779 len = SIZE_OP_JUMP;
780 else
781 len = 0;
782
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
784 }
785 else {
786 if (qn->lower == 0)
787 len = SIZE_OP_JUMP;
788 else
789 len = 0;
790
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
792 }
793 }
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
796 len = SIZE_OP_JUMP + tlen;
797 else
798 len = 0;
799 }
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
802 if (CKN_ON) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
804 }
805 else {
806 len = SIZE_OP_PUSH + tlen;
807 }
808 }
809 else {
810 len = tlen;
811 }
812 }
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
815 }
816 else {
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
819 if (CKN_ON)
820 len += SIZE_OP_STATE_CHECK;
821 }
822
823 return len;
824}
825
826static int
827compile_quantifier_node(QtfrNode* qn, regex_t* reg)
828{
829 int r, mod_tlen;
830 int ckn;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
834
835 if (tlen < 0) return tlen;
836
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
838
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
841 if (r) return r;
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
845 else
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
847 if (r) return r;
848 if (CKN_ON) {
849 r = add_state_check_num(reg, ckn);
850 if (r) return r;
851 }
852
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
854 }
855 else {
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
860 }
861 else {
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
864 : OP_ANYCHAR_STAR));
865 }
866 if (r) return r;
867 if (CKN_ON)
868 r = add_state_check_num(reg, ckn);
869
870 return r;
871 }
872 }
873
874 if (empty_info != 0)
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
876 else
877 mod_tlen = tlen;
878
879 if (infinite && qn->lower <= 1) {
880 if (qn->greedy) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
884 if (r) return r;
885 }
886
887 if (CKN_ON) {
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
889 if (r) return r;
890 r = add_state_check_num(reg, ckn);
891 if (r) return r;
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
893 }
894 else {
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
896 }
897 if (r) return r;
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
899 if (r) return r;
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (int )SIZE_OP_JUMP
902 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
903 }
904 else {
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
907 if (r) return r;
908 }
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
910 if (r) return r;
911 if (CKN_ON) {
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
913 if (r) return r;
914 r = add_state_check_num(reg, ckn);
915 if (r) return r;
916 r = add_rel_addr(reg,
917 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
918 }
919 else
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
921 }
922 }
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
926 if (r) return r;
927 r = compile_tree(qn->target, reg);
928 }
929 else
930 r = 0;
931 }
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
934 if (CKN_ON) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
936 if (r) return r;
937 r = add_state_check_num(reg, ckn);
938 if (r) return r;
939 r = add_rel_addr(reg, tlen);
940 }
941 else {
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
943 }
944 if (r) return r;
945 }
946
947 r = compile_tree(qn->target, reg);
948 }
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
950 if (CKN_ON) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
952 if (r) return r;
953 r = add_state_check_num(reg, ckn);
954 if (r) return r;
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
956 }
957 else {
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
959 }
960
961 if (r) return r;
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
963 if (r) return r;
964 r = compile_tree(qn->target, reg);
965 }
966 else {
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
968 if (CKN_ON) {
969 if (r) return r;
970 r = add_opcode(reg, OP_STATE_CHECK);
971 if (r) return r;
972 r = add_state_check_num(reg, ckn);
973 }
974 }
975 return r;
976}
977
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979
980static int
981compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
982{
983 int len, mod_tlen;
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
987
988 if (tlen < 0) return tlen;
989
990 /* anychar repeat */
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
995 else
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
997 }
998 }
999
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1002 else
1003 mod_tlen = tlen;
1004
1005 if (infinite &&
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1008 len = SIZE_OP_JUMP;
1009 }
1010 else {
1011 len = tlen * qn->lower;
1012 }
1013
1014 if (qn->greedy) {
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1018 else
1019#endif
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1022 else
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1024 }
1025 else
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1027 }
1028 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1029 len = SIZE_OP_JUMP + tlen;
1030 }
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1036 }
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1039 }
1040 else {
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1043 }
1044
1045 return len;
1046}
1047
1048static int
1049compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1050{
1051 int i, r, mod_tlen;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1055
1056 if (tlen < 0) return tlen;
1057
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1060 if (r) return r;
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1064 else
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1066 if (r) return r;
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1068 }
1069 else {
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1072 else
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1074 }
1075 }
1076
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1079 else
1080 mod_tlen = tlen;
1081
1082 if (infinite &&
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1085 if (qn->greedy) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1089 else
1090#endif
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1093 else
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1095 }
1096 else {
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1098 }
1099 if (r) return r;
1100 }
1101 else {
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1103 if (r) return r;
1104 }
1105
1106 if (qn->greedy) {
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1117 }
1118 else
1119#endif
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1123 if (r) return r;
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1126 if (r) return r;
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1129 }
1130 else {
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1132 if (r) return r;
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1134 if (r) return r;
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1137 }
1138 }
1139 else {
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1141 if (r) return r;
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1143 if (r) return r;
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1145 }
1146 }
1147 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1149 if (r) return r;
1150 r = compile_tree(qn->target, reg);
1151 }
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1156
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1158 if (r) return r;
1159
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1163 if (r) return r;
1164 r = compile_tree(qn->target, reg);
1165 if (r) return r;
1166 }
1167 }
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1170 if (r) return r;
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1172 if (r) return r;
1173 r = compile_tree(qn->target, reg);
1174 }
1175 else {
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1177 }
1178 return r;
1179}
1180#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1181
1182static int
1183compile_length_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int tlen;
1186 OnigOptionType prev = reg->options;
1187
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1191
1192 if (tlen < 0) return tlen;
1193
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1197 }
1198 else
1199 return tlen;
1200}
1201
1202static int
1203compile_option_node(EncloseNode* node, regex_t* reg)
1204{
1205 int r;
1206 OnigOptionType prev = reg->options;
1207
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1210 if (r) return r;
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1212 if (r) return r;
1213 r = add_opcode(reg, OP_FAIL);
1214 if (r) return r;
1215 }
1216
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1220
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1222 if (r) return r;
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1224 }
1225 return r;
1226}
1227
1228static int
1229compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1230{
1231 int len;
1232 int tlen;
1233
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1236
1237 if (node->target) {
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0) return tlen;
1240 }
1241 else
1242 tlen = 0;
1243
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1253 else
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1256 }
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1261 }
1262 else
1263#endif
1264 {
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1267 else
1268 len = SIZE_OP_MEMORY_START;
1269
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1272 }
1273 break;
1274
1275 case ENCLOSE_STOP_BACKTRACK:
1276 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1277 /* optimization because the match cache optimization pushes an extra item to */
1278 /* the stack and it breaks the assumption for this optimization. */
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0) return tlen;
1284
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1287 }
1288 else {
1289#endif
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1292 }
1293#endif
1294 break;
1295
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1300
1301 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1302 if (tlen < 0) return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1305 x = NCDR(x);
1306 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1307 if (tlen < 0) return tlen;
1308 len += tlen;
1309 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1310 }
1311 else {
1312 return ONIGERR_PARSER_BUG;
1313 }
1314 break;
1315
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1318 break;
1319
1320 default:
1321 return ONIGERR_TYPE_BUG;
1322 break;
1323 }
1324
1325 return len;
1326}
1327
1328static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1329
1330static int
1331compile_enclose_node(EncloseNode* node, regex_t* reg)
1332{
1333 int r, len;
1334
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1337
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1343 if (r) return r;
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (int )node->call_addr);
1347 if (r) return r;
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1353 else
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1356
1357 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1358 if (r) return r;
1359 }
1360#endif
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1363 else
1364 r = add_opcode(reg, OP_MEMORY_START);
1365 if (r) return r;
1366 r = add_mem_num(reg, node->regnum);
1367 if (r) return r;
1368 r = compile_tree(node->target, reg);
1369 if (r) return r;
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1375 else
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1378
1379 if (r) return r;
1380 r = add_mem_num(reg, node->regnum);
1381 if (r) return r;
1382 r = add_opcode(reg, OP_RETURN);
1383 }
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1387 else
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1389 if (r) return r;
1390 r = add_mem_num(reg, node->regnum);
1391 }
1392 else
1393#endif
1394 {
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1397 else
1398 r = add_opcode(reg, OP_MEMORY_END);
1399 if (r) return r;
1400 r = add_mem_num(reg, node->regnum);
1401 }
1402 break;
1403
1404 case ENCLOSE_STOP_BACKTRACK:
1405 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1406 /* optimization because the match cache optimization pushes an extra item to */
1407 /* the stack and it breaks the assumption for this optimization. */
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1412 if (r) return r;
1413
1414 len = compile_length_tree(qn->target, reg);
1415 if (len < 0) return len;
1416
1417 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1418 if (r) return r;
1419 r = compile_tree(qn->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP);
1422 if (r) return r;
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1425 }
1426 else {
1427#endif
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1429 if (r) return r;
1430 r = compile_tree(node->target, reg);
1431 if (r) return r;
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1434 }
1435#endif
1436 break;
1437
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1440 if (r) return r;
1441 r = add_mem_num(reg, node->regnum);
1442 if (r) return r;
1443
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1446 int len2;
1447
1448 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1449 if (len < 0) return len;
1450 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1451 x = NCDR(x);
1452 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1453 if (len2 < 0) return len2;
1454 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1455
1456 x = node->target;
1457 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1458 if (r) return r;
1459 r = compile_tree(NCAR(x), reg); /* yes-node */
1460 if (r) return r;
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1462 if (r) return r;
1463 x = NCDR(x);
1464 r = compile_tree(NCAR(x), reg); /* no-node */
1465 }
1466 else {
1467 return ONIGERR_PARSER_BUG;
1468 }
1469 break;
1470
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1473 if (len < 0) return len;
1474
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1476 if (r) return r;
1477 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1478 if (r) return r;
1479 r = compile_tree(node->target, reg);
1480 if (r) return r;
1481 r = add_opcode(reg, OP_ABSENT_END);
1482 break;
1483
1484 default:
1485 return ONIGERR_TYPE_BUG;
1486 break;
1487 }
1488
1489 return r;
1490}
1491
1492static int
1493compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1494{
1495 int len;
1496 int tlen = 0;
1497
1498 if (node->target) {
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0) return tlen;
1501 }
1502
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1506 break;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1509 break;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1512 break;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1515 break;
1516
1517 default:
1518 len = SIZE_OPCODE;
1519 break;
1520 }
1521
1522 return len;
1523}
1524
1525static int
1526compile_anchor_node(AnchorNode* node, regex_t* reg)
1527{
1528 int r, len;
1529
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1537
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1541 break;
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1545 break;
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1550 break;
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1554 break;
1555#endif
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1557
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1560 if (r) return r;
1561 r = compile_tree(node->target, reg);
1562 if (r) return r;
1563 r = add_opcode(reg, OP_POP_POS);
1564 break;
1565
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1568 if (len < 0) return len;
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1570 if (r) return r;
1571 r = compile_tree(node->target, reg);
1572 if (r) return r;
1573 r = add_opcode(reg, OP_FAIL_POS);
1574 break;
1575
1576 case ANCHOR_LOOK_BEHIND:
1577 {
1578 int n;
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1580 if (r) return r;
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1584 }
1585 else
1586 n = node->char_len;
1587 r = add_length(reg, n);
1588 if (r) return r;
1589 r = compile_tree(node->target, reg);
1590 }
1591 break;
1592
1593 case ANCHOR_LOOK_BEHIND_NOT:
1594 {
1595 int n;
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1599 if (r) return r;
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1603 }
1604 else
1605 n = node->char_len;
1606 r = add_length(reg, n);
1607 if (r) return r;
1608 r = compile_tree(node->target, reg);
1609 if (r) return r;
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1611 }
1612 break;
1613
1614 default:
1615 return ONIGERR_TYPE_BUG;
1616 break;
1617 }
1618
1619 return r;
1620}
1621
1622static int
1623compile_length_tree(Node* node, regex_t* reg)
1624{
1625 int len, type, r;
1626
1627 type = NTYPE(node);
1628 switch (type) {
1629 case NT_LIST:
1630 len = 0;
1631 do {
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0) return r;
1634 len += r;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 break;
1638
1639 case NT_ALT:
1640 {
1641 int n = 0;
1642 len = 0;
1643 do {
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0) return r;
1646 len += r;
1647 n++;
1648 } while (IS_NOT_NULL(node = NCDR(node)));
1649 r = len;
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1651 }
1652 break;
1653
1654 case NT_STR:
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1657 else
1658 r = compile_length_string_node(node, reg);
1659 break;
1660
1661 case NT_CCLASS:
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1663 break;
1664
1665 case NT_CTYPE:
1666 case NT_CANY:
1667 r = SIZE_OPCODE;
1668 break;
1669
1670 case NT_BREF:
1671 {
1672 BRefNode* br = NBREF(node);
1673
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1678 }
1679 else
1680#endif
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1684 }
1685 else {
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1687 }
1688 }
1689 break;
1690
1691#ifdef USE_SUBEXP_CALL
1692 case NT_CALL:
1693 r = SIZE_OP_CALL;
1694 break;
1695#endif
1696
1697 case NT_QTFR:
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1699 break;
1700
1701 case NT_ENCLOSE:
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1703 break;
1704
1705 case NT_ANCHOR:
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1707 break;
1708
1709 default:
1710 return ONIGERR_TYPE_BUG;
1711 break;
1712 }
1713
1714 return r;
1715}
1716
1717static int
1718compile_tree(Node* node, regex_t* reg)
1719{
1720 int n, type, len, pos, r = 0;
1721
1722 type = NTYPE(node);
1723 switch (type) {
1724 case NT_LIST:
1725 do {
1726 r = compile_tree(NCAR(node), reg);
1727 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1728 break;
1729
1730 case NT_ALT:
1731 {
1732 Node* x = node;
1733 len = 0;
1734 do {
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1738 }
1739 } while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used + len; /* goal position */
1741
1742 do {
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1746 if (r) break;
1747 }
1748 r = compile_tree(NCAR(node), reg);
1749 if (r) break;
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1753 if (r) break;
1754 }
1755 } while (IS_NOT_NULL(node = NCDR(node)));
1756 }
1757 break;
1758
1759 case NT_STR:
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1762 else
1763 r = compile_string_node(node, reg);
1764 break;
1765
1766 case NT_CCLASS:
1767 r = compile_cclass_node(NCCLASS(node), reg);
1768 break;
1769
1770 case NT_CTYPE:
1771 {
1772 int op;
1773
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1779 }
1780 else {
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1782 else op = OP_WORD;
1783 }
1784 break;
1785 default:
1786 return ONIGERR_TYPE_BUG;
1787 break;
1788 }
1789 r = add_opcode(reg, op);
1790 }
1791 break;
1792
1793 case NT_CANY:
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1796 else
1797 r = add_opcode(reg, OP_ANYCHAR);
1798 break;
1799
1800 case NT_BREF:
1801 {
1802 BRefNode* br = NBREF(node);
1803
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1807 if (r) return r;
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1809 if (r) return r;
1810 r = add_length(reg, br->nest_level);
1811 if (r) return r;
1812
1813 goto add_bacref_mems;
1814 }
1815 else
1816#endif
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1821 if (r) return r;
1822 r = add_mem_num(reg, n);
1823 }
1824 else {
1825 switch (n) {
1826 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1828 default:
1829 r = add_opcode(reg, OP_BACKREFN);
1830 if (r) return r;
1831 r = add_mem_num(reg, n);
1832 break;
1833 }
1834 }
1835 }
1836 else {
1837 int i;
1838 int* p;
1839
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1842 }
1843 else {
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1845 }
1846 if (r) return r;
1847
1848#ifdef USE_BACKREF_WITH_LEVEL
1849 add_bacref_mems:
1850#endif
1851 r = add_length(reg, br->back_num);
1852 if (r) return r;
1853 p = BACKREFS_P(br);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1856 if (r) return r;
1857 }
1858 }
1859 }
1860 break;
1861
1862#ifdef USE_SUBEXP_CALL
1863 case NT_CALL:
1864 r = compile_call(NCALL(node), reg);
1865 break;
1866#endif
1867
1868 case NT_QTFR:
1869 r = compile_quantifier_node(NQTFR(node), reg);
1870 break;
1871
1872 case NT_ENCLOSE:
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1874 break;
1875
1876 case NT_ANCHOR:
1877 r = compile_anchor_node(NANCHOR(node), reg);
1878 break;
1879
1880 default:
1881#ifdef ONIG_DEBUG
1882 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1883#endif
1884 break;
1885 }
1886
1887 return r;
1888}
1889
1890#ifdef USE_NAMED_GROUP
1891
1892static int
1893noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1894{
1895 int r = 0;
1896 Node* node = *plink;
1897
1898 switch (NTYPE(node)) {
1899 case NT_LIST:
1900 case NT_ALT:
1901 do {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1904 break;
1905
1906 case NT_QTFR:
1907 {
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1913 }
1914 }
1915 break;
1916
1917 case NT_ENCLOSE:
1918 {
1919 EncloseNode* en = NENCLOSE(node);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1922 (*counter)++;
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1925 }
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1931 break;
1932 }
1933 }
1934 r = noname_disable_map(&(en->target), map, counter);
1935 }
1936 break;
1937
1938 case NT_ANCHOR:
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1941 break;
1942
1943 default:
1944 break;
1945 }
1946
1947 return r;
1948}
1949
1950static int
1951renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1952{
1953 int i, pos, n, old_num;
1954 int *backs;
1955 BRefNode* bn = NBREF(node);
1956
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1959
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1963 else
1964 backs = bn->back_dynamic;
1965
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1969 if (n > 0) {
1970 backs[pos] = n;
1971 pos++;
1972 }
1973 }
1974
1975 bn->back_num = pos;
1976 return 0;
1977}
1978
1979static int
1980renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1981{
1982 int r = 0;
1983
1984 switch (NTYPE(node)) {
1985 case NT_LIST:
1986 case NT_ALT:
1987 do {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990 break;
1991 case NT_QTFR:
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1993 break;
1994 case NT_ENCLOSE:
1995 {
1996 EncloseNode* en = NENCLOSE(node);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2000 }
2001 r = renumber_by_map(en->target, map, num_mem);
2002 }
2003 break;
2004
2005 case NT_BREF:
2006 r = renumber_node_backref(node, map, num_mem);
2007 break;
2008
2009 case NT_ANCHOR:
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2012 break;
2013
2014 default:
2015 break;
2016 }
2017
2018 return r;
2019}
2020
2021static int
2022numbered_ref_check(Node* node)
2023{
2024 int r = 0;
2025
2026 switch (NTYPE(node)) {
2027 case NT_LIST:
2028 case NT_ALT:
2029 do {
2030 r = numbered_ref_check(NCAR(node));
2031 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2032 break;
2033 case NT_QTFR:
2034 r = numbered_ref_check(NQTFR(node)->target);
2035 break;
2036 case NT_ENCLOSE:
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2038 break;
2039
2040 case NT_BREF:
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2043 break;
2044
2045 case NT_ANCHOR:
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2048 break;
2049
2050 default:
2051 break;
2052 }
2053
2054 return r;
2055}
2056
2057static int
2058disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2059{
2060 int r, i, pos, counter;
2061 BitStatusType loc;
2062 GroupNumRemap* map;
2063
2064 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2067 map[i].new_val = 0;
2068 }
2069 counter = 0;
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0) return r;
2072
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0) return r;
2075
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2079 pos++;
2080 }
2081 }
2082
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2088 }
2089 }
2090
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2093
2094 return onig_renumber_name_table(reg, map);
2095}
2096#endif /* USE_NAMED_GROUP */
2097
2098#ifdef USE_SUBEXP_CALL
2099static int
2100unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2101{
2102 int i, offset;
2103 EncloseNode* en;
2104 AbsAddrType addr;
2105
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2111
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2113 }
2114 return 0;
2115}
2116#endif
2117
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2119static int
2120quantifiers_memory_node_info(Node* node)
2121{
2122 int r = 0;
2123
2124 switch (NTYPE(node)) {
2125 case NT_LIST:
2126 case NT_ALT:
2127 {
2128 int v;
2129 do {
2130 v = quantifiers_memory_node_info(NCAR(node));
2131 if (v > r) r = v;
2132 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2133 }
2134 break;
2135
2136# ifdef USE_SUBEXP_CALL
2137 case NT_CALL:
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2140 }
2141 else
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2143 break;
2144# endif
2145
2146 case NT_QTFR:
2147 {
2148 QtfrNode* qn = NQTFR(node);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2151 }
2152 }
2153 break;
2154
2155 case NT_ENCLOSE:
2156 {
2157 EncloseNode* en = NENCLOSE(node);
2158 switch (en->type) {
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2161 break;
2162
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2168 break;
2169 default:
2170 break;
2171 }
2172 }
2173 break;
2174
2175 case NT_BREF:
2176 case NT_STR:
2177 case NT_CTYPE:
2178 case NT_CCLASS:
2179 case NT_CANY:
2180 case NT_ANCHOR:
2181 default:
2182 break;
2183 }
2184
2185 return r;
2186}
2187#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2188
2189static int
2190get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2191{
2192 OnigDistance tmin;
2193 int r = 0;
2194
2195 *min = 0;
2196 switch (NTYPE(node)) {
2197 case NT_BREF:
2198 {
2199 int i;
2200 int* backs;
2201 Node** nodes = SCANENV_MEM_NODES(env);
2202 BRefNode* br = NBREF(node);
2203 if (br->state & NST_RECURSION) break;
2204
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2208 if (r != 0) break;
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2212 if (r != 0) break;
2213 if (*min > tmin) *min = tmin;
2214 }
2215 }
2216 break;
2217
2218#ifdef USE_SUBEXP_CALL
2219 case NT_CALL:
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2221 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2223 *min = en->min_len;
2224 }
2225 else
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2227 break;
2228#endif
2229
2230 case NT_LIST:
2231 do {
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2235 break;
2236
2237 case NT_ALT:
2238 {
2239 Node *x, *y;
2240 y = node;
2241 do {
2242 x = NCAR(y);
2243 r = get_min_match_length(x, &tmin, env);
2244 if (r != 0) break;
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2248 }
2249 break;
2250
2251 case NT_STR:
2252 {
2253 StrNode* sn = NSTR(node);
2254 *min = sn->end - sn->s;
2255 }
2256 break;
2257
2258 case NT_CTYPE:
2259 *min = 1;
2260 break;
2261
2262 case NT_CCLASS:
2263 case NT_CANY:
2264 *min = 1;
2265 break;
2266
2267 case NT_QTFR:
2268 {
2269 QtfrNode* qn = NQTFR(node);
2270
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2273 if (r == 0)
2274 *min = distance_multiply(*min, qn->lower);
2275 }
2276 }
2277 break;
2278
2279 case NT_ENCLOSE:
2280 {
2281 EncloseNode* en = NENCLOSE(node);
2282 switch (en->type) {
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2285 *min = en->min_len;
2286 else {
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2288 *min = 0; /* recursive */
2289 else {
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2293 if (r == 0) {
2294 en->min_len = *min;
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2296 }
2297 }
2298 }
2299 break;
2300
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2305 break;
2306
2307 case ENCLOSE_ABSENT:
2308 break;
2309 }
2310 }
2311 break;
2312
2313 case NT_ANCHOR:
2314 default:
2315 break;
2316 }
2317
2318 return r;
2319}
2320
2321static int
2322get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2323{
2324 OnigDistance tmax;
2325 int r = 0;
2326
2327 *max = 0;
2328 switch (NTYPE(node)) {
2329 case NT_LIST:
2330 do {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2332 if (r == 0)
2333 *max = distance_add(*max, tmax);
2334 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2335 break;
2336
2337 case NT_ALT:
2338 do {
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2342 break;
2343
2344 case NT_STR:
2345 {
2346 StrNode* sn = NSTR(node);
2347 *max = sn->end - sn->s;
2348 }
2349 break;
2350
2351 case NT_CTYPE:
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2353 break;
2354
2355 case NT_CCLASS:
2356 case NT_CANY:
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2358 break;
2359
2360 case NT_BREF:
2361 {
2362 int i;
2363 int* backs;
2364 Node** nodes = SCANENV_MEM_NODES(env);
2365 BRefNode* br = NBREF(node);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2368 break;
2369 }
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2374 if (r != 0) break;
2375 if (*max < tmax) *max = tmax;
2376 }
2377 }
2378 break;
2379
2380#ifdef USE_SUBEXP_CALL
2381 case NT_CALL:
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2384 else
2385 *max = ONIG_INFINITE_DISTANCE;
2386 break;
2387#endif
2388
2389 case NT_QTFR:
2390 {
2391 QtfrNode* qn = NQTFR(node);
2392
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2398 else
2399 *max = ONIG_INFINITE_DISTANCE;
2400 }
2401 }
2402 }
2403 break;
2404
2405 case NT_ENCLOSE:
2406 {
2407 EncloseNode* en = NENCLOSE(node);
2408 switch (en->type) {
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2411 *max = en->max_len;
2412 else {
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2415 else {
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2419 if (r == 0) {
2420 en->max_len = *max;
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2422 }
2423 }
2424 }
2425 break;
2426
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2431 break;
2432
2433 case ENCLOSE_ABSENT:
2434 break;
2435 }
2436 }
2437 break;
2438
2439 case NT_ANCHOR:
2440 default:
2441 break;
2442 }
2443
2444 return r;
2445}
2446
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2449
2450/* fixed size pattern node only */
2451static int
2452get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2453{
2454 int tlen;
2455 int r = 0;
2456
2457 level++;
2458 *len = 0;
2459 switch (NTYPE(node)) {
2460 case NT_LIST:
2461 do {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2463 if (r == 0)
2464 *len = (int )distance_add(*len, tlen);
2465 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2466 break;
2467
2468 case NT_ALT:
2469 {
2470 int tlen2;
2471 int varlen = 0;
2472
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2476 if (r == 0) {
2477 if (tlen != tlen2)
2478 varlen = 1;
2479 }
2480 }
2481 if (r == 0) {
2482 if (varlen != 0) {
2483 if (level == 1)
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2485 else
2486 r = GET_CHAR_LEN_VARLEN;
2487 }
2488 else
2489 *len = tlen;
2490 }
2491 }
2492 break;
2493
2494 case NT_STR:
2495 {
2496 StrNode* sn = NSTR(node);
2497 UChar *s = sn->s;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2500 (*len)++;
2501 }
2502 }
2503 break;
2504
2505 case NT_QTFR:
2506 {
2507 QtfrNode* qn = NQTFR(node);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2510 if (r == 0)
2511 *len = (int )distance_multiply(tlen, qn->lower);
2512 }
2513 else
2514 r = GET_CHAR_LEN_VARLEN;
2515 }
2516 break;
2517
2518#ifdef USE_SUBEXP_CALL
2519 case NT_CALL:
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2522 else
2523 r = GET_CHAR_LEN_VARLEN;
2524 break;
2525#endif
2526
2527 case NT_CTYPE:
2528 *len = 1;
2529 break;
2530
2531 case NT_CCLASS:
2532 case NT_CANY:
2533 *len = 1;
2534 break;
2535
2536 case NT_ENCLOSE:
2537 {
2538 EncloseNode* en = NENCLOSE(node);
2539 switch (en->type) {
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *len = en->char_len;
2544 else {
2545 r = get_char_length_tree1(en->target, reg, len, level);
2546 if (r == 0) {
2547 en->char_len = *len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2549 }
2550 }
2551 break;
2552#endif
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg, len, level);
2557 break;
2558 case ENCLOSE_ABSENT:
2559 default:
2560 break;
2561 }
2562 }
2563 break;
2564
2565 case NT_ANCHOR:
2566 break;
2567
2568 default:
2569 r = GET_CHAR_LEN_VARLEN;
2570 break;
2571 }
2572
2573 return r;
2574}
2575
2576static int
2577get_char_length_tree(Node* node, regex_t* reg, int* len)
2578{
2579 return get_char_length_tree1(node, reg, len, 0);
2580}
2581
2582/* x is not included y ==> 1 : 0 */
2583static int
2584is_not_included(Node* x, Node* y, regex_t* reg)
2585{
2586 int i;
2587 OnigDistance len;
2588 OnigCodePoint code;
2589 UChar *p;
2590 int ytype;
2591
2592 retry:
2593 ytype = NTYPE(y);
2594 switch (NTYPE(x)) {
2595 case NT_CTYPE:
2596 {
2597 switch (ytype) {
2598 case NT_CTYPE:
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2602 return 1;
2603 else
2604 return 0;
2605 break;
2606
2607 case NT_CCLASS:
2608 swap:
2609 {
2610 Node* tmp;
2611 tmp = x; x = y; y = tmp;
2612 goto retry;
2613 }
2614 break;
2615
2616 case NT_STR:
2617 goto swap;
2618 break;
2619
2620 default:
2621 break;
2622 }
2623 }
2624 break;
2625
2626 case NT_CCLASS:
2627 {
2628 CClassNode* xc = NCCLASS(x);
2629 switch (ytype) {
2630 case NT_CTYPE:
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2639 }
2640 else {
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2642 }
2643 }
2644 }
2645 return 1;
2646 }
2647 return 0;
2648 }
2649 else {
2650 if (IS_NOT_NULL(xc->mbuf)) return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2652 int is_word;
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2655 else
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2657 if (! is_word) {
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2660 return 0;
2661 }
2662 else {
2663 if (! BITSET_AT(xc->bs, i))
2664 return 0;
2665 }
2666 }
2667 }
2668 return 1;
2669 }
2670 break;
2671
2672 default:
2673 break;
2674 }
2675 break;
2676
2677 case NT_CCLASS:
2678 {
2679 int v;
2680 CClassNode* yc = NCCLASS(y);
2681
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2689 return 0;
2690 }
2691 }
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2694 return 1;
2695 return 0;
2696 }
2697 break;
2698
2699 case NT_STR:
2700 goto swap;
2701 break;
2702
2703 default:
2704 break;
2705 }
2706 }
2707 break;
2708
2709 case NT_STR:
2710 {
2711 StrNode* xs = NSTR(x);
2712 if (NSTRING_LEN(x) == 0)
2713 break;
2714
2715 switch (ytype) {
2716 case NT_CTYPE:
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2722 else
2723 return !(NCTYPE(y)->not);
2724 }
2725 else {
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2728 else
2729 return !(NCTYPE(y)->not);
2730 }
2731 break;
2732 default:
2733 break;
2734 }
2735 break;
2736
2737 case NT_CCLASS:
2738 {
2739 CClassNode* cc = NCCLASS(y);
2740
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2744 }
2745 break;
2746
2747 case NT_STR:
2748 {
2749 UChar *q;
2750 StrNode* ys = NSTR(y);
2751 len = NSTRING_LEN(x);
2752 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2754 /* tiny version */
2755 return 0;
2756 }
2757 else {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2759 if (*p != *q) return 1;
2760 }
2761 }
2762 }
2763 break;
2764
2765 default:
2766 break;
2767 }
2768 }
2769 break;
2770
2771 default:
2772 break;
2773 }
2774
2775 return 0;
2776}
2777
2778static Node*
2779get_head_value_node(Node* node, int exact, regex_t* reg)
2780{
2781 Node* n = NULL_NODE;
2782
2783 switch (NTYPE(node)) {
2784 case NT_BREF:
2785 case NT_ALT:
2786 case NT_CANY:
2787#ifdef USE_SUBEXP_CALL
2788 case NT_CALL:
2789#endif
2790 break;
2791
2792 case NT_CTYPE:
2793 case NT_CCLASS:
2794 if (exact == 0) {
2795 n = node;
2796 }
2797 break;
2798
2799 case NT_LIST:
2800 n = get_head_value_node(NCAR(node), exact, reg);
2801 break;
2802
2803 case NT_STR:
2804 {
2805 StrNode* sn = NSTR(node);
2806
2807 if (sn->end <= sn->s)
2808 break;
2809
2810 if (exact != 0 &&
2811 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2812 }
2813 else {
2814 n = node;
2815 }
2816 }
2817 break;
2818
2819 case NT_QTFR:
2820 {
2821 QtfrNode* qn = NQTFR(node);
2822 if (qn->lower > 0) {
2823#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2824 if (IS_NOT_NULL(qn->head_exact))
2825 n = qn->head_exact;
2826 else
2827#endif
2828 n = get_head_value_node(qn->target, exact, reg);
2829 }
2830 }
2831 break;
2832
2833 case NT_ENCLOSE:
2834 {
2835 EncloseNode* en = NENCLOSE(node);
2836 switch (en->type) {
2837 case ENCLOSE_OPTION:
2838 {
2839 OnigOptionType options = reg->options;
2840
2841 reg->options = NENCLOSE(node)->option;
2842 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2843 reg->options = options;
2844 }
2845 break;
2846
2847 case ENCLOSE_MEMORY:
2848 case ENCLOSE_STOP_BACKTRACK:
2849 case ENCLOSE_CONDITION:
2850 n = get_head_value_node(en->target, exact, reg);
2851 break;
2852
2853 case ENCLOSE_ABSENT:
2854 break;
2855 }
2856 }
2857 break;
2858
2859 case NT_ANCHOR:
2860 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2861 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2862 break;
2863
2864 default:
2865 break;
2866 }
2867
2868 return n;
2869}
2870
2871static int
2872check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2873{
2874 int type, r = 0;
2875
2876 type = NTYPE(node);
2877 if ((NTYPE2BIT(type) & type_mask) == 0)
2878 return 1;
2879
2880 switch (type) {
2881 case NT_LIST:
2882 case NT_ALT:
2883 do {
2884 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2885 anchor_mask);
2886 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2887 break;
2888
2889 case NT_QTFR:
2890 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2891 anchor_mask);
2892 break;
2893
2894 case NT_ENCLOSE:
2895 {
2896 EncloseNode* en = NENCLOSE(node);
2897 if ((en->type & enclose_mask) == 0)
2898 return 1;
2899
2900 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2901 }
2902 break;
2903
2904 case NT_ANCHOR:
2905 type = NANCHOR(node)->type;
2906 if ((type & anchor_mask) == 0)
2907 return 1;
2908
2909 if (NANCHOR(node)->target)
2910 r = check_type_tree(NANCHOR(node)->target,
2911 type_mask, enclose_mask, anchor_mask);
2912 break;
2913
2914 default:
2915 break;
2916 }
2917 return r;
2918}
2919
2920#ifdef USE_SUBEXP_CALL
2921
2922# define RECURSION_EXIST 1
2923# define RECURSION_INFINITE 2
2924
2925static int
2926subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2927{
2928 int type;
2929 int r = 0;
2930
2931 type = NTYPE(node);
2932 switch (type) {
2933 case NT_LIST:
2934 {
2935 Node *x;
2936 OnigDistance min;
2937 int ret;
2938
2939 x = node;
2940 do {
2941 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2942 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2943 r |= ret;
2944 if (head) {
2945 ret = get_min_match_length(NCAR(x), &min, env);
2946 if (ret != 0) return ret;
2947 if (min != 0) head = 0;
2948 }
2949 } while (IS_NOT_NULL(x = NCDR(x)));
2950 }
2951 break;
2952
2953 case NT_ALT:
2954 {
2955 int ret;
2956 r = RECURSION_EXIST;
2957 do {
2958 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2959 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2960 r &= ret;
2961 } while (IS_NOT_NULL(node = NCDR(node)));
2962 }
2963 break;
2964
2965 case NT_QTFR:
2966 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2967 if (r == RECURSION_EXIST) {
2968 if (NQTFR(node)->lower == 0) r = 0;
2969 }
2970 break;
2971
2972 case NT_ANCHOR:
2973 {
2974 AnchorNode* an = NANCHOR(node);
2975 switch (an->type) {
2976 case ANCHOR_PREC_READ:
2977 case ANCHOR_PREC_READ_NOT:
2978 case ANCHOR_LOOK_BEHIND:
2979 case ANCHOR_LOOK_BEHIND_NOT:
2980 r = subexp_inf_recursive_check(an->target, env, head);
2981 break;
2982 }
2983 }
2984 break;
2985
2986 case NT_CALL:
2987 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2988 break;
2989
2990 case NT_ENCLOSE:
2991 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2992 return 0;
2993 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2994 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2995 else {
2996 SET_ENCLOSE_STATUS(node, NST_MARK2);
2997 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2998 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2999 }
3000 break;
3001
3002 default:
3003 break;
3004 }
3005
3006 return r;
3007}
3008
3009static int
3010subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
3011{
3012 int type;
3013 int r = 0;
3014
3015 type = NTYPE(node);
3016 switch (type) {
3017 case NT_LIST:
3018 case NT_ALT:
3019 do {
3020 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3021 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3022 break;
3023
3024 case NT_QTFR:
3025 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3026 break;
3027
3028 case NT_ANCHOR:
3029 {
3030 AnchorNode* an = NANCHOR(node);
3031 switch (an->type) {
3032 case ANCHOR_PREC_READ:
3033 case ANCHOR_PREC_READ_NOT:
3034 case ANCHOR_LOOK_BEHIND:
3035 case ANCHOR_LOOK_BEHIND_NOT:
3036 r = subexp_inf_recursive_check_trav(an->target, env);
3037 break;
3038 }
3039 }
3040 break;
3041
3042 case NT_ENCLOSE:
3043 {
3044 EncloseNode* en = NENCLOSE(node);
3045
3046 if (IS_ENCLOSE_RECURSION(en)) {
3047 SET_ENCLOSE_STATUS(node, NST_MARK1);
3048 r = subexp_inf_recursive_check(en->target, env, 1);
3049 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3050 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3051 }
3052 r = subexp_inf_recursive_check_trav(en->target, env);
3053 }
3054
3055 break;
3056
3057 default:
3058 break;
3059 }
3060
3061 return r;
3062}
3063
3064static int
3065subexp_recursive_check(Node* node)
3066{
3067 int r = 0;
3068
3069 switch (NTYPE(node)) {
3070 case NT_LIST:
3071 case NT_ALT:
3072 do {
3073 r |= subexp_recursive_check(NCAR(node));
3074 } while (IS_NOT_NULL(node = NCDR(node)));
3075 break;
3076
3077 case NT_QTFR:
3078 r = subexp_recursive_check(NQTFR(node)->target);
3079 break;
3080
3081 case NT_ANCHOR:
3082 {
3083 AnchorNode* an = NANCHOR(node);
3084 switch (an->type) {
3085 case ANCHOR_PREC_READ:
3086 case ANCHOR_PREC_READ_NOT:
3087 case ANCHOR_LOOK_BEHIND:
3088 case ANCHOR_LOOK_BEHIND_NOT:
3089 r = subexp_recursive_check(an->target);
3090 break;
3091 }
3092 }
3093 break;
3094
3095 case NT_CALL:
3096 r = subexp_recursive_check(NCALL(node)->target);
3097 if (r != 0) SET_CALL_RECURSION(node);
3098 break;
3099
3100 case NT_ENCLOSE:
3101 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3102 return 0;
3103 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3104 return 1; /* recursion */
3105 else {
3106 SET_ENCLOSE_STATUS(node, NST_MARK2);
3107 r = subexp_recursive_check(NENCLOSE(node)->target);
3108 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3109 }
3110 break;
3111
3112 default:
3113 break;
3114 }
3115
3116 return r;
3117}
3118
3119
3120static int
3121subexp_recursive_check_trav(Node* node, ScanEnv* env)
3122{
3123# define FOUND_CALLED_NODE 1
3124
3125 int type;
3126 int r = 0;
3127
3128 type = NTYPE(node);
3129 switch (type) {
3130 case NT_LIST:
3131 case NT_ALT:
3132 {
3133 int ret;
3134 do {
3135 ret = subexp_recursive_check_trav(NCAR(node), env);
3136 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3137 else if (ret < 0) return ret;
3138 } while (IS_NOT_NULL(node = NCDR(node)));
3139 }
3140 break;
3141
3142 case NT_QTFR:
3143 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3144 if (NQTFR(node)->upper == 0) {
3145 if (r == FOUND_CALLED_NODE)
3146 NQTFR(node)->is_referred = 1;
3147 }
3148 break;
3149
3150 case NT_ANCHOR:
3151 {
3152 AnchorNode* an = NANCHOR(node);
3153 switch (an->type) {
3154 case ANCHOR_PREC_READ:
3155 case ANCHOR_PREC_READ_NOT:
3156 case ANCHOR_LOOK_BEHIND:
3157 case ANCHOR_LOOK_BEHIND_NOT:
3158 r = subexp_recursive_check_trav(an->target, env);
3159 break;
3160 }
3161 }
3162 break;
3163
3164 case NT_ENCLOSE:
3165 {
3166 EncloseNode* en = NENCLOSE(node);
3167
3168 if (! IS_ENCLOSE_RECURSION(en)) {
3169 if (IS_ENCLOSE_CALLED(en)) {
3170 SET_ENCLOSE_STATUS(node, NST_MARK1);
3171 r = subexp_recursive_check(en->target);
3172 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3173 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3174 }
3175 }
3176 r = subexp_recursive_check_trav(en->target, env);
3177 if (IS_ENCLOSE_CALLED(en))
3178 r |= FOUND_CALLED_NODE;
3179 }
3180 break;
3181
3182 default:
3183 break;
3184 }
3185
3186 return r;
3187}
3188
3189static int
3190setup_subexp_call(Node* node, ScanEnv* env)
3191{
3192 int type;
3193 int r = 0;
3194
3195 type = NTYPE(node);
3196 switch (type) {
3197 case NT_LIST:
3198 do {
3199 r = setup_subexp_call(NCAR(node), env);
3200 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3201 break;
3202
3203 case NT_ALT:
3204 do {
3205 r = setup_subexp_call(NCAR(node), env);
3206 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3207 break;
3208
3209 case NT_QTFR:
3210 r = setup_subexp_call(NQTFR(node)->target, env);
3211 break;
3212 case NT_ENCLOSE:
3213 r = setup_subexp_call(NENCLOSE(node)->target, env);
3214 break;
3215
3216 case NT_CALL:
3217 {
3218 CallNode* cn = NCALL(node);
3219 Node** nodes = SCANENV_MEM_NODES(env);
3220
3221 if (cn->group_num != 0) {
3222 int gnum = cn->group_num;
3223
3224# ifdef USE_NAMED_GROUP
3225 if (env->num_named > 0 &&
3226 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3227 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3228 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3229 }
3230# endif
3231 if (gnum > env->num_mem) {
3232 onig_scan_env_set_error_string(env,
3233 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3234 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3235 }
3236
3237# ifdef USE_NAMED_GROUP
3238 set_call_attr:
3239# endif
3240 cn->target = nodes[cn->group_num];
3241 if (IS_NULL(cn->target)) {
3242 onig_scan_env_set_error_string(env,
3243 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3244 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3245 }
3246 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3247 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3248 cn->unset_addr_list = env->unset_addr_list;
3249 }
3250# ifdef USE_NAMED_GROUP
3251# ifdef USE_PERL_SUBEXP_CALL
3252 else if (cn->name == cn->name_end) {
3253 goto set_call_attr;
3254 }
3255# endif
3256 else {
3257 int *refs;
3258
3259 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3260 &refs);
3261 if (n <= 0) {
3262 onig_scan_env_set_error_string(env,
3263 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3264 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3265 }
3266 else if (n > 1 &&
3267 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3268 onig_scan_env_set_error_string(env,
3269 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3270 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3271 }
3272 else {
3273 cn->group_num = refs[0];
3274 goto set_call_attr;
3275 }
3276 }
3277# endif
3278 }
3279 break;
3280
3281 case NT_ANCHOR:
3282 {
3283 AnchorNode* an = NANCHOR(node);
3284
3285 switch (an->type) {
3286 case ANCHOR_PREC_READ:
3287 case ANCHOR_PREC_READ_NOT:
3288 case ANCHOR_LOOK_BEHIND:
3289 case ANCHOR_LOOK_BEHIND_NOT:
3290 r = setup_subexp_call(an->target, env);
3291 break;
3292 }
3293 }
3294 break;
3295
3296 default:
3297 break;
3298 }
3299
3300 return r;
3301}
3302#endif
3303
3304/* divide different length alternatives in look-behind.
3305 (?<=A|B) ==> (?<=A)|(?<=B)
3306 (?<!A|B) ==> (?<!A)(?<!B)
3307*/
3308static int
3309divide_look_behind_alternatives(Node* node)
3310{
3311 Node *head, *np, *insert_node;
3312 AnchorNode* an = NANCHOR(node);
3313 int anc_type = an->type;
3314
3315 head = an->target;
3316 np = NCAR(head);
3317 swap_node(node, head);
3318 NCAR(node) = head;
3319 NANCHOR(head)->target = np;
3320
3321 np = node;
3322 while ((np = NCDR(np)) != NULL_NODE) {
3323 insert_node = onig_node_new_anchor(anc_type);
3324 CHECK_NULL_RETURN_MEMERR(insert_node);
3325 NANCHOR(insert_node)->target = NCAR(np);
3326 NCAR(np) = insert_node;
3327 }
3328
3329 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3330 np = node;
3331 do {
3332 SET_NTYPE(np, NT_LIST); /* alt -> list */
3333 } while ((np = NCDR(np)) != NULL_NODE);
3334 }
3335 return 0;
3336}
3337
3338static int
3339setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3340{
3341 int r, len;
3342 AnchorNode* an = NANCHOR(node);
3343
3344 r = get_char_length_tree(an->target, reg, &len);
3345 if (r == 0)
3346 an->char_len = len;
3347 else if (r == GET_CHAR_LEN_VARLEN)
3348 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3349 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3350 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3351 r = divide_look_behind_alternatives(node);
3352 else
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3354 }
3355
3356 return r;
3357}
3358
3359static int
3360next_setup(Node* node, Node* next_node, regex_t* reg)
3361{
3362 int type;
3363
3364 retry:
3365 type = NTYPE(node);
3366 if (type == NT_QTFR) {
3367 QtfrNode* qn = NQTFR(node);
3368 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3369#ifdef USE_QTFR_PEEK_NEXT
3370 Node* n = get_head_value_node(next_node, 1, reg);
3371 /* '\0': for UTF-16BE etc... */
3372 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3373 qn->next_head_exact = n;
3374 }
3375#endif
3376 /* automatic possessification a*b ==> (?>a*)b */
3377 if (qn->lower <= 1) {
3378 int ttype = NTYPE(qn->target);
3379 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3380 Node *x, *y;
3381 x = get_head_value_node(qn->target, 0, reg);
3382 if (IS_NOT_NULL(x)) {
3383 y = get_head_value_node(next_node, 0, reg);
3384 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3385 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3386 CHECK_NULL_RETURN_MEMERR(en);
3387 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3388 swap_node(node, en);
3389 NENCLOSE(node)->target = en;
3390 }
3391 }
3392 }
3393 }
3394 }
3395 }
3396 else if (type == NT_ENCLOSE) {
3397 EncloseNode* en = NENCLOSE(node);
3398 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3399 node = en->target;
3400 goto retry;
3401 }
3402 }
3403 return 0;
3404}
3405
3406
3407static int
3408update_string_node_case_fold(regex_t* reg, Node *node)
3409{
3410 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3411 UChar *sbuf, *ebuf, *sp;
3412 int r, i, len;
3413 OnigDistance sbuf_size;
3414 StrNode* sn = NSTR(node);
3415
3416 end = sn->end;
3417 sbuf_size = (end - sn->s) * 2;
3418 sbuf = (UChar* )xmalloc(sbuf_size);
3419 CHECK_NULL_RETURN_MEMERR(sbuf);
3420 ebuf = sbuf + sbuf_size;
3421
3422 sp = sbuf;
3423 p = sn->s;
3424 while (p < end) {
3425 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3426 for (i = 0; i < len; i++) {
3427 if (sp >= ebuf) {
3428 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3429 if (IS_NULL(p)) {
3430 xfree(sbuf);
3431 return ONIGERR_MEMORY;
3432 }
3433 sbuf = p;
3434 sp = sbuf + sbuf_size;
3435 sbuf_size *= 2;
3436 ebuf = sbuf + sbuf_size;
3437 }
3438
3439 *sp++ = buf[i];
3440 }
3441 }
3442
3443 r = onig_node_str_set(node, sbuf, sp);
3444
3445 xfree(sbuf);
3446 return r;
3447}
3448
3449static int
3450expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3451 regex_t* reg)
3452{
3453 int r;
3454 Node *node;
3455
3456 node = onig_node_new_str(s, end);
3457 if (IS_NULL(node)) return ONIGERR_MEMORY;
3458
3459 r = update_string_node_case_fold(reg, node);
3460 if (r != 0) {
3461 onig_node_free(node);
3462 return r;
3463 }
3464
3465 NSTRING_SET_AMBIG(node);
3466 NSTRING_SET_DONT_GET_OPT_INFO(node);
3467 *rnode = node;
3468 return 0;
3469}
3470
3471static int
3472is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3473 int slen)
3474{
3475 int i;
3476
3477 for (i = 0; i < item_num; i++) {
3478 if (items[i].byte_len != slen) {
3479 return 1;
3480 }
3481 if (items[i].code_len != 1) {
3482 return 1;
3483 }
3484 }
3485 return 0;
3486}
3487
3488static int
3489expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3490 UChar *p, int slen, UChar *end,
3491 regex_t* reg, Node **rnode)
3492{
3493 int r, i, j, len, varlen;
3494 Node *anode, *var_anode, *snode, *xnode, *an;
3495 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3496
3497 *rnode = var_anode = NULL_NODE;
3498
3499 varlen = 0;
3500 for (i = 0; i < item_num; i++) {
3501 if (items[i].byte_len != slen) {
3502 varlen = 1;
3503 break;
3504 }
3505 }
3506
3507 if (varlen != 0) {
3508 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3509 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3510
3511 xnode = onig_node_new_list(NULL, NULL);
3512 if (IS_NULL(xnode)) goto mem_err;
3513 NCAR(var_anode) = xnode;
3514
3515 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3516 if (IS_NULL(anode)) goto mem_err;
3517 NCAR(xnode) = anode;
3518 }
3519 else {
3520 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3522 }
3523
3524 snode = onig_node_new_str(p, p + slen);
3525 if (IS_NULL(snode)) goto mem_err;
3526
3527 NCAR(anode) = snode;
3528
3529 for (i = 0; i < item_num; i++) {
3530 snode = onig_node_new_str(NULL, NULL);
3531 if (IS_NULL(snode)) goto mem_err;
3532
3533 for (j = 0; j < items[i].code_len; j++) {
3534 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3535 if (len < 0) {
3536 r = len;
3537 goto mem_err2;
3538 }
3539
3540 r = onig_node_str_cat(snode, buf, buf + len);
3541 if (r != 0) goto mem_err2;
3542 }
3543
3544 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3545 if (IS_NULL(an)) {
3546 goto mem_err2;
3547 }
3548
3549 if (items[i].byte_len != slen) {
3550 Node *rem;
3551 UChar *q = p + items[i].byte_len;
3552
3553 if (q < end) {
3554 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3555 if (r != 0) {
3556 onig_node_free(an);
3557 goto mem_err2;
3558 }
3559
3560 xnode = onig_node_list_add(NULL_NODE, snode);
3561 if (IS_NULL(xnode)) {
3562 onig_node_free(an);
3563 onig_node_free(rem);
3564 goto mem_err2;
3565 }
3566 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3567 onig_node_free(an);
3568 onig_node_free(xnode);
3569 onig_node_free(rem);
3570 goto mem_err;
3571 }
3572
3573 NCAR(an) = xnode;
3574 }
3575 else {
3576 NCAR(an) = snode;
3577 }
3578
3579 NCDR(var_anode) = an;
3580 var_anode = an;
3581 }
3582 else {
3583 NCAR(an) = snode;
3584 NCDR(anode) = an;
3585 anode = an;
3586 }
3587 }
3588
3589 return varlen;
3590
3591 mem_err2:
3592 onig_node_free(snode);
3593
3594 mem_err:
3595 onig_node_free(*rnode);
3596
3597 return ONIGERR_MEMORY;
3598}
3599
3600static int
3601expand_case_fold_string(Node* node, regex_t* reg)
3602{
3603#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3604
3605 int r, n, len, alt_num;
3606 int varlen = 0;
3607 UChar *start, *end, *p;
3608 Node *top_root, *root, *snode, *prev_node;
3609 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3610 StrNode* sn = NSTR(node);
3611
3612 if (NSTRING_IS_AMBIG(node)) return 0;
3613
3614 start = sn->s;
3615 end = sn->end;
3616 if (start >= end) return 0;
3617
3618 r = 0;
3619 top_root = root = prev_node = snode = NULL_NODE;
3620 alt_num = 1;
3621 p = start;
3622 while (p < end) {
3623 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3624 p, end, items);
3625 if (n < 0) {
3626 r = n;
3627 goto err;
3628 }
3629
3630 len = enclen(reg->enc, p, end);
3631
3632 varlen = is_case_fold_variable_len(n, items, len);
3633 if (n == 0 || varlen == 0) {
3634 if (IS_NULL(snode)) {
3635 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3636 onig_node_free(top_root);
3637 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3638 if (IS_NULL(root)) {
3639 onig_node_free(prev_node);
3640 goto mem_err;
3641 }
3642 }
3643
3644 prev_node = snode = onig_node_new_str(NULL, NULL);
3645 if (IS_NULL(snode)) goto mem_err;
3646 if (IS_NOT_NULL(root)) {
3647 if (IS_NULL(onig_node_list_add(root, snode))) {
3648 onig_node_free(snode);
3649 goto mem_err;
3650 }
3651 }
3652 }
3653
3654 r = onig_node_str_cat(snode, p, p + len);
3655 if (r != 0) goto err;
3656 }
3657 else {
3658 alt_num *= (n + 1);
3659 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3660
3661 if (IS_NOT_NULL(snode)) {
3662 r = update_string_node_case_fold(reg, snode);
3663 if (r == 0) {
3664 NSTRING_SET_AMBIG(snode);
3665 }
3666 }
3667 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3668 onig_node_free(top_root);
3669 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3670 if (IS_NULL(root)) {
3671 onig_node_free(prev_node);
3672 goto mem_err;
3673 }
3674 }
3675
3676 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3677 if (r < 0) goto mem_err;
3678 if (r == 1) {
3679 if (IS_NULL(root)) {
3680 top_root = prev_node;
3681 }
3682 else {
3683 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3684 onig_node_free(prev_node);
3685 goto mem_err;
3686 }
3687 }
3688
3689 root = NCAR(prev_node);
3690 }
3691 else { /* r == 0 */
3692 if (IS_NOT_NULL(root)) {
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3695 goto mem_err;
3696 }
3697 }
3698 }
3699
3700 snode = NULL_NODE;
3701 }
3702
3703 p += len;
3704 }
3705 if (IS_NOT_NULL(snode)) {
3706 r = update_string_node_case_fold(reg, snode);
3707 if (r == 0) {
3708 NSTRING_SET_AMBIG(snode);
3709 }
3710 }
3711
3712 if (p < end) {
3713 Node *srem;
3714
3715 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3716 if (r != 0) goto mem_err;
3717
3718 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3719 onig_node_free(top_root);
3720 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3721 if (IS_NULL(root)) {
3722 onig_node_free(srem);
3723 onig_node_free(prev_node);
3724 goto mem_err;
3725 }
3726 }
3727
3728 if (IS_NULL(root)) {
3729 prev_node = srem;
3730 }
3731 else {
3732 if (IS_NULL(onig_node_list_add(root, srem))) {
3733 onig_node_free(srem);
3734 goto mem_err;
3735 }
3736 }
3737 }
3738
3739 /* ending */
3740 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3741 swap_node(node, top_root);
3742 onig_node_free(top_root);
3743 return 0;
3744
3745 mem_err:
3746 r = ONIGERR_MEMORY;
3747
3748 err:
3749 onig_node_free(top_root);
3750 return r;
3751}
3752
3753
3754#ifdef USE_COMBINATION_EXPLOSION_CHECK
3755
3756# define CEC_THRES_NUM_BIG_REPEAT 512
3757# define CEC_INFINITE_NUM 0x7fffffff
3758
3759# define CEC_IN_INFINITE_REPEAT (1<<0)
3760# define CEC_IN_FINITE_REPEAT (1<<1)
3761# define CEC_CONT_BIG_REPEAT (1<<2)
3762
3763static int
3764setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3765{
3766 int type;
3767 int r = state;
3768
3769 type = NTYPE(node);
3770 switch (type) {
3771 case NT_LIST:
3772 {
3773 do {
3774 r = setup_comb_exp_check(NCAR(node), r, env);
3775 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3776 }
3777 break;
3778
3779 case NT_ALT:
3780 {
3781 int ret;
3782 do {
3783 ret = setup_comb_exp_check(NCAR(node), state, env);
3784 r |= ret;
3785 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3786 }
3787 break;
3788
3789 case NT_QTFR:
3790 {
3791 int child_state = state;
3792 int add_state = 0;
3793 QtfrNode* qn = NQTFR(node);
3794 Node* target = qn->target;
3795 int var_num;
3796
3797 if (! IS_REPEAT_INFINITE(qn->upper)) {
3798 if (qn->upper > 1) {
3799 /* {0,1}, {1,1} are allowed */
3800 child_state |= CEC_IN_FINITE_REPEAT;
3801
3802 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3803 if (env->backrefed_mem == 0) {
3804 if (NTYPE(qn->target) == NT_ENCLOSE) {
3805 EncloseNode* en = NENCLOSE(qn->target);
3806 if (en->type == ENCLOSE_MEMORY) {
3807 if (NTYPE(en->target) == NT_QTFR) {
3808 QtfrNode* q = NQTFR(en->target);
3809 if (IS_REPEAT_INFINITE(q->upper)
3810 && q->greedy == qn->greedy) {
3811 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3812 if (qn->upper == 1)
3813 child_state = state;
3814 }
3815 }
3816 }
3817 }
3818 }
3819 }
3820 }
3821
3822 if (state & CEC_IN_FINITE_REPEAT) {
3823 qn->comb_exp_check_num = -1;
3824 }
3825 else {
3826 if (IS_REPEAT_INFINITE(qn->upper)) {
3827 var_num = CEC_INFINITE_NUM;
3828 child_state |= CEC_IN_INFINITE_REPEAT;
3829 }
3830 else {
3831 var_num = qn->upper - qn->lower;
3832 }
3833
3834 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3835 add_state |= CEC_CONT_BIG_REPEAT;
3836
3837 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3838 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3839 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3840 if (qn->comb_exp_check_num == 0) {
3841 env->num_comb_exp_check++;
3842 qn->comb_exp_check_num = env->num_comb_exp_check;
3843 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3844 env->comb_exp_max_regnum = env->curr_max_regnum;
3845 }
3846 }
3847 }
3848
3849 r = setup_comb_exp_check(target, child_state, env);
3850 r |= add_state;
3851 }
3852 break;
3853
3854 case NT_ENCLOSE:
3855 {
3856 EncloseNode* en = NENCLOSE(node);
3857
3858 switch (en->type) {
3859 case ENCLOSE_MEMORY:
3860 {
3861 if (env->curr_max_regnum < en->regnum)
3862 env->curr_max_regnum = en->regnum;
3863
3864 r = setup_comb_exp_check(en->target, state, env);
3865 }
3866 break;
3867
3868 default:
3869 r = setup_comb_exp_check(en->target, state, env);
3870 break;
3871 }
3872 }
3873 break;
3874
3875# ifdef USE_SUBEXP_CALL
3876 case NT_CALL:
3877 if (IS_CALL_RECURSION(NCALL(node)))
3878 env->has_recursion = 1;
3879 else
3880 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3881 break;
3882# endif
3883
3884 default:
3885 break;
3886 }
3887
3888 return r;
3889}
3890#endif
3891
3892#define IN_ALT (1<<0)
3893#define IN_NOT (1<<1)
3894#define IN_REPEAT (1<<2)
3895#define IN_VAR_REPEAT (1<<3)
3896#define IN_CALL (1<<4)
3897#define IN_RECCALL (1<<5)
3898
3899/* setup_tree does the following work.
3900 1. check empty loop. (set qn->target_empty_info)
3901 2. expand ignore-case in char class.
3902 3. set memory status bit flags. (reg->mem_stats)
3903 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3904 5. find invalid patterns in look-behind.
3905 6. expand repeated string.
3906 */
3907static int
3908setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3909{
3910 int type;
3911 int r = 0;
3912
3913restart:
3914 type = NTYPE(node);
3915 switch (type) {
3916 case NT_LIST:
3917 {
3918 Node* prev = NULL_NODE;
3919 do {
3920 r = setup_tree(NCAR(node), reg, state, env);
3921 if (IS_NOT_NULL(prev) && r == 0) {
3922 r = next_setup(prev, NCAR(node), reg);
3923 }
3924 prev = NCAR(node);
3925 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3926 }
3927 break;
3928
3929 case NT_ALT:
3930 do {
3931 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3932 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3933 break;
3934
3935 case NT_CCLASS:
3936 break;
3937
3938 case NT_STR:
3939 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3940 r = expand_case_fold_string(node, reg);
3941 }
3942 break;
3943
3944 case NT_CTYPE:
3945 case NT_CANY:
3946 break;
3947
3948#ifdef USE_SUBEXP_CALL
3949 case NT_CALL:
3950 break;
3951#endif
3952
3953 case NT_BREF:
3954 {
3955 int i;
3956 int* p;
3957 Node** nodes = SCANENV_MEM_NODES(env);
3958 BRefNode* br = NBREF(node);
3959 p = BACKREFS_P(br);
3960 for (i = 0; i < br->back_num; i++) {
3961 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3962 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3963 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3964#ifdef USE_BACKREF_WITH_LEVEL
3965 if (IS_BACKREF_NEST_LEVEL(br)) {
3966 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3967 }
3968#endif
3969 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3970 }
3971 }
3972 break;
3973
3974 case NT_QTFR:
3975 {
3976 OnigDistance d;
3977 QtfrNode* qn = NQTFR(node);
3978 Node* target = qn->target;
3979
3980 if ((state & IN_REPEAT) != 0) {
3981 qn->state |= NST_IN_REPEAT;
3982 }
3983
3984 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3985 r = get_min_match_length(target, &d, env);
3986 if (r) break;
3987 if (d == 0) {
3988 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3989#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3990 r = quantifiers_memory_node_info(target);
3991 if (r < 0) break;
3992 if (r > 0) {
3993 qn->target_empty_info = r;
3994 }
3995#endif
3996#if 0
3997 r = get_max_match_length(target, &d, env);
3998 if (r == 0 && d == 0) {
3999 /* ()* ==> ()?, ()+ ==> () */
4000 qn->upper = 1;
4001 if (qn->lower > 1) qn->lower = 1;
4002 if (NTYPE(target) == NT_STR) {
4003 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
4004 }
4005 }
4006#endif
4007 }
4008 }
4009
4010 state |= IN_REPEAT;
4011 if (qn->lower != qn->upper)
4012 state |= IN_VAR_REPEAT;
4013 r = setup_tree(target, reg, state, env);
4014 if (r) break;
4015
4016 /* expand string */
4017#define EXPAND_STRING_MAX_LENGTH 100
4018 if (NTYPE(target) == NT_STR) {
4019 if (qn->lower > 1) {
4020 int i, n = qn->lower;
4021 OnigDistance len = NSTRING_LEN(target);
4022 StrNode* sn = NSTR(target);
4023 Node* np;
4024
4025 np = onig_node_new_str(sn->s, sn->end);
4026 if (IS_NULL(np)) return ONIGERR_MEMORY;
4027 NSTR(np)->flag = sn->flag;
4028
4029 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4030 r = onig_node_str_cat(np, sn->s, sn->end);
4031 if (r) {
4032 onig_node_free(np);
4033 return r;
4034 }
4035 }
4036 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4037 Node *np1, *np2;
4038
4039 qn->lower -= i;
4040 if (! IS_REPEAT_INFINITE(qn->upper))
4041 qn->upper -= i;
4042
4043 np1 = onig_node_new_list(np, NULL);
4044 if (IS_NULL(np1)) {
4045 onig_node_free(np);
4046 return ONIGERR_MEMORY;
4047 }
4048 swap_node(np1, node);
4049 np2 = onig_node_list_add(node, np1);
4050 if (IS_NULL(np2)) {
4051 onig_node_free(np1);
4052 return ONIGERR_MEMORY;
4053 }
4054 }
4055 else {
4056 swap_node(np, node);
4057 onig_node_free(np);
4058 }
4059 break; /* break case NT_QTFR: */
4060 }
4061 }
4062
4063#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4064 if (qn->greedy && (qn->target_empty_info != 0)) {
4065 if (NTYPE(target) == NT_QTFR) {
4066 QtfrNode* tqn = NQTFR(target);
4067 if (IS_NOT_NULL(tqn->head_exact)) {
4068 qn->head_exact = tqn->head_exact;
4069 tqn->head_exact = NULL;
4070 }
4071 }
4072 else {
4073 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4074 }
4075 }
4076#endif
4077 }
4078 break;
4079
4080 case NT_ENCLOSE:
4081 {
4082 EncloseNode* en = NENCLOSE(node);
4083
4084 switch (en->type) {
4085 case ENCLOSE_OPTION:
4086 {
4087 OnigOptionType options = reg->options;
4088 reg->options = NENCLOSE(node)->option;
4089 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4090 reg->options = options;
4091 }
4092 break;
4093
4094 case ENCLOSE_MEMORY:
4095 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4096 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4097 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4098 }
4099 if (IS_ENCLOSE_CALLED(en))
4100 state |= IN_CALL;
4101 if (IS_ENCLOSE_RECURSION(en))
4102 state |= IN_RECCALL;
4103 else if ((state & IN_RECCALL) != 0)
4104 SET_CALL_RECURSION(node);
4105 r = setup_tree(en->target, reg, state, env);
4106 break;
4107
4108 case ENCLOSE_STOP_BACKTRACK:
4109 {
4110 Node* target = en->target;
4111 r = setup_tree(target, reg, state, env);
4112 if (NTYPE(target) == NT_QTFR) {
4113 QtfrNode* tqn = NQTFR(target);
4114 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4115 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4116 int qtype = NTYPE(tqn->target);
4117 if (IS_NODE_TYPE_SIMPLE(qtype))
4118 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4119 }
4120 }
4121 }
4122 break;
4123
4124 case ENCLOSE_CONDITION:
4125#ifdef USE_NAMED_GROUP
4126 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4127 env->num_named > 0 &&
4128 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4129 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4130 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4131 }
4132#endif
4133 if (NENCLOSE(node)->regnum > env->num_mem)
4134 return ONIGERR_INVALID_BACKREF;
4135 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4136 break;
4137
4138 case ENCLOSE_ABSENT:
4139 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4140 break;
4141 }
4142 }
4143 break;
4144
4145 case NT_ANCHOR:
4146 {
4147 AnchorNode* an = NANCHOR(node);
4148
4149 switch (an->type) {
4150 case ANCHOR_PREC_READ:
4151 r = setup_tree(an->target, reg, state, env);
4152 break;
4153 case ANCHOR_PREC_READ_NOT:
4154 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4155 break;
4156
4157/* allowed node types in look-behind */
4158#define ALLOWED_TYPE_IN_LB \
4159 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4160 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4161
4162#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4163#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4164
4165#define ALLOWED_ANCHOR_IN_LB \
4166( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4167 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4168 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4169 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4170#define ALLOWED_ANCHOR_IN_LB_NOT \
4171( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4172 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4173 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4174 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4175
4176 case ANCHOR_LOOK_BEHIND:
4177 {
4178 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4179 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4180 if (r < 0) return r;
4181 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4182 if (NTYPE(node) != NT_ANCHOR) goto restart;
4183 r = setup_tree(an->target, reg, state, env);
4184 if (r != 0) return r;
4185 r = setup_look_behind(node, reg, env);
4186 }
4187 break;
4188
4189 case ANCHOR_LOOK_BEHIND_NOT:
4190 {
4191 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4192 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4193 if (r < 0) return r;
4194 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4195 if (NTYPE(node) != NT_ANCHOR) goto restart;
4196 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4197 if (r != 0) return r;
4198 r = setup_look_behind(node, reg, env);
4199 }
4200 break;
4201 }
4202 }
4203 break;
4204
4205 default:
4206 break;
4207 }
4208
4209 return r;
4210}
4211
4212#ifndef USE_SUNDAY_QUICK_SEARCH
4213/* set skip map for Boyer-Moore search */
4214static int
4215set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4216 UChar skip[], int** int_skip, int ignore_case)
4217{
4218 OnigDistance i, len;
4219 int clen, flen, n, j, k;
4220 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4221 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4222 OnigEncoding enc = reg->enc;
4223
4224 len = end - s;
4225 if (len < ONIG_CHAR_TABLE_SIZE) {
4226 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4227
4228 n = 0;
4229 for (i = 0; i < len - 1; i += clen) {
4230 p = s + i;
4231 if (ignore_case)
4232 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4233 p, end, items);
4234 clen = enclen(enc, p, end);
4235 if (p + clen > end)
4236 clen = (int )(end - p);
4237
4238 for (j = 0; j < n; j++) {
4239 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4240 return 1; /* different length isn't supported. */
4241 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4242 if (flen != clen)
4243 return 1; /* different length isn't supported. */
4244 }
4245 for (j = 0; j < clen; j++) {
4246 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4247 for (k = 0; k < n; k++) {
4248 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4249 }
4250 }
4251 }
4252 }
4253 else {
4254# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4255 /* This should not happen. */
4256 return ONIGERR_TYPE_BUG;
4257# else
4258 if (IS_NULL(*int_skip)) {
4259 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4260 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4261 }
4262 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4263
4264 n = 0;
4265 for (i = 0; i < len - 1; i += clen) {
4266 p = s + i;
4267 if (ignore_case)
4268 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4269 p, end, items);
4270 clen = enclen(enc, p, end);
4271 if (p + clen > end)
4272 clen = (int )(end - p);
4273
4274 for (j = 0; j < n; j++) {
4275 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4276 return 1; /* different length isn't supported. */
4277 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4278 if (flen != clen)
4279 return 1; /* different length isn't supported. */
4280 }
4281 for (j = 0; j < clen; j++) {
4282 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4283 for (k = 0; k < n; k++) {
4284 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4285 }
4286 }
4287 }
4288# endif
4289 }
4290 return 0;
4291}
4292
4293#else /* USE_SUNDAY_QUICK_SEARCH */
4294
4295/* set skip map for Sunday's quick search */
4296static int
4297set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4298 UChar skip[], int** int_skip, int ignore_case)
4299{
4300 OnigDistance i, len;
4301 int clen, flen, n, j, k;
4302 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4303 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4304 OnigEncoding enc = reg->enc;
4305
4306 len = end - s;
4307 if (len < ONIG_CHAR_TABLE_SIZE) {
4308 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4309
4310 n = 0;
4311 for (i = 0; i < len; i += clen) {
4312 p = s + i;
4313 if (ignore_case)
4314 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4315 p, end, items);
4316 clen = enclen(enc, p, end);
4317 if (p + clen > end)
4318 clen = (int )(end - p);
4319
4320 for (j = 0; j < n; j++) {
4321 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4322 return 1; /* different length isn't supported. */
4323 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4324 if (flen != clen)
4325 return 1; /* different length isn't supported. */
4326 }
4327 for (j = 0; j < clen; j++) {
4328 skip[s[i + j]] = (UChar )(len - i - j);
4329 for (k = 0; k < n; k++) {
4330 skip[buf[k][j]] = (UChar )(len - i - j);
4331 }
4332 }
4333 }
4334 }
4335 else {
4336# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4337 /* This should not happen. */
4338 return ONIGERR_TYPE_BUG;
4339# else
4340 if (IS_NULL(*int_skip)) {
4341 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4342 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4343 }
4344 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4345
4346 n = 0;
4347 for (i = 0; i < len; i += clen) {
4348 p = s + i;
4349 if (ignore_case)
4350 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4351 p, end, items);
4352 clen = enclen(enc, p, end);
4353 if (p + clen > end)
4354 clen = (int )(end - p);
4355
4356 for (j = 0; j < n; j++) {
4357 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4358 return 1; /* different length isn't supported. */
4359 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4360 if (flen != clen)
4361 return 1; /* different length isn't supported. */
4362 }
4363 for (j = 0; j < clen; j++) {
4364 (*int_skip)[s[i + j]] = (int )(len - i - j);
4365 for (k = 0; k < n; k++) {
4366 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4367 }
4368 }
4369 }
4370# endif
4371 }
4372 return 0;
4373}
4374#endif /* USE_SUNDAY_QUICK_SEARCH */
4375
4376typedef struct {
4377 OnigDistance min; /* min byte length */
4378 OnigDistance max; /* max byte length */
4379} MinMaxLen;
4380
4381typedef struct {
4382 MinMaxLen mmd;
4383 OnigEncoding enc;
4384 OnigOptionType options;
4385 OnigCaseFoldType case_fold_flag;
4386 ScanEnv* scan_env;
4387} OptEnv;
4388
4389typedef struct {
4390 int left_anchor;
4391 int right_anchor;
4392} OptAncInfo;
4393
4394typedef struct {
4395 MinMaxLen mmd; /* info position */
4396 OptAncInfo anc;
4397
4398 int reach_end;
4399 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4400 int len;
4401 UChar s[OPT_EXACT_MAXLEN];
4402} OptExactInfo;
4403
4404typedef struct {
4405 MinMaxLen mmd; /* info position */
4406 OptAncInfo anc;
4407
4408 int value; /* weighted value */
4409 UChar map[ONIG_CHAR_TABLE_SIZE];
4410} OptMapInfo;
4411
4412typedef struct {
4413 MinMaxLen len;
4414
4415 OptAncInfo anc;
4416 OptExactInfo exb; /* boundary */
4417 OptExactInfo exm; /* middle */
4418 OptExactInfo expr; /* prec read (?=...) */
4419
4420 OptMapInfo map; /* boundary */
4421} NodeOptInfo;
4422
4423
4424static int
4425map_position_value(OnigEncoding enc, int i)
4426{
4427 static const short int ByteValTable[] = {
4428 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4430 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4431 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4432 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4433 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4434 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4435 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4436 };
4437
4438 if (i < numberof(ByteValTable)) {
4439 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4440 return 20;
4441 else
4442 return (int )ByteValTable[i];
4443 }
4444 else
4445 return 4; /* Take it easy. */
4446}
4447
4448static int
4449distance_value(MinMaxLen* mm)
4450{
4451 /* 1000 / (min-max-dist + 1) */
4452 static const short int dist_vals[] = {
4453 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4454 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4455 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4456 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4457 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4458 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4459 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4460 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4461 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4462 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4463 };
4464
4465 OnigDistance d;
4466
4467 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4468
4469 d = mm->max - mm->min;
4470 if (d < numberof(dist_vals))
4471 /* return dist_vals[d] * 16 / (mm->min + 12); */
4472 return (int )dist_vals[d];
4473 else
4474 return 1;
4475}
4476
4477static int
4478comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4479{
4480 if (v2 <= 0) return -1;
4481 if (v1 <= 0) return 1;
4482
4483 v1 *= distance_value(d1);
4484 v2 *= distance_value(d2);
4485
4486 if (v2 > v1) return 1;
4487 if (v2 < v1) return -1;
4488
4489 if (d2->min < d1->min) return 1;
4490 if (d2->min > d1->min) return -1;
4491 return 0;
4492}
4493
4494static int
4495is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4496{
4497 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4498}
4499
4500
4501static void
4502set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4503{
4504 mml->min = min;
4505 mml->max = max;
4506}
4507
4508static void
4509clear_mml(MinMaxLen* mml)
4510{
4511 mml->min = mml->max = 0;
4512}
4513
4514static void
4515copy_mml(MinMaxLen* to, MinMaxLen* from)
4516{
4517 to->min = from->min;
4518 to->max = from->max;
4519}
4520
4521static void
4522add_mml(MinMaxLen* to, MinMaxLen* from)
4523{
4524 to->min = distance_add(to->min, from->min);
4525 to->max = distance_add(to->max, from->max);
4526}
4527
4528#if 0
4529static void
4530add_len_mml(MinMaxLen* to, OnigDistance len)
4531{
4532 to->min = distance_add(to->min, len);
4533 to->max = distance_add(to->max, len);
4534}
4535#endif
4536
4537static void
4538alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4539{
4540 if (to->min > from->min) to->min = from->min;
4541 if (to->max < from->max) to->max = from->max;
4542}
4543
4544static void
4545copy_opt_env(OptEnv* to, OptEnv* from)
4546{
4547 *to = *from;
4548}
4549
4550static void
4551clear_opt_anc_info(OptAncInfo* anc)
4552{
4553 anc->left_anchor = 0;
4554 anc->right_anchor = 0;
4555}
4556
4557static void
4558copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4559{
4560 *to = *from;
4561}
4562
4563static void
4564concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4565 OnigDistance left_len, OnigDistance right_len)
4566{
4567 clear_opt_anc_info(to);
4568
4569 to->left_anchor = left->left_anchor;
4570 if (left_len == 0) {
4571 to->left_anchor |= right->left_anchor;
4572 }
4573
4574 to->right_anchor = right->right_anchor;
4575 if (right_len == 0) {
4576 to->right_anchor |= left->right_anchor;
4577 }
4578 else {
4579 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4580 }
4581}
4582
4583static int
4584is_left_anchor(int anc)
4585{
4586 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4587 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4588 anc == ANCHOR_PREC_READ_NOT)
4589 return 0;
4590
4591 return 1;
4592}
4593
4594static int
4595is_set_opt_anc_info(OptAncInfo* to, int anc)
4596{
4597 if ((to->left_anchor & anc) != 0) return 1;
4598
4599 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4600}
4601
4602static void
4603add_opt_anc_info(OptAncInfo* to, int anc)
4604{
4605 if (is_left_anchor(anc))
4606 to->left_anchor |= anc;
4607 else
4608 to->right_anchor |= anc;
4609}
4610
4611static void
4612remove_opt_anc_info(OptAncInfo* to, int anc)
4613{
4614 if (is_left_anchor(anc))
4615 to->left_anchor &= ~anc;
4616 else
4617 to->right_anchor &= ~anc;
4618}
4619
4620static void
4621alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4622{
4623 to->left_anchor &= add->left_anchor;
4624 to->right_anchor &= add->right_anchor;
4625}
4626
4627static int
4628is_full_opt_exact_info(OptExactInfo* ex)
4629{
4630 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4631}
4632
4633static void
4634clear_opt_exact_info(OptExactInfo* ex)
4635{
4636 clear_mml(&ex->mmd);
4637 clear_opt_anc_info(&ex->anc);
4638 ex->reach_end = 0;
4639 ex->ignore_case = -1; /* unset */
4640 ex->len = 0;
4641 ex->s[0] = '\0';
4642}
4643
4644static void
4645copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4646{
4647 *to = *from;
4648}
4649
4650static void
4651concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4652{
4653 int i, j, len;
4654 UChar *p, *end;
4655 OptAncInfo tanc;
4656
4657 if (to->ignore_case < 0)
4658 to->ignore_case = add->ignore_case;
4659 else if (to->ignore_case != add->ignore_case)
4660 return ; /* avoid */
4661
4662 p = add->s;
4663 end = p + add->len;
4664 for (i = to->len; p < end; ) {
4665 len = enclen(enc, p, end);
4666 if (i + len > OPT_EXACT_MAXLEN) break;
4667 for (j = 0; j < len && p < end; j++)
4668 to->s[i++] = *p++;
4669 }
4670
4671 to->len = i;
4672 to->reach_end = (p == end ? add->reach_end : 0);
4673
4674 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4675 if (! to->reach_end) tanc.right_anchor = 0;
4676 copy_opt_anc_info(&to->anc, &tanc);
4677}
4678
4679static void
4680concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4681 int raw ARG_UNUSED, OnigEncoding enc)
4682{
4683 int i, j, len;
4684 UChar *p;
4685
4686 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4687 len = enclen(enc, p, end);
4688 if (i + len > OPT_EXACT_MAXLEN) break;
4689 for (j = 0; j < len && p < end; j++)
4690 to->s[i++] = *p++;
4691 }
4692
4693 to->len = i;
4694}
4695
4696static void
4697alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4698{
4699 int i, j, len;
4700
4701 if (add->len == 0 || to->len == 0) {
4702 clear_opt_exact_info(to);
4703 return ;
4704 }
4705
4706 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4707 clear_opt_exact_info(to);
4708 return ;
4709 }
4710
4711 for (i = 0; i < to->len && i < add->len; ) {
4712 if (to->s[i] != add->s[i]) break;
4713 len = enclen(env->enc, to->s + i, to->s + to->len);
4714
4715 for (j = 1; j < len; j++) {
4716 if (to->s[i+j] != add->s[i+j]) break;
4717 }
4718 if (j < len) break;
4719 i += len;
4720 }
4721
4722 if (! add->reach_end || i < add->len || i < to->len) {
4723 to->reach_end = 0;
4724 }
4725 to->len = i;
4726 if (to->ignore_case < 0)
4727 to->ignore_case = add->ignore_case;
4728 else if (add->ignore_case >= 0)
4729 to->ignore_case |= add->ignore_case;
4730
4731 alt_merge_opt_anc_info(&to->anc, &add->anc);
4732 if (! to->reach_end) to->anc.right_anchor = 0;
4733}
4734
4735static void
4736select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4737{
4738 int v1, v2;
4739
4740 v1 = now->len;
4741 v2 = alt->len;
4742
4743 if (v2 == 0) {
4744 return ;
4745 }
4746 else if (v1 == 0) {
4747 copy_opt_exact_info(now, alt);
4748 return ;
4749 }
4750 else if (v1 <= 2 && v2 <= 2) {
4751 /* ByteValTable[x] is big value --> low price */
4752 v2 = map_position_value(enc, now->s[0]);
4753 v1 = map_position_value(enc, alt->s[0]);
4754
4755 if (now->len > 1) v1 += 5;
4756 if (alt->len > 1) v2 += 5;
4757 }
4758
4759 if (now->ignore_case <= 0) v1 *= 2;
4760 if (alt->ignore_case <= 0) v2 *= 2;
4761
4762 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4763 copy_opt_exact_info(now, alt);
4764}
4765
4766static void
4767clear_opt_map_info(OptMapInfo* map)
4768{
4769 static const OptMapInfo clean_info = {
4770 {0, 0}, {0, 0}, 0,
4771 {
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4777 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4788 }
4789 };
4790
4791 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4792}
4793
4794static void
4795copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4796{
4797 *to = *from;
4798}
4799
4800static void
4801add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4802{
4803 if (map->map[c] == 0) {
4804 map->map[c] = 1;
4805 map->value += map_position_value(enc, c);
4806 }
4807}
4808
4809static int
4810add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4811 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4812{
4813 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4814 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4815 int i, n;
4816
4817 add_char_opt_map_info(map, p[0], enc);
4818
4819 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4820 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4821 if (n < 0) return n;
4822
4823 for (i = 0; i < n; i++) {
4824 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4825 add_char_opt_map_info(map, buf[0], enc);
4826 }
4827
4828 return 0;
4829}
4830
4831static void
4832select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4833{
4834 const int z = 1<<15; /* 32768: something big value */
4835
4836 int v1, v2;
4837
4838 if (alt->value == 0) return ;
4839 if (now->value == 0) {
4840 copy_opt_map_info(now, alt);
4841 return ;
4842 }
4843
4844 v1 = z / now->value;
4845 v2 = z / alt->value;
4846 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4847 copy_opt_map_info(now, alt);
4848}
4849
4850static int
4851comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4852{
4853#define COMP_EM_BASE 20
4854 int ve, vm;
4855
4856 if (m->value <= 0) return -1;
4857
4858 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4859 vm = COMP_EM_BASE * 5 * 2 / m->value;
4860 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4861}
4862
4863static void
4864alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4865{
4866 int i, val;
4867
4868 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4869 if (to->value == 0) return ;
4870 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4871 clear_opt_map_info(to);
4872 return ;
4873 }
4874
4875 alt_merge_mml(&to->mmd, &add->mmd);
4876
4877 val = 0;
4878 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4879 if (add->map[i])
4880 to->map[i] = 1;
4881
4882 if (to->map[i])
4883 val += map_position_value(enc, i);
4884 }
4885 to->value = val;
4886
4887 alt_merge_opt_anc_info(&to->anc, &add->anc);
4888}
4889
4890static void
4891set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4892{
4893 copy_mml(&(opt->exb.mmd), mmd);
4894 copy_mml(&(opt->expr.mmd), mmd);
4895 copy_mml(&(opt->map.mmd), mmd);
4896}
4897
4898static void
4899clear_node_opt_info(NodeOptInfo* opt)
4900{
4901 clear_mml(&opt->len);
4902 clear_opt_anc_info(&opt->anc);
4903 clear_opt_exact_info(&opt->exb);
4904 clear_opt_exact_info(&opt->exm);
4905 clear_opt_exact_info(&opt->expr);
4906 clear_opt_map_info(&opt->map);
4907}
4908
4909static void
4910copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4911{
4912 *to = *from;
4913}
4914
4915static void
4916concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4917{
4918 int exb_reach, exm_reach;
4919 OptAncInfo tanc;
4920
4921 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4922 copy_opt_anc_info(&to->anc, &tanc);
4923
4924 if (add->exb.len > 0 && to->len.max == 0) {
4925 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4926 to->len.max, add->len.max);
4927 copy_opt_anc_info(&add->exb.anc, &tanc);
4928 }
4929
4930 if (add->map.value > 0 && to->len.max == 0) {
4931 if (add->map.mmd.max == 0)
4932 add->map.anc.left_anchor |= to->anc.left_anchor;
4933 }
4934
4935 exb_reach = to->exb.reach_end;
4936 exm_reach = to->exm.reach_end;
4937
4938 if (add->len.max != 0)
4939 to->exb.reach_end = to->exm.reach_end = 0;
4940
4941 if (add->exb.len > 0) {
4942 if (exb_reach) {
4943 concat_opt_exact_info(&to->exb, &add->exb, enc);
4944 clear_opt_exact_info(&add->exb);
4945 }
4946 else if (exm_reach) {
4947 concat_opt_exact_info(&to->exm, &add->exb, enc);
4948 clear_opt_exact_info(&add->exb);
4949 }
4950 }
4951 select_opt_exact_info(enc, &to->exm, &add->exb);
4952 select_opt_exact_info(enc, &to->exm, &add->exm);
4953
4954 if (to->expr.len > 0) {
4955 if (add->len.max > 0) {
4956 if (to->expr.len > (int )add->len.max)
4957 to->expr.len = (int )add->len.max;
4958
4959 if (to->expr.mmd.max == 0)
4960 select_opt_exact_info(enc, &to->exb, &to->expr);
4961 else
4962 select_opt_exact_info(enc, &to->exm, &to->expr);
4963 }
4964 }
4965 else if (add->expr.len > 0) {
4966 copy_opt_exact_info(&to->expr, &add->expr);
4967 }
4968
4969 select_opt_map_info(&to->map, &add->map);
4970
4971 add_mml(&to->len, &add->len);
4972}
4973
4974static void
4975alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4976{
4977 alt_merge_opt_anc_info (&to->anc, &add->anc);
4978 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4979 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4980 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4981 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4982
4983 alt_merge_mml(&to->len, &add->len);
4984}
4985
4986
4987#define MAX_NODE_OPT_INFO_REF_COUNT 5
4988
4989static int
4990optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4991{
4992 int type;
4993 int r = 0;
4994
4995 clear_node_opt_info(opt);
4996 set_bound_node_opt_info(opt, &env->mmd);
4997
4998 type = NTYPE(node);
4999 switch (type) {
5000 case NT_LIST:
5001 {
5002 OptEnv nenv;
5003 NodeOptInfo nopt;
5004 Node* nd = node;
5005
5006 copy_opt_env(&nenv, env);
5007 do {
5008 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
5009 if (r == 0) {
5010 add_mml(&nenv.mmd, &nopt.len);
5011 concat_left_node_opt_info(env->enc, opt, &nopt);
5012 }
5013 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5014 }
5015 break;
5016
5017 case NT_ALT:
5018 {
5019 NodeOptInfo nopt;
5020 Node* nd = node;
5021
5022 do {
5023 r = optimize_node_left(NCAR(nd), &nopt, env);
5024 if (r == 0) {
5025 if (nd == node) copy_node_opt_info(opt, &nopt);
5026 else alt_merge_node_opt_info(opt, &nopt, env);
5027 }
5028 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5029 }
5030 break;
5031
5032 case NT_STR:
5033 {
5034 StrNode* sn = NSTR(node);
5035 OnigDistance slen = sn->end - sn->s;
5036 int is_raw = NSTRING_IS_RAW(node);
5037
5038 if (! NSTRING_IS_AMBIG(node)) {
5039 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5040 is_raw, env->enc);
5041 opt->exb.ignore_case = 0;
5042 if (slen > 0) {
5043 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5044 }
5045 set_mml(&opt->len, slen, slen);
5046 }
5047 else {
5048 OnigDistance max;
5049
5050 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5051 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5052 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5053 }
5054 else {
5055 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5056 is_raw, env->enc);
5057 opt->exb.ignore_case = 1;
5058
5059 if (slen > 0) {
5060 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5061 env->enc, env->case_fold_flag);
5062 if (r != 0) break;
5063 }
5064
5065 max = slen;
5066 }
5067
5068 set_mml(&opt->len, slen, max);
5069 }
5070
5071 if ((OnigDistance )opt->exb.len == slen)
5072 opt->exb.reach_end = 1;
5073 }
5074 break;
5075
5076 case NT_CCLASS:
5077 {
5078 int i, z;
5079 CClassNode* cc = NCCLASS(node);
5080
5081 /* no need to check ignore case. (set in setup_tree()) */
5082
5083 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5084 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5085 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5086
5087 set_mml(&opt->len, min, max);
5088 }
5089 else {
5090 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5091 z = BITSET_AT(cc->bs, i);
5092 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5093 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5094 }
5095 }
5096 set_mml(&opt->len, 1, 1);
5097 }
5098 }
5099 break;
5100
5101 case NT_CTYPE:
5102 {
5103 int i, min, max;
5104 int maxcode;
5105
5106 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5107
5108 if (max == 1) {
5109 min = 1;
5110
5111 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5112 switch (NCTYPE(node)->ctype) {
5113 case ONIGENC_CTYPE_WORD:
5114 if (NCTYPE(node)->not != 0) {
5115 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5116 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5117 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5118 }
5119 }
5120 }
5121 else {
5122 for (i = 0; i < maxcode; i++) {
5123 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5124 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5125 }
5126 }
5127 }
5128 break;
5129 }
5130 }
5131 else {
5132 min = ONIGENC_MBC_MINLEN(env->enc);
5133 }
5134 set_mml(&opt->len, min, max);
5135 }
5136 break;
5137
5138 case NT_CANY:
5139 {
5140 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5141 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5142 set_mml(&opt->len, min, max);
5143 }
5144 break;
5145
5146 case NT_ANCHOR:
5147 switch (NANCHOR(node)->type) {
5148 case ANCHOR_BEGIN_BUF:
5149 case ANCHOR_BEGIN_POSITION:
5150 case ANCHOR_BEGIN_LINE:
5151 case ANCHOR_END_BUF:
5152 case ANCHOR_SEMI_END_BUF:
5153 case ANCHOR_END_LINE:
5154 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5155 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5156 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5157 break;
5158
5159 case ANCHOR_PREC_READ:
5160 {
5161 NodeOptInfo nopt;
5162
5163 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5164 if (r == 0) {
5165 if (nopt.exb.len > 0)
5166 copy_opt_exact_info(&opt->expr, &nopt.exb);
5167 else if (nopt.exm.len > 0)
5168 copy_opt_exact_info(&opt->expr, &nopt.exm);
5169
5170 opt->expr.reach_end = 0;
5171
5172 if (nopt.map.value > 0)
5173 copy_opt_map_info(&opt->map, &nopt.map);
5174 }
5175 }
5176 break;
5177
5178 case ANCHOR_LOOK_BEHIND_NOT:
5179 break;
5180 }
5181 break;
5182
5183 case NT_BREF:
5184 {
5185 int i;
5186 int* backs;
5187 OnigDistance min, max, tmin, tmax;
5188 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5189 BRefNode* br = NBREF(node);
5190
5191 if (br->state & NST_RECURSION) {
5192 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5193 break;
5194 }
5195 backs = BACKREFS_P(br);
5196 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5197 if (r != 0) break;
5198 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5199 if (r != 0) break;
5200 for (i = 1; i < br->back_num; i++) {
5201 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5202 if (r != 0) break;
5203 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5204 if (r != 0) break;
5205 if (min > tmin) min = tmin;
5206 if (max < tmax) max = tmax;
5207 }
5208 if (r == 0) set_mml(&opt->len, min, max);
5209 }
5210 break;
5211
5212#ifdef USE_SUBEXP_CALL
5213 case NT_CALL:
5214 if (IS_CALL_RECURSION(NCALL(node)))
5215 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5216 else {
5217 OnigOptionType save = env->options;
5218 env->options = NENCLOSE(NCALL(node)->target)->option;
5219 r = optimize_node_left(NCALL(node)->target, opt, env);
5220 env->options = save;
5221 }
5222 break;
5223#endif
5224
5225 case NT_QTFR:
5226 {
5227 int i;
5228 OnigDistance min, max;
5229 NodeOptInfo nopt;
5230 QtfrNode* qn = NQTFR(node);
5231
5232 r = optimize_node_left(qn->target, &nopt, env);
5233 if (r) break;
5234
5235 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5236 if (env->mmd.max == 0 &&
5237 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5238 if (IS_MULTILINE(env->options))
5239 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5240 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5241 else
5242 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5243 }
5244 }
5245 else {
5246 if (qn->lower > 0) {
5247 copy_node_opt_info(opt, &nopt);
5248 if (nopt.exb.len > 0) {
5249 if (nopt.exb.reach_end) {
5250 for (i = 2; i <= qn->lower &&
5251 ! is_full_opt_exact_info(&opt->exb); i++) {
5252 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5253 }
5254 if (i < qn->lower) {
5255 opt->exb.reach_end = 0;
5256 }
5257 }
5258 }
5259
5260 if (qn->lower != qn->upper) {
5261 opt->exb.reach_end = 0;
5262 opt->exm.reach_end = 0;
5263 }
5264 if (qn->lower > 1)
5265 opt->exm.reach_end = 0;
5266 }
5267 }
5268
5269 min = distance_multiply(nopt.len.min, qn->lower);
5270 if (IS_REPEAT_INFINITE(qn->upper))
5271 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5272 else
5273 max = distance_multiply(nopt.len.max, qn->upper);
5274
5275 set_mml(&opt->len, min, max);
5276 }
5277 break;
5278
5279 case NT_ENCLOSE:
5280 {
5281 EncloseNode* en = NENCLOSE(node);
5282
5283 switch (en->type) {
5284 case ENCLOSE_OPTION:
5285 {
5286 OnigOptionType save = env->options;
5287
5288 env->options = en->option;
5289 r = optimize_node_left(en->target, opt, env);
5290 env->options = save;
5291 }
5292 break;
5293
5294 case ENCLOSE_MEMORY:
5295#ifdef USE_SUBEXP_CALL
5296 en->opt_count++;
5297 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5298 OnigDistance min, max;
5299
5300 min = 0;
5301 max = ONIG_INFINITE_DISTANCE;
5302 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5303 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5304 set_mml(&opt->len, min, max);
5305 }
5306 else
5307#endif
5308 {
5309 r = optimize_node_left(en->target, opt, env);
5310
5311 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5312 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5313 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5314 }
5315 }
5316 break;
5317
5318 case ENCLOSE_STOP_BACKTRACK:
5319 case ENCLOSE_CONDITION:
5320 r = optimize_node_left(en->target, opt, env);
5321 break;
5322
5323 case ENCLOSE_ABSENT:
5324 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5325 break;
5326 }
5327 }
5328 break;
5329
5330 default:
5331#ifdef ONIG_DEBUG
5332 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5333 NTYPE(node));
5334#endif
5335 r = ONIGERR_TYPE_BUG;
5336 break;
5337 }
5338
5339 return r;
5340}
5341
5342static int
5343set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5344{
5345 int r;
5346 int allow_reverse;
5347
5348 if (e->len == 0) return 0;
5349
5350 reg->exact = (UChar* )xmalloc(e->len);
5351 CHECK_NULL_RETURN_MEMERR(reg->exact);
5352 xmemcpy(reg->exact, e->s, e->len);
5353 reg->exact_end = reg->exact + e->len;
5354
5355 allow_reverse =
5356 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5357
5358 if (e->ignore_case > 0) {
5359 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5360 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5361 reg->map, &(reg->int_map), 1);
5362 if (r == 0) {
5363 reg->optimize = (allow_reverse != 0
5364 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5365 }
5366 else {
5367 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5368 }
5369 }
5370 else {
5371 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5372 }
5373 }
5374 else {
5375 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5376 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5377 reg->map, &(reg->int_map), 0);
5378 if (r == 0) {
5379 reg->optimize = (allow_reverse != 0
5380 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5381 }
5382 else {
5383 reg->optimize = ONIG_OPTIMIZE_EXACT;
5384 }
5385 }
5386 else {
5387 reg->optimize = ONIG_OPTIMIZE_EXACT;
5388 }
5389 }
5390
5391 reg->dmin = e->mmd.min;
5392 reg->dmax = e->mmd.max;
5393
5394 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5395 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5396 }
5397
5398 return 0;
5399}
5400
5401static void
5402set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5403{
5404 int i;
5405
5406 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5407 reg->map[i] = m->map[i];
5408
5409 reg->optimize = ONIG_OPTIMIZE_MAP;
5410 reg->dmin = m->mmd.min;
5411 reg->dmax = m->mmd.max;
5412
5413 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5414 reg->threshold_len = (int )(reg->dmin + 1);
5415 }
5416}
5417
5418static void
5419set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5420{
5421 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5422 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5423}
5424
5425#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5426static void print_optimize_info(FILE* f, regex_t* reg);
5427#endif
5428
5429static int
5430set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5431{
5432
5433 int r;
5434 NodeOptInfo opt;
5435 OptEnv env;
5436
5437 env.enc = reg->enc;
5438 env.options = reg->options;
5439 env.case_fold_flag = reg->case_fold_flag;
5440 env.scan_env = scan_env;
5441 clear_mml(&env.mmd);
5442
5443 r = optimize_node_left(node, &opt, &env);
5444 if (r) return r;
5445
5446 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5447 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5448 ANCHOR_LOOK_BEHIND);
5449
5450 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5451 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5452
5453 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5454 ANCHOR_PREC_READ_NOT);
5455
5456 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5457 reg->anchor_dmin = opt.len.min;
5458 reg->anchor_dmax = opt.len.max;
5459 }
5460
5461 if (opt.exb.len > 0 || opt.exm.len > 0) {
5462 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5463 if (opt.map.value > 0 &&
5464 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5465 goto set_map;
5466 }
5467 else {
5468 r = set_optimize_exact_info(reg, &opt.exb);
5469 set_sub_anchor(reg, &opt.exb.anc);
5470 }
5471 }
5472 else if (opt.map.value > 0) {
5473 set_map:
5474 set_optimize_map_info(reg, &opt.map);
5475 set_sub_anchor(reg, &opt.map.anc);
5476 }
5477 else {
5478 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5479 if (opt.len.max == 0)
5480 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5481 }
5482
5483#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5484 print_optimize_info(stderr, reg);
5485#endif
5486 return r;
5487}
5488
5489static void
5490clear_optimize_info(regex_t* reg)
5491{
5492 reg->optimize = ONIG_OPTIMIZE_NONE;
5493 reg->anchor = 0;
5494 reg->anchor_dmin = 0;
5495 reg->anchor_dmax = 0;
5496 reg->sub_anchor = 0;
5497 reg->exact_end = (UChar* )NULL;
5498 reg->threshold_len = 0;
5499 xfree(reg->exact);
5500 reg->exact = (UChar* )NULL;
5501}
5502
5503#ifdef ONIG_DEBUG
5504
5505static void print_enc_string(FILE* fp, OnigEncoding enc,
5506 const UChar *s, const UChar *end)
5507{
5508 fprintf(fp, "\nPATTERN: /");
5509
5510 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5511 const UChar *p;
5512 OnigCodePoint code;
5513
5514 p = s;
5515 while (p < end) {
5516 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5517 if (code >= 0x80) {
5518 fprintf(fp, " 0x%04x ", (int )code);
5519 }
5520 else {
5521 fputc((int )code, fp);
5522 }
5523
5524 p += enclen(enc, p, end);
5525 }
5526 }
5527 else {
5528 while (s < end) {
5529 fputc((int )*s, fp);
5530 s++;
5531 }
5532 }
5533
5534 fprintf(fp, "/ (%s)\n", enc->name);
5535}
5536#endif /* ONIG_DEBUG */
5537
5538#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5539static void
5540print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5541{
5542 if (a == ONIG_INFINITE_DISTANCE)
5543 fputs("inf", f);
5544 else
5545 fprintf(f, "(%"PRIuPTR")", a);
5546
5547 fputs("-", f);
5548
5549 if (b == ONIG_INFINITE_DISTANCE)
5550 fputs("inf", f);
5551 else
5552 fprintf(f, "(%"PRIuPTR")", b);
5553}
5554
5555static void
5556print_anchor(FILE* f, int anchor)
5557{
5558 int q = 0;
5559
5560 fprintf(f, "[");
5561
5562 if (anchor & ANCHOR_BEGIN_BUF) {
5563 fprintf(f, "begin-buf");
5564 q = 1;
5565 }
5566 if (anchor & ANCHOR_BEGIN_LINE) {
5567 if (q) fprintf(f, ", ");
5568 q = 1;
5569 fprintf(f, "begin-line");
5570 }
5571 if (anchor & ANCHOR_BEGIN_POSITION) {
5572 if (q) fprintf(f, ", ");
5573 q = 1;
5574 fprintf(f, "begin-pos");
5575 }
5576 if (anchor & ANCHOR_END_BUF) {
5577 if (q) fprintf(f, ", ");
5578 q = 1;
5579 fprintf(f, "end-buf");
5580 }
5581 if (anchor & ANCHOR_SEMI_END_BUF) {
5582 if (q) fprintf(f, ", ");
5583 q = 1;
5584 fprintf(f, "semi-end-buf");
5585 }
5586 if (anchor & ANCHOR_END_LINE) {
5587 if (q) fprintf(f, ", ");
5588 q = 1;
5589 fprintf(f, "end-line");
5590 }
5591 if (anchor & ANCHOR_ANYCHAR_STAR) {
5592 if (q) fprintf(f, ", ");
5593 q = 1;
5594 fprintf(f, "anychar-star");
5595 }
5596 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5597 if (q) fprintf(f, ", ");
5598 fprintf(f, "anychar-star-ml");
5599 }
5600
5601 fprintf(f, "]");
5602}
5603
5604static void
5605print_optimize_info(FILE* f, regex_t* reg)
5606{
5607 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5608 "EXACT_IC", "MAP",
5609 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5610
5611 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5612 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5613 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5614 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5615 fprintf(f, "\n");
5616
5617 if (reg->optimize) {
5618 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5619 fprintf(f, "\n");
5620 }
5621 fprintf(f, "\n");
5622
5623 if (reg->exact) {
5624 UChar *p;
5625 fprintf(f, "exact: [");
5626 for (p = reg->exact; p < reg->exact_end; p++) {
5627 fputc(*p, f);
5628 }
5629 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5630 }
5631 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5632 int c, i, n = 0;
5633
5634 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5635 if (reg->map[i]) n++;
5636
5637 fprintf(f, "map: n=%d\n", n);
5638 if (n > 0) {
5639 c = 0;
5640 fputc('[', f);
5641 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5642 if (reg->map[i] != 0) {
5643 if (c > 0) fputs(", ", f);
5644 c++;
5645 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5646 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5647 fputc(i, f);
5648 else
5649 fprintf(f, "%d", i);
5650 }
5651 }
5652 fprintf(f, "]\n");
5653 }
5654 }
5655}
5656#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5657
5658
5659extern void
5660onig_free_body(regex_t* reg)
5661{
5662 if (IS_NOT_NULL(reg)) {
5663 xfree(reg->p);
5664 xfree(reg->exact);
5665 xfree(reg->int_map);
5666 xfree(reg->int_map_backward);
5667 xfree(reg->repeat_range);
5668 onig_free(reg->chain);
5669
5670#ifdef USE_NAMED_GROUP
5671 onig_names_free(reg);
5672#endif
5673 }
5674}
5675
5676extern void
5677onig_free(regex_t* reg)
5678{
5679 if (IS_NOT_NULL(reg)) {
5680 onig_free_body(reg);
5681 xfree(reg);
5682 }
5683}
5684
5685static void*
5686dup_copy(const void *ptr, size_t size)
5687{
5688 void *newptr = xmalloc(size);
5689 if (IS_NOT_NULL(newptr)) {
5690 memcpy(newptr, ptr, size);
5691 }
5692 return newptr;
5693}
5694
5695extern int
5696onig_reg_copy(regex_t** nreg, regex_t* oreg)
5697{
5698 if (IS_NOT_NULL(oreg)) {
5699 regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
5700 if (IS_NULL(reg)) return ONIGERR_MEMORY;
5701
5702 *reg = *oreg;
5703
5704# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5705
5706 if (IS_NOT_NULL(reg->exact)) {
5707 size_t exact_size = reg->exact_end - reg->exact;
5708 if (COPY_FAILED(exact, exact_size))
5709 goto err;
5710 (reg)->exact_end = (reg)->exact + exact_size;
5711 }
5712
5713 if (IS_NOT_NULL(reg->int_map)) {
5714 if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5715 goto err_int_map;
5716 }
5717 if (IS_NOT_NULL(reg->int_map_backward)) {
5718 if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5719 goto err_int_map_backward;
5720 }
5721 if (IS_NOT_NULL(reg->p)) {
5722 if (COPY_FAILED(p, reg->alloc))
5723 goto err_p;
5724 }
5725 if (IS_NOT_NULL(reg->repeat_range)) {
5726 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
5727 goto err_repeat_range;
5728 }
5729 if (IS_NOT_NULL(reg->name_table)) {
5730 if (onig_names_copy(reg, oreg))
5731 goto err_name_table;
5732 }
5733 if (IS_NOT_NULL(reg->chain)) {
5734 if (onig_reg_copy(&reg->chain, reg->chain))
5735 goto err_chain;
5736 }
5737 return 0;
5738# undef COPY_FAILED
5739
5740 err_chain:
5741 onig_names_free(reg);
5742 err_name_table:
5743 xfree(reg->repeat_range);
5744 err_repeat_range:
5745 xfree(reg->p);
5746 err_p:
5747 xfree(reg->int_map_backward);
5748 err_int_map_backward:
5749 xfree(reg->int_map);
5750 err_int_map:
5751 xfree(reg->exact);
5752 err:
5753 xfree(reg);
5754 return ONIGERR_MEMORY;
5755 }
5756 return 0;
5757}
5758
5759#ifdef RUBY
5760size_t
5761onig_memsize(const regex_t *reg)
5762{
5763 size_t size = sizeof(regex_t);
5764 if (IS_NULL(reg)) return 0;
5765 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5766 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5767 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5768 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5769 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5770 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5771
5772 return size;
5773}
5774
5775size_t
5776onig_region_memsize(const OnigRegion *regs)
5777{
5778 size_t size = sizeof(*regs);
5779 if (IS_NULL(regs)) return 0;
5780 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5781 return size;
5782}
5783#endif
5784
5785#define REGEX_TRANSFER(to,from) do {\
5786 onig_free_body(to);\
5787 xmemcpy(to, from, sizeof(regex_t));\
5788 xfree(from);\
5789} while (0)
5790
5791#if 0
5792extern void
5793onig_transfer(regex_t* to, regex_t* from)
5794{
5795 REGEX_TRANSFER(to, from);
5796}
5797#endif
5798
5799#ifdef ONIG_DEBUG_COMPILE
5800static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5801#endif
5802#ifdef ONIG_DEBUG_PARSE_TREE
5803static void print_tree(FILE* f, Node* node);
5804#endif
5805
5806#ifdef RUBY
5807extern int
5808onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5809 OnigErrorInfo* einfo)
5810{
5811 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5812}
5813#endif
5814
5815#ifdef RUBY
5816extern int
5817onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5818 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5819#else
5820extern int
5821onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5822 OnigErrorInfo* einfo)
5823#endif
5824{
5825#define COMPILE_INIT_SIZE 20
5826
5827 int r;
5828 OnigDistance init_size;
5829 Node* root;
5830 ScanEnv scan_env = {0};
5831#ifdef USE_SUBEXP_CALL
5832 UnsetAddrList uslist;
5833#endif
5834
5835 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5836
5837#ifdef RUBY
5838 scan_env.sourcefile = sourcefile;
5839 scan_env.sourceline = sourceline;
5840#endif
5841
5842#ifdef ONIG_DEBUG
5843 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5844#endif
5845
5846 if (reg->alloc == 0) {
5847 init_size = (pattern_end - pattern) * 2;
5848 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5849 r = BBUF_INIT(reg, init_size);
5850 if (r != 0) goto end;
5851 }
5852 else
5853 reg->used = 0;
5854
5855 reg->num_mem = 0;
5856 reg->num_repeat = 0;
5857 reg->num_null_check = 0;
5858 reg->repeat_range_alloc = 0;
5859 reg->repeat_range = (OnigRepeatRange* )NULL;
5860#ifdef USE_COMBINATION_EXPLOSION_CHECK
5861 reg->num_comb_exp_check = 0;
5862#endif
5863
5864 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5865 if (r != 0) goto err;
5866
5867#ifdef ONIG_DEBUG_PARSE_TREE
5868# if 0
5869 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5870 print_tree(stderr, root);
5871# endif
5872#endif
5873
5874#ifdef USE_NAMED_GROUP
5875 /* mixed use named group and no-named group */
5876 if (scan_env.num_named > 0 &&
5877 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5878 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5879 if (scan_env.num_named != scan_env.num_mem)
5880 r = disable_noname_group_capture(&root, reg, &scan_env);
5881 else
5882 r = numbered_ref_check(root);
5883
5884 if (r != 0) goto err;
5885 }
5886#endif
5887
5888#ifdef USE_SUBEXP_CALL
5889 if (scan_env.num_call > 0) {
5890 r = unset_addr_list_init(&uslist, scan_env.num_call);
5891 if (r != 0) goto err;
5892 scan_env.unset_addr_list = &uslist;
5893 r = setup_subexp_call(root, &scan_env);
5894 if (r != 0) goto err_unset;
5895 r = subexp_recursive_check_trav(root, &scan_env);
5896 if (r < 0) goto err_unset;
5897 r = subexp_inf_recursive_check_trav(root, &scan_env);
5898 if (r != 0) goto err_unset;
5899
5900 reg->num_call = scan_env.num_call;
5901 }
5902 else
5903 reg->num_call = 0;
5904#endif
5905
5906 r = setup_tree(root, reg, 0, &scan_env);
5907 if (r != 0) goto err_unset;
5908
5909#ifdef ONIG_DEBUG_PARSE_TREE
5910 print_tree(stderr, root);
5911#endif
5912
5913 reg->capture_history = scan_env.capture_history;
5914 reg->bt_mem_start = scan_env.bt_mem_start;
5915 reg->bt_mem_start |= reg->capture_history;
5916 if (IS_FIND_CONDITION(reg->options))
5917 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5918 else {
5919 reg->bt_mem_end = scan_env.bt_mem_end;
5920 reg->bt_mem_end |= reg->capture_history;
5921 }
5922
5923#ifdef USE_COMBINATION_EXPLOSION_CHECK
5924 if (scan_env.backrefed_mem == 0
5925# ifdef USE_SUBEXP_CALL
5926 || scan_env.num_call == 0
5927# endif
5928 ) {
5929 setup_comb_exp_check(root, 0, &scan_env);
5930# ifdef USE_SUBEXP_CALL
5931 if (scan_env.has_recursion != 0) {
5932 scan_env.num_comb_exp_check = 0;
5933 }
5934 else
5935# endif
5936 if (scan_env.comb_exp_max_regnum > 0) {
5937 int i;
5938 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5939 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5940 scan_env.num_comb_exp_check = 0;
5941 break;
5942 }
5943 }
5944 }
5945 }
5946
5947 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5948#endif
5949
5950 clear_optimize_info(reg);
5951#ifndef ONIG_DONT_OPTIMIZE
5952 r = set_optimize_info_from_tree(root, reg, &scan_env);
5953 if (r != 0) goto err_unset;
5954#endif
5955
5956 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5957 xfree(scan_env.mem_nodes_dynamic);
5958 scan_env.mem_nodes_dynamic = (Node** )NULL;
5959 }
5960
5961 r = compile_tree(root, reg);
5962 if (r == 0) {
5963 r = add_opcode(reg, OP_END);
5964#ifdef USE_SUBEXP_CALL
5965 if (scan_env.num_call > 0) {
5966 r = unset_addr_list_fix(&uslist, reg);
5967 unset_addr_list_end(&uslist);
5968 if (r) goto err;
5969 }
5970#endif
5971
5972 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5973 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5974 else {
5975 if (reg->bt_mem_start != 0)
5976 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5977 else
5978 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5979 }
5980 }
5981#ifdef USE_SUBEXP_CALL
5982 else if (scan_env.num_call > 0) {
5983 unset_addr_list_end(&uslist);
5984 }
5985#endif
5986 onig_node_free(root);
5987
5988#ifdef ONIG_DEBUG_COMPILE
5989# ifdef USE_NAMED_GROUP
5990 onig_print_names(stderr, reg);
5991# endif
5992 print_compiled_byte_code_list(stderr, reg);
5993#endif
5994
5995 end:
5996 onig_reg_resize(reg);
5997 return r;
5998
5999 err_unset:
6000#ifdef USE_SUBEXP_CALL
6001 if (scan_env.num_call > 0) {
6002 unset_addr_list_end(&uslist);
6003 }
6004#endif
6005 err:
6006 if (IS_NOT_NULL(scan_env.error)) {
6007 if (IS_NOT_NULL(einfo)) {
6008 einfo->enc = scan_env.enc;
6009 einfo->par = scan_env.error;
6010 einfo->par_end = scan_env.error_end;
6011 }
6012 }
6013
6014 onig_node_free(root);
6015 xfree(scan_env.mem_nodes_dynamic);
6016
6017 return r;
6018}
6019
6020static int onig_inited = 0;
6021
6022extern int
6023onig_reg_init(regex_t* reg, OnigOptionType option,
6024 OnigCaseFoldType case_fold_flag,
6025 OnigEncoding enc, const OnigSyntaxType* syntax)
6026{
6027 if (! onig_inited)
6028 onig_init();
6029
6030 if (IS_NULL(reg))
6031 return ONIGERR_INVALID_ARGUMENT;
6032
6033 if (ONIGENC_IS_UNDEF(enc))
6034 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
6035
6036 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6037 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6038 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6039 }
6040
6041 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6042 option |= syntax->options;
6043 option &= ~ONIG_OPTION_SINGLELINE;
6044 }
6045 else
6046 option |= syntax->options;
6047
6048 (reg)->enc = enc;
6049 (reg)->options = option;
6050 (reg)->syntax = syntax;
6051 (reg)->optimize = 0;
6052 (reg)->exact = (UChar* )NULL;
6053 (reg)->int_map = (int* )NULL;
6054 (reg)->int_map_backward = (int* )NULL;
6055 (reg)->chain = (regex_t* )NULL;
6056
6057 (reg)->p = (UChar* )NULL;
6058 (reg)->alloc = 0;
6059 (reg)->used = 0;
6060 (reg)->name_table = (void* )NULL;
6061
6062 (reg)->case_fold_flag = case_fold_flag;
6063
6064 (reg)->timelimit = 0;
6065
6066 return 0;
6067}
6068
6069extern int
6070onig_new_without_alloc(regex_t* reg, const UChar* pattern,
6071 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
6072 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6073{
6074 int r;
6075
6076 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6077 if (r) return r;
6078
6079 r = onig_compile(reg, pattern, pattern_end, einfo);
6080 return r;
6081}
6082
6083extern int
6084onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6085 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6086 OnigErrorInfo* einfo)
6087{
6088 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6089 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6090
6091 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6092 if (r) {
6093 onig_free(*reg);
6094 *reg = NULL;
6095 }
6096
6097 return r;
6098}
6099
6100extern int
6101onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6102{
6103 return onig_init();
6104}
6105
6106extern int
6107onig_init(void)
6108{
6109 if (onig_inited != 0)
6110 return 0;
6111
6112 onig_inited = 1;
6113
6114#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6115 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6116#endif
6117
6118 onigenc_init();
6119 /* onigenc_set_default_caseconv_table((UChar* )0); */
6120
6121#ifdef ONIG_DEBUG_STATISTICS
6122 onig_statistics_init();
6123#endif
6124
6125 return 0;
6126}
6127
6128
6129static OnigEndCallListItemType* EndCallTop;
6130
6131extern void onig_add_end_call(void (*func)(void))
6132{
6134
6135 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6136 if (item == 0) return ;
6137
6138 item->next = EndCallTop;
6139 item->func = func;
6140
6141 EndCallTop = item;
6142}
6143
6144static void
6145exec_end_call_list(void)
6146{
6148 void (*func)(void);
6149
6150 while (EndCallTop != 0) {
6151 func = EndCallTop->func;
6152 (*func)();
6153
6154 prev = EndCallTop;
6155 EndCallTop = EndCallTop->next;
6156 xfree(prev);
6157 }
6158}
6159
6160extern int
6161onig_end(void)
6162{
6163 exec_end_call_list();
6164
6165#ifdef ONIG_DEBUG_STATISTICS
6166 onig_print_statistics(stderr);
6167#endif
6168
6169#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6170 _CrtDumpMemoryLeaks();
6171#endif
6172
6173 onig_inited = 0;
6174
6175 return 0;
6176}
6177
6178extern int
6179onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6180{
6181 OnigCodePoint n, *data;
6182 OnigCodePoint low, high, x;
6183
6184 GET_CODE_POINT(n, p);
6185 data = (OnigCodePoint* )p;
6186 data++;
6187
6188 for (low = 0, high = n; low < high; ) {
6189 x = (low + high) >> 1;
6190 if (code > data[x * 2 + 1])
6191 low = x + 1;
6192 else
6193 high = x;
6194 }
6195
6196 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6197}
6198
6199extern int
6200onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6201{
6202 int found;
6203
6204 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6205 if (IS_NULL(cc->mbuf)) {
6206 found = 0;
6207 }
6208 else {
6209 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6210 }
6211 }
6212 else {
6213 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6214 }
6215
6216 if (IS_NCCLASS_NOT(cc))
6217 return !found;
6218 else
6219 return found;
6220}
6221
6222extern int
6223onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6224{
6225 int len;
6226
6227 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6228 len = 2;
6229 }
6230 else {
6231 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6232 }
6233 return onig_is_code_in_cc_len(len, code, cc);
6234}
6235
6236
6237#ifdef ONIG_DEBUG
6238
6239/* arguments type */
6240# define ARG_SPECIAL -1
6241# define ARG_NON 0
6242# define ARG_RELADDR 1
6243# define ARG_ABSADDR 2
6244# define ARG_LENGTH 3
6245# define ARG_MEMNUM 4
6246# define ARG_OPTION 5
6247# define ARG_STATE_CHECK 6
6248
6249OnigOpInfoType OnigOpInfo[] = {
6250 { OP_FINISH, "finish", ARG_NON },
6251 { OP_END, "end", ARG_NON },
6252 { OP_EXACT1, "exact1", ARG_SPECIAL },
6253 { OP_EXACT2, "exact2", ARG_SPECIAL },
6254 { OP_EXACT3, "exact3", ARG_SPECIAL },
6255 { OP_EXACT4, "exact4", ARG_SPECIAL },
6256 { OP_EXACT5, "exact5", ARG_SPECIAL },
6257 { OP_EXACTN, "exactn", ARG_SPECIAL },
6258 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6259 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6260 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6261 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6262 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6263 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6264 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6265 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6266 { OP_CCLASS, "cclass", ARG_SPECIAL },
6267 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6268 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6269 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6270 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6271 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6272 { OP_ANYCHAR, "anychar", ARG_NON },
6273 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6274 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6275 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6276 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6277 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6278 { OP_WORD, "word", ARG_NON },
6279 { OP_NOT_WORD, "not-word", ARG_NON },
6280 { OP_WORD_BOUND, "word-bound", ARG_NON },
6281 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6282 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6283 { OP_WORD_END, "word-end", ARG_NON },
6284 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6285 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6286 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6287 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6288 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6289 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6290 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6291 { OP_END_BUF, "end-buf", ARG_NON },
6292 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6293 { OP_END_LINE, "end-line", ARG_NON },
6294 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6295 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6296 { OP_BACKREF1, "backref1", ARG_NON },
6297 { OP_BACKREF2, "backref2", ARG_NON },
6298 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6299 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6300 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6301 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6302 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6303 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6304 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6305 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6306 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6307 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6308 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6309 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6310 { OP_SET_OPTION, "set-option", ARG_OPTION },
6311 { OP_KEEP, "keep", ARG_NON },
6312 { OP_FAIL, "fail", ARG_NON },
6313 { OP_JUMP, "jump", ARG_RELADDR },
6314 { OP_PUSH, "push", ARG_RELADDR },
6315 { OP_POP, "pop", ARG_NON },
6316 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6317 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6318 { OP_REPEAT, "repeat", ARG_SPECIAL },
6319 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6320 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6321 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6322 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6323 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6324 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6325 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6326 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6327 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6328 { OP_PUSH_POS, "push-pos", ARG_NON },
6329 { OP_POP_POS, "pop-pos", ARG_NON },
6330 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6331 { OP_FAIL_POS, "fail-pos", ARG_NON },
6332 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6333 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6334 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6335 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6336 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6337 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6338 { OP_ABSENT, "absent", ARG_RELADDR },
6339 { OP_ABSENT_END, "absent-end", ARG_NON },
6340 { OP_CALL, "call", ARG_ABSADDR },
6341 { OP_RETURN, "return", ARG_NON },
6342 { OP_CONDITION, "condition", ARG_SPECIAL },
6343 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6344 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6345 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6346 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6347 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6348 "state-check-anychar-ml*", ARG_STATE_CHECK },
6349 { -1, "", ARG_NON }
6350};
6351
6352static const char*
6353op2name(int opcode)
6354{
6355 int i;
6356
6357 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6358 if (opcode == OnigOpInfo[i].opcode)
6359 return OnigOpInfo[i].name;
6360 }
6361 return "";
6362}
6363
6364static int
6365op2arg_type(int opcode)
6366{
6367 int i;
6368
6369 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6370 if (opcode == OnigOpInfo[i].opcode)
6371 return OnigOpInfo[i].arg_type;
6372 }
6373 return ARG_SPECIAL;
6374}
6375
6376# ifdef ONIG_DEBUG_PARSE_TREE
6377static void
6378Indent(FILE* f, int indent)
6379{
6380 int i;
6381 for (i = 0; i < indent; i++) putc(' ', f);
6382}
6383# endif /* ONIG_DEBUG_PARSE_TREE */
6384
6385static void
6386p_string(FILE* f, ptrdiff_t len, UChar* s)
6387{
6388 fputs(":", f);
6389 while (len-- > 0) { fputc(*s++, f); }
6390}
6391
6392static void
6393p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6394{
6395 int x = len * mb_len;
6396
6397 fprintf(f, ":%d:", len);
6398 while (x-- > 0) { fputc(*s++, f); }
6399}
6400
6401extern void
6402onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6403 OnigEncoding enc)
6404{
6405 int i, n, arg_type;
6406 RelAddrType addr;
6407 LengthType len;
6408 MemNumType mem;
6409 StateCheckNumType scn;
6410 OnigCodePoint code;
6411 UChar *q;
6412
6413 fprintf(f, "[%s", op2name(*bp));
6414 arg_type = op2arg_type(*bp);
6415 if (arg_type != ARG_SPECIAL) {
6416 bp++;
6417 switch (arg_type) {
6418 case ARG_NON:
6419 break;
6420 case ARG_RELADDR:
6421 GET_RELADDR_INC(addr, bp);
6422 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6423 break;
6424 case ARG_ABSADDR:
6425 GET_ABSADDR_INC(addr, bp);
6426 fprintf(f, ":(%d)", addr);
6427 break;
6428 case ARG_LENGTH:
6429 GET_LENGTH_INC(len, bp);
6430 fprintf(f, ":%d", len);
6431 break;
6432 case ARG_MEMNUM:
6433 mem = *((MemNumType* )bp);
6434 bp += SIZE_MEMNUM;
6435 fprintf(f, ":%d", mem);
6436 break;
6437 case ARG_OPTION:
6438 {
6439 OnigOptionType option = *((OnigOptionType* )bp);
6440 bp += SIZE_OPTION;
6441 fprintf(f, ":%d", option);
6442 }
6443 break;
6444
6445 case ARG_STATE_CHECK:
6446 scn = *((StateCheckNumType* )bp);
6447 bp += SIZE_STATE_CHECK_NUM;
6448 fprintf(f, ":%d", scn);
6449 break;
6450 }
6451 }
6452 else {
6453 switch (*bp++) {
6454 case OP_EXACT1:
6455 case OP_ANYCHAR_STAR_PEEK_NEXT:
6456 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6457 p_string(f, 1, bp++); break;
6458 case OP_EXACT2:
6459 p_string(f, 2, bp); bp += 2; break;
6460 case OP_EXACT3:
6461 p_string(f, 3, bp); bp += 3; break;
6462 case OP_EXACT4:
6463 p_string(f, 4, bp); bp += 4; break;
6464 case OP_EXACT5:
6465 p_string(f, 5, bp); bp += 5; break;
6466 case OP_EXACTN:
6467 GET_LENGTH_INC(len, bp);
6468 p_len_string(f, len, 1, bp);
6469 bp += len;
6470 break;
6471
6472 case OP_EXACTMB2N1:
6473 p_string(f, 2, bp); bp += 2; break;
6474 case OP_EXACTMB2N2:
6475 p_string(f, 4, bp); bp += 4; break;
6476 case OP_EXACTMB2N3:
6477 p_string(f, 6, bp); bp += 6; break;
6478 case OP_EXACTMB2N:
6479 GET_LENGTH_INC(len, bp);
6480 p_len_string(f, len, 2, bp);
6481 bp += len * 2;
6482 break;
6483 case OP_EXACTMB3N:
6484 GET_LENGTH_INC(len, bp);
6485 p_len_string(f, len, 3, bp);
6486 bp += len * 3;
6487 break;
6488 case OP_EXACTMBN:
6489 {
6490 int mb_len;
6491
6492 GET_LENGTH_INC(mb_len, bp);
6493 GET_LENGTH_INC(len, bp);
6494 fprintf(f, ":%d:%d:", mb_len, len);
6495 n = len * mb_len;
6496 while (n-- > 0) { fputc(*bp++, f); }
6497 }
6498 break;
6499
6500 case OP_EXACT1_IC:
6501 len = enclen(enc, bp, bpend);
6502 p_string(f, len, bp);
6503 bp += len;
6504 break;
6505 case OP_EXACTN_IC:
6506 GET_LENGTH_INC(len, bp);
6507 p_len_string(f, len, 1, bp);
6508 bp += len;
6509 break;
6510
6511 case OP_CCLASS:
6512 n = bitset_on_num((BitSetRef )bp);
6513 bp += SIZE_BITSET;
6514 fprintf(f, ":%d", n);
6515 break;
6516
6517 case OP_CCLASS_NOT:
6518 n = bitset_on_num((BitSetRef )bp);
6519 bp += SIZE_BITSET;
6520 fprintf(f, ":%d", n);
6521 break;
6522
6523 case OP_CCLASS_MB:
6524 case OP_CCLASS_MB_NOT:
6525 GET_LENGTH_INC(len, bp);
6526 q = bp;
6527# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6528 ALIGNMENT_RIGHT(q);
6529# endif
6530 GET_CODE_POINT(code, q);
6531 bp += len;
6532 fprintf(f, ":%d:%d", (int )code, len);
6533 break;
6534
6535 case OP_CCLASS_MIX:
6536 case OP_CCLASS_MIX_NOT:
6537 n = bitset_on_num((BitSetRef )bp);
6538 bp += SIZE_BITSET;
6539 GET_LENGTH_INC(len, bp);
6540 q = bp;
6541# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6542 ALIGNMENT_RIGHT(q);
6543# endif
6544 GET_CODE_POINT(code, q);
6545 bp += len;
6546 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6547 break;
6548
6549 case OP_BACKREFN_IC:
6550 mem = *((MemNumType* )bp);
6551 bp += SIZE_MEMNUM;
6552 fprintf(f, ":%d", mem);
6553 break;
6554
6555 case OP_BACKREF_MULTI_IC:
6556 case OP_BACKREF_MULTI:
6557 fputs(" ", f);
6558 GET_LENGTH_INC(len, bp);
6559 for (i = 0; i < len; i++) {
6560 GET_MEMNUM_INC(mem, bp);
6561 if (i > 0) fputs(", ", f);
6562 fprintf(f, "%d", mem);
6563 }
6564 break;
6565
6566 case OP_BACKREF_WITH_LEVEL:
6567 {
6568 OnigOptionType option;
6569 LengthType level;
6570
6571 GET_OPTION_INC(option, bp);
6572 fprintf(f, ":%d", option);
6573 GET_LENGTH_INC(level, bp);
6574 fprintf(f, ":%d", level);
6575
6576 fputs(" ", f);
6577 GET_LENGTH_INC(len, bp);
6578 for (i = 0; i < len; i++) {
6579 GET_MEMNUM_INC(mem, bp);
6580 if (i > 0) fputs(", ", f);
6581 fprintf(f, "%d", mem);
6582 }
6583 }
6584 break;
6585
6586 case OP_REPEAT:
6587 case OP_REPEAT_NG:
6588 {
6589 mem = *((MemNumType* )bp);
6590 bp += SIZE_MEMNUM;
6591 addr = *((RelAddrType* )bp);
6592 bp += SIZE_RELADDR;
6593 fprintf(f, ":%d:%d", mem, addr);
6594 }
6595 break;
6596
6597 case OP_PUSH_OR_JUMP_EXACT1:
6598 case OP_PUSH_IF_PEEK_NEXT:
6599 addr = *((RelAddrType* )bp);
6600 bp += SIZE_RELADDR;
6601 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6602 p_string(f, 1, bp);
6603 bp += 1;
6604 break;
6605
6606 case OP_LOOK_BEHIND:
6607 GET_LENGTH_INC(len, bp);
6608 fprintf(f, ":%d", len);
6609 break;
6610
6611 case OP_PUSH_LOOK_BEHIND_NOT:
6612 GET_RELADDR_INC(addr, bp);
6613 GET_LENGTH_INC(len, bp);
6614 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6615 break;
6616
6617 case OP_STATE_CHECK_PUSH:
6618 case OP_STATE_CHECK_PUSH_OR_JUMP:
6619 scn = *((StateCheckNumType* )bp);
6620 bp += SIZE_STATE_CHECK_NUM;
6621 addr = *((RelAddrType* )bp);
6622 bp += SIZE_RELADDR;
6623 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6624 break;
6625
6626 case OP_CONDITION:
6627 GET_MEMNUM_INC(mem, bp);
6628 GET_RELADDR_INC(addr, bp);
6629 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6630 break;
6631
6632 default:
6633 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6634 bp[-1]);
6635 }
6636 }
6637 fputs("]", f);
6638 if (nextp) *nextp = bp;
6639}
6640
6641# ifdef ONIG_DEBUG_COMPILE
6642static void
6643print_compiled_byte_code_list(FILE* f, regex_t* reg)
6644{
6645 int ncode;
6646 UChar* bp = reg->p;
6647 UChar* end = reg->p + reg->used;
6648
6649 fprintf(f, "code length: %d", reg->used);
6650
6651 ncode = -1;
6652 while (bp < end) {
6653 ncode++;
6654 if (ncode % 5 == 0)
6655 fprintf(f, "\n%ld:", bp - reg->p);
6656 else
6657 fprintf(f, " %ld:", bp - reg->p);
6658 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6659 }
6660
6661 fprintf(f, "\n");
6662}
6663# endif /* ONIG_DEBUG_COMPILE */
6664
6665# ifdef ONIG_DEBUG_PARSE_TREE
6666static void
6667print_indent_tree(FILE* f, Node* node, int indent)
6668{
6669 int i, type, container_p = 0;
6670 int add = 3;
6671 UChar* p;
6672
6673 Indent(f, indent);
6674 if (IS_NULL(node)) {
6675 fprintf(f, "ERROR: null node!!!\n");
6676 exit (0);
6677 }
6678
6679 type = NTYPE(node);
6680 switch (type) {
6681 case NT_LIST:
6682 case NT_ALT:
6683 if (NTYPE(node) == NT_LIST)
6684 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6685 else
6686 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6687
6688 print_indent_tree(f, NCAR(node), indent + add);
6689 while (IS_NOT_NULL(node = NCDR(node))) {
6690 if (NTYPE(node) != type) {
6691 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6692 exit(0);
6693 }
6694 print_indent_tree(f, NCAR(node), indent + add);
6695 }
6696 break;
6697
6698 case NT_STR:
6699 fprintf(f, "<string%s:%"PRIxPTR">",
6700 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6701 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6702 if (*p >= 0x20 && *p < 0x7f)
6703 fputc(*p, f);
6704 else {
6705 fprintf(f, " 0x%02x", *p);
6706 }
6707 }
6708 break;
6709
6710 case NT_CCLASS:
6711 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6712 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6713 if (NCCLASS(node)->mbuf) {
6714 BBuf* bbuf = NCCLASS(node)->mbuf;
6715 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6716 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6717 fprintf(f, "%d", *data++);
6718 for (; data < end; data+=2) {
6719 fprintf(f, ",");
6720 fprintf(f, "%04x-%04x", data[0], data[1]);
6721 }
6722 }
6723 break;
6724
6725 case NT_CTYPE:
6726 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6727 switch (NCTYPE(node)->ctype) {
6728 case ONIGENC_CTYPE_WORD:
6729 if (NCTYPE(node)->not != 0)
6730 fputs("not word", f);
6731 else
6732 fputs("word", f);
6733 break;
6734
6735 default:
6736 fprintf(f, "ERROR: undefined ctype.\n");
6737 exit(0);
6738 }
6739 break;
6740
6741 case NT_CANY:
6742 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6743 break;
6744
6745 case NT_ANCHOR:
6746 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6747 switch (NANCHOR(node)->type) {
6748 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6749 case ANCHOR_END_BUF: fputs("end buf", f); break;
6750 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6751 case ANCHOR_END_LINE: fputs("end line", f); break;
6752 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6753 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6754
6755 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6756 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6757# ifdef USE_WORD_BEGIN_END
6758 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6759 case ANCHOR_WORD_END: fputs("word end", f); break;
6760# endif
6761 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6762 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6763 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6764 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6765 case ANCHOR_KEEP: fputs("keep",f); break;
6766
6767 default:
6768 fprintf(f, "ERROR: undefined anchor type.\n");
6769 break;
6770 }
6771 break;
6772
6773 case NT_BREF:
6774 {
6775 int* p;
6776 BRefNode* br = NBREF(node);
6777 p = BACKREFS_P(br);
6778 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6779 for (i = 0; i < br->back_num; i++) {
6780 if (i > 0) fputs(", ", f);
6781 fprintf(f, "%d", p[i]);
6782 }
6783 }
6784 break;
6785
6786# ifdef USE_SUBEXP_CALL
6787 case NT_CALL:
6788 {
6789 CallNode* cn = NCALL(node);
6790 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6791 p_string(f, cn->name_end - cn->name, cn->name);
6792 }
6793 break;
6794# endif
6795
6796 case NT_QTFR:
6797 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6798 NQTFR(node)->lower, NQTFR(node)->upper,
6799 (NQTFR(node)->greedy ? "" : "?"));
6800 print_indent_tree(f, NQTFR(node)->target, indent + add);
6801 break;
6802
6803 case NT_ENCLOSE:
6804 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6805 switch (NENCLOSE(node)->type) {
6806 case ENCLOSE_OPTION:
6807 fprintf(f, "option:%d", NENCLOSE(node)->option);
6808 break;
6809 case ENCLOSE_MEMORY:
6810 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6811 break;
6812 case ENCLOSE_STOP_BACKTRACK:
6813 fprintf(f, "stop-bt");
6814 break;
6815 case ENCLOSE_CONDITION:
6816 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6817 break;
6818 case ENCLOSE_ABSENT:
6819 fprintf(f, "absent");
6820 break;
6821
6822 default:
6823 break;
6824 }
6825 fprintf(f, "\n");
6826 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6827 break;
6828
6829 default:
6830 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6831 break;
6832 }
6833
6834 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6835 type != NT_ENCLOSE)
6836 fprintf(f, "\n");
6837
6838 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6839
6840 fflush(f);
6841}
6842
6843static void
6844print_tree(FILE* f, Node* node)
6845{
6846 print_indent_tree(f, node, 0);
6847}
6848# endif /* ONIG_DEBUG_PARSE_TREE */
6849#endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.