Ruby 3.5.0dev (2025-11-03 revision 4a3d8346a6d0e068508631541f6bc43e8b154ea1)
regcomp.c (4a3d8346a6d0e068508631541f6bc43e8b154ea1)
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
163onig_bbuf_init(BBuf* buf, OnigDistance size)
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188 CHECK_NULL_RETURN_MEMERR(p);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 xfree(uslist->us);
199}
200
201static int
202unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
203{
204 UnsetAddr* p;
205 int size;
206
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
209 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
212 uslist->us = p;
213 }
214
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
217 uslist->num++;
218 return 0;
219}
220#endif /* USE_SUBEXP_CALL */
221
222
223static int
224add_opcode(regex_t* reg, int opcode)
225{
226 BBUF_ADD1(reg, opcode);
227 return 0;
228}
229
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
231static int
232add_state_check_num(regex_t* reg, int num)
233{
234 StateCheckNumType n = (StateCheckNumType )num;
235
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
237 return 0;
238}
239#endif
240
241static int
242add_rel_addr(regex_t* reg, int addr)
243{
244 RelAddrType ra = (RelAddrType )addr;
245
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
247 return 0;
248}
249
250static int
251add_abs_addr(regex_t* reg, int addr)
252{
253 AbsAddrType ra = (AbsAddrType )addr;
254
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
256 return 0;
257}
258
259static int
260add_length(regex_t* reg, OnigDistance len)
261{
262 LengthType l = (LengthType )len;
263
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
265 return 0;
266}
267
268static int
269add_mem_num(regex_t* reg, int num)
270{
271 MemNumType n = (MemNumType )num;
272
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
274 return 0;
275}
276
277#if 0
278static int
279add_pointer(regex_t* reg, void* addr)
280{
281 PointerType ptr = (PointerType )addr;
282
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
284 return 0;
285}
286#endif
287
288static int
289add_option(regex_t* reg, OnigOptionType option)
290{
291 BBUF_ADD(reg, &option, SIZE_OPTION);
292 return 0;
293}
294
295static int
296add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
297{
298 int r;
299
300 r = add_opcode(reg, opcode);
301 if (r) return r;
302 r = add_rel_addr(reg, addr);
303 return r;
304}
305
306static int
307add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
308{
309 BBUF_ADD(reg, bytes, len);
310 return 0;
311}
312
313static int
314add_bitset(regex_t* reg, BitSetRef bs)
315{
316 BBUF_ADD(reg, bs, SIZE_BITSET);
317 return 0;
318}
319
320static int
321add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
322{
323 int r;
324
325 r = add_opcode(reg, opcode);
326 if (r) return r;
327 r = add_option(reg, option);
328 return r;
329}
330
331static int compile_length_tree(Node* node, regex_t* reg);
332static int compile_tree(Node* node, regex_t* reg);
333
334
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
338
339static int
340select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
341{
342 int op;
343 OnigDistance str_len = roomof(byte_len, mb_len);
344
345 if (ignore_case) {
346 switch (str_len) {
347 case 1: op = OP_EXACT1_IC; break;
348 default: op = OP_EXACTN_IC; break;
349 }
350 }
351 else {
352 switch (mb_len) {
353 case 1:
354 switch (str_len) {
355 case 1: op = OP_EXACT1; break;
356 case 2: op = OP_EXACT2; break;
357 case 3: op = OP_EXACT3; break;
358 case 4: op = OP_EXACT4; break;
359 case 5: op = OP_EXACT5; break;
360 default: op = OP_EXACTN; break;
361 }
362 break;
363
364 case 2:
365 switch (str_len) {
366 case 1: op = OP_EXACTMB2N1; break;
367 case 2: op = OP_EXACTMB2N2; break;
368 case 3: op = OP_EXACTMB2N3; break;
369 default: op = OP_EXACTMB2N; break;
370 }
371 break;
372
373 case 3:
374 op = OP_EXACTMB3N;
375 break;
376
377 default:
378 op = OP_EXACTMBN;
379 break;
380 }
381 }
382 return op;
383}
384
385static int
386compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
387{
388 int r;
389 int saved_num_null_check = reg->num_null_check;
390
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
393 if (r) return r;
394 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
395 if (r) return r;
396 reg->num_null_check++;
397 }
398
399 r = compile_tree(node, reg);
400 if (r) return r;
401
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
409
410 if (r) return r;
411 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
412 }
413 return r;
414}
415
416#ifdef USE_SUBEXP_CALL
417static int
418compile_call(CallNode* node, regex_t* reg)
419{
420 int r;
421
422 r = add_opcode(reg, OP_CALL);
423 if (r) return r;
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
425 node->target);
426 if (r) return r;
427 r = add_abs_addr(reg, 0 /*dummy addr.*/);
428 return r;
429}
430#endif
431
432static int
433compile_tree_n_times(Node* node, int n, regex_t* reg)
434{
435 int i, r;
436
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
439 if (r) return r;
440 }
441 return 0;
442}
443
444static int
445add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED, int ignore_case)
447{
448 int len;
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
450
451 len = SIZE_OPCODE;
452
453 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
455 len += SIZE_LENGTH;
456
457 len += (int )byte_len;
458 return len;
459}
460
461static int
462add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
463 regex_t* reg, int ignore_case)
464{
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
466 add_opcode(reg, op);
467
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
470
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
474 else
475 add_length(reg, byte_len / mb_len);
476 }
477
478 add_bytes(reg, s, byte_len);
479 return 0;
480}
481
482
483static int
484compile_length_string_node(Node* node, regex_t* reg)
485{
486 int rlen, r, len, prev_len, blen, ambig;
487 OnigEncoding enc = reg->enc;
488 UChar *p, *prev;
489 StrNode* sn;
490
491 sn = NSTR(node);
492 if (sn->end <= sn->s)
493 return 0;
494
495 ambig = NSTRING_IS_AMBIG(node);
496
497 p = prev = sn->s;
498 prev_len = enclen(enc, p, sn->end);
499 p += prev_len;
500 blen = prev_len;
501 rlen = 0;
502
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (len == prev_len || ambig) {
506 blen += len;
507 }
508 else {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
510 rlen += r;
511 prev = p;
512 blen = len;
513 prev_len = len;
514 }
515 p += len;
516 }
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 rlen += r;
519 return rlen;
520}
521
522static int
523compile_length_string_raw_node(StrNode* sn, regex_t* reg)
524{
525 if (sn->end <= sn->s)
526 return 0;
527
528 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
529}
530
531static int
532compile_string_node(Node* node, regex_t* reg)
533{
534 int r, len, prev_len, blen, ambig;
535 OnigEncoding enc = reg->enc;
536 UChar *p, *prev, *end;
537 StrNode* sn;
538
539 sn = NSTR(node);
540 if (sn->end <= sn->s)
541 return 0;
542
543 end = sn->end;
544 ambig = NSTRING_IS_AMBIG(node);
545
546 p = prev = sn->s;
547 prev_len = enclen(enc, p, end);
548 p += prev_len;
549 blen = prev_len;
550
551 for (; p < end; ) {
552 len = enclen(enc, p, end);
553 if (len == prev_len || ambig) {
554 blen += len;
555 }
556 else {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
558 if (r) return r;
559
560 prev = p;
561 blen = len;
562 prev_len = len;
563 }
564
565 p += len;
566 }
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
568}
569
570static int
571compile_string_raw_node(StrNode* sn, regex_t* reg)
572{
573 if (sn->end <= sn->s)
574 return 0;
575
576 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
577}
578
579static int
580add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
581{
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
585#else
586 int r, pad_size;
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
588
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
592
593 r = add_bytes(reg, mbuf->p, mbuf->used);
594
595 /* padding for return value from compile_length_cclass_node() to be fix. */
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
598 return r;
599#endif
600}
601
602static int
603compile_length_cclass_node(CClassNode* cc, regex_t* reg)
604{
605 int len;
606
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
609 }
610 else {
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
612 len = SIZE_OPCODE;
613 }
614 else {
615 len = SIZE_OPCODE + SIZE_BITSET;
616 }
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
619#else
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
621#endif
622 }
623
624 return len;
625}
626
627static int
628compile_cclass_node(CClassNode* cc, regex_t* reg)
629{
630 int r;
631
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
635 else
636 add_opcode(reg, OP_CCLASS);
637
638 r = add_bitset(reg, cc->bs);
639 }
640 else {
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
644 else
645 add_opcode(reg, OP_CCLASS_MB);
646
647 r = add_multi_byte_cclass(cc->mbuf, reg);
648 }
649 else {
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
652 else
653 add_opcode(reg, OP_CCLASS_MIX);
654
655 r = add_bitset(reg, cc->bs);
656 if (r) return r;
657 r = add_multi_byte_cclass(cc->mbuf, reg);
658 }
659 }
660
661 return r;
662}
663
664static int
665entry_repeat_range(regex_t* reg, int id, int lower, int upper)
666{
667#define REPEAT_RANGE_ALLOC 4
668
670
671 if (reg->repeat_range_alloc == 0) {
672 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
676 }
677 else if (reg->repeat_range_alloc <= id) {
678 int n;
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
680 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
681 sizeof(OnigRepeatRange) * n);
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
685 }
686 else {
687 p = reg->repeat_range;
688 }
689
690 p[id].lower = lower;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
692 return 0;
693}
694
695static int
696compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
697 regex_t* reg)
698{
699 int r;
700 int num_repeat = reg->num_repeat;
701
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
703 if (r) return r;
704 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
705 reg->num_repeat++;
706 if (r) return r;
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
708 if (r) return r;
709
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
711 if (r) return r;
712
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
714 if (r) return r;
715
716 if (
717#ifdef USE_SUBEXP_CALL
718 reg->num_call > 0 ||
719#endif
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
722 }
723 else {
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
725 }
726 if (r) return r;
727 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
728 return r;
729}
730
731static int
732is_anychar_star_quantifier(QtfrNode* qn)
733{
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
736 return 1;
737 else
738 return 0;
739}
740
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
743
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
745
746static int
747compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
748{
749 int len, mod_tlen, cklen;
750 int ckn;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
754
755 if (tlen < 0) return tlen;
756
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
758
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
760
761 /* anychar repeat */
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
766 else
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
768 }
769 }
770
771 if (empty_info != 0)
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
773 else
774 mod_tlen = tlen;
775
776 if (infinite && qn->lower <= 1) {
777 if (qn->greedy) {
778 if (qn->lower == 1)
779 len = SIZE_OP_JUMP;
780 else
781 len = 0;
782
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
784 }
785 else {
786 if (qn->lower == 0)
787 len = SIZE_OP_JUMP;
788 else
789 len = 0;
790
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
792 }
793 }
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
796 len = SIZE_OP_JUMP + tlen;
797 else
798 len = 0;
799 }
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
802 if (CKN_ON) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
804 }
805 else {
806 len = SIZE_OP_PUSH + tlen;
807 }
808 }
809 else {
810 len = tlen;
811 }
812 }
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
815 }
816 else {
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
819 if (CKN_ON)
820 len += SIZE_OP_STATE_CHECK;
821 }
822
823 return len;
824}
825
826static int
827compile_quantifier_node(QtfrNode* qn, regex_t* reg)
828{
829 int r, mod_tlen;
830 int ckn;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
834
835 if (tlen < 0) return tlen;
836
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
838
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
841 if (r) return r;
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
845 else
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
847 if (r) return r;
848 if (CKN_ON) {
849 r = add_state_check_num(reg, ckn);
850 if (r) return r;
851 }
852
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
854 }
855 else {
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
860 }
861 else {
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
864 : OP_ANYCHAR_STAR));
865 }
866 if (r) return r;
867 if (CKN_ON)
868 r = add_state_check_num(reg, ckn);
869
870 return r;
871 }
872 }
873
874 if (empty_info != 0)
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
876 else
877 mod_tlen = tlen;
878
879 if (infinite && qn->lower <= 1) {
880 if (qn->greedy) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
884 if (r) return r;
885 }
886
887 if (CKN_ON) {
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
889 if (r) return r;
890 r = add_state_check_num(reg, ckn);
891 if (r) return r;
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
893 }
894 else {
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
896 }
897 if (r) return r;
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
899 if (r) return r;
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (int )SIZE_OP_JUMP
902 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
903 }
904 else {
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
907 if (r) return r;
908 }
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
910 if (r) return r;
911 if (CKN_ON) {
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
913 if (r) return r;
914 r = add_state_check_num(reg, ckn);
915 if (r) return r;
916 r = add_rel_addr(reg,
917 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
918 }
919 else
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
921 }
922 }
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
926 if (r) return r;
927 r = compile_tree(qn->target, reg);
928 }
929 else
930 r = 0;
931 }
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
934 if (CKN_ON) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
936 if (r) return r;
937 r = add_state_check_num(reg, ckn);
938 if (r) return r;
939 r = add_rel_addr(reg, tlen);
940 }
941 else {
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
943 }
944 if (r) return r;
945 }
946
947 r = compile_tree(qn->target, reg);
948 }
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
950 if (CKN_ON) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
952 if (r) return r;
953 r = add_state_check_num(reg, ckn);
954 if (r) return r;
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
956 }
957 else {
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
959 }
960
961 if (r) return r;
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
963 if (r) return r;
964 r = compile_tree(qn->target, reg);
965 }
966 else {
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
968 if (CKN_ON) {
969 if (r) return r;
970 r = add_opcode(reg, OP_STATE_CHECK);
971 if (r) return r;
972 r = add_state_check_num(reg, ckn);
973 }
974 }
975 return r;
976}
977
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979
980static int
981compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
982{
983 int len, mod_tlen;
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
987
988 if (tlen < 0) return tlen;
989
990 /* anychar repeat */
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
995 else
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
997 }
998 }
999
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1002 else
1003 mod_tlen = tlen;
1004
1005 if (infinite &&
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1008 len = SIZE_OP_JUMP;
1009 }
1010 else {
1011 len = tlen * qn->lower;
1012 }
1013
1014 if (qn->greedy) {
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1018 else
1019#endif
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1022 else
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1024 }
1025 else
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1027 }
1028 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1029 len = SIZE_OP_JUMP + tlen;
1030 }
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1036 }
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1039 }
1040 else {
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1043 }
1044
1045 return len;
1046}
1047
1048static int
1049compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1050{
1051 int i, r, mod_tlen;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1055
1056 if (tlen < 0) return tlen;
1057
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1060 if (r) return r;
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1064 else
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1066 if (r) return r;
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1068 }
1069 else {
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1072 else
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1074 }
1075 }
1076
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1079 else
1080 mod_tlen = tlen;
1081
1082 if (infinite &&
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1085 if (qn->greedy) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1089 else
1090#endif
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1093 else
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1095 }
1096 else {
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1098 }
1099 if (r) return r;
1100 }
1101 else {
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1103 if (r) return r;
1104 }
1105
1106 if (qn->greedy) {
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1117 }
1118 else
1119#endif
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1123 if (r) return r;
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1126 if (r) return r;
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1129 }
1130 else {
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1132 if (r) return r;
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1134 if (r) return r;
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1137 }
1138 }
1139 else {
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1141 if (r) return r;
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1143 if (r) return r;
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1145 }
1146 }
1147 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1149 if (r) return r;
1150 r = compile_tree(qn->target, reg);
1151 }
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1156
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1158 if (r) return r;
1159
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1163 if (r) return r;
1164 r = compile_tree(qn->target, reg);
1165 if (r) return r;
1166 }
1167 }
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1170 if (r) return r;
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1172 if (r) return r;
1173 r = compile_tree(qn->target, reg);
1174 }
1175 else {
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1177 }
1178 return r;
1179}
1180#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1181
1182static int
1183compile_length_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int tlen;
1186 OnigOptionType prev = reg->options;
1187
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1191
1192 if (tlen < 0) return tlen;
1193
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1197 }
1198 else
1199 return tlen;
1200}
1201
1202static int
1203compile_option_node(EncloseNode* node, regex_t* reg)
1204{
1205 int r;
1206 OnigOptionType prev = reg->options;
1207
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1210 if (r) return r;
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1212 if (r) return r;
1213 r = add_opcode(reg, OP_FAIL);
1214 if (r) return r;
1215 }
1216
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1220
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1222 if (r) return r;
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1224 }
1225 return r;
1226}
1227
1228static int
1229compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1230{
1231 int len;
1232 int tlen;
1233
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1236
1237 if (node->target) {
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0) return tlen;
1240 }
1241 else
1242 tlen = 0;
1243
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1253 else
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1256 }
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1261 }
1262 else
1263#endif
1264 {
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1267 else
1268 len = SIZE_OP_MEMORY_START;
1269
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1272 }
1273 break;
1274
1275 case ENCLOSE_STOP_BACKTRACK:
1276 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1277 /* optimization because the match cache optimization pushes an extra item to */
1278 /* the stack and it breaks the assumption for this optimization. */
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0) return tlen;
1284
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1287 }
1288 else {
1289#endif
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1292 }
1293#endif
1294 break;
1295
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1300
1301 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1302 if (tlen < 0) return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1305 x = NCDR(x);
1306 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1307 if (tlen < 0) return tlen;
1308 len += tlen;
1309 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1310 }
1311 else {
1312 return ONIGERR_PARSER_BUG;
1313 }
1314 break;
1315
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1318 break;
1319
1320 default:
1321 return ONIGERR_TYPE_BUG;
1322 break;
1323 }
1324
1325 return len;
1326}
1327
1328static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1329
1330static int
1331compile_enclose_node(EncloseNode* node, regex_t* reg)
1332{
1333 int r, len;
1334
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1337
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1343 if (r) return r;
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (int )node->call_addr);
1347 if (r) return r;
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1353 else
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1356
1357 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1358 if (r) return r;
1359 }
1360#endif
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1363 else
1364 r = add_opcode(reg, OP_MEMORY_START);
1365 if (r) return r;
1366 r = add_mem_num(reg, node->regnum);
1367 if (r) return r;
1368 r = compile_tree(node->target, reg);
1369 if (r) return r;
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1375 else
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1378
1379 if (r) return r;
1380 r = add_mem_num(reg, node->regnum);
1381 if (r) return r;
1382 r = add_opcode(reg, OP_RETURN);
1383 }
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1387 else
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1389 if (r) return r;
1390 r = add_mem_num(reg, node->regnum);
1391 }
1392 else
1393#endif
1394 {
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1397 else
1398 r = add_opcode(reg, OP_MEMORY_END);
1399 if (r) return r;
1400 r = add_mem_num(reg, node->regnum);
1401 }
1402 break;
1403
1404 case ENCLOSE_STOP_BACKTRACK:
1405 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1406 /* optimization because the match cache optimization pushes an extra item to */
1407 /* the stack and it breaks the assumption for this optimization. */
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1412 if (r) return r;
1413
1414 len = compile_length_tree(qn->target, reg);
1415 if (len < 0) return len;
1416
1417 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1418 if (r) return r;
1419 r = compile_tree(qn->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP);
1422 if (r) return r;
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1425 }
1426 else {
1427#endif
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1429 if (r) return r;
1430 r = compile_tree(node->target, reg);
1431 if (r) return r;
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1434 }
1435#endif
1436 break;
1437
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1440 if (r) return r;
1441 r = add_mem_num(reg, node->regnum);
1442 if (r) return r;
1443
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1446 int len2;
1447
1448 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1449 if (len < 0) return len;
1450 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1451 x = NCDR(x);
1452 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1453 if (len2 < 0) return len2;
1454 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1455
1456 x = node->target;
1457 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1458 if (r) return r;
1459 r = compile_tree(NCAR(x), reg); /* yes-node */
1460 if (r) return r;
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1462 if (r) return r;
1463 x = NCDR(x);
1464 r = compile_tree(NCAR(x), reg); /* no-node */
1465 }
1466 else {
1467 return ONIGERR_PARSER_BUG;
1468 }
1469 break;
1470
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1473 if (len < 0) return len;
1474
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1476 if (r) return r;
1477 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1478 if (r) return r;
1479 r = compile_tree(node->target, reg);
1480 if (r) return r;
1481 r = add_opcode(reg, OP_ABSENT_END);
1482 break;
1483
1484 default:
1485 return ONIGERR_TYPE_BUG;
1486 break;
1487 }
1488
1489 return r;
1490}
1491
1492static int
1493compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1494{
1495 int len;
1496 int tlen = 0;
1497
1498 if (node->target) {
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0) return tlen;
1501 }
1502
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1506 break;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1509 break;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1512 break;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1515 break;
1516
1517 default:
1518 len = SIZE_OPCODE;
1519 break;
1520 }
1521
1522 return len;
1523}
1524
1525static int
1526compile_anchor_node(AnchorNode* node, regex_t* reg)
1527{
1528 int r, len;
1529
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1537
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1541 break;
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1545 break;
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1550 break;
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1554 break;
1555#endif
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1557
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1560 if (r) return r;
1561 r = compile_tree(node->target, reg);
1562 if (r) return r;
1563 r = add_opcode(reg, OP_POP_POS);
1564 break;
1565
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1568 if (len < 0) return len;
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1570 if (r) return r;
1571 r = compile_tree(node->target, reg);
1572 if (r) return r;
1573 r = add_opcode(reg, OP_FAIL_POS);
1574 break;
1575
1576 case ANCHOR_LOOK_BEHIND:
1577 {
1578 int n;
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1580 if (r) return r;
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1584 }
1585 else
1586 n = node->char_len;
1587 r = add_length(reg, n);
1588 if (r) return r;
1589 r = compile_tree(node->target, reg);
1590 }
1591 break;
1592
1593 case ANCHOR_LOOK_BEHIND_NOT:
1594 {
1595 int n;
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1599 if (r) return r;
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1603 }
1604 else
1605 n = node->char_len;
1606 r = add_length(reg, n);
1607 if (r) return r;
1608 r = compile_tree(node->target, reg);
1609 if (r) return r;
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1611 }
1612 break;
1613
1614 default:
1615 return ONIGERR_TYPE_BUG;
1616 break;
1617 }
1618
1619 return r;
1620}
1621
1622static int
1623compile_length_tree(Node* node, regex_t* reg)
1624{
1625 int len, type, r;
1626
1627 type = NTYPE(node);
1628 switch (type) {
1629 case NT_LIST:
1630 len = 0;
1631 do {
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0) return r;
1634 len += r;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 break;
1638
1639 case NT_ALT:
1640 {
1641 int n = 0;
1642 len = 0;
1643 do {
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0) return r;
1646 len += r;
1647 n++;
1648 } while (IS_NOT_NULL(node = NCDR(node)));
1649 r = len;
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1651 }
1652 break;
1653
1654 case NT_STR:
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1657 else
1658 r = compile_length_string_node(node, reg);
1659 break;
1660
1661 case NT_CCLASS:
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1663 break;
1664
1665 case NT_CTYPE:
1666 case NT_CANY:
1667 r = SIZE_OPCODE;
1668 break;
1669
1670 case NT_BREF:
1671 {
1672 BRefNode* br = NBREF(node);
1673
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1678 }
1679 else
1680#endif
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1684 }
1685 else {
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1687 }
1688 }
1689 break;
1690
1691#ifdef USE_SUBEXP_CALL
1692 case NT_CALL:
1693 r = SIZE_OP_CALL;
1694 break;
1695#endif
1696
1697 case NT_QTFR:
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1699 break;
1700
1701 case NT_ENCLOSE:
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1703 break;
1704
1705 case NT_ANCHOR:
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1707 break;
1708
1709 default:
1710 return ONIGERR_TYPE_BUG;
1711 break;
1712 }
1713
1714 return r;
1715}
1716
1717static int
1718compile_tree(Node* node, regex_t* reg)
1719{
1720 int n, type, len, pos, r = 0;
1721
1722 type = NTYPE(node);
1723 switch (type) {
1724 case NT_LIST:
1725 do {
1726 r = compile_tree(NCAR(node), reg);
1727 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1728 break;
1729
1730 case NT_ALT:
1731 {
1732 Node* x = node;
1733 len = 0;
1734 do {
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1738 }
1739 } while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used + len; /* goal position */
1741
1742 do {
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1746 if (r) break;
1747 }
1748 r = compile_tree(NCAR(node), reg);
1749 if (r) break;
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1753 if (r) break;
1754 }
1755 } while (IS_NOT_NULL(node = NCDR(node)));
1756 }
1757 break;
1758
1759 case NT_STR:
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1762 else
1763 r = compile_string_node(node, reg);
1764 break;
1765
1766 case NT_CCLASS:
1767 r = compile_cclass_node(NCCLASS(node), reg);
1768 break;
1769
1770 case NT_CTYPE:
1771 {
1772 int op;
1773
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1779 }
1780 else {
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1782 else op = OP_WORD;
1783 }
1784 break;
1785 default:
1786 return ONIGERR_TYPE_BUG;
1787 break;
1788 }
1789 r = add_opcode(reg, op);
1790 }
1791 break;
1792
1793 case NT_CANY:
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1796 else
1797 r = add_opcode(reg, OP_ANYCHAR);
1798 break;
1799
1800 case NT_BREF:
1801 {
1802 BRefNode* br = NBREF(node);
1803
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1807 if (r) return r;
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1809 if (r) return r;
1810 r = add_length(reg, br->nest_level);
1811 if (r) return r;
1812
1813 goto add_bacref_mems;
1814 }
1815 else
1816#endif
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1821 if (r) return r;
1822 r = add_mem_num(reg, n);
1823 }
1824 else {
1825 switch (n) {
1826 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1828 default:
1829 r = add_opcode(reg, OP_BACKREFN);
1830 if (r) return r;
1831 r = add_mem_num(reg, n);
1832 break;
1833 }
1834 }
1835 }
1836 else {
1837 int i;
1838 int* p;
1839
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1842 }
1843 else {
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1845 }
1846 if (r) return r;
1847
1848#ifdef USE_BACKREF_WITH_LEVEL
1849 add_bacref_mems:
1850#endif
1851 r = add_length(reg, br->back_num);
1852 if (r) return r;
1853 p = BACKREFS_P(br);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1856 if (r) return r;
1857 }
1858 }
1859 }
1860 break;
1861
1862#ifdef USE_SUBEXP_CALL
1863 case NT_CALL:
1864 r = compile_call(NCALL(node), reg);
1865 break;
1866#endif
1867
1868 case NT_QTFR:
1869 r = compile_quantifier_node(NQTFR(node), reg);
1870 break;
1871
1872 case NT_ENCLOSE:
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1874 break;
1875
1876 case NT_ANCHOR:
1877 r = compile_anchor_node(NANCHOR(node), reg);
1878 break;
1879
1880 default:
1881#ifdef ONIG_DEBUG
1882 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1883#endif
1884 break;
1885 }
1886
1887 return r;
1888}
1889
1890#ifdef USE_NAMED_GROUP
1891
1892static int
1893noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1894{
1895 int r = 0;
1896 Node* node = *plink;
1897
1898 switch (NTYPE(node)) {
1899 case NT_LIST:
1900 case NT_ALT:
1901 do {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1904 break;
1905
1906 case NT_QTFR:
1907 {
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1913 }
1914 }
1915 break;
1916
1917 case NT_ENCLOSE:
1918 {
1919 EncloseNode* en = NENCLOSE(node);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1922 (*counter)++;
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1925 }
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1931 break;
1932 }
1933 }
1934 r = noname_disable_map(&(en->target), map, counter);
1935 }
1936 break;
1937
1938 case NT_ANCHOR:
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1941 break;
1942
1943 default:
1944 break;
1945 }
1946
1947 return r;
1948}
1949
1950static int
1951renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1952{
1953 int i, pos, n, old_num;
1954 int *backs;
1955 BRefNode* bn = NBREF(node);
1956
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1959
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1963 else
1964 backs = bn->back_dynamic;
1965
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1969 if (n > 0) {
1970 backs[pos] = n;
1971 pos++;
1972 }
1973 }
1974
1975 bn->back_num = pos;
1976 return 0;
1977}
1978
1979static int
1980renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1981{
1982 int r = 0;
1983
1984 switch (NTYPE(node)) {
1985 case NT_LIST:
1986 case NT_ALT:
1987 do {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990 break;
1991 case NT_QTFR:
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1993 break;
1994 case NT_ENCLOSE:
1995 {
1996 EncloseNode* en = NENCLOSE(node);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2000 }
2001 r = renumber_by_map(en->target, map, num_mem);
2002 }
2003 break;
2004
2005 case NT_BREF:
2006 r = renumber_node_backref(node, map, num_mem);
2007 break;
2008
2009 case NT_ANCHOR:
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2012 break;
2013
2014 default:
2015 break;
2016 }
2017
2018 return r;
2019}
2020
2021static int
2022numbered_ref_check(Node* node)
2023{
2024 int r = 0;
2025
2026 switch (NTYPE(node)) {
2027 case NT_LIST:
2028 case NT_ALT:
2029 do {
2030 r = numbered_ref_check(NCAR(node));
2031 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2032 break;
2033 case NT_QTFR:
2034 r = numbered_ref_check(NQTFR(node)->target);
2035 break;
2036 case NT_ENCLOSE:
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2038 break;
2039
2040 case NT_BREF:
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2043 break;
2044
2045 case NT_ANCHOR:
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2048 break;
2049
2050 default:
2051 break;
2052 }
2053
2054 return r;
2055}
2056
2057static int
2058disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2059{
2060 int r, i, pos, counter;
2061 BitStatusType loc;
2062 GroupNumRemap* map;
2063
2064 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2067 map[i].new_val = 0;
2068 }
2069 counter = 0;
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0) return r;
2072
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0) return r;
2075
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2079 pos++;
2080 }
2081 }
2082
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2088 }
2089 }
2090
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2093
2094 return onig_renumber_name_table(reg, map);
2095}
2096#endif /* USE_NAMED_GROUP */
2097
2098#ifdef USE_SUBEXP_CALL
2099static int
2100unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2101{
2102 int i, offset;
2103 EncloseNode* en;
2104 AbsAddrType addr;
2105
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2111
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2113 }
2114 return 0;
2115}
2116#endif
2117
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2119static int
2120quantifiers_memory_node_info(Node* node)
2121{
2122 int r = 0;
2123
2124 switch (NTYPE(node)) {
2125 case NT_LIST:
2126 case NT_ALT:
2127 {
2128 int v;
2129 do {
2130 v = quantifiers_memory_node_info(NCAR(node));
2131 if (v > r) r = v;
2132 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2133 }
2134 break;
2135
2136# ifdef USE_SUBEXP_CALL
2137 case NT_CALL:
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2140 }
2141 else
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2143 break;
2144# endif
2145
2146 case NT_QTFR:
2147 {
2148 QtfrNode* qn = NQTFR(node);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2151 }
2152 }
2153 break;
2154
2155 case NT_ENCLOSE:
2156 {
2157 EncloseNode* en = NENCLOSE(node);
2158 switch (en->type) {
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2161 break;
2162
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2168 break;
2169 default:
2170 break;
2171 }
2172 }
2173 break;
2174
2175 case NT_BREF:
2176 case NT_STR:
2177 case NT_CTYPE:
2178 case NT_CCLASS:
2179 case NT_CANY:
2180 case NT_ANCHOR:
2181 default:
2182 break;
2183 }
2184
2185 return r;
2186}
2187#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2188
2189static int
2190get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2191{
2192 OnigDistance tmin;
2193 int r = 0;
2194
2195 *min = 0;
2196 switch (NTYPE(node)) {
2197 case NT_BREF:
2198 {
2199 int i;
2200 int* backs;
2201 Node** nodes = SCANENV_MEM_NODES(env);
2202 BRefNode* br = NBREF(node);
2203 if (br->state & NST_RECURSION) break;
2204
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2208 if (r != 0) break;
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2212 if (r != 0) break;
2213 if (*min > tmin) *min = tmin;
2214 }
2215 }
2216 break;
2217
2218#ifdef USE_SUBEXP_CALL
2219 case NT_CALL:
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2221 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2223 *min = en->min_len;
2224 }
2225 else
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2227 break;
2228#endif
2229
2230 case NT_LIST:
2231 do {
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2235 break;
2236
2237 case NT_ALT:
2238 {
2239 Node *x, *y;
2240 y = node;
2241 do {
2242 x = NCAR(y);
2243 r = get_min_match_length(x, &tmin, env);
2244 if (r != 0) break;
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2248 }
2249 break;
2250
2251 case NT_STR:
2252 {
2253 StrNode* sn = NSTR(node);
2254 *min = sn->end - sn->s;
2255 }
2256 break;
2257
2258 case NT_CTYPE:
2259 *min = 1;
2260 break;
2261
2262 case NT_CCLASS:
2263 case NT_CANY:
2264 *min = 1;
2265 break;
2266
2267 case NT_QTFR:
2268 {
2269 QtfrNode* qn = NQTFR(node);
2270
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2273 if (r == 0)
2274 *min = distance_multiply(*min, qn->lower);
2275 }
2276 }
2277 break;
2278
2279 case NT_ENCLOSE:
2280 {
2281 EncloseNode* en = NENCLOSE(node);
2282 switch (en->type) {
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2285 *min = en->min_len;
2286 else {
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2288 *min = 0; /* recursive */
2289 else {
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2293 if (r == 0) {
2294 en->min_len = *min;
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2296 }
2297 }
2298 }
2299 break;
2300
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2305 break;
2306
2307 case ENCLOSE_ABSENT:
2308 break;
2309 }
2310 }
2311 break;
2312
2313 case NT_ANCHOR:
2314 default:
2315 break;
2316 }
2317
2318 return r;
2319}
2320
2321static int
2322get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2323{
2324 OnigDistance tmax;
2325 int r = 0;
2326
2327 *max = 0;
2328 switch (NTYPE(node)) {
2329 case NT_LIST:
2330 do {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2332 if (r == 0)
2333 *max = distance_add(*max, tmax);
2334 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2335 break;
2336
2337 case NT_ALT:
2338 do {
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2342 break;
2343
2344 case NT_STR:
2345 {
2346 StrNode* sn = NSTR(node);
2347 *max = sn->end - sn->s;
2348 }
2349 break;
2350
2351 case NT_CTYPE:
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2353 break;
2354
2355 case NT_CCLASS:
2356 case NT_CANY:
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2358 break;
2359
2360 case NT_BREF:
2361 {
2362 int i;
2363 int* backs;
2364 Node** nodes = SCANENV_MEM_NODES(env);
2365 BRefNode* br = NBREF(node);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2368 break;
2369 }
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2374 if (r != 0) break;
2375 if (*max < tmax) *max = tmax;
2376 }
2377 }
2378 break;
2379
2380#ifdef USE_SUBEXP_CALL
2381 case NT_CALL:
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2384 else
2385 *max = ONIG_INFINITE_DISTANCE;
2386 break;
2387#endif
2388
2389 case NT_QTFR:
2390 {
2391 QtfrNode* qn = NQTFR(node);
2392
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2398 else
2399 *max = ONIG_INFINITE_DISTANCE;
2400 }
2401 }
2402 }
2403 break;
2404
2405 case NT_ENCLOSE:
2406 {
2407 EncloseNode* en = NENCLOSE(node);
2408 switch (en->type) {
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2411 *max = en->max_len;
2412 else {
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2415 else {
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2419 if (r == 0) {
2420 en->max_len = *max;
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2422 }
2423 }
2424 }
2425 break;
2426
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2431 break;
2432
2433 case ENCLOSE_ABSENT:
2434 break;
2435 }
2436 }
2437 break;
2438
2439 case NT_ANCHOR:
2440 default:
2441 break;
2442 }
2443
2444 return r;
2445}
2446
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2449
2450/* fixed size pattern node only */
2451static int
2452get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2453{
2454 int tlen;
2455 int r = 0;
2456
2457 level++;
2458 *len = 0;
2459 switch (NTYPE(node)) {
2460 case NT_LIST:
2461 do {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2463 if (r == 0)
2464 *len = (int )distance_add(*len, tlen);
2465 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2466 break;
2467
2468 case NT_ALT:
2469 {
2470 int tlen2;
2471 int varlen = 0;
2472
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2476 if (r == 0) {
2477 if (tlen != tlen2)
2478 varlen = 1;
2479 }
2480 }
2481 if (r == 0) {
2482 if (varlen != 0) {
2483 if (level == 1)
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2485 else
2486 r = GET_CHAR_LEN_VARLEN;
2487 }
2488 else
2489 *len = tlen;
2490 }
2491 }
2492 break;
2493
2494 case NT_STR:
2495 {
2496 StrNode* sn = NSTR(node);
2497 UChar *s = sn->s;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2500 (*len)++;
2501 }
2502 }
2503 break;
2504
2505 case NT_QTFR:
2506 {
2507 QtfrNode* qn = NQTFR(node);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2510 if (r == 0)
2511 *len = (int )distance_multiply(tlen, qn->lower);
2512 }
2513 else
2514 r = GET_CHAR_LEN_VARLEN;
2515 }
2516 break;
2517
2518#ifdef USE_SUBEXP_CALL
2519 case NT_CALL:
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2522 else
2523 r = GET_CHAR_LEN_VARLEN;
2524 break;
2525#endif
2526
2527 case NT_CTYPE:
2528 *len = 1;
2529 break;
2530
2531 case NT_CCLASS:
2532 case NT_CANY:
2533 *len = 1;
2534 break;
2535
2536 case NT_ENCLOSE:
2537 {
2538 EncloseNode* en = NENCLOSE(node);
2539 switch (en->type) {
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *len = en->char_len;
2544 else {
2545 r = get_char_length_tree1(en->target, reg, len, level);
2546 if (r == 0) {
2547 en->char_len = *len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2549 }
2550 }
2551 break;
2552#endif
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg, len, level);
2557 break;
2558 case ENCLOSE_ABSENT:
2559 default:
2560 break;
2561 }
2562 }
2563 break;
2564
2565 case NT_ANCHOR:
2566 break;
2567
2568 default:
2569 r = GET_CHAR_LEN_VARLEN;
2570 break;
2571 }
2572
2573 return r;
2574}
2575
2576static int
2577get_char_length_tree(Node* node, regex_t* reg, int* len)
2578{
2579 return get_char_length_tree1(node, reg, len, 0);
2580}
2581
2582/* x is not included y ==> 1 : 0 */
2583static int
2584is_not_included(Node* x, Node* y, regex_t* reg)
2585{
2586 int i;
2587 OnigDistance len;
2588 OnigCodePoint code;
2589 UChar *p;
2590 int ytype;
2591
2592 retry:
2593 ytype = NTYPE(y);
2594 switch (NTYPE(x)) {
2595 case NT_CTYPE:
2596 {
2597 switch (ytype) {
2598 case NT_CTYPE:
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2602 return 1;
2603 else
2604 return 0;
2605 break;
2606
2607 case NT_CCLASS:
2608 swap:
2609 {
2610 Node* tmp;
2611 tmp = x; x = y; y = tmp;
2612 goto retry;
2613 }
2614 break;
2615
2616 case NT_STR:
2617 goto swap;
2618 break;
2619
2620 default:
2621 break;
2622 }
2623 }
2624 break;
2625
2626 case NT_CCLASS:
2627 {
2628 CClassNode* xc = NCCLASS(x);
2629 switch (ytype) {
2630 case NT_CTYPE:
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2639 }
2640 else {
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2642 }
2643 }
2644 }
2645 return 1;
2646 }
2647 return 0;
2648 }
2649 else {
2650 if (IS_NOT_NULL(xc->mbuf)) return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2652 int is_word;
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2655 else
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2657 if (! is_word) {
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2660 return 0;
2661 }
2662 else {
2663 if (! BITSET_AT(xc->bs, i))
2664 return 0;
2665 }
2666 }
2667 }
2668 return 1;
2669 }
2670 break;
2671
2672 default:
2673 break;
2674 }
2675 break;
2676
2677 case NT_CCLASS:
2678 {
2679 int v;
2680 CClassNode* yc = NCCLASS(y);
2681
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2689 return 0;
2690 }
2691 }
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2694 return 1;
2695 return 0;
2696 }
2697 break;
2698
2699 case NT_STR:
2700 goto swap;
2701 break;
2702
2703 default:
2704 break;
2705 }
2706 }
2707 break;
2708
2709 case NT_STR:
2710 {
2711 StrNode* xs = NSTR(x);
2712 if (NSTRING_LEN(x) == 0)
2713 break;
2714
2715 switch (ytype) {
2716 case NT_CTYPE:
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2722 else
2723 return !(NCTYPE(y)->not);
2724 }
2725 else {
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2728 else
2729 return !(NCTYPE(y)->not);
2730 }
2731 break;
2732 default:
2733 break;
2734 }
2735 break;
2736
2737 case NT_CCLASS:
2738 {
2739 CClassNode* cc = NCCLASS(y);
2740
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2744 }
2745 break;
2746
2747 case NT_STR:
2748 {
2749 UChar *q;
2750 StrNode* ys = NSTR(y);
2751 len = NSTRING_LEN(x);
2752 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2754 /* tiny version */
2755 return 0;
2756 }
2757 else {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2759 if (*p != *q) return 1;
2760 }
2761 }
2762 }
2763 break;
2764
2765 default:
2766 break;
2767 }
2768 }
2769 break;
2770
2771 default:
2772 break;
2773 }
2774
2775 return 0;
2776}
2777
2778static Node*
2779get_head_value_node(Node* node, int exact, regex_t* reg)
2780{
2781 Node* n = NULL_NODE;
2782
2783 switch (NTYPE(node)) {
2784 case NT_BREF:
2785 case NT_ALT:
2786 case NT_CANY:
2787#ifdef USE_SUBEXP_CALL
2788 case NT_CALL:
2789#endif
2790 break;
2791
2792 case NT_CTYPE:
2793 case NT_CCLASS:
2794 if (exact == 0) {
2795 n = node;
2796 }
2797 break;
2798
2799 case NT_LIST:
2800 n = get_head_value_node(NCAR(node), exact, reg);
2801 break;
2802
2803 case NT_STR:
2804 {
2805 StrNode* sn = NSTR(node);
2806 if (sn->end <= sn->s)
2807 break;
2808
2809 if (exact == 0 ||
2810 NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2811 n = node;
2812 }
2813 }
2814 break;
2815
2816 case NT_QTFR:
2817 {
2818 QtfrNode* qn = NQTFR(node);
2819 if (qn->lower > 0) {
2820#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2821 if (IS_NOT_NULL(qn->head_exact))
2822 n = qn->head_exact;
2823 else
2824#endif
2825 n = get_head_value_node(qn->target, exact, reg);
2826 }
2827 }
2828 break;
2829
2830 case NT_ENCLOSE:
2831 {
2832 EncloseNode* en = NENCLOSE(node);
2833 switch (en->type) {
2834 case ENCLOSE_OPTION:
2835 {
2836 OnigOptionType options = reg->options;
2837
2838 reg->options = NENCLOSE(node)->option;
2839 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2840 reg->options = options;
2841 }
2842 break;
2843
2844 case ENCLOSE_MEMORY:
2845 case ENCLOSE_STOP_BACKTRACK:
2846 case ENCLOSE_CONDITION:
2847 n = get_head_value_node(en->target, exact, reg);
2848 break;
2849
2850 case ENCLOSE_ABSENT:
2851 break;
2852 }
2853 }
2854 break;
2855
2856 case NT_ANCHOR:
2857 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2858 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2859 break;
2860
2861 default:
2862 break;
2863 }
2864
2865 return n;
2866}
2867
2868static int
2869check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2870{
2871 int type, r = 0;
2872
2873 type = NTYPE(node);
2874 if ((NTYPE2BIT(type) & type_mask) == 0)
2875 return 1;
2876
2877 switch (type) {
2878 case NT_LIST:
2879 case NT_ALT:
2880 do {
2881 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2882 anchor_mask);
2883 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2884 break;
2885
2886 case NT_QTFR:
2887 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2888 anchor_mask);
2889 break;
2890
2891 case NT_ENCLOSE:
2892 {
2893 EncloseNode* en = NENCLOSE(node);
2894 if ((en->type & enclose_mask) == 0)
2895 return 1;
2896
2897 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2898 }
2899 break;
2900
2901 case NT_ANCHOR:
2902 type = NANCHOR(node)->type;
2903 if ((type & anchor_mask) == 0)
2904 return 1;
2905
2906 if (NANCHOR(node)->target)
2907 r = check_type_tree(NANCHOR(node)->target,
2908 type_mask, enclose_mask, anchor_mask);
2909 break;
2910
2911 default:
2912 break;
2913 }
2914 return r;
2915}
2916
2917#ifdef USE_SUBEXP_CALL
2918
2919# define RECURSION_EXIST 1
2920# define RECURSION_INFINITE 2
2921
2922static int
2923subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2924{
2925 int type;
2926 int r = 0;
2927
2928 type = NTYPE(node);
2929 switch (type) {
2930 case NT_LIST:
2931 {
2932 Node *x;
2933 OnigDistance min;
2934 int ret;
2935
2936 x = node;
2937 do {
2938 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2939 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2940 r |= ret;
2941 if (head) {
2942 ret = get_min_match_length(NCAR(x), &min, env);
2943 if (ret != 0) return ret;
2944 if (min != 0) head = 0;
2945 }
2946 } while (IS_NOT_NULL(x = NCDR(x)));
2947 }
2948 break;
2949
2950 case NT_ALT:
2951 {
2952 int ret;
2953 r = RECURSION_EXIST;
2954 do {
2955 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2956 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2957 r &= ret;
2958 } while (IS_NOT_NULL(node = NCDR(node)));
2959 }
2960 break;
2961
2962 case NT_QTFR:
2963 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2964 if (r == RECURSION_EXIST) {
2965 if (NQTFR(node)->lower == 0) r = 0;
2966 }
2967 break;
2968
2969 case NT_ANCHOR:
2970 {
2971 AnchorNode* an = NANCHOR(node);
2972 switch (an->type) {
2973 case ANCHOR_PREC_READ:
2974 case ANCHOR_PREC_READ_NOT:
2975 case ANCHOR_LOOK_BEHIND:
2976 case ANCHOR_LOOK_BEHIND_NOT:
2977 r = subexp_inf_recursive_check(an->target, env, head);
2978 break;
2979 }
2980 }
2981 break;
2982
2983 case NT_CALL:
2984 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2985 break;
2986
2987 case NT_ENCLOSE:
2988 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2989 return 0;
2990 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2991 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2992 else {
2993 SET_ENCLOSE_STATUS(node, NST_MARK2);
2994 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2995 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2996 }
2997 break;
2998
2999 default:
3000 break;
3001 }
3002
3003 return r;
3004}
3005
3006static int
3007subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
3008{
3009 int type;
3010 int r = 0;
3011
3012 type = NTYPE(node);
3013 switch (type) {
3014 case NT_LIST:
3015 case NT_ALT:
3016 do {
3017 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3018 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3019 break;
3020
3021 case NT_QTFR:
3022 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3023 break;
3024
3025 case NT_ANCHOR:
3026 {
3027 AnchorNode* an = NANCHOR(node);
3028 switch (an->type) {
3029 case ANCHOR_PREC_READ:
3030 case ANCHOR_PREC_READ_NOT:
3031 case ANCHOR_LOOK_BEHIND:
3032 case ANCHOR_LOOK_BEHIND_NOT:
3033 r = subexp_inf_recursive_check_trav(an->target, env);
3034 break;
3035 }
3036 }
3037 break;
3038
3039 case NT_ENCLOSE:
3040 {
3041 EncloseNode* en = NENCLOSE(node);
3042
3043 if (IS_ENCLOSE_RECURSION(en)) {
3044 SET_ENCLOSE_STATUS(node, NST_MARK1);
3045 r = subexp_inf_recursive_check(en->target, env, 1);
3046 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3047 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3048 }
3049 r = subexp_inf_recursive_check_trav(en->target, env);
3050 }
3051
3052 break;
3053
3054 default:
3055 break;
3056 }
3057
3058 return r;
3059}
3060
3061static int
3062subexp_recursive_check(Node* node)
3063{
3064 int r = 0;
3065
3066 switch (NTYPE(node)) {
3067 case NT_LIST:
3068 case NT_ALT:
3069 do {
3070 r |= subexp_recursive_check(NCAR(node));
3071 } while (IS_NOT_NULL(node = NCDR(node)));
3072 break;
3073
3074 case NT_QTFR:
3075 r = subexp_recursive_check(NQTFR(node)->target);
3076 break;
3077
3078 case NT_ANCHOR:
3079 {
3080 AnchorNode* an = NANCHOR(node);
3081 switch (an->type) {
3082 case ANCHOR_PREC_READ:
3083 case ANCHOR_PREC_READ_NOT:
3084 case ANCHOR_LOOK_BEHIND:
3085 case ANCHOR_LOOK_BEHIND_NOT:
3086 r = subexp_recursive_check(an->target);
3087 break;
3088 }
3089 }
3090 break;
3091
3092 case NT_CALL:
3093 r = subexp_recursive_check(NCALL(node)->target);
3094 if (r != 0) SET_CALL_RECURSION(node);
3095 break;
3096
3097 case NT_ENCLOSE:
3098 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3099 return 0;
3100 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3101 return 1; /* recursion */
3102 else {
3103 SET_ENCLOSE_STATUS(node, NST_MARK2);
3104 r = subexp_recursive_check(NENCLOSE(node)->target);
3105 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3106 }
3107 break;
3108
3109 default:
3110 break;
3111 }
3112
3113 return r;
3114}
3115
3116
3117static int
3118subexp_recursive_check_trav(Node* node, ScanEnv* env)
3119{
3120# define FOUND_CALLED_NODE 1
3121
3122 int type;
3123 int r = 0;
3124
3125 type = NTYPE(node);
3126 switch (type) {
3127 case NT_LIST:
3128 case NT_ALT:
3129 {
3130 int ret;
3131 do {
3132 ret = subexp_recursive_check_trav(NCAR(node), env);
3133 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3134 else if (ret < 0) return ret;
3135 } while (IS_NOT_NULL(node = NCDR(node)));
3136 }
3137 break;
3138
3139 case NT_QTFR:
3140 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3141 if (NQTFR(node)->upper == 0) {
3142 if (r == FOUND_CALLED_NODE)
3143 NQTFR(node)->is_referred = 1;
3144 }
3145 break;
3146
3147 case NT_ANCHOR:
3148 {
3149 AnchorNode* an = NANCHOR(node);
3150 switch (an->type) {
3151 case ANCHOR_PREC_READ:
3152 case ANCHOR_PREC_READ_NOT:
3153 case ANCHOR_LOOK_BEHIND:
3154 case ANCHOR_LOOK_BEHIND_NOT:
3155 r = subexp_recursive_check_trav(an->target, env);
3156 break;
3157 }
3158 }
3159 break;
3160
3161 case NT_ENCLOSE:
3162 {
3163 EncloseNode* en = NENCLOSE(node);
3164
3165 if (! IS_ENCLOSE_RECURSION(en)) {
3166 if (IS_ENCLOSE_CALLED(en)) {
3167 SET_ENCLOSE_STATUS(node, NST_MARK1);
3168 r = subexp_recursive_check(en->target);
3169 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3170 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3171 }
3172 }
3173 r = subexp_recursive_check_trav(en->target, env);
3174 if (IS_ENCLOSE_CALLED(en))
3175 r |= FOUND_CALLED_NODE;
3176 }
3177 break;
3178
3179 default:
3180 break;
3181 }
3182
3183 return r;
3184}
3185
3186static int
3187setup_subexp_call(Node* node, ScanEnv* env)
3188{
3189 int type;
3190 int r = 0;
3191
3192 type = NTYPE(node);
3193 switch (type) {
3194 case NT_LIST:
3195 do {
3196 r = setup_subexp_call(NCAR(node), env);
3197 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3198 break;
3199
3200 case NT_ALT:
3201 do {
3202 r = setup_subexp_call(NCAR(node), env);
3203 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3204 break;
3205
3206 case NT_QTFR:
3207 r = setup_subexp_call(NQTFR(node)->target, env);
3208 break;
3209 case NT_ENCLOSE:
3210 r = setup_subexp_call(NENCLOSE(node)->target, env);
3211 break;
3212
3213 case NT_CALL:
3214 {
3215 CallNode* cn = NCALL(node);
3216 Node** nodes = SCANENV_MEM_NODES(env);
3217
3218 if (cn->group_num != 0) {
3219 int gnum = cn->group_num;
3220
3221# ifdef USE_NAMED_GROUP
3222 if (env->num_named > 0 &&
3223 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3224 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3225 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3226 }
3227# endif
3228 if (gnum > env->num_mem) {
3229 onig_scan_env_set_error_string(env,
3230 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3231 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3232 }
3233
3234# ifdef USE_NAMED_GROUP
3235 set_call_attr:
3236# endif
3237 cn->target = nodes[cn->group_num];
3238 if (IS_NULL(cn->target)) {
3239 onig_scan_env_set_error_string(env,
3240 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3241 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3242 }
3243 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3244 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3245 cn->unset_addr_list = env->unset_addr_list;
3246 }
3247# ifdef USE_NAMED_GROUP
3248# ifdef USE_PERL_SUBEXP_CALL
3249 else if (cn->name == cn->name_end) {
3250 goto set_call_attr;
3251 }
3252# endif
3253 else {
3254 int *refs;
3255
3256 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3257 &refs);
3258 if (n <= 0) {
3259 onig_scan_env_set_error_string(env,
3260 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3261 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3262 }
3263 else if (n > 1 &&
3264 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3265 onig_scan_env_set_error_string(env,
3266 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3267 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3268 }
3269 else {
3270 cn->group_num = refs[0];
3271 goto set_call_attr;
3272 }
3273 }
3274# endif
3275 }
3276 break;
3277
3278 case NT_ANCHOR:
3279 {
3280 AnchorNode* an = NANCHOR(node);
3281
3282 switch (an->type) {
3283 case ANCHOR_PREC_READ:
3284 case ANCHOR_PREC_READ_NOT:
3285 case ANCHOR_LOOK_BEHIND:
3286 case ANCHOR_LOOK_BEHIND_NOT:
3287 r = setup_subexp_call(an->target, env);
3288 break;
3289 }
3290 }
3291 break;
3292
3293 default:
3294 break;
3295 }
3296
3297 return r;
3298}
3299#endif
3300
3301#define IN_ALT (1<<0)
3302#define IN_NOT (1<<1)
3303#define IN_REPEAT (1<<2)
3304#define IN_VAR_REPEAT (1<<3)
3305#define IN_CALL (1<<4)
3306#define IN_RECCALL (1<<5)
3307#define IN_LOOK_BEHIND (1<<6)
3308
3309/* divide different length alternatives in look-behind.
3310 (?<=A|B) ==> (?<=A)|(?<=B)
3311 (?<!A|B) ==> (?<!A)(?<!B)
3312*/
3313static int
3314divide_look_behind_alternatives(Node* node)
3315{
3316 Node *head, *np, *insert_node;
3317 AnchorNode* an = NANCHOR(node);
3318 int anc_type = an->type;
3319
3320 head = an->target;
3321 np = NCAR(head);
3322 swap_node(node, head);
3323 NCAR(node) = head;
3324 NANCHOR(head)->target = np;
3325
3326 np = node;
3327 while ((np = NCDR(np)) != NULL_NODE) {
3328 insert_node = onig_node_new_anchor(anc_type);
3329 CHECK_NULL_RETURN_MEMERR(insert_node);
3330 NANCHOR(insert_node)->target = NCAR(np);
3331 NCAR(np) = insert_node;
3332 }
3333
3334 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3335 np = node;
3336 do {
3337 SET_NTYPE(np, NT_LIST); /* alt -> list */
3338 } while ((np = NCDR(np)) != NULL_NODE);
3339 }
3340 return 0;
3341}
3342
3343static int
3344setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3345{
3346 int r, len;
3347 AnchorNode* an = NANCHOR(node);
3348
3349 r = get_char_length_tree(an->target, reg, &len);
3350 if (r == 0)
3351 an->char_len = len;
3352 else if (r == GET_CHAR_LEN_VARLEN)
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3354 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3355 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3356 r = divide_look_behind_alternatives(node);
3357 else
3358 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3359 }
3360
3361 return r;
3362}
3363
3364static int
3365next_setup(Node* node, Node* next_node, regex_t* reg)
3366{
3367 int type;
3368
3369 retry:
3370 type = NTYPE(node);
3371 if (type == NT_QTFR) {
3372 QtfrNode* qn = NQTFR(node);
3373 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3374#ifdef USE_QTFR_PEEK_NEXT
3375 Node* n = get_head_value_node(next_node, 1, reg);
3376 /* '\0': for UTF-16BE etc... */
3377 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3378 qn->next_head_exact = n;
3379 }
3380#endif
3381 /* automatic possessification a*b ==> (?>a*)b */
3382 if (qn->lower <= 1) {
3383 int ttype = NTYPE(qn->target);
3384 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3385 Node *x, *y;
3386 x = get_head_value_node(qn->target, 0, reg);
3387 if (IS_NOT_NULL(x)) {
3388 y = get_head_value_node(next_node, 0, reg);
3389 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3390 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3391 CHECK_NULL_RETURN_MEMERR(en);
3392 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3393 swap_node(node, en);
3394 NENCLOSE(node)->target = en;
3395 }
3396 }
3397 }
3398 }
3399 }
3400 }
3401 else if (type == NT_ENCLOSE) {
3402 EncloseNode* en = NENCLOSE(node);
3403 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3404 node = en->target;
3405 goto retry;
3406 }
3407 }
3408 return 0;
3409}
3410
3411
3412static int
3413update_string_node_case_fold(regex_t* reg, Node *node)
3414{
3415 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3416 UChar *sbuf, *ebuf, *sp;
3417 int r, i, len;
3418 OnigDistance sbuf_size;
3419 StrNode* sn = NSTR(node);
3420
3421 end = sn->end;
3422 sbuf_size = (end - sn->s) * 2;
3423 sbuf = (UChar* )xmalloc(sbuf_size);
3424 CHECK_NULL_RETURN_MEMERR(sbuf);
3425 ebuf = sbuf + sbuf_size;
3426
3427 sp = sbuf;
3428 p = sn->s;
3429 while (p < end) {
3430 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3431 for (i = 0; i < len; i++) {
3432 if (sp >= ebuf) {
3433 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3434 if (IS_NULL(p)) {
3435 xfree(sbuf);
3436 return ONIGERR_MEMORY;
3437 }
3438 sbuf = p;
3439 sp = sbuf + sbuf_size;
3440 sbuf_size *= 2;
3441 ebuf = sbuf + sbuf_size;
3442 }
3443
3444 *sp++ = buf[i];
3445 }
3446 }
3447
3448 r = onig_node_str_set(node, sbuf, sp);
3449
3450 xfree(sbuf);
3451 return r;
3452}
3453
3454static int
3455expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3456 regex_t* reg)
3457{
3458 int r;
3459 Node *node;
3460
3461 node = onig_node_new_str(s, end);
3462 if (IS_NULL(node)) return ONIGERR_MEMORY;
3463
3464 r = update_string_node_case_fold(reg, node);
3465 if (r != 0) {
3466 onig_node_free(node);
3467 return r;
3468 }
3469
3470 NSTRING_SET_AMBIG(node);
3471 NSTRING_SET_DONT_GET_OPT_INFO(node);
3472 *rnode = node;
3473 return 0;
3474}
3475
3476static int
3477is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3478 int slen)
3479{
3480 int i;
3481
3482 for (i = 0; i < item_num; i++) {
3483 if (items[i].byte_len != slen) {
3484 return 1;
3485 }
3486 if (items[i].code_len != 1) {
3487 return 1;
3488 }
3489 }
3490 return 0;
3491}
3492
3493static int
3494expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3495 UChar *p, int slen, UChar *end,
3496 regex_t* reg, Node **rnode)
3497{
3498 int r, i, j, len, varlen;
3499 Node *anode, *var_anode, *snode, *xnode, *an;
3500 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3501
3502 *rnode = var_anode = NULL_NODE;
3503
3504 varlen = 0;
3505 for (i = 0; i < item_num; i++) {
3506 if (items[i].byte_len != slen) {
3507 varlen = 1;
3508 break;
3509 }
3510 }
3511
3512 if (varlen != 0) {
3513 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3514 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3515
3516 xnode = onig_node_new_list(NULL, NULL);
3517 if (IS_NULL(xnode)) goto mem_err;
3518 NCAR(var_anode) = xnode;
3519
3520 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode)) goto mem_err;
3522 NCAR(xnode) = anode;
3523 }
3524 else {
3525 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3526 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3527 }
3528
3529 snode = onig_node_new_str(p, p + slen);
3530 if (IS_NULL(snode)) goto mem_err;
3531
3532 NCAR(anode) = snode;
3533
3534 for (i = 0; i < item_num; i++) {
3535 snode = onig_node_new_str(NULL, NULL);
3536 if (IS_NULL(snode)) goto mem_err;
3537
3538 for (j = 0; j < items[i].code_len; j++) {
3539 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3540 if (len < 0) {
3541 r = len;
3542 goto mem_err2;
3543 }
3544
3545 r = onig_node_str_cat(snode, buf, buf + len);
3546 if (r != 0) goto mem_err2;
3547 }
3548
3549 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3550 if (IS_NULL(an)) {
3551 goto mem_err2;
3552 }
3553
3554 if (items[i].byte_len != slen) {
3555 Node *rem;
3556 UChar *q = p + items[i].byte_len;
3557
3558 if (q < end) {
3559 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3560 if (r != 0) {
3561 onig_node_free(an);
3562 goto mem_err2;
3563 }
3564
3565 xnode = onig_node_list_add(NULL_NODE, snode);
3566 if (IS_NULL(xnode)) {
3567 onig_node_free(an);
3568 onig_node_free(rem);
3569 goto mem_err2;
3570 }
3571 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3572 onig_node_free(an);
3573 onig_node_free(xnode);
3574 onig_node_free(rem);
3575 goto mem_err;
3576 }
3577
3578 NCAR(an) = xnode;
3579 }
3580 else {
3581 NCAR(an) = snode;
3582 }
3583
3584 NCDR(var_anode) = an;
3585 var_anode = an;
3586 }
3587 else {
3588 NCAR(an) = snode;
3589 NCDR(anode) = an;
3590 anode = an;
3591 }
3592 }
3593
3594 return varlen;
3595
3596 mem_err2:
3597 onig_node_free(snode);
3598
3599 mem_err:
3600 onig_node_free(*rnode);
3601
3602 return ONIGERR_MEMORY;
3603}
3604
3605#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3606
3607static int
3608expand_case_fold_string(Node* node, regex_t* reg, int state)
3609{
3610 int r, n, len, alt_num;
3611 int varlen = 0;
3612 int is_in_look_behind;
3613 UChar *start, *end, *p;
3614 Node *top_root, *root, *snode, *prev_node;
3615 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3616 StrNode* sn;
3617
3618 if (NSTRING_IS_AMBIG(node)) return 0;
3619
3620 sn = NSTR(node);
3621
3622 start = sn->s;
3623 end = sn->end;
3624 if (start >= end) return 0;
3625
3626 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3627
3628 r = 0;
3629 top_root = root = prev_node = snode = NULL_NODE;
3630 alt_num = 1;
3631 p = start;
3632 while (p < end) {
3633 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3634 p, end, items);
3635 if (n < 0) {
3636 r = n;
3637 goto err;
3638 }
3639
3640 len = enclen(reg->enc, p, end);
3641
3642 varlen = is_case_fold_variable_len(n, items, len);
3643 if (n == 0 || varlen == 0 || is_in_look_behind) {
3644 if (IS_NULL(snode)) {
3645 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3646 onig_node_free(top_root);
3647 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3648 if (IS_NULL(root)) {
3649 onig_node_free(prev_node);
3650 goto mem_err;
3651 }
3652 }
3653
3654 prev_node = snode = onig_node_new_str(NULL, NULL);
3655 if (IS_NULL(snode)) goto mem_err;
3656 if (IS_NOT_NULL(root)) {
3657 if (IS_NULL(onig_node_list_add(root, snode))) {
3658 onig_node_free(snode);
3659 goto mem_err;
3660 }
3661 }
3662 }
3663
3664 r = onig_node_str_cat(snode, p, p + len);
3665 if (r != 0) goto err;
3666 }
3667 else {
3668 alt_num *= (n + 1);
3669 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3670
3671 if (IS_NOT_NULL(snode)) {
3672 r = update_string_node_case_fold(reg, snode);
3673 if (r == 0) {
3674 NSTRING_SET_AMBIG(snode);
3675 }
3676 }
3677 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3678 onig_node_free(top_root);
3679 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3680 if (IS_NULL(root)) {
3681 onig_node_free(prev_node);
3682 goto mem_err;
3683 }
3684 }
3685
3686 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3687 if (r < 0) goto mem_err;
3688 if (r == 1) {
3689 if (IS_NULL(root)) {
3690 top_root = prev_node;
3691 }
3692 else {
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3695 goto mem_err;
3696 }
3697 }
3698
3699 root = NCAR(prev_node);
3700 }
3701 else { /* r == 0 */
3702 if (IS_NOT_NULL(root)) {
3703 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3704 onig_node_free(prev_node);
3705 goto mem_err;
3706 }
3707 }
3708 }
3709
3710 snode = NULL_NODE;
3711 }
3712
3713 p += len;
3714 }
3715 if (IS_NOT_NULL(snode)) {
3716 r = update_string_node_case_fold(reg, snode);
3717 if (r == 0) {
3718 NSTRING_SET_AMBIG(snode);
3719 }
3720 }
3721
3722 if (p < end) {
3723 Node *srem;
3724
3725 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3726 if (r != 0) goto mem_err;
3727
3728 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3729 onig_node_free(top_root);
3730 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3731 if (IS_NULL(root)) {
3732 onig_node_free(srem);
3733 onig_node_free(prev_node);
3734 goto mem_err;
3735 }
3736 }
3737
3738 if (IS_NULL(root)) {
3739 prev_node = srem;
3740 }
3741 else {
3742 if (IS_NULL(onig_node_list_add(root, srem))) {
3743 onig_node_free(srem);
3744 goto mem_err;
3745 }
3746 }
3747 }
3748
3749 /* ending */
3750 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3751 swap_node(node, top_root);
3752 onig_node_free(top_root);
3753 return 0;
3754
3755 mem_err:
3756 r = ONIGERR_MEMORY;
3757
3758 err:
3759 onig_node_free(top_root);
3760 return r;
3761}
3762
3763
3764#ifdef USE_COMBINATION_EXPLOSION_CHECK
3765
3766# define CEC_THRES_NUM_BIG_REPEAT 512
3767# define CEC_INFINITE_NUM 0x7fffffff
3768
3769# define CEC_IN_INFINITE_REPEAT (1<<0)
3770# define CEC_IN_FINITE_REPEAT (1<<1)
3771# define CEC_CONT_BIG_REPEAT (1<<2)
3772
3773static int
3774setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3775{
3776 int type;
3777 int r = state;
3778
3779 type = NTYPE(node);
3780 switch (type) {
3781 case NT_LIST:
3782 {
3783 do {
3784 r = setup_comb_exp_check(NCAR(node), r, env);
3785 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3786 }
3787 break;
3788
3789 case NT_ALT:
3790 {
3791 int ret;
3792 do {
3793 ret = setup_comb_exp_check(NCAR(node), state, env);
3794 r |= ret;
3795 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3796 }
3797 break;
3798
3799 case NT_QTFR:
3800 {
3801 int child_state = state;
3802 int add_state = 0;
3803 QtfrNode* qn = NQTFR(node);
3804 Node* target = qn->target;
3805 int var_num;
3806
3807 if (! IS_REPEAT_INFINITE(qn->upper)) {
3808 if (qn->upper > 1) {
3809 /* {0,1}, {1,1} are allowed */
3810 child_state |= CEC_IN_FINITE_REPEAT;
3811
3812 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3813 if (env->backrefed_mem == 0) {
3814 if (NTYPE(qn->target) == NT_ENCLOSE) {
3815 EncloseNode* en = NENCLOSE(qn->target);
3816 if (en->type == ENCLOSE_MEMORY) {
3817 if (NTYPE(en->target) == NT_QTFR) {
3818 QtfrNode* q = NQTFR(en->target);
3819 if (IS_REPEAT_INFINITE(q->upper)
3820 && q->greedy == qn->greedy) {
3821 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3822 if (qn->upper == 1)
3823 child_state = state;
3824 }
3825 }
3826 }
3827 }
3828 }
3829 }
3830 }
3831
3832 if (state & CEC_IN_FINITE_REPEAT) {
3833 qn->comb_exp_check_num = -1;
3834 }
3835 else {
3836 if (IS_REPEAT_INFINITE(qn->upper)) {
3837 var_num = CEC_INFINITE_NUM;
3838 child_state |= CEC_IN_INFINITE_REPEAT;
3839 }
3840 else {
3841 var_num = qn->upper - qn->lower;
3842 }
3843
3844 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3845 add_state |= CEC_CONT_BIG_REPEAT;
3846
3847 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3848 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3849 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3850 if (qn->comb_exp_check_num == 0) {
3851 env->num_comb_exp_check++;
3852 qn->comb_exp_check_num = env->num_comb_exp_check;
3853 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3854 env->comb_exp_max_regnum = env->curr_max_regnum;
3855 }
3856 }
3857 }
3858
3859 r = setup_comb_exp_check(target, child_state, env);
3860 r |= add_state;
3861 }
3862 break;
3863
3864 case NT_ENCLOSE:
3865 {
3866 EncloseNode* en = NENCLOSE(node);
3867
3868 switch (en->type) {
3869 case ENCLOSE_MEMORY:
3870 {
3871 if (env->curr_max_regnum < en->regnum)
3872 env->curr_max_regnum = en->regnum;
3873
3874 r = setup_comb_exp_check(en->target, state, env);
3875 }
3876 break;
3877
3878 default:
3879 r = setup_comb_exp_check(en->target, state, env);
3880 break;
3881 }
3882 }
3883 break;
3884
3885# ifdef USE_SUBEXP_CALL
3886 case NT_CALL:
3887 if (IS_CALL_RECURSION(NCALL(node)))
3888 env->has_recursion = 1;
3889 else
3890 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3891 break;
3892# endif
3893
3894 default:
3895 break;
3896 }
3897
3898 return r;
3899}
3900#endif
3901
3902/* setup_tree does the following work.
3903 1. check empty loop. (set qn->target_empty_info)
3904 2. expand ignore-case in char class.
3905 3. set memory status bit flags. (reg->mem_stats)
3906 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3907 5. find invalid patterns in look-behind.
3908 6. expand repeated string.
3909 */
3910static int
3911setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3912{
3913 int type;
3914 int r = 0;
3915
3916restart:
3917 type = NTYPE(node);
3918 switch (type) {
3919 case NT_LIST:
3920 {
3921 Node* prev = NULL_NODE;
3922 do {
3923 r = setup_tree(NCAR(node), reg, state, env);
3924 if (IS_NOT_NULL(prev) && r == 0) {
3925 r = next_setup(prev, NCAR(node), reg);
3926 }
3927 prev = NCAR(node);
3928 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3929 }
3930 break;
3931
3932 case NT_ALT:
3933 do {
3934 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3935 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3936 break;
3937
3938 case NT_CCLASS:
3939 break;
3940
3941 case NT_STR:
3942 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3943 r = expand_case_fold_string(node, reg, state);
3944 }
3945 break;
3946
3947 case NT_CTYPE:
3948 case NT_CANY:
3949 break;
3950
3951#ifdef USE_SUBEXP_CALL
3952 case NT_CALL:
3953 break;
3954#endif
3955
3956 case NT_BREF:
3957 {
3958 int i;
3959 int* p;
3960 Node** nodes = SCANENV_MEM_NODES(env);
3961 BRefNode* br = NBREF(node);
3962 p = BACKREFS_P(br);
3963 for (i = 0; i < br->back_num; i++) {
3964 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3965 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3966 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3967#ifdef USE_BACKREF_WITH_LEVEL
3968 if (IS_BACKREF_NEST_LEVEL(br)) {
3969 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3970 }
3971#endif
3972 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3973 }
3974 }
3975 break;
3976
3977 case NT_QTFR:
3978 {
3979 OnigDistance d;
3980 QtfrNode* qn = NQTFR(node);
3981 Node* target = qn->target;
3982
3983 if ((state & IN_REPEAT) != 0) {
3984 qn->state |= NST_IN_REPEAT;
3985 }
3986
3987 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3988 r = get_min_match_length(target, &d, env);
3989 if (r) break;
3990 if (d == 0) {
3991 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3992#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3993 r = quantifiers_memory_node_info(target);
3994 if (r < 0) break;
3995 if (r > 0) {
3996 qn->target_empty_info = r;
3997 }
3998#endif
3999#if 0
4000 r = get_max_match_length(target, &d, env);
4001 if (r == 0 && d == 0) {
4002 /* ()* ==> ()?, ()+ ==> () */
4003 qn->upper = 1;
4004 if (qn->lower > 1) qn->lower = 1;
4005 if (NTYPE(target) == NT_STR) {
4006 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
4007 }
4008 }
4009#endif
4010 }
4011 }
4012
4013 state |= IN_REPEAT;
4014 if (qn->lower != qn->upper)
4015 state |= IN_VAR_REPEAT;
4016 r = setup_tree(target, reg, state, env);
4017 if (r) break;
4018
4019 /* expand string */
4020#define EXPAND_STRING_MAX_LENGTH 100
4021 if (NTYPE(target) == NT_STR) {
4022 if (qn->lower > 1) {
4023 int i, n = qn->lower;
4024 OnigDistance len = NSTRING_LEN(target);
4025 StrNode* sn = NSTR(target);
4026 Node* np;
4027
4028 np = onig_node_new_str(sn->s, sn->end);
4029 if (IS_NULL(np)) return ONIGERR_MEMORY;
4030 NSTR(np)->flag = sn->flag;
4031
4032 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4033 r = onig_node_str_cat(np, sn->s, sn->end);
4034 if (r) {
4035 onig_node_free(np);
4036 return r;
4037 }
4038 }
4039 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4040 Node *np1, *np2;
4041
4042 qn->lower -= i;
4043 if (! IS_REPEAT_INFINITE(qn->upper))
4044 qn->upper -= i;
4045
4046 np1 = onig_node_new_list(np, NULL);
4047 if (IS_NULL(np1)) {
4048 onig_node_free(np);
4049 return ONIGERR_MEMORY;
4050 }
4051 swap_node(np1, node);
4052 np2 = onig_node_list_add(node, np1);
4053 if (IS_NULL(np2)) {
4054 onig_node_free(np1);
4055 return ONIGERR_MEMORY;
4056 }
4057 }
4058 else {
4059 swap_node(np, node);
4060 onig_node_free(np);
4061 }
4062 break; /* break case NT_QTFR: */
4063 }
4064 }
4065
4066#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4067 if (qn->greedy && (qn->target_empty_info != 0)) {
4068 if (NTYPE(target) == NT_QTFR) {
4069 QtfrNode* tqn = NQTFR(target);
4070 if (IS_NOT_NULL(tqn->head_exact)) {
4071 qn->head_exact = tqn->head_exact;
4072 tqn->head_exact = NULL;
4073 }
4074 }
4075 else {
4076 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4077 }
4078 }
4079#endif
4080 }
4081 break;
4082
4083 case NT_ENCLOSE:
4084 {
4085 EncloseNode* en = NENCLOSE(node);
4086
4087 switch (en->type) {
4088 case ENCLOSE_OPTION:
4089 {
4090 OnigOptionType options = reg->options;
4091 reg->options = NENCLOSE(node)->option;
4092 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4093 reg->options = options;
4094 }
4095 break;
4096
4097 case ENCLOSE_MEMORY:
4098 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4099 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4100 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4101 }
4102 if (IS_ENCLOSE_CALLED(en))
4103 state |= IN_CALL;
4104 if (IS_ENCLOSE_RECURSION(en))
4105 state |= IN_RECCALL;
4106 else if ((state & IN_RECCALL) != 0)
4107 SET_CALL_RECURSION(node);
4108 r = setup_tree(en->target, reg, state, env);
4109 break;
4110
4111 case ENCLOSE_STOP_BACKTRACK:
4112 {
4113 Node* target = en->target;
4114 r = setup_tree(target, reg, state, env);
4115 if (NTYPE(target) == NT_QTFR) {
4116 QtfrNode* tqn = NQTFR(target);
4117 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4118 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4119 int qtype = NTYPE(tqn->target);
4120 if (IS_NODE_TYPE_SIMPLE(qtype))
4121 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4122 }
4123 }
4124 }
4125 break;
4126
4127 case ENCLOSE_CONDITION:
4128#ifdef USE_NAMED_GROUP
4129 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4130 env->num_named > 0 &&
4131 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4132 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4133 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4134 }
4135#endif
4136 if (NENCLOSE(node)->regnum > env->num_mem)
4137 return ONIGERR_INVALID_BACKREF;
4138 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4139 break;
4140
4141 case ENCLOSE_ABSENT:
4142 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4143 break;
4144 }
4145 }
4146 break;
4147
4148 case NT_ANCHOR:
4149 {
4150 AnchorNode* an = NANCHOR(node);
4151
4152 switch (an->type) {
4153 case ANCHOR_PREC_READ:
4154 r = setup_tree(an->target, reg, state, env);
4155 break;
4156 case ANCHOR_PREC_READ_NOT:
4157 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4158 break;
4159
4160/* allowed node types in look-behind */
4161#define ALLOWED_TYPE_IN_LB \
4162 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4163 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4164
4165#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4166#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4167
4168#define ALLOWED_ANCHOR_IN_LB \
4169( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4170 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4171 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4172 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4173#define ALLOWED_ANCHOR_IN_LB_NOT \
4174( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4175 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4176 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4177 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4178
4179 case ANCHOR_LOOK_BEHIND:
4180 {
4181 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4182 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4183 if (r < 0) return r;
4184 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4185 if (NTYPE(node) != NT_ANCHOR) goto restart;
4186 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4187 if (r != 0) return r;
4188 r = setup_look_behind(node, reg, env);
4189 }
4190 break;
4191
4192 case ANCHOR_LOOK_BEHIND_NOT:
4193 {
4194 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4195 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4196 if (r < 0) return r;
4197 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4198 if (NTYPE(node) != NT_ANCHOR) goto restart;
4199 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4200 env);
4201 if (r != 0) return r;
4202 r = setup_look_behind(node, reg, env);
4203 }
4204 break;
4205 }
4206 }
4207 break;
4208
4209 default:
4210 break;
4211 }
4212
4213 return r;
4214}
4215
4216/* set skip map for Sunday's quick search */
4217static int
4218set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4219 UChar skip[], int** int_skip, int ignore_case)
4220{
4221 OnigDistance i, len;
4222 int clen, flen, n, j, k;
4223 UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4224 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4225 OnigEncoding enc = reg->enc;
4226
4227 len = end - s;
4228 if (len < ONIG_CHAR_TABLE_SIZE) {
4229 if (ignore_case) {
4230 for (i = 0; i < len; i += clen) {
4231 p = s + i;
4232 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4233 p, end, items);
4234 clen = enclen(enc, p, end);
4235 if (p + clen > end)
4236 clen = (int )(end - p);
4237
4238 for (j = 0; j < n; j++) {
4239 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4240 /* Different length isn't supported. Stop optimization at here. */
4241 end = p;
4242 goto endcheck;
4243 }
4244 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4245 if (flen != clen) {
4246 /* Different length isn't supported. Stop optimization at here. */
4247 end = p;
4248 goto endcheck;
4249 }
4250 }
4251 }
4252endcheck:
4253 ;
4254 }
4255
4256 len = end - s;
4257 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4258 skip[i] = (UChar )(len + 1);
4259 n = 0;
4260 for (i = 0; i < len; i += clen) {
4261 p = s + i;
4262 if (ignore_case)
4263 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4264 p, end, items);
4265 clen = enclen(enc, p, end);
4266 if (p + clen > end)
4267 clen = (int )(end - p);
4268
4269 for (j = 0; j < clen; j++) {
4270 skip[s[i + j]] = (UChar )(len - i - j);
4271 for (k = 0; k < n; k++) {
4272 ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4273 skip[buf[j]] = (UChar )(len - i - j);
4274 }
4275 }
4276 }
4277 }
4278 else {
4279# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4280 /* This should not happen. */
4281 return ONIGERR_TYPE_BUG;
4282# else
4283 if (IS_NULL(*int_skip)) {
4284 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4285 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4286 }
4287 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4288
4289 n = 0;
4290 for (i = 0; i < len; i += clen) {
4291 p = s + i;
4292 if (ignore_case)
4293 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4294 p, end, items);
4295 clen = enclen(enc, p, end);
4296 if (p + clen > end)
4297 clen = (int )(end - p);
4298
4299 for (j = 0; j < n; j++) {
4300 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4301 return 1; /* different length isn't supported. */
4302 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4303 if (flen != clen)
4304 return 1; /* different length isn't supported. */
4305 }
4306 for (j = 0; j < clen; j++) {
4307 (*int_skip)[s[i + j]] = (int )(len - i - j);
4308 for (k = 0; k < n; k++) {
4309 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4310 }
4311 }
4312 }
4313# endif
4314 }
4315 return (int)len;
4316}
4317
4318typedef struct {
4319 OnigDistance min; /* min byte length */
4320 OnigDistance max; /* max byte length */
4321} MinMaxLen;
4322
4323typedef struct {
4324 MinMaxLen mmd;
4325 OnigEncoding enc;
4326 OnigOptionType options;
4327 OnigCaseFoldType case_fold_flag;
4328 ScanEnv* scan_env;
4329} OptEnv;
4330
4331typedef struct {
4332 int left_anchor;
4333 int right_anchor;
4334} OptAncInfo;
4335
4336typedef struct {
4337 MinMaxLen mmd; /* info position */
4338 OptAncInfo anc;
4339
4340 int reach_end;
4341 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4342 int len;
4343 UChar s[OPT_EXACT_MAXLEN];
4344} OptExactInfo;
4345
4346typedef struct {
4347 MinMaxLen mmd; /* info position */
4348 OptAncInfo anc;
4349
4350 int value; /* weighted value */
4351 UChar map[ONIG_CHAR_TABLE_SIZE];
4352} OptMapInfo;
4353
4354typedef struct {
4355 MinMaxLen len;
4356
4357 OptAncInfo anc;
4358 OptExactInfo exb; /* boundary */
4359 OptExactInfo exm; /* middle */
4360 OptExactInfo expr; /* prec read (?=...) */
4361
4362 OptMapInfo map; /* boundary */
4363} NodeOptInfo;
4364
4365
4366static int
4367map_position_value(OnigEncoding enc, int i)
4368{
4369 static const short int ByteValTable[] = {
4370 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4372 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4373 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4374 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4375 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4376 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4377 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4378 };
4379
4380 if (i < numberof(ByteValTable)) {
4381 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4382 return 20;
4383 else
4384 return (int )ByteValTable[i];
4385 }
4386 else
4387 return 4; /* Take it easy. */
4388}
4389
4390static int
4391distance_value(MinMaxLen* mm)
4392{
4393 /* 1000 / (min-max-dist + 1) */
4394 static const short int dist_vals[] = {
4395 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4396 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4397 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4398 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4399 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4400 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4401 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4402 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4403 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4404 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4405 };
4406
4407 OnigDistance d;
4408
4409 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4410
4411 d = mm->max - mm->min;
4412 if (d < numberof(dist_vals))
4413 /* return dist_vals[d] * 16 / (mm->min + 12); */
4414 return (int )dist_vals[d];
4415 else
4416 return 1;
4417}
4418
4419static int
4420comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4421{
4422 if (v2 <= 0) return -1;
4423 if (v1 <= 0) return 1;
4424
4425 v1 *= distance_value(d1);
4426 v2 *= distance_value(d2);
4427
4428 if (v2 > v1) return 1;
4429 if (v2 < v1) return -1;
4430
4431 if (d2->min < d1->min) return 1;
4432 if (d2->min > d1->min) return -1;
4433 return 0;
4434}
4435
4436static int
4437is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4438{
4439 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4440}
4441
4442
4443static void
4444set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4445{
4446 mml->min = min;
4447 mml->max = max;
4448}
4449
4450static void
4451clear_mml(MinMaxLen* mml)
4452{
4453 mml->min = mml->max = 0;
4454}
4455
4456static void
4457copy_mml(MinMaxLen* to, MinMaxLen* from)
4458{
4459 to->min = from->min;
4460 to->max = from->max;
4461}
4462
4463static void
4464add_mml(MinMaxLen* to, MinMaxLen* from)
4465{
4466 to->min = distance_add(to->min, from->min);
4467 to->max = distance_add(to->max, from->max);
4468}
4469
4470#if 0
4471static void
4472add_len_mml(MinMaxLen* to, OnigDistance len)
4473{
4474 to->min = distance_add(to->min, len);
4475 to->max = distance_add(to->max, len);
4476}
4477#endif
4478
4479static void
4480alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4481{
4482 if (to->min > from->min) to->min = from->min;
4483 if (to->max < from->max) to->max = from->max;
4484}
4485
4486static void
4487copy_opt_env(OptEnv* to, OptEnv* from)
4488{
4489 *to = *from;
4490}
4491
4492static void
4493clear_opt_anc_info(OptAncInfo* anc)
4494{
4495 anc->left_anchor = 0;
4496 anc->right_anchor = 0;
4497}
4498
4499static void
4500copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4501{
4502 *to = *from;
4503}
4504
4505static void
4506concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4507 OnigDistance left_len, OnigDistance right_len)
4508{
4509 clear_opt_anc_info(to);
4510
4511 to->left_anchor = left->left_anchor;
4512 if (left_len == 0) {
4513 to->left_anchor |= right->left_anchor;
4514 }
4515
4516 to->right_anchor = right->right_anchor;
4517 if (right_len == 0) {
4518 to->right_anchor |= left->right_anchor;
4519 }
4520 else {
4521 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4522 }
4523}
4524
4525static int
4526is_left_anchor(int anc)
4527{
4528 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4529 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4530 anc == ANCHOR_PREC_READ_NOT)
4531 return 0;
4532
4533 return 1;
4534}
4535
4536static int
4537is_set_opt_anc_info(OptAncInfo* to, int anc)
4538{
4539 if ((to->left_anchor & anc) != 0) return 1;
4540
4541 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4542}
4543
4544static void
4545add_opt_anc_info(OptAncInfo* to, int anc)
4546{
4547 if (is_left_anchor(anc))
4548 to->left_anchor |= anc;
4549 else
4550 to->right_anchor |= anc;
4551}
4552
4553static void
4554remove_opt_anc_info(OptAncInfo* to, int anc)
4555{
4556 if (is_left_anchor(anc))
4557 to->left_anchor &= ~anc;
4558 else
4559 to->right_anchor &= ~anc;
4560}
4561
4562static void
4563alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4564{
4565 to->left_anchor &= add->left_anchor;
4566 to->right_anchor &= add->right_anchor;
4567}
4568
4569static int
4570is_full_opt_exact_info(OptExactInfo* ex)
4571{
4572 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4573}
4574
4575static void
4576clear_opt_exact_info(OptExactInfo* ex)
4577{
4578 clear_mml(&ex->mmd);
4579 clear_opt_anc_info(&ex->anc);
4580 ex->reach_end = 0;
4581 ex->ignore_case = -1; /* unset */
4582 ex->len = 0;
4583 ex->s[0] = '\0';
4584}
4585
4586static void
4587copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4588{
4589 *to = *from;
4590}
4591
4592static void
4593concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4594{
4595 int i, j, len;
4596 UChar *p, *end;
4597 OptAncInfo tanc;
4598
4599 if (to->ignore_case < 0)
4600 to->ignore_case = add->ignore_case;
4601 else if (to->ignore_case != add->ignore_case)
4602 return ; /* avoid */
4603
4604 p = add->s;
4605 end = p + add->len;
4606 for (i = to->len; p < end; ) {
4607 len = enclen(enc, p, end);
4608 if (i + len > OPT_EXACT_MAXLEN) break;
4609 for (j = 0; j < len && p < end; j++)
4610 to->s[i++] = *p++;
4611 }
4612
4613 to->len = i;
4614 to->reach_end = (p == end ? add->reach_end : 0);
4615
4616 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4617 if (! to->reach_end) tanc.right_anchor = 0;
4618 copy_opt_anc_info(&to->anc, &tanc);
4619}
4620
4621static void
4622concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4623 int raw ARG_UNUSED, OnigEncoding enc)
4624{
4625 int i, j, len;
4626 UChar *p;
4627
4628 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4629 len = enclen(enc, p, end);
4630 if (i + len > OPT_EXACT_MAXLEN) break;
4631 for (j = 0; j < len && p < end; j++)
4632 to->s[i++] = *p++;
4633 }
4634
4635 to->len = i;
4636}
4637
4638static void
4639alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4640{
4641 int i, j, len;
4642
4643 if (add->len == 0 || to->len == 0) {
4644 clear_opt_exact_info(to);
4645 return ;
4646 }
4647
4648 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4649 clear_opt_exact_info(to);
4650 return ;
4651 }
4652
4653 for (i = 0; i < to->len && i < add->len; ) {
4654 if (to->s[i] != add->s[i]) break;
4655 len = enclen(env->enc, to->s + i, to->s + to->len);
4656
4657 for (j = 1; j < len; j++) {
4658 if (to->s[i+j] != add->s[i+j]) break;
4659 }
4660 if (j < len) break;
4661 i += len;
4662 }
4663
4664 if (! add->reach_end || i < add->len || i < to->len) {
4665 to->reach_end = 0;
4666 }
4667 to->len = i;
4668 if (to->ignore_case < 0)
4669 to->ignore_case = add->ignore_case;
4670 else if (add->ignore_case >= 0)
4671 to->ignore_case |= add->ignore_case;
4672
4673 alt_merge_opt_anc_info(&to->anc, &add->anc);
4674 if (! to->reach_end) to->anc.right_anchor = 0;
4675}
4676
4677static void
4678select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4679{
4680 int v1, v2;
4681
4682 v1 = now->len;
4683 v2 = alt->len;
4684
4685 if (v2 == 0) {
4686 return ;
4687 }
4688 else if (v1 == 0) {
4689 copy_opt_exact_info(now, alt);
4690 return ;
4691 }
4692 else if (v1 <= 2 && v2 <= 2) {
4693 /* ByteValTable[x] is big value --> low price */
4694 v2 = map_position_value(enc, now->s[0]);
4695 v1 = map_position_value(enc, alt->s[0]);
4696
4697 if (now->len > 1) v1 += 5;
4698 if (alt->len > 1) v2 += 5;
4699 }
4700
4701 if (now->ignore_case <= 0) v1 *= 2;
4702 if (alt->ignore_case <= 0) v2 *= 2;
4703
4704 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4705 copy_opt_exact_info(now, alt);
4706}
4707
4708static void
4709clear_opt_map_info(OptMapInfo* map)
4710{
4711 static const OptMapInfo clean_info = {
4712 {0, 0}, {0, 0}, 0,
4713 {
4714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4726 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4728 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4729 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4730 }
4731 };
4732
4733 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4734}
4735
4736static void
4737copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4738{
4739 *to = *from;
4740}
4741
4742static void
4743add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4744{
4745 if (map->map[c] == 0) {
4746 map->map[c] = 1;
4747 map->value += map_position_value(enc, c);
4748 }
4749}
4750
4751static int
4752add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4753 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4754{
4755 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4756 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4757 int i, n;
4758
4759 add_char_opt_map_info(map, p[0], enc);
4760
4761 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4762 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4763 if (n < 0) return n;
4764
4765 for (i = 0; i < n; i++) {
4766 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4767 add_char_opt_map_info(map, buf[0], enc);
4768 }
4769
4770 return 0;
4771}
4772
4773static void
4774select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4775{
4776 const int z = 1<<15; /* 32768: something big value */
4777
4778 int v1, v2;
4779
4780 if (alt->value == 0) return ;
4781 if (now->value == 0) {
4782 copy_opt_map_info(now, alt);
4783 return ;
4784 }
4785
4786 v1 = z / now->value;
4787 v2 = z / alt->value;
4788 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4789 copy_opt_map_info(now, alt);
4790}
4791
4792static int
4793comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4794{
4795#define COMP_EM_BASE 20
4796 int ve, vm;
4797
4798 if (m->value <= 0) return -1;
4799
4800 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4801 vm = COMP_EM_BASE * 5 * 2 / m->value;
4802 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4803}
4804
4805static void
4806alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4807{
4808 int i, val;
4809
4810 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4811 if (to->value == 0) return ;
4812 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4813 clear_opt_map_info(to);
4814 return ;
4815 }
4816
4817 alt_merge_mml(&to->mmd, &add->mmd);
4818
4819 val = 0;
4820 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4821 if (add->map[i])
4822 to->map[i] = 1;
4823
4824 if (to->map[i])
4825 val += map_position_value(enc, i);
4826 }
4827 to->value = val;
4828
4829 alt_merge_opt_anc_info(&to->anc, &add->anc);
4830}
4831
4832static void
4833set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4834{
4835 copy_mml(&(opt->exb.mmd), mmd);
4836 copy_mml(&(opt->expr.mmd), mmd);
4837 copy_mml(&(opt->map.mmd), mmd);
4838}
4839
4840static void
4841clear_node_opt_info(NodeOptInfo* opt)
4842{
4843 clear_mml(&opt->len);
4844 clear_opt_anc_info(&opt->anc);
4845 clear_opt_exact_info(&opt->exb);
4846 clear_opt_exact_info(&opt->exm);
4847 clear_opt_exact_info(&opt->expr);
4848 clear_opt_map_info(&opt->map);
4849}
4850
4851static void
4852copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4853{
4854 *to = *from;
4855}
4856
4857static void
4858concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4859{
4860 int exb_reach, exm_reach;
4861 OptAncInfo tanc;
4862
4863 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4864 copy_opt_anc_info(&to->anc, &tanc);
4865
4866 if (add->exb.len > 0 && to->len.max == 0) {
4867 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4868 to->len.max, add->len.max);
4869 copy_opt_anc_info(&add->exb.anc, &tanc);
4870 }
4871
4872 if (add->map.value > 0 && to->len.max == 0) {
4873 if (add->map.mmd.max == 0)
4874 add->map.anc.left_anchor |= to->anc.left_anchor;
4875 }
4876
4877 exb_reach = to->exb.reach_end;
4878 exm_reach = to->exm.reach_end;
4879
4880 if (add->len.max != 0)
4881 to->exb.reach_end = to->exm.reach_end = 0;
4882
4883 if (add->exb.len > 0) {
4884 if (exb_reach) {
4885 concat_opt_exact_info(&to->exb, &add->exb, enc);
4886 clear_opt_exact_info(&add->exb);
4887 }
4888 else if (exm_reach) {
4889 concat_opt_exact_info(&to->exm, &add->exb, enc);
4890 clear_opt_exact_info(&add->exb);
4891 }
4892 }
4893 select_opt_exact_info(enc, &to->exm, &add->exb);
4894 select_opt_exact_info(enc, &to->exm, &add->exm);
4895
4896 if (to->expr.len > 0) {
4897 if (add->len.max > 0) {
4898 if (to->expr.len > (int )add->len.max)
4899 to->expr.len = (int )add->len.max;
4900
4901 if (to->expr.mmd.max == 0)
4902 select_opt_exact_info(enc, &to->exb, &to->expr);
4903 else
4904 select_opt_exact_info(enc, &to->exm, &to->expr);
4905 }
4906 }
4907 else if (add->expr.len > 0) {
4908 copy_opt_exact_info(&to->expr, &add->expr);
4909 }
4910
4911 select_opt_map_info(&to->map, &add->map);
4912
4913 add_mml(&to->len, &add->len);
4914}
4915
4916static void
4917alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4918{
4919 alt_merge_opt_anc_info (&to->anc, &add->anc);
4920 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4921 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4922 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4923 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4924
4925 alt_merge_mml(&to->len, &add->len);
4926}
4927
4928
4929#define MAX_NODE_OPT_INFO_REF_COUNT 5
4930
4931static int
4932optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4933{
4934 int type;
4935 int r = 0;
4936
4937 clear_node_opt_info(opt);
4938 set_bound_node_opt_info(opt, &env->mmd);
4939
4940 type = NTYPE(node);
4941 switch (type) {
4942 case NT_LIST:
4943 {
4944 OptEnv nenv;
4945 NodeOptInfo nopt;
4946 Node* nd = node;
4947
4948 copy_opt_env(&nenv, env);
4949 do {
4950 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4951 if (r == 0) {
4952 add_mml(&nenv.mmd, &nopt.len);
4953 concat_left_node_opt_info(env->enc, opt, &nopt);
4954 }
4955 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4956 }
4957 break;
4958
4959 case NT_ALT:
4960 {
4961 NodeOptInfo nopt;
4962 Node* nd = node;
4963
4964 do {
4965 r = optimize_node_left(NCAR(nd), &nopt, env);
4966 if (r == 0) {
4967 if (nd == node) copy_node_opt_info(opt, &nopt);
4968 else alt_merge_node_opt_info(opt, &nopt, env);
4969 }
4970 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4971 }
4972 break;
4973
4974 case NT_STR:
4975 {
4976 StrNode* sn = NSTR(node);
4977 OnigDistance slen = sn->end - sn->s;
4978 int is_raw = NSTRING_IS_RAW(node);
4979
4980 if (! NSTRING_IS_AMBIG(node)) {
4981 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4982 is_raw, env->enc);
4983 opt->exb.ignore_case = 0;
4984 if (slen > 0) {
4985 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4986 }
4987 set_mml(&opt->len, slen, slen);
4988 }
4989 else {
4990 OnigDistance max;
4991
4992 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4993 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4994 max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
4995 }
4996 else {
4997 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4998 is_raw, env->enc);
4999 opt->exb.ignore_case = 1;
5000
5001 if (slen > 0) {
5002 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5003 env->enc, env->case_fold_flag);
5004 if (r != 0) break;
5005 }
5006
5007 max = slen;
5008 }
5009
5010 set_mml(&opt->len, slen, max);
5011 }
5012
5013 if ((OnigDistance )opt->exb.len == slen)
5014 opt->exb.reach_end = 1;
5015 }
5016 break;
5017
5018 case NT_CCLASS:
5019 {
5020 int i, z;
5021 CClassNode* cc = NCCLASS(node);
5022
5023 /* no need to check ignore case. (set in setup_tree()) */
5024
5025 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5026 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5027 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5028
5029 set_mml(&opt->len, min, max);
5030 }
5031 else {
5032 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5033 z = BITSET_AT(cc->bs, i);
5034 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5035 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5036 }
5037 }
5038 set_mml(&opt->len, 1, 1);
5039 }
5040 }
5041 break;
5042
5043 case NT_CTYPE:
5044 {
5045 int i, min, max;
5046 int maxcode;
5047
5048 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5049
5050 if (max == 1) {
5051 min = 1;
5052
5053 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5054 switch (NCTYPE(node)->ctype) {
5055 case ONIGENC_CTYPE_WORD:
5056 if (NCTYPE(node)->not != 0) {
5057 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5058 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5059 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5060 }
5061 }
5062 }
5063 else {
5064 for (i = 0; i < maxcode; i++) {
5065 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5066 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5067 }
5068 }
5069 }
5070 break;
5071 }
5072 }
5073 else {
5074 min = ONIGENC_MBC_MINLEN(env->enc);
5075 }
5076 set_mml(&opt->len, min, max);
5077 }
5078 break;
5079
5080 case NT_CANY:
5081 {
5082 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5083 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5084 set_mml(&opt->len, min, max);
5085 }
5086 break;
5087
5088 case NT_ANCHOR:
5089 switch (NANCHOR(node)->type) {
5090 case ANCHOR_BEGIN_BUF:
5091 case ANCHOR_BEGIN_POSITION:
5092 case ANCHOR_BEGIN_LINE:
5093 case ANCHOR_END_BUF:
5094 case ANCHOR_SEMI_END_BUF:
5095 case ANCHOR_END_LINE:
5096 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5097 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5098 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5099 break;
5100
5101 case ANCHOR_PREC_READ:
5102 {
5103 NodeOptInfo nopt;
5104
5105 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5106 if (r == 0) {
5107 if (nopt.exb.len > 0)
5108 copy_opt_exact_info(&opt->expr, &nopt.exb);
5109 else if (nopt.exm.len > 0)
5110 copy_opt_exact_info(&opt->expr, &nopt.exm);
5111
5112 opt->expr.reach_end = 0;
5113
5114 if (nopt.map.value > 0)
5115 copy_opt_map_info(&opt->map, &nopt.map);
5116 }
5117 }
5118 break;
5119
5120 case ANCHOR_LOOK_BEHIND_NOT:
5121 break;
5122 }
5123 break;
5124
5125 case NT_BREF:
5126 {
5127 int i;
5128 int* backs;
5129 OnigDistance min, max, tmin, tmax;
5130 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5131 BRefNode* br = NBREF(node);
5132
5133 if (br->state & NST_RECURSION) {
5134 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5135 break;
5136 }
5137 backs = BACKREFS_P(br);
5138 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5139 if (r != 0) break;
5140 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5141 if (r != 0) break;
5142 for (i = 1; i < br->back_num; i++) {
5143 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5144 if (r != 0) break;
5145 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5146 if (r != 0) break;
5147 if (min > tmin) min = tmin;
5148 if (max < tmax) max = tmax;
5149 }
5150 if (r == 0) set_mml(&opt->len, min, max);
5151 }
5152 break;
5153
5154#ifdef USE_SUBEXP_CALL
5155 case NT_CALL:
5156 if (IS_CALL_RECURSION(NCALL(node)))
5157 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5158 else {
5159 OnigOptionType save = env->options;
5160 env->options = NENCLOSE(NCALL(node)->target)->option;
5161 r = optimize_node_left(NCALL(node)->target, opt, env);
5162 env->options = save;
5163 }
5164 break;
5165#endif
5166
5167 case NT_QTFR:
5168 {
5169 int i;
5170 OnigDistance min, max;
5171 NodeOptInfo nopt;
5172 QtfrNode* qn = NQTFR(node);
5173
5174 r = optimize_node_left(qn->target, &nopt, env);
5175 if (r) break;
5176
5177 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5178 if (env->mmd.max == 0 &&
5179 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5180 if (IS_MULTILINE(env->options))
5181 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5182 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5183 else
5184 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5185 }
5186 }
5187 else {
5188 if (qn->lower > 0) {
5189 copy_node_opt_info(opt, &nopt);
5190 if (nopt.exb.len > 0) {
5191 if (nopt.exb.reach_end) {
5192 for (i = 2; i <= qn->lower &&
5193 ! is_full_opt_exact_info(&opt->exb); i++) {
5194 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5195 }
5196 if (i < qn->lower) {
5197 opt->exb.reach_end = 0;
5198 }
5199 }
5200 }
5201
5202 if (qn->lower != qn->upper) {
5203 opt->exb.reach_end = 0;
5204 opt->exm.reach_end = 0;
5205 }
5206 if (qn->lower > 1)
5207 opt->exm.reach_end = 0;
5208 }
5209 }
5210
5211 min = distance_multiply(nopt.len.min, qn->lower);
5212 if (IS_REPEAT_INFINITE(qn->upper))
5213 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5214 else
5215 max = distance_multiply(nopt.len.max, qn->upper);
5216
5217 set_mml(&opt->len, min, max);
5218 }
5219 break;
5220
5221 case NT_ENCLOSE:
5222 {
5223 EncloseNode* en = NENCLOSE(node);
5224
5225 switch (en->type) {
5226 case ENCLOSE_OPTION:
5227 {
5228 OnigOptionType save = env->options;
5229
5230 env->options = en->option;
5231 r = optimize_node_left(en->target, opt, env);
5232 env->options = save;
5233 }
5234 break;
5235
5236 case ENCLOSE_MEMORY:
5237#ifdef USE_SUBEXP_CALL
5238 en->opt_count++;
5239 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5240 OnigDistance min, max;
5241
5242 min = 0;
5243 max = ONIG_INFINITE_DISTANCE;
5244 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5245 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5246 set_mml(&opt->len, min, max);
5247 }
5248 else
5249#endif
5250 {
5251 r = optimize_node_left(en->target, opt, env);
5252
5253 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5254 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5255 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5256 }
5257 }
5258 break;
5259
5260 case ENCLOSE_STOP_BACKTRACK:
5261 case ENCLOSE_CONDITION:
5262 r = optimize_node_left(en->target, opt, env);
5263 break;
5264
5265 case ENCLOSE_ABSENT:
5266 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5267 break;
5268 }
5269 }
5270 break;
5271
5272 default:
5273#ifdef ONIG_DEBUG
5274 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5275 NTYPE(node));
5276#endif
5277 r = ONIGERR_TYPE_BUG;
5278 break;
5279 }
5280
5281 return r;
5282}
5283
5284static int
5285set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5286{
5287 int allow_reverse;
5288
5289 if (e->len == 0) return 0;
5290
5291 reg->exact = (UChar* )xmalloc(e->len);
5292 CHECK_NULL_RETURN_MEMERR(reg->exact);
5293 xmemcpy(reg->exact, e->s, e->len);
5294 reg->exact_end = reg->exact + e->len;
5295
5296 allow_reverse =
5297 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5298
5299 if (e->ignore_case > 0) {
5300 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5301 e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5302 reg->map, &(reg->int_map), 1);
5303 reg->exact_end = reg->exact + e->len;
5304 if (e->len >= 3) {
5305 reg->optimize = (allow_reverse != 0
5306 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5307 }
5308 else if (e->len > 0) {
5309 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5310 }
5311 else
5312 return 0;
5313 }
5314 else {
5315 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5316 }
5317 }
5318 else {
5319 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5320 set_bm_skip(reg->exact, reg->exact_end, reg,
5321 reg->map, &(reg->int_map), 0);
5322 reg->optimize = (allow_reverse != 0
5323 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5324 }
5325 else {
5326 reg->optimize = ONIG_OPTIMIZE_EXACT;
5327 }
5328 }
5329
5330 reg->dmin = e->mmd.min;
5331 reg->dmax = e->mmd.max;
5332
5333 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5334 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5335 }
5336
5337 return 0;
5338}
5339
5340static void
5341set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5342{
5343 int i;
5344
5345 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5346 reg->map[i] = m->map[i];
5347
5348 reg->optimize = ONIG_OPTIMIZE_MAP;
5349 reg->dmin = m->mmd.min;
5350 reg->dmax = m->mmd.max;
5351
5352 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5353 reg->threshold_len = (int )(reg->dmin + 1);
5354 }
5355}
5356
5357static void
5358set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5359{
5360 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5361 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5362}
5363
5364#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5365static void print_optimize_info(FILE* f, regex_t* reg);
5366#endif
5367
5368static int
5369set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5370{
5371
5372 int r;
5373 NodeOptInfo opt;
5374 OptEnv env;
5375
5376 env.enc = reg->enc;
5377 env.options = reg->options;
5378 env.case_fold_flag = reg->case_fold_flag;
5379 env.scan_env = scan_env;
5380 clear_mml(&env.mmd);
5381
5382 r = optimize_node_left(node, &opt, &env);
5383 if (r) return r;
5384
5385 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5386 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5387 ANCHOR_LOOK_BEHIND);
5388
5389 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5390 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5391
5392 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5393 ANCHOR_PREC_READ_NOT);
5394
5395 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5396 reg->anchor_dmin = opt.len.min;
5397 reg->anchor_dmax = opt.len.max;
5398 }
5399
5400 if (opt.exb.len > 0 || opt.exm.len > 0) {
5401 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5402 if (opt.map.value > 0 &&
5403 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5404 goto set_map;
5405 }
5406 else {
5407 r = set_optimize_exact_info(reg, &opt.exb);
5408 set_sub_anchor(reg, &opt.exb.anc);
5409 }
5410 }
5411 else if (opt.map.value > 0) {
5412 set_map:
5413 set_optimize_map_info(reg, &opt.map);
5414 set_sub_anchor(reg, &opt.map.anc);
5415 }
5416 else {
5417 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5418 if (opt.len.max == 0)
5419 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5420 }
5421
5422#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5423 print_optimize_info(stderr, reg);
5424#endif
5425 return r;
5426}
5427
5428static void
5429clear_optimize_info(regex_t* reg)
5430{
5431 reg->optimize = ONIG_OPTIMIZE_NONE;
5432 reg->anchor = 0;
5433 reg->anchor_dmin = 0;
5434 reg->anchor_dmax = 0;
5435 reg->sub_anchor = 0;
5436 reg->exact_end = (UChar* )NULL;
5437 reg->threshold_len = 0;
5438 xfree(reg->exact);
5439 reg->exact = (UChar* )NULL;
5440}
5441
5442#ifdef ONIG_DEBUG
5443
5444static void print_enc_string(FILE* fp, OnigEncoding enc,
5445 const UChar *s, const UChar *end)
5446{
5447 fprintf(fp, "\nPATTERN: /");
5448
5449 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5450 const UChar *p;
5451 OnigCodePoint code;
5452
5453 p = s;
5454 while (p < end) {
5455 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5456 if (code >= 0x80) {
5457 fprintf(fp, " 0x%04x ", (int )code);
5458 }
5459 else {
5460 fputc((int )code, fp);
5461 }
5462
5463 p += enclen(enc, p, end);
5464 }
5465 }
5466 else {
5467 while (s < end) {
5468 fputc((int )*s, fp);
5469 s++;
5470 }
5471 }
5472
5473 fprintf(fp, "/ (%s)\n", enc->name);
5474}
5475#endif /* ONIG_DEBUG */
5476
5477#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5478static void
5479print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5480{
5481 if (a == ONIG_INFINITE_DISTANCE)
5482 fputs("inf", f);
5483 else
5484 fprintf(f, "(%"PRIuPTR")", a);
5485
5486 fputs("-", f);
5487
5488 if (b == ONIG_INFINITE_DISTANCE)
5489 fputs("inf", f);
5490 else
5491 fprintf(f, "(%"PRIuPTR")", b);
5492}
5493
5494static void
5495print_anchor(FILE* f, int anchor)
5496{
5497 int q = 0;
5498
5499 fprintf(f, "[");
5500
5501 if (anchor & ANCHOR_BEGIN_BUF) {
5502 fprintf(f, "begin-buf");
5503 q = 1;
5504 }
5505 if (anchor & ANCHOR_BEGIN_LINE) {
5506 if (q) fprintf(f, ", ");
5507 q = 1;
5508 fprintf(f, "begin-line");
5509 }
5510 if (anchor & ANCHOR_BEGIN_POSITION) {
5511 if (q) fprintf(f, ", ");
5512 q = 1;
5513 fprintf(f, "begin-pos");
5514 }
5515 if (anchor & ANCHOR_END_BUF) {
5516 if (q) fprintf(f, ", ");
5517 q = 1;
5518 fprintf(f, "end-buf");
5519 }
5520 if (anchor & ANCHOR_SEMI_END_BUF) {
5521 if (q) fprintf(f, ", ");
5522 q = 1;
5523 fprintf(f, "semi-end-buf");
5524 }
5525 if (anchor & ANCHOR_END_LINE) {
5526 if (q) fprintf(f, ", ");
5527 q = 1;
5528 fprintf(f, "end-line");
5529 }
5530 if (anchor & ANCHOR_ANYCHAR_STAR) {
5531 if (q) fprintf(f, ", ");
5532 q = 1;
5533 fprintf(f, "anychar-star");
5534 }
5535 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5536 if (q) fprintf(f, ", ");
5537 fprintf(f, "anychar-star-ml");
5538 }
5539
5540 fprintf(f, "]");
5541}
5542
5543static void
5544print_optimize_info(FILE* f, regex_t* reg)
5545{
5546 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5547 "EXACT_IC", "MAP",
5548 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5549
5550 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5551 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5552 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5553 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5554 fprintf(f, "\n");
5555
5556 if (reg->optimize) {
5557 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5558 fprintf(f, "\n");
5559 }
5560 fprintf(f, "\n");
5561
5562 if (reg->exact) {
5563 UChar *p;
5564 fprintf(f, "exact: [");
5565 for (p = reg->exact; p < reg->exact_end; p++) {
5566 fputc(*p, f);
5567 }
5568 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5569 }
5570 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5571 int c, i, n = 0;
5572
5573 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5574 if (reg->map[i]) n++;
5575
5576 fprintf(f, "map: n=%d\n", n);
5577 if (n > 0) {
5578 c = 0;
5579 fputc('[', f);
5580 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5581 if (reg->map[i] != 0) {
5582 if (c > 0) fputs(", ", f);
5583 c++;
5584 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5585 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5586 fputc(i, f);
5587 else
5588 fprintf(f, "%d", i);
5589 }
5590 }
5591 fprintf(f, "]\n");
5592 }
5593 }
5594}
5595#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5596
5597
5598extern void
5599onig_free_body(regex_t* reg)
5600{
5601 if (IS_NOT_NULL(reg)) {
5602 xfree(reg->p);
5603 xfree(reg->exact);
5604 xfree(reg->int_map);
5605 xfree(reg->int_map_backward);
5606 xfree(reg->repeat_range);
5607 onig_free(reg->chain);
5608
5609#ifdef USE_NAMED_GROUP
5610 onig_names_free(reg);
5611#endif
5612 }
5613}
5614
5615extern void
5616onig_free(regex_t* reg)
5617{
5618 if (IS_NOT_NULL(reg)) {
5619 onig_free_body(reg);
5620 xfree(reg);
5621 }
5622}
5623
5624static void*
5625dup_copy(const void *ptr, size_t size)
5626{
5627 void *newptr = xmalloc(size);
5628 if (IS_NOT_NULL(newptr)) {
5629 memcpy(newptr, ptr, size);
5630 }
5631 return newptr;
5632}
5633
5634extern int
5635onig_reg_copy(regex_t** nreg, regex_t* oreg)
5636{
5637 if (IS_NOT_NULL(oreg)) {
5638 regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
5639 if (IS_NULL(reg)) return ONIGERR_MEMORY;
5640
5641 *reg = *oreg;
5642
5643# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5644
5645 if (IS_NOT_NULL(reg->exact)) {
5646 size_t exact_size = reg->exact_end - reg->exact;
5647 if (COPY_FAILED(exact, exact_size))
5648 goto err;
5649 (reg)->exact_end = (reg)->exact + exact_size;
5650 }
5651
5652 if (IS_NOT_NULL(reg->int_map)) {
5653 if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5654 goto err_int_map;
5655 }
5656 if (IS_NOT_NULL(reg->int_map_backward)) {
5657 if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5658 goto err_int_map_backward;
5659 }
5660 if (IS_NOT_NULL(reg->p)) {
5661 if (COPY_FAILED(p, reg->alloc))
5662 goto err_p;
5663 }
5664 if (IS_NOT_NULL(reg->repeat_range)) {
5665 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
5666 goto err_repeat_range;
5667 }
5668 if (IS_NOT_NULL(reg->name_table)) {
5669 if (onig_names_copy(reg, oreg))
5670 goto err_name_table;
5671 }
5672 if (IS_NOT_NULL(reg->chain)) {
5673 if (onig_reg_copy(&reg->chain, reg->chain))
5674 goto err_chain;
5675 }
5676 return 0;
5677# undef COPY_FAILED
5678
5679 err_chain:
5680 onig_names_free(reg);
5681 err_name_table:
5682 xfree(reg->repeat_range);
5683 err_repeat_range:
5684 xfree(reg->p);
5685 err_p:
5686 xfree(reg->int_map_backward);
5687 err_int_map_backward:
5688 xfree(reg->int_map);
5689 err_int_map:
5690 xfree(reg->exact);
5691 err:
5692 xfree(reg);
5693 return ONIGERR_MEMORY;
5694 }
5695 return 0;
5696}
5697
5698#ifdef RUBY
5699size_t
5700onig_memsize(const regex_t *reg)
5701{
5702 size_t size = sizeof(regex_t);
5703 if (IS_NULL(reg)) return 0;
5704 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5705 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5706 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5707 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5708 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5709 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5710
5711 return size;
5712}
5713
5714size_t
5715onig_region_memsize(const OnigRegion *regs)
5716{
5717 size_t size = sizeof(*regs);
5718 if (IS_NULL(regs)) return 0;
5719 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5720 return size;
5721}
5722#endif
5723
5724#define REGEX_TRANSFER(to,from) do {\
5725 onig_free_body(to);\
5726 xmemcpy(to, from, sizeof(regex_t));\
5727 xfree(from);\
5728} while (0)
5729
5730#if 0
5731extern void
5732onig_transfer(regex_t* to, regex_t* from)
5733{
5734 REGEX_TRANSFER(to, from);
5735}
5736#endif
5737
5738#ifdef ONIG_DEBUG_COMPILE
5739static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5740#endif
5741#ifdef ONIG_DEBUG_PARSE_TREE
5742static void print_tree(FILE* f, Node* node);
5743#endif
5744
5745#ifdef RUBY
5746extern int
5747onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5748 OnigErrorInfo* einfo)
5749{
5750 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5751}
5752#endif
5753
5754#ifdef RUBY
5755extern int
5756onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5757 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5758#else
5759extern int
5760onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5761 OnigErrorInfo* einfo)
5762#endif
5763{
5764#define COMPILE_INIT_SIZE 20
5765
5766 int r;
5767 OnigDistance init_size;
5768 Node* root;
5769 ScanEnv scan_env = {0};
5770#ifdef USE_SUBEXP_CALL
5771 UnsetAddrList uslist;
5772#endif
5773
5774 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5775
5776#ifdef RUBY
5777 scan_env.sourcefile = sourcefile;
5778 scan_env.sourceline = sourceline;
5779#endif
5780
5781#ifdef ONIG_DEBUG
5782 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5783#endif
5784
5785 if (reg->alloc == 0) {
5786 init_size = (pattern_end - pattern) * 2;
5787 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5788 r = BBUF_INIT(reg, init_size);
5789 if (r != 0) goto end;
5790 }
5791 else
5792 reg->used = 0;
5793
5794 reg->num_mem = 0;
5795 reg->num_repeat = 0;
5796 reg->num_null_check = 0;
5797 reg->repeat_range_alloc = 0;
5798 reg->repeat_range = (OnigRepeatRange* )NULL;
5799#ifdef USE_COMBINATION_EXPLOSION_CHECK
5800 reg->num_comb_exp_check = 0;
5801#endif
5802
5803 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5804 if (r != 0) goto err;
5805
5806#ifdef ONIG_DEBUG_PARSE_TREE
5807# if 0
5808 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5809 print_tree(stderr, root);
5810# endif
5811#endif
5812
5813#ifdef USE_NAMED_GROUP
5814 /* mixed use named group and no-named group */
5815 if (scan_env.num_named > 0 &&
5816 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5817 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5818 if (scan_env.num_named != scan_env.num_mem)
5819 r = disable_noname_group_capture(&root, reg, &scan_env);
5820 else
5821 r = numbered_ref_check(root);
5822
5823 if (r != 0) goto err;
5824 }
5825#endif
5826
5827#ifdef USE_SUBEXP_CALL
5828 if (scan_env.num_call > 0) {
5829 r = unset_addr_list_init(&uslist, scan_env.num_call);
5830 if (r != 0) goto err;
5831 scan_env.unset_addr_list = &uslist;
5832 r = setup_subexp_call(root, &scan_env);
5833 if (r != 0) goto err_unset;
5834 r = subexp_recursive_check_trav(root, &scan_env);
5835 if (r < 0) goto err_unset;
5836 r = subexp_inf_recursive_check_trav(root, &scan_env);
5837 if (r != 0) goto err_unset;
5838
5839 reg->num_call = scan_env.num_call;
5840 }
5841 else
5842 reg->num_call = 0;
5843#endif
5844
5845 r = setup_tree(root, reg, 0, &scan_env);
5846 if (r != 0) goto err_unset;
5847
5848#ifdef ONIG_DEBUG_PARSE_TREE
5849 print_tree(stderr, root);
5850#endif
5851
5852 reg->capture_history = scan_env.capture_history;
5853 reg->bt_mem_start = scan_env.bt_mem_start;
5854 reg->bt_mem_start |= reg->capture_history;
5855 if (IS_FIND_CONDITION(reg->options))
5856 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5857 else {
5858 reg->bt_mem_end = scan_env.bt_mem_end;
5859 reg->bt_mem_end |= reg->capture_history;
5860 }
5861
5862#ifdef USE_COMBINATION_EXPLOSION_CHECK
5863 if (scan_env.backrefed_mem == 0
5864# ifdef USE_SUBEXP_CALL
5865 || scan_env.num_call == 0
5866# endif
5867 ) {
5868 setup_comb_exp_check(root, 0, &scan_env);
5869# ifdef USE_SUBEXP_CALL
5870 if (scan_env.has_recursion != 0) {
5871 scan_env.num_comb_exp_check = 0;
5872 }
5873 else
5874# endif
5875 if (scan_env.comb_exp_max_regnum > 0) {
5876 int i;
5877 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5878 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5879 scan_env.num_comb_exp_check = 0;
5880 break;
5881 }
5882 }
5883 }
5884 }
5885
5886 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5887#endif
5888
5889 clear_optimize_info(reg);
5890#ifndef ONIG_DONT_OPTIMIZE
5891 r = set_optimize_info_from_tree(root, reg, &scan_env);
5892 if (r != 0) goto err_unset;
5893#endif
5894
5895 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5896 xfree(scan_env.mem_nodes_dynamic);
5897 scan_env.mem_nodes_dynamic = (Node** )NULL;
5898 }
5899
5900 r = compile_tree(root, reg);
5901 if (r == 0) {
5902 r = add_opcode(reg, OP_END);
5903#ifdef USE_SUBEXP_CALL
5904 if (scan_env.num_call > 0) {
5905 r = unset_addr_list_fix(&uslist, reg);
5906 unset_addr_list_end(&uslist);
5907 if (r) goto err;
5908 }
5909#endif
5910
5911 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5912 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5913 else {
5914 if (reg->bt_mem_start != 0)
5915 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5916 else
5917 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5918 }
5919 }
5920#ifdef USE_SUBEXP_CALL
5921 else if (scan_env.num_call > 0) {
5922 unset_addr_list_end(&uslist);
5923 }
5924#endif
5925 onig_node_free(root);
5926
5927#ifdef ONIG_DEBUG_COMPILE
5928# ifdef USE_NAMED_GROUP
5929 onig_print_names(stderr, reg);
5930# endif
5931 print_compiled_byte_code_list(stderr, reg);
5932#endif
5933
5934 end:
5935 onig_reg_resize(reg);
5936 return r;
5937
5938 err_unset:
5939#ifdef USE_SUBEXP_CALL
5940 if (scan_env.num_call > 0) {
5941 unset_addr_list_end(&uslist);
5942 }
5943#endif
5944 err:
5945 if (IS_NOT_NULL(scan_env.error)) {
5946 if (IS_NOT_NULL(einfo)) {
5947 einfo->enc = scan_env.enc;
5948 einfo->par = scan_env.error;
5949 einfo->par_end = scan_env.error_end;
5950 }
5951 }
5952
5953 onig_node_free(root);
5954 xfree(scan_env.mem_nodes_dynamic);
5955
5956 return r;
5957}
5958
5959static int onig_inited = 0;
5960
5961extern int
5962onig_reg_init(regex_t* reg, OnigOptionType option,
5963 OnigCaseFoldType case_fold_flag,
5964 OnigEncoding enc, const OnigSyntaxType* syntax)
5965{
5966 if (! onig_inited)
5967 onig_init();
5968
5969 if (IS_NULL(reg))
5970 return ONIGERR_INVALID_ARGUMENT;
5971
5972 if (ONIGENC_IS_UNDEF(enc))
5973 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5974
5975 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5976 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5977 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5978 }
5979
5980 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5981 option |= syntax->options;
5982 option &= ~ONIG_OPTION_SINGLELINE;
5983 }
5984 else
5985 option |= syntax->options;
5986
5987 (reg)->enc = enc;
5988 (reg)->options = option;
5989 (reg)->syntax = syntax;
5990 (reg)->optimize = 0;
5991 (reg)->exact = (UChar* )NULL;
5992 (reg)->int_map = (int* )NULL;
5993 (reg)->int_map_backward = (int* )NULL;
5994 (reg)->chain = (regex_t* )NULL;
5995
5996 (reg)->p = (UChar* )NULL;
5997 (reg)->alloc = 0;
5998 (reg)->used = 0;
5999 (reg)->name_table = (void* )NULL;
6000
6001 (reg)->case_fold_flag = case_fold_flag;
6002
6003 (reg)->timelimit = 0;
6004
6005 return 0;
6006}
6007
6008extern int
6009onig_new_without_alloc(regex_t* reg, const UChar* pattern,
6010 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
6011 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6012{
6013 int r;
6014
6015 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6016 if (r) return r;
6017
6018 r = onig_compile(reg, pattern, pattern_end, einfo);
6019 return r;
6020}
6021
6022extern int
6023onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6024 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6025 OnigErrorInfo* einfo)
6026{
6027 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6028 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6029
6030 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6031 if (r) {
6032 onig_free(*reg);
6033 *reg = NULL;
6034 }
6035
6036 return r;
6037}
6038
6039extern int
6040onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6041{
6042 return onig_init();
6043}
6044
6045extern int
6046onig_init(void)
6047{
6048 if (onig_inited != 0)
6049 return 0;
6050
6051 onig_inited = 1;
6052
6053#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6054 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6055#endif
6056
6057 onigenc_init();
6058 /* onigenc_set_default_caseconv_table((UChar* )0); */
6059
6060#ifdef ONIG_DEBUG_STATISTICS
6061 onig_statistics_init();
6062#endif
6063
6064 return 0;
6065}
6066
6067
6068static OnigEndCallListItemType* EndCallTop;
6069
6070extern void onig_add_end_call(void (*func)(void))
6071{
6073
6074 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6075 if (item == 0) return ;
6076
6077 item->next = EndCallTop;
6078 item->func = func;
6079
6080 EndCallTop = item;
6081}
6082
6083static void
6084exec_end_call_list(void)
6085{
6087 void (*func)(void);
6088
6089 while (EndCallTop != 0) {
6090 func = EndCallTop->func;
6091 (*func)();
6092
6093 prev = EndCallTop;
6094 EndCallTop = EndCallTop->next;
6095 xfree(prev);
6096 }
6097}
6098
6099extern int
6100onig_end(void)
6101{
6102 exec_end_call_list();
6103
6104#ifdef ONIG_DEBUG_STATISTICS
6105 onig_print_statistics(stderr);
6106#endif
6107
6108#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6109 _CrtDumpMemoryLeaks();
6110#endif
6111
6112 onig_inited = 0;
6113
6114 return 0;
6115}
6116
6117extern int
6118onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6119{
6120 OnigCodePoint n, *data;
6121 OnigCodePoint low, high, x;
6122
6123 GET_CODE_POINT(n, p);
6124 data = (OnigCodePoint* )p;
6125 data++;
6126
6127 for (low = 0, high = n; low < high; ) {
6128 x = (low + high) >> 1;
6129 if (code > data[x * 2 + 1])
6130 low = x + 1;
6131 else
6132 high = x;
6133 }
6134
6135 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6136}
6137
6138extern int
6139onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6140{
6141 int found;
6142
6143 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6144 if (IS_NULL(cc->mbuf)) {
6145 found = 0;
6146 }
6147 else {
6148 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6149 }
6150 }
6151 else {
6152 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6153 }
6154
6155 if (IS_NCCLASS_NOT(cc))
6156 return !found;
6157 else
6158 return found;
6159}
6160
6161extern int
6162onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6163{
6164 int len;
6165
6166 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6167 len = 2;
6168 }
6169 else {
6170 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6171 }
6172 return onig_is_code_in_cc_len(len, code, cc);
6173}
6174
6175
6176#ifdef ONIG_DEBUG
6177
6178/* arguments type */
6179# define ARG_SPECIAL -1
6180# define ARG_NON 0
6181# define ARG_RELADDR 1
6182# define ARG_ABSADDR 2
6183# define ARG_LENGTH 3
6184# define ARG_MEMNUM 4
6185# define ARG_OPTION 5
6186# define ARG_STATE_CHECK 6
6187
6188OnigOpInfoType OnigOpInfo[] = {
6189 { OP_FINISH, "finish", ARG_NON },
6190 { OP_END, "end", ARG_NON },
6191 { OP_EXACT1, "exact1", ARG_SPECIAL },
6192 { OP_EXACT2, "exact2", ARG_SPECIAL },
6193 { OP_EXACT3, "exact3", ARG_SPECIAL },
6194 { OP_EXACT4, "exact4", ARG_SPECIAL },
6195 { OP_EXACT5, "exact5", ARG_SPECIAL },
6196 { OP_EXACTN, "exactn", ARG_SPECIAL },
6197 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6198 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6199 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6200 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6201 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6202 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6203 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6204 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6205 { OP_CCLASS, "cclass", ARG_SPECIAL },
6206 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6207 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6208 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6209 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6210 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6211 { OP_ANYCHAR, "anychar", ARG_NON },
6212 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6213 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6214 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6215 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6216 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6217 { OP_WORD, "word", ARG_NON },
6218 { OP_NOT_WORD, "not-word", ARG_NON },
6219 { OP_WORD_BOUND, "word-bound", ARG_NON },
6220 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6221 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6222 { OP_WORD_END, "word-end", ARG_NON },
6223 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6224 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6225 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6226 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6227 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6228 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6229 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6230 { OP_END_BUF, "end-buf", ARG_NON },
6231 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6232 { OP_END_LINE, "end-line", ARG_NON },
6233 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6234 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6235 { OP_BACKREF1, "backref1", ARG_NON },
6236 { OP_BACKREF2, "backref2", ARG_NON },
6237 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6238 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6239 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6240 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6241 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6242 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6243 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6244 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6245 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6246 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6247 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6248 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6249 { OP_SET_OPTION, "set-option", ARG_OPTION },
6250 { OP_KEEP, "keep", ARG_NON },
6251 { OP_FAIL, "fail", ARG_NON },
6252 { OP_JUMP, "jump", ARG_RELADDR },
6253 { OP_PUSH, "push", ARG_RELADDR },
6254 { OP_POP, "pop", ARG_NON },
6255 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6256 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6257 { OP_REPEAT, "repeat", ARG_SPECIAL },
6258 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6259 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6260 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6261 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6262 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6263 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6264 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6265 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6266 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6267 { OP_PUSH_POS, "push-pos", ARG_NON },
6268 { OP_POP_POS, "pop-pos", ARG_NON },
6269 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6270 { OP_FAIL_POS, "fail-pos", ARG_NON },
6271 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6272 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6273 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6274 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6275 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6276 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6277 { OP_ABSENT, "absent", ARG_RELADDR },
6278 { OP_ABSENT_END, "absent-end", ARG_NON },
6279 { OP_CALL, "call", ARG_ABSADDR },
6280 { OP_RETURN, "return", ARG_NON },
6281 { OP_CONDITION, "condition", ARG_SPECIAL },
6282 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6283 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6284 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6285 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6286 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6287 "state-check-anychar-ml*", ARG_STATE_CHECK },
6288 { -1, "", ARG_NON }
6289};
6290
6291static const char*
6292op2name(int opcode)
6293{
6294 int i;
6295
6296 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6297 if (opcode == OnigOpInfo[i].opcode)
6298 return OnigOpInfo[i].name;
6299 }
6300 return "";
6301}
6302
6303static int
6304op2arg_type(int opcode)
6305{
6306 int i;
6307
6308 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6309 if (opcode == OnigOpInfo[i].opcode)
6310 return OnigOpInfo[i].arg_type;
6311 }
6312 return ARG_SPECIAL;
6313}
6314
6315# ifdef ONIG_DEBUG_PARSE_TREE
6316static void
6317Indent(FILE* f, int indent)
6318{
6319 int i;
6320 for (i = 0; i < indent; i++) putc(' ', f);
6321}
6322# endif /* ONIG_DEBUG_PARSE_TREE */
6323
6324static void
6325p_string(FILE* f, ptrdiff_t len, UChar* s)
6326{
6327 fputs(":", f);
6328 while (len-- > 0) { fputc(*s++, f); }
6329}
6330
6331static void
6332p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6333{
6334 int x = len * mb_len;
6335
6336 fprintf(f, ":%d:", len);
6337 while (x-- > 0) { fputc(*s++, f); }
6338}
6339
6340extern void
6341onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6342 OnigEncoding enc)
6343{
6344 int i, n, arg_type;
6345 RelAddrType addr;
6346 LengthType len;
6347 MemNumType mem;
6348 StateCheckNumType scn;
6349 OnigCodePoint code;
6350 UChar *q;
6351
6352 fprintf(f, "[%s", op2name(*bp));
6353 arg_type = op2arg_type(*bp);
6354 if (arg_type != ARG_SPECIAL) {
6355 bp++;
6356 switch (arg_type) {
6357 case ARG_NON:
6358 break;
6359 case ARG_RELADDR:
6360 GET_RELADDR_INC(addr, bp);
6361 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6362 break;
6363 case ARG_ABSADDR:
6364 GET_ABSADDR_INC(addr, bp);
6365 fprintf(f, ":(%d)", addr);
6366 break;
6367 case ARG_LENGTH:
6368 GET_LENGTH_INC(len, bp);
6369 fprintf(f, ":%d", len);
6370 break;
6371 case ARG_MEMNUM:
6372 mem = *((MemNumType* )bp);
6373 bp += SIZE_MEMNUM;
6374 fprintf(f, ":%d", mem);
6375 break;
6376 case ARG_OPTION:
6377 {
6378 OnigOptionType option = *((OnigOptionType* )bp);
6379 bp += SIZE_OPTION;
6380 fprintf(f, ":%d", option);
6381 }
6382 break;
6383
6384 case ARG_STATE_CHECK:
6385 scn = *((StateCheckNumType* )bp);
6386 bp += SIZE_STATE_CHECK_NUM;
6387 fprintf(f, ":%d", scn);
6388 break;
6389 }
6390 }
6391 else {
6392 switch (*bp++) {
6393 case OP_EXACT1:
6394 case OP_ANYCHAR_STAR_PEEK_NEXT:
6395 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6396 p_string(f, 1, bp++); break;
6397 case OP_EXACT2:
6398 p_string(f, 2, bp); bp += 2; break;
6399 case OP_EXACT3:
6400 p_string(f, 3, bp); bp += 3; break;
6401 case OP_EXACT4:
6402 p_string(f, 4, bp); bp += 4; break;
6403 case OP_EXACT5:
6404 p_string(f, 5, bp); bp += 5; break;
6405 case OP_EXACTN:
6406 GET_LENGTH_INC(len, bp);
6407 p_len_string(f, len, 1, bp);
6408 bp += len;
6409 break;
6410
6411 case OP_EXACTMB2N1:
6412 p_string(f, 2, bp); bp += 2; break;
6413 case OP_EXACTMB2N2:
6414 p_string(f, 4, bp); bp += 4; break;
6415 case OP_EXACTMB2N3:
6416 p_string(f, 6, bp); bp += 6; break;
6417 case OP_EXACTMB2N:
6418 GET_LENGTH_INC(len, bp);
6419 p_len_string(f, len, 2, bp);
6420 bp += len * 2;
6421 break;
6422 case OP_EXACTMB3N:
6423 GET_LENGTH_INC(len, bp);
6424 p_len_string(f, len, 3, bp);
6425 bp += len * 3;
6426 break;
6427 case OP_EXACTMBN:
6428 {
6429 int mb_len;
6430
6431 GET_LENGTH_INC(mb_len, bp);
6432 GET_LENGTH_INC(len, bp);
6433 fprintf(f, ":%d:%d:", mb_len, len);
6434 n = len * mb_len;
6435 while (n-- > 0) { fputc(*bp++, f); }
6436 }
6437 break;
6438
6439 case OP_EXACT1_IC:
6440 len = enclen(enc, bp, bpend);
6441 p_string(f, len, bp);
6442 bp += len;
6443 break;
6444 case OP_EXACTN_IC:
6445 GET_LENGTH_INC(len, bp);
6446 p_len_string(f, len, 1, bp);
6447 bp += len;
6448 break;
6449
6450 case OP_CCLASS:
6451 n = bitset_on_num((BitSetRef )bp);
6452 bp += SIZE_BITSET;
6453 fprintf(f, ":%d", n);
6454 break;
6455
6456 case OP_CCLASS_NOT:
6457 n = bitset_on_num((BitSetRef )bp);
6458 bp += SIZE_BITSET;
6459 fprintf(f, ":%d", n);
6460 break;
6461
6462 case OP_CCLASS_MB:
6463 case OP_CCLASS_MB_NOT:
6464 GET_LENGTH_INC(len, bp);
6465 q = bp;
6466# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6467 ALIGNMENT_RIGHT(q);
6468# endif
6469 GET_CODE_POINT(code, q);
6470 bp += len;
6471 fprintf(f, ":%d:%d", (int )code, len);
6472 break;
6473
6474 case OP_CCLASS_MIX:
6475 case OP_CCLASS_MIX_NOT:
6476 n = bitset_on_num((BitSetRef )bp);
6477 bp += SIZE_BITSET;
6478 GET_LENGTH_INC(len, bp);
6479 q = bp;
6480# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6481 ALIGNMENT_RIGHT(q);
6482# endif
6483 GET_CODE_POINT(code, q);
6484 bp += len;
6485 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6486 break;
6487
6488 case OP_BACKREFN_IC:
6489 mem = *((MemNumType* )bp);
6490 bp += SIZE_MEMNUM;
6491 fprintf(f, ":%d", mem);
6492 break;
6493
6494 case OP_BACKREF_MULTI_IC:
6495 case OP_BACKREF_MULTI:
6496 fputs(" ", f);
6497 GET_LENGTH_INC(len, bp);
6498 for (i = 0; i < len; i++) {
6499 GET_MEMNUM_INC(mem, bp);
6500 if (i > 0) fputs(", ", f);
6501 fprintf(f, "%d", mem);
6502 }
6503 break;
6504
6505 case OP_BACKREF_WITH_LEVEL:
6506 {
6507 OnigOptionType option;
6508 LengthType level;
6509
6510 GET_OPTION_INC(option, bp);
6511 fprintf(f, ":%d", option);
6512 GET_LENGTH_INC(level, bp);
6513 fprintf(f, ":%d", level);
6514
6515 fputs(" ", f);
6516 GET_LENGTH_INC(len, bp);
6517 for (i = 0; i < len; i++) {
6518 GET_MEMNUM_INC(mem, bp);
6519 if (i > 0) fputs(", ", f);
6520 fprintf(f, "%d", mem);
6521 }
6522 }
6523 break;
6524
6525 case OP_REPEAT:
6526 case OP_REPEAT_NG:
6527 {
6528 mem = *((MemNumType* )bp);
6529 bp += SIZE_MEMNUM;
6530 addr = *((RelAddrType* )bp);
6531 bp += SIZE_RELADDR;
6532 fprintf(f, ":%d:%d", mem, addr);
6533 }
6534 break;
6535
6536 case OP_PUSH_OR_JUMP_EXACT1:
6537 case OP_PUSH_IF_PEEK_NEXT:
6538 addr = *((RelAddrType* )bp);
6539 bp += SIZE_RELADDR;
6540 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6541 p_string(f, 1, bp);
6542 bp += 1;
6543 break;
6544
6545 case OP_LOOK_BEHIND:
6546 GET_LENGTH_INC(len, bp);
6547 fprintf(f, ":%d", len);
6548 break;
6549
6550 case OP_PUSH_LOOK_BEHIND_NOT:
6551 GET_RELADDR_INC(addr, bp);
6552 GET_LENGTH_INC(len, bp);
6553 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6554 break;
6555
6556 case OP_STATE_CHECK_PUSH:
6557 case OP_STATE_CHECK_PUSH_OR_JUMP:
6558 scn = *((StateCheckNumType* )bp);
6559 bp += SIZE_STATE_CHECK_NUM;
6560 addr = *((RelAddrType* )bp);
6561 bp += SIZE_RELADDR;
6562 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6563 break;
6564
6565 case OP_CONDITION:
6566 GET_MEMNUM_INC(mem, bp);
6567 GET_RELADDR_INC(addr, bp);
6568 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6569 break;
6570
6571 default:
6572 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6573 bp[-1]);
6574 }
6575 }
6576 fputs("]", f);
6577 if (nextp) *nextp = bp;
6578}
6579
6580# ifdef ONIG_DEBUG_COMPILE
6581static void
6582print_compiled_byte_code_list(FILE* f, regex_t* reg)
6583{
6584 int ncode;
6585 UChar* bp = reg->p;
6586 UChar* end = reg->p + reg->used;
6587
6588 fprintf(f, "code length: %d", reg->used);
6589
6590 ncode = -1;
6591 while (bp < end) {
6592 ncode++;
6593 if (ncode % 5 == 0)
6594 fprintf(f, "\n%ld:", bp - reg->p);
6595 else
6596 fprintf(f, " %ld:", bp - reg->p);
6597 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6598 }
6599
6600 fprintf(f, "\n");
6601}
6602# endif /* ONIG_DEBUG_COMPILE */
6603
6604# ifdef ONIG_DEBUG_PARSE_TREE
6605static void
6606print_indent_tree(FILE* f, Node* node, int indent)
6607{
6608 int i, type, container_p = 0;
6609 int add = 3;
6610 UChar* p;
6611
6612 Indent(f, indent);
6613 if (IS_NULL(node)) {
6614 fprintf(f, "ERROR: null node!!!\n");
6615 exit (0);
6616 }
6617
6618 type = NTYPE(node);
6619 switch (type) {
6620 case NT_LIST:
6621 case NT_ALT:
6622 if (NTYPE(node) == NT_LIST)
6623 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6624 else
6625 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6626
6627 print_indent_tree(f, NCAR(node), indent + add);
6628 while (IS_NOT_NULL(node = NCDR(node))) {
6629 if (NTYPE(node) != type) {
6630 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6631 exit(0);
6632 }
6633 print_indent_tree(f, NCAR(node), indent + add);
6634 }
6635 break;
6636
6637 case NT_STR:
6638 fprintf(f, "<string%s:%"PRIxPTR">",
6639 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6640 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6641 if (*p >= 0x20 && *p < 0x7f)
6642 fputc(*p, f);
6643 else {
6644 fprintf(f, " 0x%02x", *p);
6645 }
6646 }
6647 break;
6648
6649 case NT_CCLASS:
6650 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6651 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6652 if (NCCLASS(node)->mbuf) {
6653 BBuf* bbuf = NCCLASS(node)->mbuf;
6654 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6655 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6656 fprintf(f, "%d", *data++);
6657 for (; data < end; data+=2) {
6658 fprintf(f, ",");
6659 fprintf(f, "%04x-%04x", data[0], data[1]);
6660 }
6661 }
6662 break;
6663
6664 case NT_CTYPE:
6665 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6666 switch (NCTYPE(node)->ctype) {
6667 case ONIGENC_CTYPE_WORD:
6668 if (NCTYPE(node)->not != 0)
6669 fputs("not word", f);
6670 else
6671 fputs("word", f);
6672 break;
6673
6674 default:
6675 fprintf(f, "ERROR: undefined ctype.\n");
6676 exit(0);
6677 }
6678 break;
6679
6680 case NT_CANY:
6681 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6682 break;
6683
6684 case NT_ANCHOR:
6685 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6686 switch (NANCHOR(node)->type) {
6687 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6688 case ANCHOR_END_BUF: fputs("end buf", f); break;
6689 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6690 case ANCHOR_END_LINE: fputs("end line", f); break;
6691 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6692 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6693
6694 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6695 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6696# ifdef USE_WORD_BEGIN_END
6697 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6698 case ANCHOR_WORD_END: fputs("word end", f); break;
6699# endif
6700 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6701 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6702 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6703 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6704 case ANCHOR_KEEP: fputs("keep",f); break;
6705
6706 default:
6707 fprintf(f, "ERROR: undefined anchor type.\n");
6708 break;
6709 }
6710 break;
6711
6712 case NT_BREF:
6713 {
6714 int* p;
6715 BRefNode* br = NBREF(node);
6716 p = BACKREFS_P(br);
6717 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6718 for (i = 0; i < br->back_num; i++) {
6719 if (i > 0) fputs(", ", f);
6720 fprintf(f, "%d", p[i]);
6721 }
6722 }
6723 break;
6724
6725# ifdef USE_SUBEXP_CALL
6726 case NT_CALL:
6727 {
6728 CallNode* cn = NCALL(node);
6729 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6730 p_string(f, cn->name_end - cn->name, cn->name);
6731 }
6732 break;
6733# endif
6734
6735 case NT_QTFR:
6736 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6737 NQTFR(node)->lower, NQTFR(node)->upper,
6738 (NQTFR(node)->greedy ? "" : "?"));
6739 print_indent_tree(f, NQTFR(node)->target, indent + add);
6740 break;
6741
6742 case NT_ENCLOSE:
6743 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6744 switch (NENCLOSE(node)->type) {
6745 case ENCLOSE_OPTION:
6746 fprintf(f, "option:%d", NENCLOSE(node)->option);
6747 break;
6748 case ENCLOSE_MEMORY:
6749 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6750 break;
6751 case ENCLOSE_STOP_BACKTRACK:
6752 fprintf(f, "stop-bt");
6753 break;
6754 case ENCLOSE_CONDITION:
6755 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6756 break;
6757 case ENCLOSE_ABSENT:
6758 fprintf(f, "absent");
6759 break;
6760
6761 default:
6762 break;
6763 }
6764 fprintf(f, "\n");
6765 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6766 break;
6767
6768 default:
6769 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6770 break;
6771 }
6772
6773 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6774 type != NT_ENCLOSE)
6775 fprintf(f, "\n");
6776
6777 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6778
6779 fflush(f);
6780}
6781
6782static void
6783print_tree(FILE* f, Node* node)
6784{
6785 print_indent_tree(f, node, 0);
6786}
6787# endif /* ONIG_DEBUG_PARSE_TREE */
6788#endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.