Ruby  3.4.0dev (2024-11-05 revision 348a53415339076afc4a02fcd09f3ae36e9c4c61)
onigmo.h
1 #ifndef ONIGMO_H
2 #define ONIGMO_H
3 /**********************************************************************
4  onigmo.h - Onigmo (Oniguruma-mod) (regular expression library)
5 **********************************************************************/
6 /*-
7  * Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8  * Copyright (c) 2011-2017 K.Takata <kentkt AT csc DOT jp>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifdef __cplusplus
34 extern "C" {
35 # if 0
36 } /* satisfy cc-mode */
37 # endif
38 #endif
39 
40 #define ONIGMO_VERSION_MAJOR 6
41 #define ONIGMO_VERSION_MINOR 1
42 #define ONIGMO_VERSION_TEENY 3
43 
44 #ifndef ONIG_EXTERN
45 # ifdef RUBY_EXTERN
46 # define ONIG_EXTERN RUBY_EXTERN
47 # else
48 # if defined(_WIN32) && !defined(__GNUC__)
49 # if defined(EXPORT) || defined(RUBY_EXPORT)
50 # define ONIG_EXTERN extern __declspec(dllexport)
51 # else
52 # define ONIG_EXTERN extern __declspec(dllimport)
53 # endif
54 # endif
55 # endif
56 #endif
57 
58 #ifndef ONIG_EXTERN
59 # define ONIG_EXTERN extern
60 #endif
61 
62 #ifndef RUBY
63 # ifndef RUBY_SYMBOL_EXPORT_BEGIN
64 # define RUBY_SYMBOL_EXPORT_BEGIN
65 # define RUBY_SYMBOL_EXPORT_END
66 # endif
67 #endif
68 
69 RUBY_SYMBOL_EXPORT_BEGIN
70 
71 #include <stddef.h> /* for size_t */
72 
73 /* PART: character encoding */
74 
75 #ifndef ONIG_ESCAPE_UCHAR_COLLISION
76 # define UChar OnigUChar
77 #endif
78 
79 typedef unsigned char OnigUChar;
80 typedef unsigned int OnigCodePoint;
81 typedef unsigned int OnigCtype;
82 typedef size_t OnigDistance;
83 typedef ptrdiff_t OnigPosition;
84 
85 #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
86 
87 /*
88  * Onig casefold/case mapping flags and related definitions
89  *
90  * Subfields (starting with 0 at LSB):
91  * 0-2: Code point count in casefold.h
92  * 3-12: Index into SpecialCaseMapping array in casefold.h
93  * 13-22: Case folding/mapping flags
94  */
95 typedef unsigned int OnigCaseFoldType; /* case fold flag */
96 
97 ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
98 
99 /* bits for actual code point count; 3 bits is more than enough, currently only 2 used */
100 #define OnigCodePointMaskWidth 3
101 #define OnigCodePointMask ((1<<OnigCodePointMaskWidth)-1)
102 #define OnigCodePointCount(n) ((n)&OnigCodePointMask)
103 #define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask)
104 
105 /* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */
106 /* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */
107 
108 /* bits for index into table with separate titlecase mappings */
109 /* 10 bits provide 1024 values */
110 #define OnigSpecialIndexShift 3
111 #define OnigSpecialIndexWidth 10
112 
113 #define ONIGENC_CASE_UPCASE (1<<13) /* has/needs uppercase mapping */
114 #define ONIGENC_CASE_DOWNCASE (1<<14) /* has/needs lowercase mapping */
115 #define ONIGENC_CASE_TITLECASE (1<<15) /* has/needs (special) titlecase mapping */
116 #define ONIGENC_CASE_SPECIAL_OFFSET 3 /* offset in bits from ONIGENC_CASE to ONIGENC_CASE_SPECIAL */
117 #define ONIGENC_CASE_UP_SPECIAL (1<<16) /* has special upcase mapping */
118 #define ONIGENC_CASE_DOWN_SPECIAL (1<<17) /* has special downcase mapping */
119 #define ONIGENC_CASE_MODIFIED (1<<18) /* data has been modified */
120 #define ONIGENC_CASE_FOLD (1<<19) /* has/needs case folding */
121 
122 #define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
123 
124 #define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */
125 #define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */
126 #define ONIGENC_CASE_IS_TITLECASE (1<<23) /* character itself is already titlecase */
127 
128 #define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
129 
130 #define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
131 #define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
132 
133 
134 #define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
135 #define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
136 /* 13 => Unicode:0x1ffc */
137 
138 /* code range */
139 #define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
140 #define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
141 #define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
142 
143 typedef struct {
144  int byte_len; /* argument(original) character(s) byte length */
145  int code_len; /* number of code */
146  OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
148 
149 typedef struct {
150  OnigCodePoint esc;
151  OnigCodePoint anychar;
152  OnigCodePoint anytime;
153  OnigCodePoint zero_or_one_time;
154  OnigCodePoint one_or_more_time;
155  OnigCodePoint anychar_anytime;
157 
158 typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
159 
160 typedef struct OnigEncodingTypeST {
161  int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
162  const char* name;
163  int max_enc_len;
164  int min_enc_len;
165  int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
166  OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
167  int (*code_to_mbclen)(OnigCodePoint code, const struct OnigEncodingTypeST* enc);
168  int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, const struct OnigEncodingTypeST* enc);
169  int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, const struct OnigEncodingTypeST* enc);
170  int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, const struct OnigEncodingTypeST* enc);
171  int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], const struct OnigEncodingTypeST* enc);
172  int (*property_name_to_ctype)(const struct OnigEncodingTypeST* enc, const OnigUChar* p, const OnigUChar* end);
173  int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, const struct OnigEncodingTypeST* enc);
174  int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], const struct OnigEncodingTypeST* enc);
175  OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
176  int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
177  int (*case_map)(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
178  int ruby_encoding_index;
179  unsigned int flags;
181 
182 typedef const OnigEncodingType* OnigEncoding;
183 
184 ONIG_EXTERN const OnigEncodingType OnigEncodingASCII;
185 #ifndef RUBY
186 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_1;
187 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_2;
188 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_3;
189 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_4;
190 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_5;
191 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_6;
192 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_7;
193 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_8;
194 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_9;
195 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_10;
196 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_11;
197 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_13;
198 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_14;
199 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_15;
200 ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_16;
201 ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_8;
202 ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_16BE;
203 ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_16LE;
204 ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_32BE;
205 ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_32LE;
206 ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_JP;
207 ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_TW;
208 ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_KR;
209 ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_CN;
210 ONIG_EXTERN const OnigEncodingType OnigEncodingShift_JIS;
211 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_31J;
212 /* ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8; */
213 ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8_R;
214 ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8_U;
215 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1250;
216 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1251;
217 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1252;
218 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1253;
219 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1254;
220 ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1257;
221 ONIG_EXTERN const OnigEncodingType OnigEncodingBIG5;
222 ONIG_EXTERN const OnigEncodingType OnigEncodingGB18030;
223 #endif /* RUBY */
224 
225 #define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
226 #ifndef RUBY
227 # define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
228 # define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
229 # define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
230 # define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
231 # define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
232 # define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
233 # define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
234 # define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
235 # define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
236 # define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
237 # define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
238 # define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
239 # define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
240 # define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
241 # define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
242 # define ONIG_ENCODING_UTF_8 (&OnigEncodingUTF_8)
243 # define ONIG_ENCODING_UTF_16BE (&OnigEncodingUTF_16BE)
244 # define ONIG_ENCODING_UTF_16LE (&OnigEncodingUTF_16LE)
245 # define ONIG_ENCODING_UTF_32BE (&OnigEncodingUTF_32BE)
246 # define ONIG_ENCODING_UTF_32LE (&OnigEncodingUTF_32LE)
247 # define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
248 # define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
249 # define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
250 # define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
251 # define ONIG_ENCODING_SHIFT_JIS (&OnigEncodingShift_JIS)
252 # define ONIG_ENCODING_WINDOWS_31J (&OnigEncodingWindows_31J)
253 /* # define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) */
254 # define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
255 # define ONIG_ENCODING_KOI8_U (&OnigEncodingKOI8_U)
256 # define ONIG_ENCODING_WINDOWS_1250 (&OnigEncodingWindows_1250)
257 # define ONIG_ENCODING_WINDOWS_1251 (&OnigEncodingWindows_1251)
258 # define ONIG_ENCODING_WINDOWS_1252 (&OnigEncodingWindows_1252)
259 # define ONIG_ENCODING_WINDOWS_1253 (&OnigEncodingWindows_1253)
260 # define ONIG_ENCODING_WINDOWS_1254 (&OnigEncodingWindows_1254)
261 # define ONIG_ENCODING_WINDOWS_1257 (&OnigEncodingWindows_1257)
262 # define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
263 # define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
264 
265 /* old names */
266 # define ONIG_ENCODING_SJIS ONIG_ENCODING_SHIFT_JIS
267 # define ONIG_ENCODING_CP932 ONIG_ENCODING_WINDOWS_31J
268 # define ONIG_ENCODING_CP1250 ONIG_ENCODING_WINDOWS_1250
269 # define ONIG_ENCODING_CP1251 ONIG_ENCODING_WINDOWS_1251
270 # define ONIG_ENCODING_CP1252 ONIG_ENCODING_WINDOWS_1252
271 # define ONIG_ENCODING_CP1253 ONIG_ENCODING_WINDOWS_1253
272 # define ONIG_ENCODING_CP1254 ONIG_ENCODING_WINDOWS_1254
273 # define ONIG_ENCODING_CP1257 ONIG_ENCODING_WINDOWS_1257
274 # define ONIG_ENCODING_UTF8 ONIG_ENCODING_UTF_8
275 # define ONIG_ENCODING_UTF16_BE ONIG_ENCODING_UTF_16BE
276 # define ONIG_ENCODING_UTF16_LE ONIG_ENCODING_UTF_16LE
277 # define ONIG_ENCODING_UTF32_BE ONIG_ENCODING_UTF_32BE
278 # define ONIG_ENCODING_UTF32_LE ONIG_ENCODING_UTF_32LE
279 #endif /* RUBY */
280 
281 #define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
282 
283 /* this declaration needs to be here because it is used in string.c in Ruby */
284 ONIG_EXTERN
285 int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
286 
287 
288 /* work size */
289 #define ONIGENC_CODE_TO_MBC_MAXLEN 7
290 #define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
291 /* 18: 6(max-byte) * 3(case-fold chars) */
292 
293 /* character types */
294 #define ONIGENC_CTYPE_NEWLINE 0
295 #define ONIGENC_CTYPE_ALPHA 1
296 #define ONIGENC_CTYPE_BLANK 2
297 #define ONIGENC_CTYPE_CNTRL 3
298 #define ONIGENC_CTYPE_DIGIT 4
299 #define ONIGENC_CTYPE_GRAPH 5
300 #define ONIGENC_CTYPE_LOWER 6
301 #define ONIGENC_CTYPE_PRINT 7
302 #define ONIGENC_CTYPE_PUNCT 8
303 #define ONIGENC_CTYPE_SPACE 9
304 #define ONIGENC_CTYPE_UPPER 10
305 #define ONIGENC_CTYPE_XDIGIT 11
306 #define ONIGENC_CTYPE_WORD 12
307 #define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
308 #define ONIGENC_CTYPE_ASCII 14
309 #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
310 
311 /* flags */
312 #define ONIGENC_FLAG_NONE 0U
313 #define ONIGENC_FLAG_UNICODE 1U
314 
315 #define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
316 
317 #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
318 #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
319 #define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
320 #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
321 #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
322 #define ONIGENC_IS_MBC_WORD(enc,s,end) \
323  ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
324 #define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
325  onigenc_ascii_is_code_ctype( \
326  ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)
327 #define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
328 
329 
330 #define ONIGENC_NAME(enc) ((enc)->name)
331 
332 #define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
333  (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
334 #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
335  (enc)->is_allowed_reverse_match(s,end,enc)
336 #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
337  (enc)->left_adjust_char_head(start, s, end, enc)
338 #define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
339  (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
340 #define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
341  (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
342 #define ONIGENC_STEP_BACK(enc,start,s,end,n) \
343  onigenc_step_back((enc),(start),(s),(end),(n))
344 
345 #define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
346 #define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
347 #define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
348 
349 #define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
350 #define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
351 
352 #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
353 #define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
354 #define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
355 
356 #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
357 
358 ONIG_EXTERN
359 int onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
360 
361 #define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen(p,e,enc)
362 #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
363 #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
364 #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
365 #define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
366 #define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
367 #define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
368 #define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
369 #define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
370  (enc)->property_name_to_ctype(enc,p,end)
371 
372 #define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
373 
374 #define ONIGENC_IS_CODE_NEWLINE(enc,code) \
375  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
376 #define ONIGENC_IS_CODE_GRAPH(enc,code) \
377  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
378 #define ONIGENC_IS_CODE_PRINT(enc,code) \
379  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
380 #define ONIGENC_IS_CODE_ALNUM(enc,code) \
381  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
382 #define ONIGENC_IS_CODE_ALPHA(enc,code) \
383  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
384 #define ONIGENC_IS_CODE_LOWER(enc,code) \
385  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
386 #define ONIGENC_IS_CODE_UPPER(enc,code) \
387  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
388 #define ONIGENC_IS_CODE_CNTRL(enc,code) \
389  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
390 #define ONIGENC_IS_CODE_PUNCT(enc,code) \
391  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
392 #define ONIGENC_IS_CODE_SPACE(enc,code) \
393  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
394 #define ONIGENC_IS_CODE_BLANK(enc,code) \
395  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
396 #define ONIGENC_IS_CODE_DIGIT(enc,code) \
397  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
398 #define ONIGENC_IS_CODE_XDIGIT(enc,code) \
399  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
400 #define ONIGENC_IS_CODE_WORD(enc,code) \
401  ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
402 
403 #define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
404  (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
405 
406 ONIG_EXTERN
407 OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n);
408 
409 
410 /* encoding API */
411 ONIG_EXTERN
412 int onigenc_init(void);
413 ONIG_EXTERN
414 int onigenc_set_default_encoding(OnigEncoding enc);
415 ONIG_EXTERN
416 OnigEncoding onigenc_get_default_encoding(void);
417 ONIG_EXTERN
418 OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev);
419 ONIG_EXTERN
420 OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
421 ONIG_EXTERN
422 OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
423 ONIG_EXTERN
424 OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
425 ONIG_EXTERN
426 int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end);
427 ONIG_EXTERN
428 int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p);
429 ONIG_EXTERN
430 int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p);
431 
432 
433 
434 /* PART: regular expression */
435 
436 /* config parameters */
437 #define ONIG_NREGION 4
438 #define ONIG_MAX_CAPTURE_GROUP_NUM 32767
439 #define ONIG_MAX_BACKREF_NUM 1000
440 #define ONIG_MAX_REPEAT_NUM 100000
441 #define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
442 /* constants */
443 #define ONIG_MAX_ERROR_MESSAGE_LEN 90
444 
445 typedef unsigned int OnigOptionType;
446 
447 #define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
448 
449 /* options */
450 #define ONIG_OPTION_NONE 0U
451 #define ONIG_OPTION_IGNORECASE 1U
452 #define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
453 #define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
454 #define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
455 #define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
456 #define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
457 #define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
458 #define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
459 #define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
460 #define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
461 /* options (search time) */
462 #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
463 #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
464 #define ONIG_OPTION_NOTBOS (ONIG_OPTION_NOTEOL << 1)
465 #define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
466 /* options (ctype range) */
467 #define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_NOTEOS << 1)
468 #define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
469 #define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
470 /* options (newline) */
471 #define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
472 #define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
473 
474 #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
475 #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
476 #define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
477 
478 /* syntax */
479 typedef struct {
480  unsigned int op;
481  unsigned int op2;
482  unsigned int behavior;
483  OnigOptionType options; /* default option */
484  OnigMetaCharTableType meta_char_table;
486 
487 ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
488 ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
489 ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
490 ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
491 ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
492 ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
493 ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
494 ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58;
495 ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58_NG;
496 ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
497 ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
498 ONIG_EXTERN const OnigSyntaxType OnigSyntaxPython;
499 
500 /* predefined syntaxes (see regsyntax.c) */
501 #define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
502 #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
503 #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
504 #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
505 #define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
506 #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
507 #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
508 #define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
509 #define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
510 #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
511 #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
512 #define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
513 
514 /* default syntax */
515 ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
516 #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
517 
518 /* syntax (operators) */
519 #define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
520 #define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
521 #define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
522 #define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
523 #define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
524 #define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
525 #define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
526 #define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
527 #define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
528 #define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
529 #define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
530 #define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
531 #define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
532 #define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \‍(...\‍) */
533 #define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
534 #define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
535 #define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
536 #define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
537 #define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
538 #define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* <. > */
539 #define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
540 #define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
541 #define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
542 #define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
543 #define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
544 #define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
545 #define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
546 #define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
547 #define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
548 #define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
549 #define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
550 #define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */
551 
552 #define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
553 #define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
554 #define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
555 #define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
556 #define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
557 #define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
558 #define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
559 #define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
560 #define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
561 #define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
562 #define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
563 #define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
564 #define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
565 #define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
566 #define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
567 #define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
568 #define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
569 #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
570 /* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
571 #define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
572 #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
573 #define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
574 #define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X */
575 #define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
576 #define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
577 #define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
578 #define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
579 #define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
580 #define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
581 #define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
582 #define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
583 #define ONIG_SYN_OP2_QMARK_TILDE_ABSENT (1U<<31) /* (?~...) */
584 /* #define ONIG_SYN_OP2_OPTION_JAVA (1U<<xx) */ /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
585 
586 /* syntax (behavior) */
587 #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
588 #define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
589 #define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
590 #define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
591 #define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
592 #define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
593 #define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
594 #define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
595 #define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
596 #define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
597 #define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
598 #define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
599 #define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP (1U<<11) /* (?<x>)(?<x>)\k<x> */
600 
601 /* syntax (behavior) in char class [...] */
602 #define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
603 #define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
604 #define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
605 #define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
606 /* syntax (behavior) warning */
607 #define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
608 #define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
609 #define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
610 
611 /* meta character specifiers (onig_set_meta_char()) */
612 #define ONIG_META_CHAR_ESCAPE 0
613 #define ONIG_META_CHAR_ANYCHAR 1
614 #define ONIG_META_CHAR_ANYTIME 2
615 #define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
616 #define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
617 #define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
618 
619 #define ONIG_INEFFECTIVE_META_CHAR 0
620 
621 /* error codes */
622 #define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
623 /* normal return */
624 #define ONIG_NORMAL 0
625 #define ONIG_MISMATCH -1
626 #define ONIG_NO_SUPPORT_CONFIG -2
627 
628 /* internal error */
629 #define ONIGERR_MEMORY -5
630 #define ONIGERR_TYPE_BUG -6
631 #define ONIGERR_PARSER_BUG -11
632 #define ONIGERR_STACK_BUG -12
633 #define ONIGERR_UNDEFINED_BYTECODE -13
634 #define ONIGERR_UNEXPECTED_BYTECODE -14
635 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15
636 #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
637 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
638 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
639 #define ONIGERR_TIMEOUT -23
640 /* general error */
641 #define ONIGERR_INVALID_ARGUMENT -30
642 /* syntax error */
643 #define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
644 #define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
645 #define ONIGERR_EMPTY_CHAR_CLASS -102
646 #define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
647 #define ONIGERR_END_PATTERN_AT_ESCAPE -104
648 #define ONIGERR_END_PATTERN_AT_META -105
649 #define ONIGERR_END_PATTERN_AT_CONTROL -106
650 #define ONIGERR_META_CODE_SYNTAX -108
651 #define ONIGERR_CONTROL_CODE_SYNTAX -109
652 #define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
653 #define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
654 #define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
655 #define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
656 #define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
657 #define ONIGERR_NESTED_REPEAT_OPERATOR -115
658 #define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
659 #define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
660 #define ONIGERR_END_PATTERN_IN_GROUP -118
661 #define ONIGERR_UNDEFINED_GROUP_OPTION -119
662 #define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
663 #define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
664 #define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
665 #define ONIGERR_INVALID_CONDITION_PATTERN -124
666 /* values error (syntax error) */
667 #define ONIGERR_TOO_BIG_NUMBER -200
668 #define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
669 #define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
670 #define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
671 #define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
672 #define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
673 #define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
674 #define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
675 #define ONIGERR_INVALID_BACKREF -208
676 #define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
677 #define ONIGERR_TOO_MANY_CAPTURE_GROUPS -210
678 #define ONIGERR_TOO_SHORT_DIGITS -211
679 #define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
680 #define ONIGERR_EMPTY_GROUP_NAME -214
681 #define ONIGERR_INVALID_GROUP_NAME -215
682 #define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
683 #define ONIGERR_UNDEFINED_NAME_REFERENCE -217
684 #define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
685 #define ONIGERR_MULTIPLEX_DEFINED_NAME -219
686 #define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
687 #define ONIGERR_NEVER_ENDING_RECURSION -221
688 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
689 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
690 #define ONIGERR_INVALID_CODE_POINT_VALUE -400
691 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
692 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
693 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
694 #define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
695 
696 /* errors related to thread */
697 /* #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 */
698 
699 
700 /* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
701 #define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
702 #define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
703  ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
704 
705 #ifdef USE_CAPTURE_HISTORY
706 typedef struct OnigCaptureTreeNodeStruct {
707  int group; /* group number */
708  OnigPosition beg;
709  OnigPosition end;
710  int allocated;
711  int num_childs;
712  struct OnigCaptureTreeNodeStruct** childs;
713 } OnigCaptureTreeNode;
714 #endif
715 
716 /* match result region type */
717 struct re_registers {
718  int allocated;
719  int num_regs;
720  OnigPosition* beg;
721  OnigPosition* end;
722 #ifdef USE_CAPTURE_HISTORY
723  /* extended */
724  OnigCaptureTreeNode* history_root; /* capture history tree root */
725 #endif
726 };
727 
728 /* capture tree traverse */
729 #define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
730 #define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
731 #define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
732  ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
733 
734 
735 #define ONIG_REGION_NOTPOS -1
736 
737 typedef struct re_registers OnigRegion;
738 
739 typedef struct {
740  OnigEncoding enc;
741  OnigUChar* par;
742  OnigUChar* par_end;
743 } OnigErrorInfo;
744 
745 typedef struct {
746  int lower;
747  int upper;
749 
750 typedef void (*OnigWarnFunc)(const char* s);
751 extern void onig_null_warn(const char* s);
752 #define ONIG_NULL_WARN onig_null_warn
753 
754 #define ONIG_CHAR_TABLE_SIZE 256
755 
756 typedef struct re_pattern_buffer {
757  /* common members of BBuf(bytes-buffer) */
758  unsigned char* p; /* compiled pattern */
759  unsigned int used; /* used space for p */
760  unsigned int alloc; /* allocated space for p */
761 
762  int num_mem; /* used memory(...) num counted from 1 */
763  int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
764  int num_null_check; /* OP_NULL_CHECK_START/END id counter */
765  int num_comb_exp_check; /* combination explosion check */
766  int num_call; /* number of subexp call */
767  unsigned int capture_history; /* (?@...) flag (1-31) */
768  unsigned int bt_mem_start; /* need backtrack flag */
769  unsigned int bt_mem_end; /* need backtrack flag */
770  int stack_pop_level;
771  int repeat_range_alloc;
772 
773  OnigOptionType options;
774 
775  OnigRepeatRange* repeat_range;
776 
777  OnigEncoding enc;
778  const OnigSyntaxType* syntax;
779  void* name_table;
780  OnigCaseFoldType case_fold_flag;
781 
782  /* optimization info (string search, char-map and anchors) */
783  int optimize; /* optimize flag */
784  int threshold_len; /* search str-length for apply optimize */
785  int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
786  OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
787  OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
788  int sub_anchor; /* start-anchor for exact or map */
789  unsigned char *exact;
790  unsigned char *exact_end;
791  unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
792  int *int_map; /* BM skip for exact_len > 255 */
793  int *int_map_backward; /* BM skip for backward search */
794  OnigDistance dmin; /* min-distance of exact or map */
795  OnigDistance dmax; /* max-distance of exact or map */
796 
797  /* rb_hrtime_t from hrtime.h */
798 #ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
799  int128_t timelimit;
800 #else
801  uint64_t timelimit;
802 #endif
803 
804  /* regex_t link chain */
805  struct re_pattern_buffer* chain; /* escape compile-conflict */
806 } OnigRegexType;
807 
808 typedef OnigRegexType* OnigRegex;
809 
810 #ifndef ONIG_ESCAPE_REGEX_T_COLLISION
811 typedef OnigRegexType regex_t;
812 #endif
813 
814 
815 typedef struct {
816  int num_of_elements;
817  OnigEncoding pattern_enc;
818  OnigEncoding target_enc;
819  const OnigSyntaxType* syntax;
820  OnigOptionType option;
821  OnigCaseFoldType case_fold_flag;
823 
824 /* Oniguruma Native API */
825 ONIG_EXTERN
826 int onig_initialize(OnigEncoding encodings[], int n);
827 ONIG_EXTERN
828 int onig_init(void);
829 ONIG_EXTERN
830 int onig_error_code_to_str(OnigUChar* s, OnigPosition err_code, ...);
831 ONIG_EXTERN
832 void onig_set_warn_func(OnigWarnFunc f);
833 ONIG_EXTERN
834 void onig_set_verb_warn_func(OnigWarnFunc f);
835 ONIG_EXTERN
836 int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
837 ONIG_EXTERN
838 int onig_reg_init(OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax);
839 ONIG_EXTERN
840 int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
841 ONIG_EXTERN
842 int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
843 ONIG_EXTERN
844 void onig_free(OnigRegex);
845 ONIG_EXTERN
846 void onig_free_body(OnigRegex);
847 ONIG_EXTERN
848 int onig_reg_copy(OnigRegex* reg, OnigRegex orig_reg);
849 ONIG_EXTERN
850 OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg);
851 ONIG_EXTERN
852 OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
853 ONIG_EXTERN
854 OnigPosition onig_search_gpos(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
855 ONIG_EXTERN
856 OnigPosition onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option);
857 ONIG_EXTERN
858 int onig_check_linear_time(OnigRegex reg);
859 ONIG_EXTERN
860 OnigRegion* onig_region_new(void);
861 ONIG_EXTERN
862 void onig_region_init(OnigRegion* region);
863 ONIG_EXTERN
864 void onig_region_free(OnigRegion* region, int free_self);
865 ONIG_EXTERN
866 void onig_region_copy(OnigRegion* to, const OnigRegion* from);
867 ONIG_EXTERN
868 void onig_region_clear(OnigRegion* region);
869 ONIG_EXTERN
870 int onig_region_resize(OnigRegion* region, int n);
871 ONIG_EXTERN
872 int onig_region_set(OnigRegion* region, int at, int beg, int end);
873 ONIG_EXTERN
874 int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums);
875 ONIG_EXTERN
876 int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, const OnigRegion *region);
877 ONIG_EXTERN
878 int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg);
879 ONIG_EXTERN
880 int onig_number_of_names(const OnigRegexType *reg);
881 ONIG_EXTERN
882 int onig_number_of_captures(const OnigRegexType *reg);
883 ONIG_EXTERN
884 int onig_number_of_capture_histories(const OnigRegexType *reg);
885 #ifdef USE_CAPTURE_HISTORY
886 ONIG_EXTERN
887 OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region);
888 #endif
889 ONIG_EXTERN
890 int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg);
891 ONIG_EXTERN
892 int onig_noname_group_capture_is_active(const OnigRegexType *reg);
893 ONIG_EXTERN
894 OnigEncoding onig_get_encoding(const OnigRegexType *reg);
895 ONIG_EXTERN
896 OnigOptionType onig_get_options(const OnigRegexType *reg);
897 ONIG_EXTERN
898 OnigCaseFoldType onig_get_case_fold_flag(const OnigRegexType *reg);
899 ONIG_EXTERN
900 const OnigSyntaxType* onig_get_syntax(const OnigRegexType *reg);
901 ONIG_EXTERN
902 int onig_set_default_syntax(const OnigSyntaxType* syntax);
903 ONIG_EXTERN
904 void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from);
905 ONIG_EXTERN
906 unsigned int onig_get_syntax_op(const OnigSyntaxType* syntax);
907 ONIG_EXTERN
908 unsigned int onig_get_syntax_op2(const OnigSyntaxType* syntax);
909 ONIG_EXTERN
910 unsigned int onig_get_syntax_behavior(const OnigSyntaxType* syntax);
911 ONIG_EXTERN
912 OnigOptionType onig_get_syntax_options(const OnigSyntaxType* syntax);
913 ONIG_EXTERN
914 void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op);
915 ONIG_EXTERN
916 void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2);
917 ONIG_EXTERN
918 void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior);
919 ONIG_EXTERN
920 void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options);
921 ONIG_EXTERN
922 int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code);
923 ONIG_EXTERN
924 void onig_copy_encoding(OnigEncodingType *to, OnigEncoding from);
925 ONIG_EXTERN
926 OnigCaseFoldType onig_get_default_case_fold_flag(void);
927 ONIG_EXTERN
928 int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag);
929 ONIG_EXTERN
930 unsigned int onig_get_match_stack_limit_size(void);
931 ONIG_EXTERN
932 int onig_set_match_stack_limit_size(unsigned int size);
933 ONIG_EXTERN
934 unsigned int onig_get_parse_depth_limit(void);
935 ONIG_EXTERN
936 int onig_set_parse_depth_limit(unsigned int depth);
937 ONIG_EXTERN
938 int onig_end(void);
939 ONIG_EXTERN
940 const char* onig_version(void);
941 ONIG_EXTERN
942 const char* onig_copyright(void);
943 
944 RUBY_SYMBOL_EXPORT_END
945 
946 #ifdef __cplusplus
947 # if 0
948 { /* satisfy cc-mode */
949 # endif
950 }
951 #endif
952 
953 #endif /* ONIGMO_H */