33OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
42onigenc_get_default_encoding(
void)
44 return OnigEncDefaultCharEncoding;
50 OnigEncDefaultCharEncoding = enc;
55onigenc_mbclen(
const OnigUChar* p,
const OnigUChar* e,
OnigEncoding enc)
57 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
58 if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) {
59 ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
60 if (p + ret > e) ret = (int)(e - p);
63 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) {
70onigenc_mbclen_approximate(
const OnigUChar* p,
const OnigUChar* e,
OnigEncoding enc)
72 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
73 if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
74 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
75 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
76 return (
int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
81onigenc_get_right_adjust_char_head(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end)
83 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
85 p += enclen(enc, p, end);
91onigenc_get_right_adjust_char_head_with_prev(
OnigEncoding enc,
92 const UChar* start,
const UChar* s,
const UChar* end,
const UChar** prev)
94 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
97 if (prev) *prev = (
const UChar* )p;
98 p += enclen(enc, p, end);
101 if (prev) *prev = (
const UChar* )NULL;
107onigenc_get_prev_char_head(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end)
110 return (UChar* )NULL;
112 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
116onigenc_step_back(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end,
int n)
118 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
120 return (UChar* )NULL;
122 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
128onigenc_step(
OnigEncoding enc,
const UChar* p,
const UChar* end,
int n)
130 UChar* q = (UChar* )p;
132 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
134 return (q <= end ? q : NULL);
138onigenc_strlen(
OnigEncoding enc,
const UChar* p,
const UChar* end)
141 UChar* q = (UChar* )p;
144 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
154 UChar* p = (UChar* )s;
160 int len = ONIGENC_MBC_MINLEN(enc);
162 if (
len == 1)
return n;
165 if (*q !=
'\0')
break;
169 if (
len == 1)
return n;
171 e = p + ONIGENC_MBC_MAXLEN(enc);
172 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
178onigenc_str_bytelen_null(
OnigEncoding enc,
const UChar* s)
180 UChar* start = (UChar* )s;
181 UChar* p = (UChar* )s;
187 int len = ONIGENC_MBC_MINLEN(enc);
189 if (
len == 1)
return (
int )(p - start);
192 if (*q !=
'\0')
break;
196 if (
len == 1)
return (
int )(p - start);
198 e = p + ONIGENC_MBC_MAXLEN(enc);
199 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
203const UChar OnigEncAsciiToLowerCaseTable[] = {
204 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
205 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
206 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
207 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
208 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
209 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
210 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
211 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
212 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
213 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
214 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
215 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
216 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
217 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
218 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
219 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
220 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
221 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
222 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
223 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
224 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
225 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
226 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
227 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
228 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
229 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
230 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
231 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
232 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
233 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
234 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
235 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
238#ifdef USE_UPPER_CASE_TABLE
239const UChar OnigEncAsciiToUpperCaseTable[256] = {
240 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
241 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
242 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
243 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
244 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
245 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
246 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
247 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
248 '\100',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
249 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
250 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
251 '\130',
'\131',
'\132',
'\133',
'\134',
'\135',
'\136',
'\137',
252 '\140',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
253 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
254 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
255 '\130',
'\131',
'\132',
'\173',
'\174',
'\175',
'\176',
'\177',
256 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
257 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
258 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
259 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
260 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
261 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
262 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
263 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
264 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
265 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
266 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
267 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
268 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
269 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
270 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
271 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
275const unsigned short OnigEncAsciiCtypeTable[256] = {
276 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
277 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
278 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
279 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
280 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
281 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
282 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
283 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
284 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
285 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
286 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
287 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
288 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
289 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
290 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
291 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
292 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
293 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
294 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
295 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
296 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
297 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
298 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
299 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
300 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
301 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
302 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
303 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
304 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
305 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
306 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
307 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
310const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
311 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
312 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
313 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
314 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
315 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
316 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
317 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
318 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
319 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
320 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
321 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
322 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
323 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
324 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
325 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
326 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
327 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
328 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
329 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
330 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
331 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
332 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
333 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
334 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
335 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
336 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
337 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\327',
338 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\337',
339 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
340 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
341 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
342 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377'
345#ifdef USE_UPPER_CASE_TABLE
346const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
347 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
348 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
349 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
350 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
351 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
352 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
353 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
354 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
355 '\100',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
356 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
357 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
358 '\130',
'\131',
'\132',
'\133',
'\134',
'\135',
'\136',
'\137',
359 '\140',
'\101',
'\102',
'\103',
'\104',
'\105',
'\106',
'\107',
360 '\110',
'\111',
'\112',
'\113',
'\114',
'\115',
'\116',
'\117',
361 '\120',
'\121',
'\122',
'\123',
'\124',
'\125',
'\126',
'\127',
362 '\130',
'\131',
'\132',
'\173',
'\174',
'\175',
'\176',
'\177',
363 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
364 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
365 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
366 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
367 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
368 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
369 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
370 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
371 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
372 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
373 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
374 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
375 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
376 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
377 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\367',
378 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\377',
384onigenc_set_default_caseconv_table(
const UChar* table ARG_UNUSED)
392onigenc_get_left_adjust_char_head(
OnigEncoding enc,
const UChar* start,
const UChar* s,
const UChar* end)
394 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
427onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
428 OnigApplyAllCaseFoldFunc f,
void* arg,
434 for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
435 code = OnigAsciiLowerMap[i].to;
436 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
437 if (r != 0)
return r;
439 code = OnigAsciiLowerMap[i].from;
440 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
441 if (r != 0)
return r;
448onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
449 const OnigUChar* p,
const OnigUChar* end ARG_UNUSED,
452 if (0x41 <= *p && *p <= 0x5a) {
453 items[0].byte_len = 1;
454 items[0].code_len = 1;
455 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
458 else if (0x61 <= *p && *p <= 0x7a) {
459 items[0].byte_len = 1;
460 items[0].code_len = 1;
461 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
469ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
470 OnigApplyAllCaseFoldFunc f,
void* arg)
472 OnigCodePoint ss[] = { 0x73, 0x73 };
474 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
478onigenc_apply_all_case_fold_with_map(
int map_size,
480 int ess_tsett_flag, OnigCaseFoldType flag,
481 OnigApplyAllCaseFoldFunc f,
void* arg)
486 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
487 if (r != 0)
return r;
489 for (i = 0; i < map_size; i++) {
491 r = (*f)(map[i].from, &code, 1, arg);
492 if (r != 0)
return r;
495 r = (*f)(map[i].to, &code, 1, arg);
496 if (r != 0)
return r;
499 if (ess_tsett_flag != 0)
500 return ss_apply_all_case_fold(flag, f, arg);
506onigenc_get_case_fold_codes_by_str_with_map(
int map_size,
508 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
511 if (0x41 <= *p && *p <= 0x5a) {
512 items[0].byte_len = 1;
513 items[0].code_len = 1;
514 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
515 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
516 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
518 items[1].byte_len = 2;
519 items[1].code_len = 1;
520 items[1].code[0] = (OnigCodePoint )0xdf;
526 else if (0x61 <= *p && *p <= 0x7a) {
527 items[0].byte_len = 1;
528 items[0].code_len = 1;
529 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
530 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
531 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
533 items[1].byte_len = 2;
534 items[1].code_len = 1;
535 items[1].code[0] = (OnigCodePoint )0xdf;
541 else if (*p == 0xdf && ess_tsett_flag != 0) {
542 items[0].byte_len = 1;
543 items[0].code_len = 2;
544 items[0].code[0] = (OnigCodePoint )
's';
545 items[0].code[1] = (OnigCodePoint )
's';
547 items[1].byte_len = 1;
548 items[1].code_len = 2;
549 items[1].code[0] = (OnigCodePoint )
'S';
550 items[1].code[1] = (OnigCodePoint )
'S';
552 items[2].byte_len = 1;
553 items[2].code_len = 2;
554 items[2].code[0] = (OnigCodePoint )
's';
555 items[2].code[1] = (OnigCodePoint )
'S';
557 items[3].byte_len = 1;
558 items[3].code_len = 2;
559 items[3].code[0] = (OnigCodePoint )
'S';
560 items[3].code[1] = (OnigCodePoint )
's';
567 for (i = 0; i < map_size; i++) {
568 if (*p == map[i].from) {
569 items[0].byte_len = 1;
570 items[0].code_len = 1;
571 items[0].code[0] = map[i].to;
574 else if (*p == map[i].to) {
575 items[0].byte_len = 1;
576 items[0].code_len = 1;
577 items[0].code[0] = map[i].from;
588onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
589 OnigCodePoint* sb_out ARG_UNUSED,
590 const OnigCodePoint* ranges[] ARG_UNUSED,
593 return ONIG_NO_SUPPORT_CONFIG;
597onigenc_is_mbc_newline_0x0a(
const UChar* p,
const UChar* end,
OnigEncoding enc ARG_UNUSED)
600 if (*p == 0x0a)
return 1;
607onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** p,
608 const UChar* end, UChar* lower,
OnigEncoding enc ARG_UNUSED)
610 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
618onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
619 const UChar** pp,
const UChar* end ARG_UNUSED)
621 const UChar* p = *pp;
624 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
629onigenc_single_byte_mbc_enc_len(
const UChar* p ARG_UNUSED,
const UChar* e ARG_UNUSED,
636onigenc_single_byte_mbc_to_code(
const UChar* p,
const UChar* end ARG_UNUSED,
639 return (OnigCodePoint )(*p);
643onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
649onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf,
OnigEncoding enc ARG_UNUSED)
655 *buf = (UChar )(code & 0xff);
660onigenc_single_byte_left_adjust_char_head(
const UChar* start ARG_UNUSED,
662 const UChar* end ARG_UNUSED,
669onigenc_always_true_is_allowed_reverse_match(
const UChar* s ARG_UNUSED,
670 const UChar* end ARG_UNUSED,
677onigenc_always_false_is_allowed_reverse_match(
const UChar* s ARG_UNUSED,
678 const UChar* end ARG_UNUSED,
685onigenc_ascii_is_code_ctype(OnigCodePoint code,
unsigned int ctype,
689 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
695onigenc_mbn_mbc_to_code(
OnigEncoding enc,
const UChar* p,
const UChar* end)
700 len = enclen(enc, p, end);
701 n = (OnigCodePoint )(*p++);
702 if (
len == 1)
return n;
704 for (i = 1; i <
len; i++) {
713onigenc_mbn_mbc_case_fold(
OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
714 const UChar** pp,
const UChar* end ARG_UNUSED,
718 const UChar *p = *pp;
720 if (ONIGENC_IS_MBC_ASCII(p)) {
721 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
728 len = enclen(enc, p, end);
729 for (i = 0; i <
len; i++) {
739onigenc_mbn_is_mbc_ambiguous(
OnigEncoding enc, OnigCaseFoldType flag,
740 const UChar** pp,
const UChar* end ARG_UNUSED)
742 const UChar* p = *pp;
744 if (ONIGENC_IS_MBC_ASCII(p)) {
746 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
749 (*pp) += enclen(enc, p);
755onigenc_mb2_code_to_mbclen(OnigCodePoint code,
OnigEncoding enc ARG_UNUSED)
757 if (code <= 0xff)
return 1;
758 if (code <= 0xffff)
return 2;
759 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
763onigenc_mb4_code_to_mbclen(OnigCodePoint code,
OnigEncoding enc ARG_UNUSED)
765 if ((code & 0xff000000) != 0)
return 4;
766 else if ((code & 0xff0000) != 0)
return 3;
767 else if ((code & 0xff00) != 0)
return 2;
772onigenc_mb2_code_to_mbc(
OnigEncoding enc, OnigCodePoint code, UChar *buf)
776 if ((code & 0xff00) != 0) {
777 *p++ = (UChar )((code >> 8) & 0xff);
779 *p++ = (UChar )(code & 0xff);
782 if (enclen(enc, buf, p) != (p - buf))
783 return ONIGERR_INVALID_CODE_POINT_VALUE;
785 return (
int )(p - buf);
789onigenc_mb4_code_to_mbc(
OnigEncoding enc, OnigCodePoint code, UChar *buf)
793 if ((code & 0xff000000) != 0) {
794 *p++ = (UChar )((code >> 24) & 0xff);
796 if ((code & 0xff0000) != 0 || p != buf) {
797 *p++ = (UChar )((code >> 16) & 0xff);
799 if ((code & 0xff00) != 0 || p != buf) {
800 *p++ = (UChar )((code >> 8) & 0xff);
802 *p++ = (UChar )(code & 0xff);
805 if (enclen(enc, buf, p) != (p - buf))
806 return ONIGERR_INVALID_CODE_POINT_VALUE;
808 return (
int )(p - buf);
812onigenc_minimum_property_name_to_ctype(
OnigEncoding enc,
const UChar* p,
const UChar* end)
815 POSIX_BRACKET_ENTRY_INIT(
"Alnum", ONIGENC_CTYPE_ALNUM),
816 POSIX_BRACKET_ENTRY_INIT(
"Alpha", ONIGENC_CTYPE_ALPHA),
817 POSIX_BRACKET_ENTRY_INIT(
"Blank", ONIGENC_CTYPE_BLANK),
818 POSIX_BRACKET_ENTRY_INIT(
"Cntrl", ONIGENC_CTYPE_CNTRL),
819 POSIX_BRACKET_ENTRY_INIT(
"Digit", ONIGENC_CTYPE_DIGIT),
820 POSIX_BRACKET_ENTRY_INIT(
"Graph", ONIGENC_CTYPE_GRAPH),
821 POSIX_BRACKET_ENTRY_INIT(
"Lower", ONIGENC_CTYPE_LOWER),
822 POSIX_BRACKET_ENTRY_INIT(
"Print", ONIGENC_CTYPE_PRINT),
823 POSIX_BRACKET_ENTRY_INIT(
"Punct", ONIGENC_CTYPE_PUNCT),
824 POSIX_BRACKET_ENTRY_INIT(
"Space", ONIGENC_CTYPE_SPACE),
825 POSIX_BRACKET_ENTRY_INIT(
"Upper", ONIGENC_CTYPE_UPPER),
826 POSIX_BRACKET_ENTRY_INIT(
"XDigit", ONIGENC_CTYPE_XDIGIT),
827 POSIX_BRACKET_ENTRY_INIT(
"ASCII", ONIGENC_CTYPE_ASCII),
828 POSIX_BRACKET_ENTRY_INIT(
"Word", ONIGENC_CTYPE_WORD),
834 len = onigenc_strlen(enc, p, end);
835 for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
836 if (
len == pb->len &&
837 onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
841 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
845onigenc_mb2_is_code_ctype(
OnigEncoding enc, OnigCodePoint code,
849 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
851 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
852 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
860onigenc_mb4_is_code_ctype(
OnigEncoding enc, OnigCodePoint code,
864 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
866 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
867 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
875onigenc_with_ascii_strncmp(
OnigEncoding enc,
const UChar* p,
const UChar* end,
876 const UChar* sascii ,
int n)
881 if (p >= end)
return (
int )(*sascii);
883 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
888 p += enclen(enc, p, end);
894onigenc_with_ascii_strnicmp(
OnigEncoding enc,
const UChar* p,
const UChar* end,
895 const UChar* sascii ,
int n)
900 if (p >= end)
return (
int )(*sascii);
902 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
903 if (ONIGENC_IS_ASCII_CODE(c))
904 c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
905 x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
909 p += enclen(enc, p, end);
917resize_property_list(
int new_size,
const OnigCodePoint*** plist,
int* psize)
920 const OnigCodePoint **list = *plist;
922 size =
sizeof(OnigCodePoint*) * new_size;
924 list = (
const OnigCodePoint** )
xmalloc(size);
925 if (IS_NULL(list))
return ONIGERR_MEMORY;
928 const OnigCodePoint **tmp;
929 tmp = (
const OnigCodePoint** )
xrealloc((
void* )list, size);
930 if (IS_NULL(tmp))
return ONIGERR_MEMORY;
941onigenc_property_list_add_property(UChar* name,
const OnigCodePoint* prop,
942 hash_table_type **table,
const OnigCodePoint*** plist,
int *pnum,
945#define PROP_INIT_SIZE 16
949 if (*psize <= *pnum) {
950 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
951 r = resize_property_list(new_size, plist, psize);
952 if (r != 0)
return r;
955 (*plist)[*pnum] = prop;
957 if (ONIG_IS_NULL(*table)) {
958 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
959 if (ONIG_IS_NULL(*table))
return ONIGERR_MEMORY;
963 onig_st_insert_strend(*table, name, name + strlen((
char* )name),
964 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
970onigenc_ascii_only_case_map(OnigCaseFoldType* flagP,
const OnigUChar** pp,
const OnigUChar* end,
974 OnigUChar *to_start = to;
975 OnigCaseFoldType flags = *flagP;
976 int codepoint_length;
978 while (*pp < end && to < to_end) {
979 codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
980 if (codepoint_length < 0)
981 return codepoint_length;
982 code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
983 *pp += codepoint_length;
985 if (code >=
'a' && code <=
'z' && (flags & ONIGENC_CASE_UPCASE)) {
986 flags |= ONIGENC_CASE_MODIFIED;
988 }
else if (code >=
'A' && code <=
'Z' &&
989 (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
990 flags |= ONIGENC_CASE_MODIFIED;
993 to += ONIGENC_CODE_TO_MBC(enc, code, to);
994 if (flags & ONIGENC_CASE_TITLECASE)
995 flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
998 return (
int )(to - to_start);
1002onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP,
const OnigUChar** pp,
1003 const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
1007 OnigUChar *to_start = to;
1008 OnigCaseFoldType flags = *flagP;
1010 while (*pp < end && to < to_end) {
1013 if (code >=
'a' && code <=
'z' && (flags & ONIGENC_CASE_UPCASE)) {
1014 flags |= ONIGENC_CASE_MODIFIED;
1016 }
else if (code >=
'A' && code <=
'Z' &&
1017 (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
1018 flags |= ONIGENC_CASE_MODIFIED;
1022 if (flags & ONIGENC_CASE_TITLECASE)
1023 flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
1026 return (
int )(to - to_start);
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
VALUE rb_eRangeError
RangeError exception.
int len
Length of the buffer.