12 #include "ruby/internal/config.h"
19 #include "internal/encoding.h"
20 #include "internal/hash.h"
21 #include "internal/imemo.h"
22 #include "internal/re.h"
23 #include "internal/string.h"
24 #include "internal/object.h"
25 #include "internal/ractor.h"
26 #include "internal/variable.h"
34 typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
35 #define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
37 #define BEG(no) (regs->beg[(no)])
38 #define END(no) (regs->end[(no)])
41 static const char casetable[] = {
42 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
43 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
44 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
45 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
47 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
49 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
51 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
53 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
55 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
57 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
59 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
61 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
63 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
65 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
67 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
69 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
70 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
71 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
72 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
73 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
74 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
75 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
76 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
77 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
78 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
79 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
80 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
81 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
82 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
83 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
84 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
85 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
88 # error >>> "You lose. You will need a translation table for your character set." <<<
92 rb_hrtime_t rb_reg_match_time_limit = 0;
97 const unsigned char *p1 = x, *p2 = y;
101 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
109 rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
111 const unsigned char *y;
113 if ((y = memmem(ys, n, xs, m)) != NULL)
120 rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
122 const unsigned char *x = xs, *xe = xs + m;
123 const unsigned char *y = ys, *ye = ys + n;
124 #define VALUE_MAX ((VALUE)~(VALUE)0)
128 rb_bug(
"!!too long pattern string!!");
130 if (!(y = memchr(y, *x, n - m + 1)))
134 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
154 rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
156 const unsigned char *x = xs, *xe = xs + m;
157 const unsigned char *y = ys;
158 VALUE i, qstable[256];
161 for (i = 0; i < 256; ++i)
164 qstable[*x] = xe - x;
166 for (; y + m <= ys + n; y += *(qstable + y[m])) {
167 if (*xs == *y && memcmp(xs, y, m) == 0)
173 static inline unsigned int
174 rb_memsearch_qs_utf8_hash(
const unsigned char *x)
176 register const unsigned int mix = 8353;
177 register unsigned int h = *x;
202 return (
unsigned char)h;
206 rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
208 const unsigned char *x = xs, *xe = xs + m;
209 const unsigned char *y = ys;
210 VALUE i, qstable[512];
213 for (i = 0; i < 512; ++i) {
216 for (; x < xe; ++x) {
217 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
220 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
221 if (*xs == *y && memcmp(xs, y, m) == 0)
228 rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
230 const unsigned char *x = xs, x0 = *xs, *y = ys;
232 for (n -= m; n >= 0; n -= char_size, y += char_size) {
233 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
240 rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
242 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
246 rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
248 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
254 const unsigned char *x = x0, *y = y0;
256 if (m > n)
return -1;
258 return memcmp(x0, y0, m) == 0 ? 0 : -1;
264 const unsigned char *ys = memchr(y, *x, n);
273 return rb_memsearch_ss(x0, m, y0, n);
276 return rb_memsearch_qs_utf8(x0, m, y0, n);
280 return rb_memsearch_wchar(x0, m, y0, n);
283 return rb_memsearch_qchar(x0, m, y0, n);
285 return rb_memsearch_qs(x0, m, y0, n);
288 #define REG_ENCODING_NONE FL_USER6
290 #define KCODE_FIXED FL_USER4
292 #define ARG_REG_OPTION_MASK \
293 (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
294 #define ARG_ENCODING_FIXED 16
295 #define ARG_ENCODING_NONE 32
298 char_to_option(
int c)
304 val = ONIG_OPTION_IGNORECASE;
307 val = ONIG_OPTION_EXTEND;
310 val = ONIG_OPTION_MULTILINE;
319 enum { OPTBUF_SIZE = 4 };
322 option_to_str(
char str[OPTBUF_SIZE],
int options)
325 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
326 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
327 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
340 return (*option = ARG_ENCODING_NONE);
342 *kcode = ENCINDEX_EUC_JP;
345 *kcode = ENCINDEX_Windows_31J;
352 return (*option = char_to_option(c));
354 *option = ARG_ENCODING_FIXED;
359 rb_reg_check(
VALUE re)
367 rb_reg_expr_str(
VALUE str,
const char *s,
long len,
370 const char *p, *pend;
375 p = s; pend = p +
len;
382 p += mbclen(p, pend, enc);
410 if (c ==
'\\' && p+clen < pend) {
411 int n = clen + mbclen(p+clen, pend, enc);
419 c = (
unsigned char)*p;
425 rb_str_buf_cat_escaped_char(str, c, unicode_p);
432 else if (c == term) {
444 snprintf(b,
sizeof(b),
"\\x%02X", c);
456 rb_reg_desc(
VALUE re)
476 char opts[OPTBUF_SIZE];
478 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
480 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
506 rb_reg_source(
VALUE re)
527 rb_reg_inspect(
VALUE re)
532 return rb_reg_desc(re);
535 static VALUE rb_reg_str_with_term(
VALUE re,
int term);
565 rb_reg_to_s(
VALUE re)
567 return rb_reg_str_with_term(re,
'/');
571 rb_reg_str_with_term(
VALUE re,
int term)
574 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
576 char optbuf[OPTBUF_SIZE + 1];
587 if (
len >= 4 &&
ptr[0] ==
'(' &&
ptr[1] ==
'?') {
590 if ((
len -= 2) > 0) {
592 opt = char_to_option((
int )*
ptr);
602 if (
len > 1 && *
ptr ==
'-') {
606 opt = char_to_option((
int )*
ptr);
628 err = onig_new(&rp,
ptr,
ptr +
len, options,
629 enc, OnigDefaultSyntax, NULL);
642 if ((options & embeddable) != embeddable) {
644 option_to_str(optbuf + 1, ~options);
650 rb_reg_expr_str(str, (
char*)
ptr,
len, enc, NULL, term);
670 rb_reg_expr_str(str, (
char*)
ptr,
len, enc, NULL, term);
680 NORETURN(
static void rb_reg_raise(
const char *err,
VALUE re));
683 rb_reg_raise(
const char *err,
VALUE re)
685 VALUE desc = rb_reg_desc(re);
691 rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
693 char opts[OPTBUF_SIZE + 1];
700 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
702 option_to_str(opts + 1, options);
707 NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
710 rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
716 rb_reg_error_desc(
VALUE str,
int options,
const char *err)
722 NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
725 rb_reg_raise_str(
VALUE str,
int options,
const char *err)
745 rb_reg_casefold_p(
VALUE re)
748 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
790 rb_reg_options_m(
VALUE re)
797 reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
798 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
819 rb_reg_names(
VALUE re)
824 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
829 reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
830 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
836 for (i = 0; i < back_num; i++)
863 rb_reg_named_captures(
VALUE re)
866 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
867 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
872 onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
874 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
879 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
881 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
884 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
894 make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
895 const char *sourcefile,
int sourceline)
908 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s +
len), flags,
909 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
911 onig_error_code_to_str((UChar*)err, r, &einfo);
970 match_alloc(
VALUE klass)
974 NEWOBJ_OF(match,
struct RMatch, klass, flags, alloc_size, 0);
987 if (to->allocated)
return 0;
990 if (to->allocated)
return 0;
991 return ONIGERR_MEMORY;
1000 pair_byte_cmp(
const void *pair1,
const void *pair2)
1002 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
1003 #if SIZEOF_LONG > SIZEOF_INT
1004 return diff ? diff > 0 ? 1 : -1 : 0;
1011 update_char_offset(
VALUE match)
1015 int i, num_regs, num_pos;
1025 num_regs = rm->
regs.num_regs;
1034 for (i = 0; i < num_regs; i++) {
1043 for (i = 0; i < num_regs; i++) {
1046 pairs[num_pos++].byte_pos = BEG(i);
1047 pairs[num_pos++].byte_pos = END(i);
1049 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1053 for (i = 0; i < num_pos; i++) {
1054 q = s + pairs[i].byte_pos;
1056 pairs[i].char_pos = c;
1060 for (i = 0; i < num_regs; i++) {
1068 key.byte_pos = BEG(i);
1069 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1072 key.byte_pos = END(i);
1073 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1079 match_check(
VALUE match)
1081 if (!
RMATCH(match)->regexp) {
1098 rm = RMATCH_EXT(obj);
1102 if (RMATCH_EXT(orig)->char_offset_num_allocated) {
1128 match_regexp(
VALUE match)
1132 regexp =
RMATCH(match)->regexp;
1133 if (
NIL_P(regexp)) {
1163 match_names(
VALUE match)
1168 return rb_reg_names(
RMATCH(match)->regexp);
1184 match_size(
VALUE match)
1190 static int name_to_backref_number(
struct re_registers *,
VALUE,
const char*,
const char*);
1191 NORETURN(
static void name_to_backref_error(
VALUE name));
1194 name_to_backref_error(
VALUE name)
1203 if (i < 0 || regs->num_regs <= i)
1208 match_backref_number(
VALUE match,
VALUE backref)
1225 num = name_to_backref_number(regs, regexp, name, name +
RSTRING_LEN(backref));
1228 name_to_backref_error(backref);
1237 return match_backref_number(match, backref);
1252 int i = match_backref_number(match, n);
1256 backref_number_check(regs, i);
1261 update_char_offset(match);
1263 LONG2NUM(RMATCH_EXT(match)->char_offset[i].end));
1287 int i = match_backref_number(match, n);
1291 backref_number_check(regs, i);
1311 int i = match_backref_number(match, n);
1315 backref_number_check(regs, i);
1335 int i = match_backref_number(match, n);
1339 backref_number_check(regs, i);
1359 int i = match_backref_number(match, n);
1363 backref_number_check(regs, i);
1368 update_char_offset(match);
1369 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg);
1385 int i = match_backref_number(match, n);
1389 backref_number_check(regs, i);
1394 update_char_offset(match);
1395 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end);
1427 int i = match_backref_number(match, n);
1430 backref_number_check(regs, i);
1432 long start = BEG(i), end = END(i);
1471 int i = match_backref_number(match, n);
1475 backref_number_check(regs, i);
1480 update_char_offset(match);
1482 &RMATCH_EXT(match)->char_offset[i];
1486 #define MATCH_BUSY FL_USER2
1491 FL_SET(match, MATCH_BUSY);
1495 rb_match_unbusy(
VALUE match)
1501 rb_match_count(
VALUE match)
1504 if (
NIL_P(match))
return -1;
1506 if (!regs)
return -1;
1507 return regs->num_regs;
1518 int err = onig_region_resize(&rmatch->
regs, 1);
1520 rmatch->
regs.beg[0] = pos;
1521 rmatch->
regs.end[0] = pos +
len;
1525 rb_backref_set_string(
VALUE string,
long pos,
long len)
1531 match_set_string(match,
string, pos,
len);
1565 rb_reg_fixed_encoding_p(
VALUE re)
1567 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1571 rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1572 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1580 "incompatible encoding regexp match (%s regexp with %s string)",
1599 int cr = str_coderange(
str);
1603 "invalid byte sequence in %s",
1616 reg_enc_error(re,
str);
1618 else if (rb_reg_fixed_encoding_p(re)) {
1621 reg_enc_error(re,
str);
1625 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1628 rb_warn(
"historical binary regexp match /.../n against %s string",
1644 if (reg->enc == enc)
return reg;
1651 onig_errmsg_buffer err =
"";
1652 unescaped = rb_reg_preprocess(
1654 &fixed_enc, err, 0);
1656 if (
NIL_P(unescaped)) {
1661 rb_hrtime_t timelimit = reg->timelimit;
1668 if (
RREGEXP(re)->usecnt == 0) {
1670 r = onig_new_without_alloc(&tmp_reg, (UChar *)
ptr, (UChar *)(
ptr +
len),
1672 OnigDefaultSyntax, &einfo);
1676 onig_free_body(&tmp_reg);
1679 onig_free_body(reg);
1685 r = onig_new(®, (UChar *)
ptr, (UChar *)(
ptr +
len),
1687 OnigDefaultSyntax, &einfo);
1691 onig_error_code_to_str((UChar*)err, r, &einfo);
1692 rb_reg_raise(err, re);
1695 reg->timelimit = timelimit;
1710 if (!tmpreg)
RREGEXP(re)->usecnt++;
1712 OnigPosition result = match(reg,
str, regs, args);
1714 if (!tmpreg)
RREGEXP(re)->usecnt--;
1720 onig_region_free(regs, 0);
1725 case ONIGERR_TIMEOUT:
1726 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
1728 onig_errmsg_buffer err =
"";
1729 onig_error_code_to_str((UChar*)err, (
int)result);
1730 rb_reg_raise(err, re);
1745 enc = rb_reg_prepare_enc(re,
str, 0);
1754 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos <
RSTRING_LEN(
str)) {
1758 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string +
RSTRING_LEN(
str));
1761 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string +
RSTRING_LEN(
str));
1786 (UChar *)(
ptr + args->pos),
1787 (UChar *)(
ptr + args->range),
1794 rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1797 if (pos >
len || pos < 0) {
1804 .range = reverse ? 0 :
len,
1808 OnigPosition result =
rb_reg_onig_match(re, str, reg_onig_search, &args, ®s);
1810 if (result == ONIG_MISMATCH) {
1812 return ONIG_MISMATCH;
1819 if (set_backref_str) {
1832 if (set_match) *set_match = match;
1838 rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str)
1840 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
1846 return rb_reg_search0(re, str, pos, reverse, 1);
1894 if (nth >= regs->num_regs) {
1898 nth += regs->num_regs;
1899 if (nth <= 0)
return Qnil;
1901 return RBOOL(BEG(nth) != -1);
1908 long start, end,
len;
1914 if (nth >= regs->num_regs) {
1918 nth += regs->num_regs;
1919 if (nth <= 0)
return Qnil;
1922 if (start == -1)
return Qnil;
1962 if (BEG(0) == -1)
return Qnil;
1996 if (BEG(0) == -1)
return Qnil;
1997 str =
RMATCH(match)->str;
2004 match_last_index(
VALUE match)
2009 if (
NIL_P(match))
return -1;
2012 if (BEG(0) == -1)
return -1;
2014 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
2022 int i = match_last_index(match);
2023 if (i <= 0)
return Qnil;
2029 rb_reg_last_defined(
VALUE match)
2031 int i = match_last_index(match);
2032 if (i < 0)
return Qnil;
2037 last_match_getter(
ID _x,
VALUE *_y)
2043 prematch_getter(
ID _x,
VALUE *_y)
2049 postmatch_getter(
ID _x,
VALUE *_y)
2055 last_paren_match_getter(
ID _x,
VALUE *_y)
2061 match_array(
VALUE match,
int start)
2071 target =
RMATCH(match)->str;
2073 for (i=start; i<regs->num_regs; i++) {
2074 if (regs->beg[i] == -1) {
2101 match_to_a(
VALUE match)
2103 return match_array(match, 0);
2123 match_captures(
VALUE match)
2125 return match_array(match, 1);
2129 name_to_backref_number(
struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2131 if (
NIL_P(regexp))
return -1;
2132 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2133 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2136 #define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2138 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2139 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2152 num = NAME_TO_NUMBER(regs, re, name,
2155 name_to_backref_error(name);
2161 match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2164 long j, end = olen < beg+
len ? olen : beg+
len;
2166 if (
len == 0)
return result;
2168 for (j = beg; j < end; j++) {
2171 if (beg +
len > j) {
2192 return match_ary_subseq(match, beg,
len, result);
2235 match_aref(
int argc,
VALUE *argv,
VALUE match)
2242 if (
NIL_P(length)) {
2247 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2252 return match_ary_aref(match, idx,
Qnil);
2265 if (beg < 0)
return Qnil;
2267 else if (beg > num_regs) {
2270 if (beg+
len > num_regs) {
2271 len = num_regs - beg;
2273 return match_ary_subseq(match, beg,
len,
Qnil);
2304 match_values_at(
int argc,
VALUE *argv,
VALUE match)
2312 for (i=0; i<argc; i++) {
2317 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2322 match_ary_aref(match, argv[i], result);
2349 match_to_s(
VALUE match)
2358 match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2359 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
2361 struct MEMO *memo = MEMO_CAST(arg);
2362 VALUE hash = memo->v1;
2363 VALUE match = memo->v2;
2364 long symbolize = memo->u3.state;
2368 if (symbolize > 0) {
2377 for (i = 0; i < back_num; i++) {
2425 match_named_captures(
int argc,
VALUE *argv,
VALUE match)
2435 VALUE symbolize_names = 0;
2440 static ID keyword_ids[1];
2442 VALUE symbolize_names_val;
2444 if (!keyword_ids[0]) {
2447 rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
2448 if (!UNDEF_P(symbolize_names_val) &&
RTEST(symbolize_names_val)) {
2449 symbolize_names = 1;
2454 memo = MEMO_NEW(hash, match, symbolize_names);
2456 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->
ptr, match_named_captures_iter, (
void*)memo);
2478 match_deconstruct_keys(
VALUE match,
VALUE keys)
2486 return rb_hash_new_with_size(0);
2490 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2493 memo = MEMO_NEW(h, match, 1);
2495 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, (
void*)memo);
2503 return rb_hash_new_with_size(0);
2544 match_string(
VALUE match)
2547 return RMATCH(match)->str;
2556 match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2557 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2562 for (i = 0; i < back_num; i++) {
2563 arg[back_refs[i]].name = name;
2564 arg[back_refs[i]].len = name_end - name;
2591 match_inspect(
VALUE match)
2597 int num_regs = regs->num_regs;
2602 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2604 else if (
NIL_P(regexp)) {
2605 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2613 match_inspect_name_iter, names);
2618 for (i = 0; i < num_regs; i++) {
2643 read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2645 const char *p = *pp;
2647 int meta_prefix = 0, ctrl_prefix = 0;
2650 if (p == end || *p++ !=
'\\') {
2651 errcpy(err,
"too short escaped multibyte character");
2657 errcpy(err,
"too short escape sequence");
2661 case '\\': code =
'\\';
break;
2662 case 'n': code =
'\n';
break;
2663 case 't': code =
'\t';
break;
2664 case 'r': code =
'\r';
break;
2665 case 'f': code =
'\f';
break;
2666 case 'v': code =
'\013';
break;
2667 case 'a': code =
'\007';
break;
2668 case 'e': code =
'\033';
break;
2671 case '0':
case '1':
case '2':
case '3':
2672 case '4':
case '5':
case '6':
case '7':
2681 errcpy(err,
"invalid hex escape");
2689 errcpy(err,
"duplicate meta escape");
2693 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2703 errcpy(err,
"too short meta escape");
2707 if (p == end || *p++ !=
'-') {
2708 errcpy(err,
"too short control escape");
2713 errcpy(err,
"duplicate control escape");
2717 if (p < end && (*p & 0x80) == 0) {
2727 errcpy(err,
"too short control escape");
2731 errcpy(err,
"unexpected escape sequence");
2734 if (code < 0 || 0xff < code) {
2735 errcpy(err,
"invalid escape code");
2749 unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2752 const char *p = *pp;
2754 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2755 char *chbuf = (
char *)area;
2760 memset(chbuf, 0, chmaxlen);
2762 byte = read_escaped_byte(&p, end, err);
2767 area[chlen++] = byte;
2768 while (chlen < chmaxlen &&
2770 byte = read_escaped_byte(&p, end, err);
2774 area[chlen++] = byte;
2779 errcpy(err,
"invalid multibyte escape");
2782 if (1 < chlen || (area[0] & 0x80)) {
2787 else if (*encp != enc) {
2788 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2794 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2802 check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2804 if ((0xd800 <= code && code <= 0xdfff) ||
2806 errcpy(err,
"invalid Unicode range");
2813 append_utf8(
unsigned long uv,
2816 if (check_unicode_range(uv, err) != 0)
2820 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2832 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2840 unescape_unicode_list(
const char **pp,
const char *end,
2843 const char *p = *pp;
2844 int has_unicode = 0;
2848 while (p < end &&
ISSPACE(*p)) p++;
2855 errcpy(err,
"invalid Unicode range");
2859 if (append_utf8(code, buf, encp, err) != 0)
2863 while (p < end &&
ISSPACE(*p)) p++;
2866 if (has_unicode == 0) {
2867 errcpy(err,
"invalid Unicode list");
2877 unescape_unicode_bmp(
const char **pp,
const char *end,
2880 const char *p = *pp;
2885 errcpy(err,
"invalid Unicode escape");
2890 errcpy(err,
"invalid Unicode escape");
2893 if (append_utf8(code, buf, encp, err) != 0)
2900 unescape_nonascii0(
const char **pp,
const char *end,
rb_encoding *enc,
2902 onig_errmsg_buffer err,
int options,
int recurse)
2904 const char *p = *pp;
2907 int in_char_class = 0;
2909 int extended_mode = options & ONIG_OPTION_EXTEND;
2916 errcpy(err,
"invalid multibyte character");
2920 if (1 < chlen || (*p & 0x80)) {
2926 else if (*encp != enc) {
2927 errcpy(err,
"non ASCII character in UTF-8 regexp");
2936 errcpy(err,
"too short escape sequence");
2941 goto invalid_multibyte;
2950 case '1':
case '2':
case '3':
2951 case '4':
case '5':
case '6':
case '7':
2953 size_t len = end-(p-1), octlen;
2970 if (rb_is_usascii_enc(enc)) {
2971 const char *pbeg = p;
2972 int byte = read_escaped_byte(&p, end, err);
2973 if (
byte == -1)
return -1;
2978 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2985 errcpy(err,
"too short escape sequence");
2991 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2993 if (p == end || *p++ !=
'}') {
2994 errcpy(err,
"invalid Unicode list");
3001 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
3023 if (extended_mode && !in_char_class) {
3025 while ((p < end) && ((c = *p++) !=
'\n')) {
3039 if (in_char_class) {
3046 if (!in_char_class && recurse) {
3047 if (--parens == 0) {
3054 if (!in_char_class && p + 1 < end && *p ==
'?') {
3055 if (*(p+1) ==
'#') {
3057 const char *orig_p = p;
3060 while (cont && (p < end)) {
3063 if (!(c & 0x80))
break;
3072 goto invalid_multibyte;
3093 int local_extend = 0;
3100 for (s = p+1; s < end; s++) {
3103 local_extend = invert ? -1 : 1;
3110 if (local_extend == 0 ||
3111 (local_extend == -1 && !extended_mode) ||
3112 (local_extend == 1 && extended_mode)) {
3119 int local_options = options;
3120 if (local_extend == 1) {
3121 local_options |= ONIG_OPTION_EXTEND;
3124 local_options &= ~ONIG_OPTION_EXTEND;
3128 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3131 if (ret < 0)
return ret;
3136 extended_mode = local_extend == 1;
3153 else if (!in_char_class && recurse) {
3171 unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
3173 onig_errmsg_buffer err,
int options)
3175 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3180 rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
3181 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3184 int has_property = 0;
3195 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3198 if (has_property && !*fixed_enc) {
3210 rb_reg_check_preprocess(
VALUE str)
3213 onig_errmsg_buffer err =
"";
3223 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3227 return rb_reg_error_desc(str, 0, err);
3233 rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3237 onig_errmsg_buffer err =
"";
3253 if (options & ARG_ENCODING_NONE &&
3254 src_enc != ascii8bit) {
3258 src_enc = ascii8bit;
3265 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3270 if (fixed_enc != 0) {
3271 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3275 regexp_enc = fixed_enc;
3291 rb_reg_initialize_check(
VALUE obj)
3293 rb_check_frozen(obj);
3301 int options, onig_errmsg_buffer err,
3302 const char *sourcefile,
int sourceline)
3309 rb_reg_initialize_check(obj);
3312 errcpy(err,
"can't make regexp with dummy encoding");
3316 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc, err, options);
3317 if (
NIL_P(unescaped))
3321 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3322 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3323 errcpy(err,
"incompatible character encoding");
3326 if (fixed_enc != a_enc) {
3327 options |= ARG_ENCODING_FIXED;
3331 else if (!(options & ARG_ENCODING_FIXED)) {
3336 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3339 if (options & ARG_ENCODING_NONE) {
3344 options & ARG_REG_OPTION_MASK, err,
3345 sourcefile, sourceline);
3346 if (!re->
ptr)
return -1;
3355 if (regenc != enc) {
3362 rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3363 const char *sourcefile,
int sourceline)
3367 if (options & ARG_ENCODING_NONE) {
3369 if (enc != ascii8bit) {
3371 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3378 options, err, sourcefile, sourceline);
3379 if (ret == 0) reg_set_source(obj, str, str_enc);
3384 rb_reg_s_alloc(
VALUE klass)
3404 return rb_reg_init_str(rb_reg_alloc(), s, options);
3408 rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3410 onig_errmsg_buffer err =
"";
3412 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3413 rb_reg_raise_str(s, options, err);
3422 onig_errmsg_buffer err =
"";
3425 enc, options, err, NULL, 0) != 0) {
3426 rb_reg_raise_str(s, options, err);
3428 reg_set_source(re, s, enc);
3434 rb_reg_new_ary(
VALUE ary,
int opt)
3444 VALUE re = rb_reg_alloc();
3445 onig_errmsg_buffer err =
"";
3447 if (rb_reg_initialize(re, s,
len, enc, options, err, NULL, 0) != 0) {
3448 rb_enc_reg_raise(s,
len, enc, options, err);
3462 rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3464 VALUE re = rb_reg_alloc();
3465 onig_errmsg_buffer err =
"";
3468 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3476 static VALUE reg_cache;
3489 static st_index_t reg_hash(
VALUE re);
3501 rb_reg_hash(
VALUE re)
3503 st_index_t hashval = reg_hash(re);
3536 if (re1 == re2)
return Qtrue;
3538 rb_reg_check(re1); rb_reg_check(re2);
3558 match_hash(
VALUE match)
3565 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3588 if (match1 == match2)
return Qtrue;
3592 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3595 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3596 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3597 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3602 reg_operand(
VALUE s,
int check)
3624 *strp = str = reg_operand(str, TRUE);
3635 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3697 long pos = reg_match_pos(re, &str, 0, NULL);
3698 if (pos < 0)
return Qnil;
3728 str = reg_operand(str, FALSE);
3734 return RBOOL(start >= 0);
3811 rb_reg_match_m(
int argc,
VALUE *argv,
VALUE re)
3816 if (
rb_scan_args(argc, argv,
"11", &str, &initpos) == 2) {
3823 pos = reg_match_pos(re, &str, pos, &result);
3852 rb_reg_match_m_p(
int argc,
VALUE *argv,
VALUE re)
3855 return rb_reg_match_p(re, argv[0], pos);
3859 rb_reg_match_p(
VALUE re,
VALUE str,
long pos)
3866 if (pos < 0)
return Qfalse;
3891 str_to_option(
VALUE str)
3897 if (
NIL_P(str))
return -1;
3899 for (
long i = 0; i <
len; ++i) {
3900 int f = char_to_option(
ptr[i]);
3910 set_timeout(rb_hrtime_t *hrt,
VALUE timeout)
3912 double timeout_d =
NIL_P(timeout) ? 0.0 :
NUM2DBL(timeout);
3913 if (!
NIL_P(timeout) && timeout_d <= 0) {
3916 double2hrtime(hrt, timeout_d);
3925 rb_reg_initialize_check(copy);
3926 if ((r = onig_reg_copy(&re,
RREGEXP_PTR(orig))) != 0) {
3948 void rb_warn_deprecated_to_remove(
const char *removal,
const char *fmt,
const char *suggest, ...);
4005 rb_reg_initialize_m(
int argc,
VALUE *argv,
VALUE self)
4008 VALUE re = reg_extract_args(argc, argv, &args);
4017 set_timeout(&
RREGEXP_PTR(
self)->timelimit, args.timeout);
4032 args->timeout =
Qnil;
4033 if (!
NIL_P(kwargs)) {
4034 static ID keywords[1];
4055 else if ((f = str_to_option(opts)) >= 0) flags = f;
4056 else if (rb_bool_expected(opts,
"ignorecase", FALSE))
4057 flags = ONIG_OPTION_IGNORECASE;
4063 args->flags = flags;
4071 rb_reg_init_str_enc(
self, str, enc, flags);
4073 rb_reg_init_str(
self, str, flags);
4091 s += mbclen(s, send, enc);
4095 case '[':
case ']':
case '{':
case '}':
4096 case '(':
case ')':
case '|':
case '-':
4097 case '*':
case '.':
case '\\':
4098 case '?':
case '+':
case '^':
case '$':
4100 case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
4122 memcpy(t, p, s - p);
4128 int n = mbclen(s, send, enc);
4136 case '[':
case ']':
case '{':
case '}':
4137 case '(':
case ')':
case '|':
case '-':
4138 case '*':
case '.':
case '\\':
4139 case '?':
case '+':
case '^':
case '$':
4203 options =
RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4204 if (
RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4205 if (
RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4210 rb_check_regexp_type(
VALUE re)
4236 return rb_check_regexp_type(re);
4249 else if (argc == 1) {
4251 VALUE re = rb_check_regexp_type(arg);
4256 quoted = rb_reg_s_quote(
Qnil, arg);
4265 int has_asciionly = 0;
4269 for (i = 0; i < argc; i++) {
4276 v = rb_check_regexp_type(e);
4280 if (!has_ascii_incompat)
4281 has_ascii_incompat = enc;
4282 else if (has_ascii_incompat != enc)
4286 else if (rb_reg_fixed_encoding_p(v)) {
4287 if (!has_ascii_compat_fixed)
4288 has_ascii_compat_fixed = enc;
4289 else if (has_ascii_compat_fixed != enc)
4296 v = rb_reg_str_with_term(v, -1);
4303 if (!has_ascii_incompat)
4304 has_ascii_incompat = enc;
4305 else if (has_ascii_incompat != enc)
4313 if (!has_ascii_compat_fixed)
4314 has_ascii_compat_fixed = enc;
4315 else if (has_ascii_compat_fixed != enc)
4319 v = rb_reg_s_quote(
Qnil, e);
4321 if (has_ascii_incompat) {
4322 if (has_asciionly) {
4326 if (has_ascii_compat_fixed) {
4338 if (has_ascii_incompat) {
4339 result_enc = has_ascii_incompat;
4341 else if (has_ascii_compat_fixed) {
4342 result_enc = has_ascii_compat_fixed;
4393 return rb_reg_s_union(
self, v);
4395 return rb_reg_s_union(
self, args);
4420 rb_reg_s_linear_time_p(
int argc,
VALUE *argv,
VALUE self)
4423 VALUE re = reg_extract_args(argc, argv, &args);
4426 re =
reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4429 return RBOOL(onig_check_linear_time(
RREGEXP_PTR(re)));
4438 return reg_copy(copy, re);
4451 #define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4458 int c = ASCGET(s, e, &clen);
4462 s += mbclen(s, e, str_enc);
4468 if (c !=
'\\' || s == e)
continue;
4475 c = ASCGET(s, e, &clen);
4477 s += mbclen(s, e, str_enc);
4486 case '1':
case '2':
case '3':
case '4':
4487 case '5':
case '6':
case '7':
case '8':
case '9':
4488 if (!
NIL_P(regexp) && onig_noname_group_capture_is_active(
RREGEXP_PTR(regexp))) {
4497 if (s < e && ASCGET(s, e, &clen) ==
'<') {
4498 char *name, *name_end;
4500 name_end = name = s + clen;
4501 while (name_end < e) {
4502 c = ASCGET(name_end, e, &clen);
4503 if (c ==
'>')
break;
4504 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4508 (
long)(name_end - name));
4509 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
4510 name_to_backref_error(n);
4512 p = s = name_end + clen;
4537 no = regs->num_regs-1;
4538 while (BEG(no) == -1 && no > 0) no--;
4539 if (no == 0)
continue;
4552 if (no >= regs->num_regs)
continue;
4553 if (BEG(no) == -1)
continue;
4558 if (!val)
return str;
4567 ignorecase_getter(
ID _x,
VALUE *_y)
4590 get_LAST_MATCH_INFO(
ID _x,
VALUE *_y)
4592 return match_getter();
4643 rb_reg_s_last_match(
int argc,
VALUE *argv,
VALUE _)
4649 n = match_backref_number(match, argv[0]);
4652 return match_getter();
4656 re_warn(
const char *s)
4663 rb_reg_timeout_p(
regex_t *reg,
void *end_time_)
4665 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4667 if (*end_time == 0) {
4671 rb_hrtime_t timelimit = reg->timelimit;
4675 timelimit = rb_reg_match_time_limit;
4679 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4683 *end_time = RB_HRTIME_MAX;
4687 if (*end_time < rb_hrtime_now()) {
4705 rb_reg_s_timeout_get(
VALUE dummy)
4707 double d = hrtime2double(rb_reg_match_time_limit);
4708 if (d == 0.0)
return Qnil;
4726 rb_reg_s_timeout_set(
VALUE dummy,
VALUE timeout)
4728 rb_ractor_ensure_main_ractor(
"can not access Regexp.timeout from non-main Ractors");
4730 set_timeout(&rb_reg_match_time_limit, timeout);
4751 rb_reg_timeout_get(
VALUE re)
4754 double d = hrtime2double(
RREGEXP_PTR(re)->timelimit);
4755 if (d == 0.0)
return Qnil;
4782 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4783 onig_set_warn_func(re_warn);
4784 onig_set_verb_warn_func(re_warn);
4792 rb_gvar_ractor_local(
"$~");
4793 rb_gvar_ractor_local(
"$&");
4794 rb_gvar_ractor_local(
"$`");
4795 rb_gvar_ractor_local(
"$'");
4796 rb_gvar_ractor_local(
"$+");
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isspace(), except it additionally takes an encoding.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define REALLOC_N
Old name of RB_REALLOC_N.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define FIX2INT
Old name of RB_FIX2INT.
#define NUM2DBL
Old name of rb_num2dbl.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define DBL2NUM
Old name of rb_float_new.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eStandardError
StandardError exception.
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid)
Identical to rb_convert_type(), except it returns RUBY_Qnil instead of raising exceptions,...
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_cMatch
MatchData class.
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_memerror(void)
Triggers out-of-memory error.
#define RGENGC_WB_PROTECTED_MATCH
This is a compile-time flag to enable/disable write barrier for struct RMatch.
void rb_gc(void)
Triggers a GC process.
void rb_global_variable(VALUE *)
An alias for rb_gc_register_address().
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_resize(VALUE ary, long len)
Expands or shrinks the passed array to the passed length.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Identical to rb_ary_new_from_values(), except it expects exactly two parameters.
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_lastline_get(void)
Queries the last line, or the $_.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_match2(VALUE re)
Identical to rb_reg_match(), except it matches against rb_lastline_get() (or, the $_).
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
int rb_memcicmp(const void *s1, const void *s2, long n)
Identical to st_locale_insensitive_strcasecmp(), except it is timing safe and returns something diffe...
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
void rb_define_virtual_variable(const char *name, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Defines a global variable that is purely function-backended.
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
int len
Length of the buffer.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
OnigPosition rb_reg_onig_match(VALUE re, VALUE str, OnigPosition(*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), void *args, struct re_registers *regs)
Runs a regular expression match using function match.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
unsigned long ruby_scan_hex(const char *str, size_t len, size_t *ret)
Interprets the passed string a hexadecimal unsigned integer.
unsigned long ruby_scan_oct(const char *str, size_t len, size_t *consumed)
Interprets the passed string as an octal unsigned integer.
VALUE rb_sprintf(const char *fmt,...)
Ruby's extended sprintf(3).
VALUE rb_str_catf(VALUE dst, const char *fmt,...)
Identical to rb_sprintf(), except it renders the output to the specified object rather than creating ...
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
#define RBASIC(obj)
Convenient casting macro.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
int char_offset_num_allocated
Number of rmatch_offset that ::rmatch::char_offset holds.
struct re_registers regs
"Registers" of a match.
Represents the region of a capture group.
long beg
Beginning of a group.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.