12#include "ruby/internal/config.h"
19#include "internal/bignum.h"
20#include "internal/encoding.h"
21#include "internal/error.h"
22#include "internal/hash.h"
23#include "internal/imemo.h"
24#include "internal/re.h"
25#include "internal/string.h"
26#include "internal/object.h"
27#include "internal/ractor.h"
28#include "internal/variable.h"
33#include "ractor_core.h"
37typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
38#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
40#define BEG(no) (regs->beg[(no)])
41#define END(no) (regs->end[(no)])
44static const char casetable[] = {
45 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
46 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
47 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
48 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
50 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
52 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
54 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
56 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
58 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
60 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
62 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
64 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
66 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
68 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
70 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
72 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
73 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
74 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
75 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
76 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
77 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
78 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
79 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
80 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
81 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
82 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
83 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
84 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
85 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
86 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
87 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
88 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
91# error >>> "You lose. You will need a translation table for your character set." <<<
95rb_hrtime_t rb_reg_match_time_limit = 0;
98rb_memcicmp(
const void *x,
const void *y,
long len)
100 const unsigned char *p1 = x, *p2 = y;
104 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
110#if defined(HAVE_MEMMEM) && !defined(__APPLE__)
112rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
114 const unsigned char *y;
116 if ((y = memmem(ys, n, xs, m)) != NULL)
123rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
125 const unsigned char *x = xs, *xe = xs + m;
126 const unsigned char *y = ys, *ye = ys + n;
127#define VALUE_MAX ((VALUE)~(VALUE)0)
131 rb_bug(
"!!too long pattern string!!");
133 if (!(y = memchr(y, *x, n - m + 1)))
137 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
157rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
159 const unsigned char *x = xs, *xe = xs + m;
160 const unsigned char *y = ys;
161 VALUE i, qstable[256];
164 for (i = 0; i < 256; ++i)
167 qstable[*x] = xe - x;
169 for (; y + m <= ys + n; y += *(qstable + y[m])) {
170 if (*xs == *y && memcmp(xs, y, m) == 0)
176static inline unsigned int
177rb_memsearch_qs_utf8_hash(
const unsigned char *x)
179 register const unsigned int mix = 8353;
180 register unsigned int h = *x;
205 return (
unsigned char)h;
209rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
211 const unsigned char *x = xs, *xe = xs + m;
212 const unsigned char *y = ys;
213 VALUE i, qstable[512];
216 for (i = 0; i < 512; ++i) {
219 for (; x < xe; ++x) {
220 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
223 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
224 if (*xs == *y && memcmp(xs, y, m) == 0)
231rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
233 const unsigned char *x = xs, x0 = *xs, *y = ys;
235 for (n -= m; n >= 0; n -= char_size, y += char_size) {
236 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
243rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
245 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
249rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
251 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
257 const unsigned char *x = x0, *y = y0;
259 if (m > n)
return -1;
261 return memcmp(x0, y0, m) == 0 ? 0 : -1;
267 const unsigned char *ys = memchr(y, *x, n);
274 else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
276 return rb_memsearch_ss(x0, m, y0, n);
278 else if (enc == rb_utf8_encoding()){
279 return rb_memsearch_qs_utf8(x0, m, y0, n);
282 else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
283 return rb_memsearch_wchar(x0, m, y0, n);
285 else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
286 return rb_memsearch_qchar(x0, m, y0, n);
288 return rb_memsearch_qs(x0, m, y0, n);
291#define REG_ENCODING_NONE FL_USER6
293#define KCODE_FIXED FL_USER4
302 val = ONIG_OPTION_IGNORECASE;
305 val = ONIG_OPTION_EXTEND;
308 val = ONIG_OPTION_MULTILINE;
317enum { OPTBUF_SIZE = 4 };
320option_to_str(
char str[OPTBUF_SIZE],
int options)
323 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
324 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
325 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
337 *kcode = rb_ascii8bit_encindex();
338 return (*option = ARG_ENCODING_NONE);
340 *kcode = ENCINDEX_EUC_JP;
343 *kcode = ENCINDEX_Windows_31J;
346 *kcode = rb_utf8_encindex();
350 return (*option = char_to_option(c));
352 *option = ARG_ENCODING_FIXED;
357rb_reg_check(
VALUE re)
365rb_reg_expr_str(
VALUE str,
const char *s,
long len,
368 const char *p, *pend;
373 p = s; pend = p +
len;
377 c = rb_enc_ascget(p, pend, &clen, enc);
380 p += mbclen(p, pend, enc);
404 int unicode_p = rb_enc_unicode_p(enc);
407 c = rb_enc_ascget(p, pend, &clen, enc);
408 if (c ==
'\\' && p+clen < pend) {
409 int n = clen + mbclen(p+clen, pend, enc);
415 clen = rb_enc_precise_mbclen(p, pend, enc);
417 c = (
unsigned char)*p;
422 unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
423 rb_str_buf_cat_escaped_char(str, c, unicode_p);
430 else if (c == term) {
438 else if (!rb_enc_isspace(c, enc)) {
442 snprintf(b,
sizeof(b),
"\\x%02X", c);
458 rb_encoding *resenc = rb_default_internal_encoding();
459 if (resenc == NULL) resenc = rb_default_external_encoding();
461 if (re && rb_enc_asciicompat(enc)) {
462 rb_enc_copy(str, re);
465 rb_enc_associate(str, rb_usascii_encoding());
469 rb_reg_expr_str(str, RSTRING_PTR(src_str), RSTRING_LEN(src_str), enc, resenc,
'/');
474 char opts[OPTBUF_SIZE];
476 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
478 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
504rb_reg_source(
VALUE re)
525rb_reg_inspect(
VALUE re)
530 return rb_reg_desc(re);
533static VALUE rb_reg_str_with_term(
VALUE re,
int term);
565 return rb_reg_str_with_term(re,
'/');
569rb_reg_str_with_term(
VALUE re,
int term)
572 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
574 char optbuf[OPTBUF_SIZE + 1];
579 rb_enc_copy(str, re);
582 const UChar *ptr = (UChar *)RSTRING_PTR(src_str);
583 long len = RSTRING_LEN(src_str);
585 if (
len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
588 if ((
len -= 2) > 0) {
590 opt = char_to_option((
int )*ptr);
600 if (
len > 1 && *ptr ==
'-') {
604 opt = char_to_option((
int )*ptr);
619 if (*ptr ==
':' && ptr[
len-1] ==
')') {
626 err = onig_new(&rp, ptr, ptr +
len, options,
627 enc, OnigDefaultSyntax, NULL);
640 if ((options & embeddable) != embeddable) {
642 option_to_str(optbuf + 1, ~options);
647 if (rb_enc_asciicompat(enc)) {
648 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
656 rb_enc_associate(str, rb_usascii_encoding());
660 s = RSTRING_PTR(str);
666 rb_str_resize(str, RSTRING_LEN(str) - n);
668 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
671 rb_enc_copy(str, re);
678NORETURN(
static void rb_reg_raise(
const char *err,
VALUE re));
681rb_reg_raise(
const char *err,
VALUE re)
683 VALUE desc = rb_reg_desc(re);
689rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
691 char opts[OPTBUF_SIZE + 1];
693 rb_encoding *resenc = rb_default_internal_encoding();
694 if (resenc == NULL) resenc = rb_default_external_encoding();
696 rb_enc_associate(desc, enc);
698 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
700 option_to_str(opts + 1, options);
705NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
708rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
714rb_reg_error_desc(
VALUE str,
int options,
const char *err)
716 return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
717 rb_enc_get(str), options, err);
720NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
723rb_reg_raise_str(
VALUE str,
int options,
const char *err)
743rb_reg_casefold_p(
VALUE re)
746 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
788rb_reg_options_m(
VALUE re)
795reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
796 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
799 rb_ary_push(ary, rb_enc_str_new((
const char *)name, name_end-name, regex->enc));
817rb_reg_names(
VALUE re)
822 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
827reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
828 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
834 for (i = 0; i < back_num; i++)
837 rb_hash_aset(hash,
rb_str_new((
const char*)name, name_end-name),ary);
861rb_reg_named_captures(
VALUE re)
864 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
865 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
870onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
872 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
877 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
879 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
882 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
892make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
893 const char *sourcefile,
int sourceline)
906 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s +
len), flags,
907 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
909 onig_error_code_to_str((UChar*)err, r, &einfo);
968match_alloc(
VALUE klass)
972 NEWOBJ_OF(match,
struct RMatch, klass, flags, alloc_size, 0);
985 if (to->allocated)
return 0;
988 if (to->allocated)
return 0;
989 return ONIGERR_MEMORY;
998pair_byte_cmp(
const void *pair1,
const void *pair2)
1000 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
1001#if SIZEOF_LONG > SIZEOF_INT
1002 return diff ? diff > 0 ? 1 : -1 : 0;
1009update_char_offset(
VALUE match)
1013 int i, num_regs, num_pos;
1024 num_regs = rm->
regs.num_regs;
1031 enc = rb_enc_get(
RMATCH(match)->str);
1033 for (i = 0; i < num_regs; i++) {
1042 for (i = 0; i < num_regs; i++) {
1045 pairs[num_pos++].byte_pos = BEG(i);
1046 pairs[num_pos++].byte_pos = END(i);
1048 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1050 s = p = RSTRING_PTR(
RMATCH(match)->str);
1052 for (i = 0; i < num_pos; i++) {
1053 q = s + pairs[i].byte_pos;
1055 pairs[i].char_pos = c;
1059 for (i = 0; i < num_regs; i++) {
1067 key.byte_pos = BEG(i);
1068 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1071 key.byte_pos = END(i);
1072 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1080match_check(
VALUE match)
1082 if (!
RMATCH(match)->regexp) {
1099 rm = RMATCH_EXT(obj);
1103 if (RMATCH_EXT(orig)->char_offset_num_allocated) {
1129match_regexp(
VALUE match)
1133 regexp =
RMATCH(match)->regexp;
1134 if (
NIL_P(regexp)) {
1164match_names(
VALUE match)
1169 return rb_reg_names(
RMATCH(match)->regexp);
1185match_size(
VALUE match)
1191static int name_to_backref_number(
const struct re_registers *,
VALUE,
const char*,
const char*);
1192NORETURN(
static void name_to_backref_error(
VALUE name));
1195name_to_backref_error(
VALUE name)
1197 rb_raise(
rb_eIndexError,
"undefined group name reference: % "PRIsVALUE,
1204 if (i < 0 || regs->num_regs <= i)
1209match_backref_number(
VALUE match,
VALUE backref)
1226 num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
1229 name_to_backref_error(backref);
1238 return match_backref_number(match, backref);
1253 int i = match_backref_number(match, n);
1257 backref_number_check(regs, i);
1262 update_char_offset(match);
1264 LONG2NUM(RMATCH_EXT(match)->char_offset[i].end));
1288 int i = match_backref_number(match, n);
1292 backref_number_check(regs, i);
1312 int i = match_backref_number(match, n);
1316 backref_number_check(regs, i);
1336 int i = match_backref_number(match, n);
1340 backref_number_check(regs, i);
1360 int i = match_backref_number(match, n);
1364 backref_number_check(regs, i);
1369 update_char_offset(match);
1370 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg);
1386 int i = match_backref_number(match, n);
1390 backref_number_check(regs, i);
1395 update_char_offset(match);
1396 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end);
1428 int i = match_backref_number(match, n);
1431 backref_number_check(regs, i);
1433 long start = BEG(i), end = END(i);
1472 int i = match_backref_number(match, n);
1476 backref_number_check(regs, i);
1481 update_char_offset(match);
1483 &RMATCH_EXT(match)->char_offset[i];
1487#define MATCH_BUSY FL_USER2
1492 FL_SET(match, MATCH_BUSY);
1496rb_match_unbusy(
VALUE match)
1502rb_match_count(
VALUE match)
1505 if (
NIL_P(match))
return -1;
1507 if (!regs)
return -1;
1508 return regs->num_regs;
1519 int err = onig_region_resize(&rmatch->
regs, 1);
1520 if (err) rb_memerror();
1521 rmatch->
regs.beg[0] = pos;
1522 rmatch->
regs.end[0] = pos +
len;
1526rb_backref_set_string(
VALUE string,
long pos,
long len)
1532 match_set_string(match,
string, pos,
len);
1567rb_reg_fixed_encoding_p(
VALUE re)
1569 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1573rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1574 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1582 "incompatible encoding regexp match (%s regexp with %s string)",
1583 rb_enc_inspect_name(rb_enc_get(re)),
1584 rb_enc_inspect_name(rb_enc_get(
str)));
1601 int cr = str_coderange(
str);
1604 rb_raise(rb_eArgError,
1605 "invalid byte sequence in %s",
1606 rb_enc_name(rb_enc_get(
str)));
1610 enc = rb_enc_get(
str);
1617 else if (!rb_enc_asciicompat(enc)) {
1618 reg_enc_error(re,
str);
1620 else if (rb_reg_fixed_encoding_p(re)) {
1623 reg_enc_error(re,
str);
1627 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1628 enc != rb_ascii8bit_encoding() &&
1630 rb_warn(
"historical binary regexp match /.../n against %s string",
1646 if (reg->enc == enc)
return reg;
1651 const char *pattern = RSTRING_PTR(src_str);
1653 onig_errmsg_buffer err =
"";
1654 unescaped = rb_reg_preprocess(
1655 pattern, pattern + RSTRING_LEN(src_str), enc,
1656 &fixed_enc, err, 0);
1658 if (
NIL_P(unescaped)) {
1659 rb_raise(rb_eArgError,
"regexp preprocess failed: %s", err);
1663 rb_hrtime_t timelimit = reg->timelimit;
1670 if (ruby_single_main_ractor &&
RREGEXP(re)->usecnt == 0) {
1672 r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr +
len),
1674 OnigDefaultSyntax, &einfo);
1678 onig_free_body(&tmp_reg);
1681 onig_free_body(reg);
1687 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr +
len),
1689 OnigDefaultSyntax, &einfo);
1693 onig_error_code_to_str((UChar*)err, r, &einfo);
1694 rb_reg_raise(err, re);
1697 reg->timelimit = timelimit;
1712 if (!tmpreg)
RREGEXP(re)->usecnt++;
1714 OnigPosition result = match(reg,
str, regs, args);
1716 if (!tmpreg)
RREGEXP(re)->usecnt--;
1722 onig_region_free(regs, 0);
1727 case ONIGERR_TIMEOUT:
1728 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
1730 onig_errmsg_buffer err =
"";
1731 onig_error_code_to_str((UChar*)err, (
int)result);
1732 rb_reg_raise(err, re);
1747 enc = rb_reg_prepare_enc(re,
str, 0);
1753 range = RSTRING_LEN(
str) - pos;
1756 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(
str)) {
1757 string = (UChar*)RSTRING_PTR(
str);
1760 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1763 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1787 (UChar *)(ptr +
len),
1788 (UChar *)(ptr + args->pos),
1789 (UChar *)(ptr + args->range),
1796rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1798 long len = RSTRING_LEN(str);
1799 if (pos >
len || pos < 0) {
1806 .range = reverse ? 0 :
len,
1810 OnigPosition result =
rb_reg_onig_match(re, str, reg_onig_search, &args, ®s);
1812 if (result == ONIG_MISMATCH) {
1814 return ONIG_MISMATCH;
1834 onig_region_free(&RMATCH_EXT(match)->regs,
false);
1840 if (set_backref_str) {
1854 if (set_match) *set_match = match;
1860rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *match)
1862 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, match);
1868 return rb_reg_search_set_match(re, str, pos, reverse, 1, NULL);
1881 (UChar *)(ptr +
len),
1916 if (nth >= regs->num_regs) {
1920 nth += regs->num_regs;
1921 if (nth <= 0)
return Qnil;
1923 return RBOOL(BEG(nth) != -1);
1930 long start, end,
len;
1936 if (nth >= regs->num_regs) {
1940 nth += regs->num_regs;
1941 if (nth <= 0)
return Qnil;
1944 if (start == -1)
return Qnil;
1984 if (BEG(0) == -1)
return Qnil;
2018 if (BEG(0) == -1)
return Qnil;
2019 str =
RMATCH(match)->str;
2026match_last_index(
VALUE match)
2031 if (
NIL_P(match))
return -1;
2034 if (BEG(0) == -1)
return -1;
2036 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
2044 int i = match_last_index(match);
2045 if (i <= 0)
return Qnil;
2051rb_reg_last_defined(
VALUE match)
2053 int i = match_last_index(match);
2054 if (i < 0)
return Qnil;
2059last_match_getter(
ID _x,
VALUE *_y)
2065prematch_getter(
ID _x,
VALUE *_y)
2071postmatch_getter(
ID _x,
VALUE *_y)
2077last_paren_match_getter(
ID _x,
VALUE *_y)
2083match_array(
VALUE match,
int start)
2093 target =
RMATCH(match)->str;
2095 for (i=start; i<regs->num_regs; i++) {
2096 if (regs->beg[i] == -1) {
2123match_to_a(
VALUE match)
2125 return match_array(match, 0);
2145match_captures(
VALUE match)
2147 return match_array(match, 1);
2151name_to_backref_number(
const struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2153 if (
NIL_P(regexp))
return -1;
2154 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2155 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2158#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2160 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2161 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2174 num = NAME_TO_NUMBER(regs, re, name,
2177 name_to_backref_error(name);
2183match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2186 long j, end = olen < beg+
len ? olen : beg+
len;
2188 if (
len == 0)
return result;
2190 for (j = beg; j < end; j++) {
2193 if (beg +
len > j) {
2214 return match_ary_subseq(match, beg,
len, result);
2257match_aref(
int argc,
VALUE *argv,
VALUE match)
2264 if (
NIL_P(length)) {
2269 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2274 return match_ary_aref(match, idx,
Qnil);
2287 if (beg < 0)
return Qnil;
2289 else if (beg > num_regs) {
2292 if (beg+
len > num_regs) {
2293 len = num_regs - beg;
2295 return match_ary_subseq(match, beg,
len,
Qnil);
2326match_values_at(
int argc,
VALUE *argv,
VALUE match)
2334 for (i=0; i<argc; i++) {
2339 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2344 match_ary_aref(match, argv[i], result);
2371match_to_s(
VALUE match)
2380match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2381 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
2383 struct MEMO *memo = MEMO_CAST(arg);
2384 VALUE hash = memo->v1;
2385 VALUE match = memo->v2;
2386 long symbolize = memo->u3.state;
2388 VALUE key = rb_enc_str_new((
const char *)name, name_end-name, regex->enc);
2390 if (symbolize > 0) {
2399 for (i = 0; i < back_num; i++) {
2402 rb_hash_aset(hash, key, value);
2408 rb_hash_aset(hash, key,
Qnil);
2447match_named_captures(
int argc,
VALUE *argv,
VALUE match)
2454 return rb_hash_new();
2457 VALUE symbolize_names = 0;
2462 static ID keyword_ids[1];
2464 VALUE symbolize_names_val;
2466 if (!keyword_ids[0]) {
2469 rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
2470 if (!UNDEF_P(symbolize_names_val) &&
RTEST(symbolize_names_val)) {
2471 symbolize_names = 1;
2475 hash = rb_hash_new();
2476 memo = rb_imemo_memo_new(hash, match, symbolize_names);
2478 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, (
void*)memo);
2500match_deconstruct_keys(
VALUE match,
VALUE keys)
2508 return rb_hash_new_with_size(0);
2512 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2515 memo = rb_imemo_memo_new(h, match, 1);
2517 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, (
void*)memo);
2525 return rb_hash_new_with_size(0);
2566match_string(
VALUE match)
2569 return RMATCH(match)->str;
2578match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2579 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2584 for (i = 0; i < back_num; i++) {
2585 arg[back_refs[i]].name = name;
2586 arg[back_refs[i]].len = name_end - name;
2613match_inspect(
VALUE match)
2619 int num_regs = regs->num_regs;
2625 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2627 else if (
NIL_P(regexp)) {
2628 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2636 match_inspect_name_iter, names);
2641 for (i = 0; i < num_regs; i++) {
2648 rb_str_catf(str,
"%d", i);
2667read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2669 const char *p = *pp;
2671 int meta_prefix = 0, ctrl_prefix = 0;
2674 if (p == end || *p++ !=
'\\') {
2675 errcpy(err,
"too short escaped multibyte character");
2681 errcpy(err,
"too short escape sequence");
2685 case '\\': code =
'\\';
break;
2686 case 'n': code =
'\n';
break;
2687 case 't': code =
'\t';
break;
2688 case 'r': code =
'\r';
break;
2689 case 'f': code =
'\f';
break;
2690 case 'v': code =
'\013';
break;
2691 case 'a': code =
'\007';
break;
2692 case 'e': code =
'\033';
break;
2695 case '0':
case '1':
case '2':
case '3':
2696 case '4':
case '5':
case '6':
case '7':
2705 errcpy(err,
"invalid hex escape");
2713 errcpy(err,
"duplicate meta escape");
2717 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2727 errcpy(err,
"too short meta escape");
2731 if (p == end || *p++ !=
'-') {
2732 errcpy(err,
"too short control escape");
2737 errcpy(err,
"duplicate control escape");
2741 if (p < end && (*p & 0x80) == 0) {
2751 errcpy(err,
"too short control escape");
2755 errcpy(err,
"unexpected escape sequence");
2758 if (code < 0 || 0xff < code) {
2759 errcpy(err,
"invalid escape code");
2773unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2776 const char *p = *pp;
2778 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2779 char *chbuf = (
char *)area;
2784 memset(chbuf, 0, chmaxlen);
2786 byte = read_escaped_byte(&p, end, err);
2791 area[chlen++] = byte;
2792 while (chlen < chmaxlen &&
2794 byte = read_escaped_byte(&p, end, err);
2798 area[chlen++] = byte;
2801 l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2803 errcpy(err,
"invalid multibyte escape");
2806 if (1 < chlen || (area[0] & 0x80)) {
2811 else if (*encp != enc) {
2812 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2818 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2826check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2828 if ((0xd800 <= code && code <= 0xdfff) ||
2830 errcpy(err,
"invalid Unicode range");
2837append_utf8(
unsigned long uv,
2840 if (check_unicode_range(uv, err) != 0)
2844 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2854 *encp = rb_utf8_encoding();
2855 else if (*encp != rb_utf8_encoding()) {
2856 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2864unescape_unicode_list(
const char **pp,
const char *end,
2867 const char *p = *pp;
2868 int has_unicode = 0;
2872 while (p < end &&
ISSPACE(*p)) p++;
2875 code = ruby_scan_hex(p, end-p, &
len);
2879 errcpy(err,
"invalid Unicode range");
2883 if (append_utf8(code, buf, encp, err) != 0)
2887 while (p < end &&
ISSPACE(*p)) p++;
2890 if (has_unicode == 0) {
2891 errcpy(err,
"invalid Unicode list");
2901unescape_unicode_bmp(
const char **pp,
const char *end,
2904 const char *p = *pp;
2909 errcpy(err,
"invalid Unicode escape");
2912 code = ruby_scan_hex(p, 4, &
len);
2914 errcpy(err,
"invalid Unicode escape");
2917 if (append_utf8(code, buf, encp, err) != 0)
2924unescape_nonascii0(
const char **pp,
const char *end,
rb_encoding *enc,
2926 onig_errmsg_buffer err,
int options,
int recurse)
2928 const char *p = *pp;
2931 int in_char_class = 0;
2933 int extended_mode = options & ONIG_OPTION_EXTEND;
2937 int chlen = rb_enc_precise_mbclen(p, end, enc);
2940 errcpy(err,
"invalid multibyte character");
2944 if (1 < chlen || (*p & 0x80)) {
2950 else if (*encp != enc) {
2951 errcpy(err,
"non ASCII character in UTF-8 regexp");
2960 errcpy(err,
"too short escape sequence");
2963 chlen = rb_enc_precise_mbclen(p, end, enc);
2965 goto invalid_multibyte;
2974 case '1':
case '2':
case '3':
2975 case '4':
case '5':
case '6':
case '7':
2977 size_t len = end-(p-1), octlen;
2978 if (ruby_scan_oct(p-1,
len < 3 ?
len : 3, &octlen) <= 0177) {
2994 if (rb_is_usascii_enc(enc)) {
2995 const char *pbeg = p;
2996 int byte = read_escaped_byte(&p, end, err);
2997 if (
byte == -1)
return -1;
3002 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
3009 errcpy(err,
"too short escape sequence");
3015 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
3017 if (p == end || *p++ !=
'}') {
3018 errcpy(err,
"invalid Unicode list");
3025 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
3047 if (extended_mode && !in_char_class) {
3049 while ((p < end) && ((c = *p++) !=
'\n')) {
3050 if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
3063 if (in_char_class) {
3070 if (!in_char_class && recurse) {
3071 if (--parens == 0) {
3078 if (!in_char_class && p + 1 < end && *p ==
'?') {
3079 if (*(p+1) ==
'#') {
3081 const char *orig_p = p;
3084 while (cont && (p < end)) {
3087 if (!(c & 0x80))
break;
3088 if (!*encp && enc == rb_utf8_encoding()) {
3094 chlen = rb_enc_precise_mbclen(p, end, enc);
3096 goto invalid_multibyte;
3117 int local_extend = 0;
3124 for (s = p+1; s < end; s++) {
3127 local_extend = invert ? -1 : 1;
3134 if (local_extend == 0 ||
3135 (local_extend == -1 && !extended_mode) ||
3136 (local_extend == 1 && extended_mode)) {
3143 int local_options = options;
3144 if (local_extend == 1) {
3145 local_options |= ONIG_OPTION_EXTEND;
3148 local_options &= ~ONIG_OPTION_EXTEND;
3152 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3155 if (ret < 0)
return ret;
3160 extended_mode = local_extend == 1;
3177 else if (!in_char_class && recurse) {
3195unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
3197 onig_errmsg_buffer err,
int options)
3199 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3204rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
3205 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3208 int has_property = 0;
3212 if (rb_enc_asciicompat(enc))
3216 rb_enc_associate(buf, enc);
3219 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3222 if (has_property && !*fixed_enc) {
3227 rb_enc_associate(buf, *fixed_enc);
3234rb_reg_check_preprocess(
VALUE str)
3237 onig_errmsg_buffer err =
"";
3243 p = RSTRING_PTR(str);
3244 end = p + RSTRING_LEN(str);
3245 enc = rb_enc_get(str);
3247 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3251 return rb_reg_error_desc(str, 0, err);
3257rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3261 onig_errmsg_buffer err =
"";
3267 rb_raise(rb_eArgError,
"no arguments given");
3276 src_enc = rb_enc_get(str);
3277 if (options & ARG_ENCODING_NONE &&
3278 src_enc != ascii8bit) {
3280 rb_raise(
rb_eRegexpError,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3282 src_enc = ascii8bit;
3286 p = RSTRING_PTR(str);
3287 end = p + RSTRING_LEN(str);
3289 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3292 rb_raise(rb_eArgError,
"%s", err);
3294 if (fixed_enc != 0) {
3295 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3296 rb_raise(
rb_eRegexpError,
"encoding mismatch in dynamic regexp : %s and %s",
3297 rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
3299 regexp_enc = fixed_enc;
3308 rb_enc_associate(result, regexp_enc);
3315rb_reg_initialize_check(
VALUE obj)
3317 rb_check_frozen(obj);
3325 int options, onig_errmsg_buffer err,
3326 const char *sourcefile,
int sourceline)
3333 rb_reg_initialize_check(obj);
3335 if (rb_enc_dummy_p(enc)) {
3336 errcpy(err,
"can't make regexp with dummy encoding");
3340 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc, err, options);
3341 if (
NIL_P(unescaped))
3345 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3346 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3347 errcpy(err,
"incompatible character encoding");
3350 if (fixed_enc != a_enc) {
3351 options |= ARG_ENCODING_FIXED;
3355 else if (!(options & ARG_ENCODING_FIXED)) {
3356 enc = rb_usascii_encoding();
3359 rb_enc_associate((
VALUE)re, enc);
3360 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3363 if (options & ARG_ENCODING_NONE) {
3367 re->
ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
3368 options & ARG_REG_OPTION_MASK, err,
3369 sourcefile, sourceline);
3370 if (!re->
ptr)
return -1;
3383 if (regenc != enc) {
3385 str = rb_enc_associate(dup, enc = regenc);
3387 str = rb_fstring(str);
3392rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3393 const char *sourcefile,
int sourceline)
3396 rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
3397 if (options & ARG_ENCODING_NONE) {
3399 if (enc != ascii8bit) {
3401 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3407 ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
3408 options, err, sourcefile, sourceline);
3409 if (ret == 0) reg_set_source(obj, str, str_enc);
3414rb_reg_s_alloc(
VALUE klass)
3434 return rb_reg_init_str(rb_reg_alloc(), s, options);
3438rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3440 onig_errmsg_buffer err =
"";
3442 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3443 rb_reg_raise_str(s, options, err);
3452 onig_errmsg_buffer err =
"";
3454 if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
3455 enc, options, err, NULL, 0) != 0) {
3456 rb_reg_raise_str(s, options, err);
3458 reg_set_source(re, s, enc);
3464rb_reg_new_ary(
VALUE ary,
int opt)
3472 VALUE re = rb_reg_alloc();
3473 onig_errmsg_buffer err =
"";
3475 if (rb_reg_initialize(re, s,
len, enc, options, err, NULL, 0) != 0) {
3476 rb_enc_reg_raise(s,
len, enc, options, err);
3490rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3492 VALUE re = rb_reg_alloc();
3493 onig_errmsg_buffer err =
"";
3496 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3497 rb_set_errinfo(rb_reg_error_desc(str, options, err));
3503static VALUE reg_cache;
3508 if (rb_ractor_main_p()) {
3511 && memcmp(
RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3521static st_index_t reg_hash(
VALUE re);
3533rb_reg_hash(
VALUE re)
3535 st_index_t hashval = reg_hash(re);
3568 if (re1 == re2)
return Qtrue;
3570 rb_reg_check(re1); rb_reg_check(re2);
3590match_hash(
VALUE match)
3597 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3620 if (match1 == match2)
return Qtrue;
3624 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3627 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3628 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3629 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3668match_integer_at(
int argc,
VALUE *argv,
VALUE match)
3680 else if ((nth = namev_to_backref_number(regs,
RMATCH(match)->regexp, idx)) < 0) {
3681 name_to_backref_error(idx);
3684 if (argc > 1 && (base =
NUM2INT(argv[1])) < 0) {
3685 rb_raise(rb_eArgError,
"invalid radix %d", base);
3688 if (nth >= regs->num_regs)
return Qnil;
3689 if (nth < 0 && (nth += regs->num_regs) <= 0)
return Qnil;
3691 long start = BEG(nth), end = END(nth);
3692 if (start < 0)
return Qnil;
3693 RUBY_ASSERT(start <= end, "%ld > %ld
", start, end);
3695 VALUE str = RMATCH(match)->str;
3696 RUBY_ASSERT(end <= RSTRING_LEN(str), "%ld > %ld
", end, RSTRING_LEN(str));
3699 return rb_int_parse_cstr(RSTRING_PTR(str) + start, end - start, &endp, NULL,
3700 base, RB_INT_PARSE_DEFAULT);
3704reg_operand(VALUE s, int check)
3707 return rb_sym2str(s);
3709 else if (RB_TYPE_P(s, T_STRING)) {
3713 return check ? rb_str_to_str(s) : rb_check_string_type(s);
3718reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
3723 rb_backref_set(Qnil);
3726 *strp = str = reg_operand(str, TRUE);
3729 VALUE l = rb_str_length(str);
3735 pos = rb_str_offset(str, pos);
3737 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3742 * self =~ other -> integer or nil
3744 * Returns the integer index (in characters) of the first match
3745 * for +self+ and +other+, or +nil+ if none;
3746 * updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables].
3748 * /at/ =~ 'input data' # => 7
3749 * $~ # => #<MatchData "at
">
3750 * /ax/ =~ 'input data' # => nil
3753 * Assigns named captures to local variables of the same names
3754 * if and only if +self+:
3756 * - Is a regexp literal;
3757 * see {Regexp Literals}[rdoc-ref:syntax/literals.rdoc@Regexp+Literals].
3758 * - Does not contain interpolations;
3759 * see {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode].
3760 * - Is at the left of the expression.
3764 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = y '
3768 * Assigns +nil+ if not matched:
3770 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = '
3774 * Does not make local variable assignments if +self+ is not a regexp literal:
3776 * r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
3778 * p foo # Undefined local variable
3779 * p bar # Undefined local variable
3781 * The assignment does not occur if the regexp is not at the left:
3783 * ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
3784 * p foo, foo # Undefined local variables
3786 * A regexp interpolation, <tt>#{}</tt>, also disables
3790 * /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
3791 * p foo # Undefined local variable
3796rb_reg_match(VALUE re, VALUE str)
3798 long pos = reg_match_pos(re, &str, 0, NULL);
3799 if (pos < 0) return Qnil;
3800 pos = rb_str_sublen(str, pos);
3801 return LONG2FIX(pos);
3806 * self === other -> true or false
3808 * Returns whether +self+ finds a match in +other+:
3810 * /^[a-z]*$/ === 'HELLO' # => false
3811 * /^[A-Z]*$/ === 'HELLO' # => true
3813 * This method is called in case statements:
3817 * when /\A[a-z]*\z/; print "Lower
case\n
"
3818 * when /\A[A-Z]*\z/; print "Upper
case\n
"
3819 * else print "Mixed
case\n
"
3820 * end # => "Upper
case"
3825rb_reg_eqq(VALUE re, VALUE str)
3829 str = reg_operand(str, FALSE);
3831 rb_backref_set(Qnil);
3834 start = rb_reg_search(re, str, 0, 0);
3835 return RBOOL(start >= 0);
3841 * ~ rxp -> integer or nil
3843 * Equivalent to <tt><i>rxp</i> =~ $_</tt>:
3851rb_reg_match2(VALUE re)
3854 VALUE line = rb_lastline_get();
3856 if (!RB_TYPE_P(line, T_STRING)) {
3857 rb_backref_set(Qnil);
3861 start = rb_reg_search(re, line, 0, 0);
3865 start = rb_str_sublen(line, start);
3866 return LONG2FIX(start);
3872 * match(string, offset = 0) -> matchdata or nil
3873 * match(string, offset = 0) {|matchdata| ... } -> object
3875 * With no block given, returns the MatchData object
3876 * that describes the match, if any, or +nil+ if none;
3877 * the search begins at the given character +offset+ in +string+:
3879 * /abra/.match('abracadabra') # => #<MatchData "abra
">
3880 * /abra/.match('abracadabra', 4) # => #<MatchData "abra
">
3881 * /abra/.match('abracadabra', 8) # => nil
3882 * /abra/.match('abracadabra', 800) # => nil
3884 * string = "\u{5d0 5d1 5e8 5d0}cadabra
"
3885 * /abra/.match(string, 7) #=> #<MatchData "abra
">
3886 * /abra/.match(string, 8) #=> nil
3887 * /abra/.match(string.b, 8) #=> #<MatchData "abra
">
3889 * With a block given, calls the block if and only if a match is found;
3890 * returns the block's value:
3892 * /abra/.match('abracadabra') {|matchdata| p matchdata }
3893 * # => #<MatchData "abra
">
3894 * /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
3895 * # => #<MatchData "abra
">
3896 * /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
3898 * /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
3901 * Output (from the first two blocks above):
3903 * #<MatchData "abra
">
3904 * #<MatchData "abra
">
3906 * /(.)(.)(.)/.match("abc
")[2] # => "b
"
3907 * /(.)(.)/.match("abc
", 1)[2] # => "c
"
3912rb_reg_match_m(int argc, VALUE *argv, VALUE re)
3914 VALUE result = Qnil, str, initpos;
3917 if (rb_scan_args(argc, argv, "11
", &str, &initpos) == 2) {
3918 pos = NUM2LONG(initpos);
3924 pos = reg_match_pos(re, &str, pos, &result);
3926 rb_backref_set(Qnil);
3929 rb_match_busy(result);
3930 if (!NIL_P(result) && rb_block_given_p()) {
3931 return rb_yield(result);
3938 * match?(string) -> true or false
3939 * match?(string, offset = 0) -> true or false
3941 * Returns <code>true</code> or <code>false</code> to indicate whether the
3942 * regexp is matched or not without updating $~ and other related variables.
3943 * If the second parameter is present, it specifies the position in the string
3944 * to begin the search.
3946 * /R.../.match?("Ruby
") # => true
3947 * /R.../.match?("Ruby
", 1) # => false
3948 * /P.../.match?("Ruby
") # => false
3953rb_reg_match_m_p(int argc, VALUE *argv, VALUE re)
3955 long pos = rb_check_arity(argc, 1, 2) > 1 ? NUM2LONG(argv[1]) : 0;
3956 return rb_reg_match_p(re, argv[0], pos);
3960rb_reg_match_p(VALUE re, VALUE str, long pos)
3962 if (NIL_P(str)) return Qfalse;
3963 str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str);
3966 pos += NUM2LONG(rb_str_length(str));
3967 if (pos < 0) return Qfalse;
3971 const char *beg = rb_str_subpos(str, pos, &len);
3972 if (!beg) return Qfalse;
3973 pos = beg - RSTRING_PTR(str);
3977 struct reg_onig_search_args args = {
3979 .range = RSTRING_LEN(str),
3982 return rb_reg_onig_match(re, str, reg_onig_search, &args, NULL) == ONIG_MISMATCH ? Qfalse : Qtrue;
3986 * Document-method: compile
3988 * Alias for Regexp.new
3992str_to_option(VALUE str)
3997 str = rb_check_string_type(str);
3998 if (NIL_P(str)) return -1;
3999 RSTRING_GETMEM(str, ptr, len);
4000 for (long i = 0; i < len; ++i) {
4001 int f = char_to_option(ptr[i]);
4003 rb_raise(rb_eArgError, "unknown regexp option: %
"PRIsVALUE, str);
4011set_timeout(rb_hrtime_t *hrt, VALUE timeout)
4013 double timeout_d = NIL_P(timeout) ? 0.0 : NUM2DBL(timeout);
4014 if (!NIL_P(timeout) && timeout_d <= 0) {
4015 rb_raise(rb_eArgError, "invalid timeout: %
"PRIsVALUE, timeout);
4017 double2hrtime(hrt, timeout_d);
4021reg_copy(VALUE copy, VALUE orig)
4026 rb_reg_initialize_check(copy);
4027 if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) {
4028 /* ONIGERR_MEMORY only */
4029 rb_raise(rb_eRegexpError, "%s
", onig_error_code_to_format(r));
4031 RREGEXP_PTR(copy) = re;
4032 RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src);
4033 RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit;
4034 rb_enc_copy(copy, orig);
4035 FL_SET_RAW(copy, FL_TEST_RAW(orig, KCODE_FIXED|REG_ENCODING_NONE));
4036 if (RBASIC_CLASS(copy) == rb_cRegexp) {
4043struct reg_init_args {
4050static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
4051static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
4055 * Regexp.new(string, options = 0, timeout: nil) -> regexp
4056 * Regexp.new(regexp, timeout: nil) -> regexp
4058 * With argument +string+ given, returns a new regexp with the given string
4061 * r = Regexp.new('foo') # => /foo/
4062 * r.source # => "foo
"
4065 * Optional argument +options+ is one of the following:
4067 * - A String of options:
4069 * Regexp.new('foo', 'i') # => /foo/i
4070 * Regexp.new('foo', 'im') # => /foo/im
4072 * - The bit-wise OR of one or more of the constants
4073 * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
4074 * Regexp::NOENCODING:
4076 * Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
4077 * Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
4078 * Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
4079 * Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
4080 * flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
4081 * Regexp.new('foo', flags) # => /foo/mix
4083 * - +nil+ or +false+, which is ignored.
4084 * - Any other truthy value, in which case the regexp will be
4087 * If optional keyword argument +timeout+ is given,
4088 * its float value overrides the timeout interval for the class,
4090 * If +nil+ is passed as +timeout, it uses the timeout interval
4091 * for the class, Regexp.timeout.
4093 * With argument +regexp+ given, returns a new regexp. The source,
4094 * options, timeout are the same as +regexp+. +options+ and +n_flag+
4095 * arguments are ineffective. The timeout can be overridden by
4096 * +timeout+ keyword.
4098 * options = Regexp::MULTILINE
4099 * r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
4100 * r2 = Regexp.new(r) # => /foo/m
4101 * r2.timeout # => 1.1
4102 * r3 = Regexp.new(r, timeout: 3.14) # => /foo/m
4103 * r3.timeout # => 3.14
4108rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
4110 struct reg_init_args args;
4111 VALUE re = reg_extract_args(argc, argv, &args);
4114 reg_init_args(self, args.str, args.enc, args.flags);
4120 set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout);
4121 if (RBASIC_CLASS(self) == rb_cRegexp) {
4129reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
4132 rb_encoding *enc = 0;
4133 VALUE str, src, opts = Qundef, kwargs;
4136 rb_scan_args(argc, argv, "11:
", &src, &opts, &kwargs);
4138 args->timeout = Qnil;
4139 if (!NIL_P(kwargs)) {
4140 static ID keywords[1];
4142 keywords[0] = rb_intern_const("timeout
");
4144 rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
4147 if (RB_TYPE_P(src, T_REGEXP)) {
4151 rb_warn("flags ignored
");
4154 flags = rb_reg_options(re);
4155 str = RREGEXP_SRC(re);
4160 if (FIXNUM_P(opts)) flags = FIX2INT(opts);
4161 else if ((f = str_to_option(opts)) >= 0) flags = f;
4162 else if (rb_bool_expected(opts, "ignorecase
", FALSE))
4163 flags = ONIG_OPTION_IGNORECASE;
4165 str = StringValue(src);
4169 args->flags = flags;
4174reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags)
4176 if (enc && rb_enc_get(str) != enc)
4177 rb_reg_init_str_enc(self, str, enc, flags);
4179 rb_reg_init_str(self, str, flags);
4184rb_reg_quote(VALUE str)
4186 rb_encoding *enc = rb_enc_get(str);
4190 int ascii_only = rb_enc_str_asciionly_p(str);
4192 s = RSTRING_PTR(str);
4193 send = s + RSTRING_LEN(str);
4195 c = rb_enc_ascget(s, send, &clen, enc);
4197 s += mbclen(s, send, enc);
4201 case '[': case ']': case '{': case '}':
4202 case '(': case ')': case '|': case '-':
4203 case '*': case '.': case '\\':
4204 case '?': case '+': case '^': case '$':
4206 case '\t': case '\f': case '\v': case '\n': case '\r':
4211 tmp = rb_str_new3(str);
4213 rb_enc_associate(tmp, rb_usascii_encoding());
4218 tmp = rb_str_new(0, RSTRING_LEN(str)*2);
4220 rb_enc_associate(tmp, rb_usascii_encoding());
4223 rb_enc_copy(tmp, str);
4225 t = RSTRING_PTR(tmp);
4226 /* copy upto metacharacter */
4227 const char *p = RSTRING_PTR(str);
4228 memcpy(t, p, s - p);
4232 c = rb_enc_ascget(s, send, &clen, enc);
4234 int n = mbclen(s, send, enc);
4242 case '[': case ']': case '{': case '}':
4243 case '(': case ')': case '|': case '-':
4244 case '*': case '.': case '\\':
4245 case '?': case '+': case '^': case '$':
4247 t += rb_enc_mbcput('\\', t, enc);
4250 t += rb_enc_mbcput('\\', t, enc);
4251 t += rb_enc_mbcput(' ', t, enc);
4254 t += rb_enc_mbcput('\\', t, enc);
4255 t += rb_enc_mbcput('t', t, enc);
4258 t += rb_enc_mbcput('\\', t, enc);
4259 t += rb_enc_mbcput('n', t, enc);
4262 t += rb_enc_mbcput('\\', t, enc);
4263 t += rb_enc_mbcput('r', t, enc);
4266 t += rb_enc_mbcput('\\', t, enc);
4267 t += rb_enc_mbcput('f', t, enc);
4270 t += rb_enc_mbcput('\\', t, enc);
4271 t += rb_enc_mbcput('v', t, enc);
4274 t += rb_enc_mbcput(c, t, enc);
4276 rb_str_resize(tmp, t - RSTRING_PTR(tmp));
4283 * Regexp.escape(string) -> new_string
4285 * Returns a new string that escapes any characters
4286 * that have special meaning in a regular expression:
4288 * s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\.
"
4290 * For any string +s+, this call returns a MatchData object:
4292 * r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
4293 * r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.
">
4298rb_reg_s_quote(VALUE c, VALUE str)
4300 return rb_reg_quote(reg_operand(str, TRUE));
4304rb_reg_options(VALUE re)
4309 options = RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4310 if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4311 if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4316rb_check_regexp_type(VALUE re)
4318 return rb_check_convert_type(re, T_REGEXP, "Regexp
", "to_regexp
");
4323 * Regexp.try_convert(object) -> regexp or nil
4325 * Returns +object+ if it is a regexp:
4327 * Regexp.try_convert(/re/) # => /re/
4329 * Otherwise if +object+ responds to <tt>:to_regexp</tt>,
4330 * calls <tt>object.to_regexp</tt> and returns the result.
4332 * Returns +nil+ if +object+ does not respond to <tt>:to_regexp</tt>.
4334 * Regexp.try_convert('re') # => nil
4336 * Raises an exception unless <tt>object.to_regexp</tt> returns a regexp.
4340rb_reg_s_try_convert(VALUE dummy, VALUE re)
4342 return rb_check_regexp_type(re);
4346rb_reg_s_union(VALUE self, VALUE args0)
4348 long argc = RARRAY_LEN(args0);
4352 args[0] = rb_str_new2("(?!)
");
4353 return rb_class_new_instance(1, args, rb_cRegexp);
4355 else if (argc == 1) {
4356 VALUE arg = rb_ary_entry(args0, 0);
4357 VALUE re = rb_check_regexp_type(arg);
4362 quoted = rb_reg_s_quote(Qnil, arg);
4363 return rb_reg_new_str(quoted, 0);
4368 VALUE source = rb_str_buf_new(0);
4369 rb_encoding *result_enc;
4371 int has_asciionly = 0;
4372 rb_encoding *has_ascii_compat_fixed = 0;
4373 rb_encoding *has_ascii_incompat = 0;
4375 for (i = 0; i < argc; i++) {
4377 VALUE e = rb_ary_entry(args0, i);
4380 rb_str_buf_cat_ascii(source, "|
");
4382 v = rb_check_regexp_type(e);
4384 rb_encoding *enc = rb_enc_get(v);
4385 if (!rb_enc_asciicompat(enc)) {
4386 if (!has_ascii_incompat)
4387 has_ascii_incompat = enc;
4388 else if (has_ascii_incompat != enc)
4389 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4390 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4392 else if (rb_reg_fixed_encoding_p(v)) {
4393 if (!has_ascii_compat_fixed)
4394 has_ascii_compat_fixed = enc;
4395 else if (has_ascii_compat_fixed != enc)
4396 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4397 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4402 v = rb_reg_str_with_term(v, -1);
4407 enc = rb_enc_get(e);
4408 if (!rb_enc_asciicompat(enc)) {
4409 if (!has_ascii_incompat)
4410 has_ascii_incompat = enc;
4411 else if (has_ascii_incompat != enc)
4412 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4413 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4415 else if (rb_enc_str_asciionly_p(e)) {
4419 if (!has_ascii_compat_fixed)
4420 has_ascii_compat_fixed = enc;
4421 else if (has_ascii_compat_fixed != enc)
4422 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4423 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4425 v = rb_reg_s_quote(Qnil, e);
4427 if (has_ascii_incompat) {
4428 if (has_asciionly) {
4429 rb_raise(rb_eArgError, "ASCII incompatible encoding: %s
",
4430 rb_enc_name(has_ascii_incompat));
4432 if (has_ascii_compat_fixed) {
4433 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4434 rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
4439 rb_enc_copy(source, v);
4441 rb_str_append(source, v);
4444 if (has_ascii_incompat) {
4445 result_enc = has_ascii_incompat;
4447 else if (has_ascii_compat_fixed) {
4448 result_enc = has_ascii_compat_fixed;
4451 result_enc = rb_ascii8bit_encoding();
4454 rb_enc_associate(source, result_enc);
4455 return rb_class_new_instance(1, &source, rb_cRegexp);
4461 * Regexp.union(*patterns) -> regexp
4462 * Regexp.union(array_of_patterns) -> regexp
4464 * Returns a new regexp that is the union of the given patterns:
4466 * r = Regexp.union(%w[cat dog]) # => /cat|dog/
4467 * r.match('cat') # => #<MatchData "cat
">
4468 * r.match('dog') # => #<MatchData "dog
">
4469 * r.match('cog') # => nil
4471 * For each pattern that is a string, <tt>Regexp.new(pattern)</tt> is used:
4473 * Regexp.union('penzance') # => /penzance/
4474 * Regexp.union('a+b*c') # => /a\+b\*c/
4475 * Regexp.union('skiing', 'sledding') # => /skiing|sledding/
4476 * Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
4478 * For each pattern that is a regexp, it is used as is,
4479 * including its flags:
4481 * Regexp.union(/foo/i, /bar/m, /baz/x)
4482 * # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
4483 * Regexp.union([/foo/i, /bar/m, /baz/x])
4484 * # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
4486 * With no arguments, returns <tt>/(?!)/</tt>:
4488 * Regexp.union # => /(?!)/
4490 * If any regexp pattern contains captures, the behavior is unspecified.
4494rb_reg_s_union_m(VALUE self, VALUE args)
4497 if (RARRAY_LEN(args) == 1 &&
4498 !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
4499 return rb_reg_s_union(self, v);
4501 return rb_reg_s_union(self, args);
4506 * Regexp.linear_time?(re)
4507 * Regexp.linear_time?(string, options = 0)
4509 * Returns +true+ if matching against <tt>re</tt> can be
4510 * done in linear time to the input string.
4512 * Regexp.linear_time?(/re/) # => true
4514 * Note that this is a property of the ruby interpreter, not of the argument
4515 * regular expression. Identical regexp can or cannot run in linear time
4516 * depending on your ruby binary. Neither forward nor backward compatibility
4517 * is guaranteed about the return value of this method. Our current algorithm
4518 * is (*1) but this is subject to change in the future. Alternative
4519 * implementations can also behave differently. They might always return
4520 * false for everything.
4522 * (*1): https://doi.org/10.1109/SP40001.2021.00032
4526rb_reg_s_linear_time_p(int argc, VALUE *argv, VALUE self)
4528 struct reg_init_args args;
4529 VALUE re = reg_extract_args(argc, argv, &args);
4532 re = reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4535 return RBOOL(onig_check_linear_time(RREGEXP_PTR(re)));
4540rb_reg_init_copy(VALUE copy, VALUE re)
4542 if (!OBJ_INIT_COPY(copy, re)) return copy;
4544 return reg_copy(copy, re);
4548rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
4553 rb_encoding *str_enc = rb_enc_get(str);
4554 rb_encoding *src_enc = rb_enc_get(src);
4555 int acompat = rb_enc_asciicompat(str_enc);
4557#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4559 RSTRING_GETMEM(str, s, n);
4564 int c = ASCGET(s, e, &clen);
4568 s += mbclen(s, e, str_enc);
4574 if (c != '\\' || s == e) continue;
4577 val = rb_str_buf_new(ss-p);
4579 rb_enc_str_buf_cat(val, p, ss-p, str_enc);
4581 c = ASCGET(s, e, &clen);
4583 s += mbclen(s, e, str_enc);
4584 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4592 case '1': case '2': case '3': case '4':
4593 case '5': case '6': case '7': case '8': case '9':
4594 if (!NIL_P(regexp) && onig_noname_group_capture_is_active(RREGEXP_PTR(regexp))) {
4603 if (s < e && ASCGET(s, e, &clen) == '<') {
4604 char *name, *name_end;
4606 name_end = name = s + clen;
4607 while (name_end < e) {
4608 c = ASCGET(name_end, e, &clen);
4609 if (c == '>') break;
4610 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4613 VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)),
4614 (long)(name_end - name));
4615 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
4616 name_to_backref_error(n);
4618 p = s = name_end + clen;
4622 rb_raise(rb_eRuntimeError, "invalid group name reference format
");
4626 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4635 rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
4639 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
4643 no = regs->num_regs-1;
4644 while (BEG(no) == -1 && no > 0) no--;
4645 if (no == 0) continue;
4649 rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
4653 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4658 if (no >= regs->num_regs) continue;
4659 if (BEG(no) == -1) continue;
4660 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
4664 if (!val) return str;
4666 rb_enc_str_buf_cat(val, p, e-p, str_enc);
4673ignorecase_getter(ID _x, VALUE *_y)
4675 rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective
");
4680ignorecase_setter(VALUE val, ID id, VALUE *_)
4682 rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective; ignored
");
4688 VALUE match = rb_backref_get();
4690 if (NIL_P(match)) return Qnil;
4691 rb_match_busy(match);
4696get_LAST_MATCH_INFO(ID _x, VALUE *_y)
4698 return match_getter();
4702match_setter(VALUE val, ID _x, VALUE *_y)
4705 Check_Type(val, T_MATCH);
4707 rb_backref_set(val);
4712 * Regexp.last_match -> matchdata or nil
4713 * Regexp.last_match(n) -> string or nil
4714 * Regexp.last_match(name) -> string or nil
4716 * With no argument, returns the value of <tt>$~</tt>,
4717 * which is the result of the most recent pattern match
4718 * (see {Regexp global variables}[rdoc-ref:Regexp@Global+Variables]):
4720 * /c(.)t/ =~ 'cat' # => 0
4721 * Regexp.last_match # => #<MatchData "cat
" 1:"a
">
4722 * /a/ =~ 'foo' # => nil
4723 * Regexp.last_match # => nil
4725 * With non-negative integer argument +n+, returns the _n_th field in the
4726 * matchdata, if any, or nil if none:
4728 * /c(.)t/ =~ 'cat' # => 0
4729 * Regexp.last_match(0) # => "cat
"
4730 * Regexp.last_match(1) # => "a
"
4731 * Regexp.last_match(2) # => nil
4733 * With negative integer argument +n+, counts backwards from the last field:
4735 * Regexp.last_match(-1) # => "a
"
4737 * With string or symbol argument +name+,
4738 * returns the string value for the named capture, if any:
4740 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
4741 * Regexp.last_match # => #<MatchData "var = val
" lhs:"var
"rhs:"val
">
4742 * Regexp.last_match(:lhs) # => "var
"
4743 * Regexp.last_match('rhs') # => "val
"
4744 * Regexp.last_match('foo') # Raises IndexError.
4749rb_reg_s_last_match(int argc, VALUE *argv, VALUE _)
4751 if (rb_check_arity(argc, 0, 1) == 1) {
4752 VALUE match = rb_backref_get();
4754 if (NIL_P(match)) return Qnil;
4755 n = match_backref_number(match, argv[0]);
4756 return rb_reg_nth_match(n, match);
4758 return match_getter();
4762re_warn(const char *s)
4767// This function is periodically called during regexp matching
4769rb_reg_timeout_p(regex_t *reg, void *end_time_)
4771 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4773 if (*end_time == 0) {
4774 // This is the first time to check interrupts;
4775 // just measure the current time and determine the end time
4776 // if timeout is set.
4777 rb_hrtime_t timelimit = reg->timelimit;
4780 // no per-object timeout.
4781 timelimit = rb_reg_match_time_limit;
4785 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4788 // no timeout is set
4789 *end_time = RB_HRTIME_MAX;
4793 if (*end_time < rb_hrtime_now()) {
4794 // Timeout has exceeded
4804 * Regexp.timeout -> float or nil
4806 * It returns the current default timeout interval for Regexp matching in second.
4807 * +nil+ means no default timeout configuration.
4811rb_reg_s_timeout_get(VALUE dummy)
4813 double d = hrtime2double(rb_reg_match_time_limit);
4814 if (d == 0.0) return Qnil;
4820 * Regexp.timeout = float or nil
4822 * It sets the default timeout interval for Regexp matching in second.
4823 * +nil+ means no default timeout configuration.
4824 * This configuration is process-global. If you want to set timeout for
4825 * each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
4827 * Regexp.timeout = 1
4828 * /^a*b?a*$/ =~ "a
" * 100000 + "x
" #=> regexp match timeout (RuntimeError)
4832rb_reg_s_timeout_set(VALUE dummy, VALUE timeout)
4834 rb_ractor_ensure_main_ractor("can not access
Regexp.timeout from non-main Ractors
");
4836 set_timeout(&rb_reg_match_time_limit, timeout);
4843 * rxp.timeout -> float or nil
4845 * It returns the timeout interval for Regexp matching in second.
4846 * +nil+ means no default timeout configuration.
4848 * This configuration is per-object. The global configuration set by
4849 * Regexp.timeout= is ignored if per-object configuration is set.
4851 * re = Regexp.new("^a*b?a*$
", timeout: 1)
4852 * re.timeout #=> 1.0
4853 * re =~ "a
" * 100000 + "x
" #=> regexp match timeout (RuntimeError)
4857rb_reg_timeout_get(VALUE re)
4860 double d = hrtime2double(RREGEXP_PTR(re)->timelimit);
4861 if (d == 0.0) return Qnil;
4866 * Document-class: RegexpError
4868 * Raised when given an invalid regexp expression.
4872 * <em>raises the exception:</em>
4874 * RegexpError: target of repeat operator is not specified: /?/
4878 * Document-class: Regexp
4880 * :include: doc/_regexp.rdoc
4886 rb_eRegexpError = rb_define_class("RegexpError
", rb_eStandardError);
4888 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4889 onig_set_warn_func(re_warn);
4890 onig_set_verb_warn_func(re_warn);
4892 rb_define_virtual_variable("$~
", get_LAST_MATCH_INFO, match_setter);
4893 rb_define_virtual_variable("$&
", last_match_getter, 0);
4894 rb_define_virtual_variable("$`
", prematch_getter, 0);
4895 rb_define_virtual_variable("$
'", postmatch_getter, 0);
4896 rb_define_virtual_variable("$+", last_paren_match_getter, 0);
4898 rb_gvar_ractor_local("$~");
4899 rb_gvar_ractor_local("$&");
4900 rb_gvar_ractor_local("$`");
4901 rb_gvar_ractor_local("$'");
4902 rb_gvar_ractor_local("$+
");
4904 rb_define_virtual_variable("$=
", ignorecase_getter, ignorecase_setter);
4906 rb_cRegexp = rb_define_class("Regexp", rb_cObject);
4907 rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc);
4908 rb_define_singleton_method(rb_cRegexp, "compile
", rb_class_new_instance_pass_kw, -1);
4909 rb_define_singleton_method(rb_cRegexp, "quote
", rb_reg_s_quote, 1);
4910 rb_define_singleton_method(rb_cRegexp, "escape
", rb_reg_s_quote, 1);
4911 rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
4912 rb_define_singleton_method(rb_cRegexp, "last_match
", rb_reg_s_last_match, -1);
4913 rb_define_singleton_method(rb_cRegexp, "try_convert
", rb_reg_s_try_convert, 1);
4914 rb_define_singleton_method(rb_cRegexp, "linear_time?
", rb_reg_s_linear_time_p, -1);
4916 rb_define_method(rb_cRegexp, "initialize
", rb_reg_initialize_m, -1);
4917 rb_define_method(rb_cRegexp, "initialize_copy
", rb_reg_init_copy, 1);
4918 rb_define_method(rb_cRegexp, "hash
", rb_reg_hash, 0);
4919 rb_define_method(rb_cRegexp, "eql?
", rb_reg_equal, 1);
4920 rb_define_method(rb_cRegexp, "==
", rb_reg_equal, 1);
4921 rb_define_method(rb_cRegexp, "=~
", rb_reg_match, 1);
4922 rb_define_method(rb_cRegexp, "===
", rb_reg_eqq, 1);
4923 rb_define_method(rb_cRegexp, "~
", rb_reg_match2, 0);
4924 rb_define_method(rb_cRegexp, "match
", rb_reg_match_m, -1);
4925 rb_define_method(rb_cRegexp, "match?
", rb_reg_match_m_p, -1);
4926 rb_define_method(rb_cRegexp, "to_s
", rb_reg_to_s, 0);
4927 rb_define_method(rb_cRegexp, "inspect
", rb_reg_inspect, 0);
4928 rb_define_method(rb_cRegexp, "source
", rb_reg_source, 0);
4929 rb_define_method(rb_cRegexp, "casefold?
", rb_reg_casefold_p, 0);
4930 rb_define_method(rb_cRegexp, "options
", rb_reg_options_m, 0);
4931 rb_define_method(rb_cRegexp, "encoding
", rb_obj_encoding, 0); /* in encoding.c */
4932 rb_define_method(rb_cRegexp, "fixed_encoding?
", rb_reg_fixed_encoding_p, 0);
4933 rb_define_method(rb_cRegexp, "names
", rb_reg_names, 0);
4934 rb_define_method(rb_cRegexp, "named_captures
", rb_reg_named_captures, 0);
4935 rb_define_method(rb_cRegexp, "timeout
", rb_reg_timeout_get, 0);
4937 /* Raised when regexp matching timed out. */
4938 rb_eRegexpTimeoutError = rb_define_class_under(rb_cRegexp, "TimeoutError
", rb_eRegexpError);
4939 rb_define_singleton_method(rb_cRegexp, "timeout
", rb_reg_s_timeout_get, 0);
4940 rb_define_singleton_method(rb_cRegexp, "timeout=
", rb_reg_s_timeout_set, 1);
4942 /* see Regexp.options and Regexp.new */
4943 rb_define_const(rb_cRegexp, "IGNORECASE
", INT2FIX(ONIG_OPTION_IGNORECASE));
4944 /* see Regexp.options and Regexp.new */
4945 rb_define_const(rb_cRegexp, "EXTENDED
", INT2FIX(ONIG_OPTION_EXTEND));
4946 /* see Regexp.options and Regexp.new */
4947 rb_define_const(rb_cRegexp, "MULTILINE
", INT2FIX(ONIG_OPTION_MULTILINE));
4948 /* see Regexp.options and Regexp.new */
4949 rb_define_const(rb_cRegexp, "FIXEDENCODING
", INT2FIX(ARG_ENCODING_FIXED));
4950 /* see Regexp.options and Regexp.new */
4951 rb_define_const(rb_cRegexp, "NOENCODING
", INT2FIX(ARG_ENCODING_NONE));
4953 rb_global_variable(®_cache);
4955 rb_cMatch = rb_define_class("MatchData
", rb_cObject);
4956 rb_define_alloc_func(rb_cMatch, match_alloc);
4957 rb_undef_method(CLASS_OF(rb_cMatch), "new");
4958 rb_undef_method(CLASS_OF(rb_cMatch), "allocate
");
4960 rb_define_method(rb_cMatch, "initialize_copy
", match_init_copy, 1);
4961 rb_define_method(rb_cMatch, "regexp
", match_regexp, 0);
4962 rb_define_method(rb_cMatch, "names
", match_names, 0);
4963 rb_define_method(rb_cMatch, "size
", match_size, 0);
4964 rb_define_method(rb_cMatch, "length
", match_size, 0);
4965 rb_define_method(rb_cMatch, "offset
", match_offset, 1);
4966 rb_define_method(rb_cMatch, "byteoffset
", match_byteoffset, 1);
4967 rb_define_method(rb_cMatch, "bytebegin
", match_bytebegin, 1);
4968 rb_define_method(rb_cMatch, "byteend
", match_byteend, 1);
4969 rb_define_method(rb_cMatch, "begin
", match_begin, 1);
4970 rb_define_method(rb_cMatch, "end
", match_end, 1);
4971 rb_define_method(rb_cMatch, "match
", match_nth, 1);
4972 rb_define_method(rb_cMatch, "match_length
", match_nth_length, 1);
4973 rb_define_method(rb_cMatch, "to_a
", match_to_a, 0);
4974 rb_define_method(rb_cMatch, "[]
", match_aref, -1);
4975 rb_define_method(rb_cMatch, "captures
", match_captures, 0);
4976 rb_define_alias(rb_cMatch, "deconstruct
", "captures
");
4977 rb_define_method(rb_cMatch, "named_captures
", match_named_captures, -1);
4978 rb_define_method(rb_cMatch, "deconstruct_keys
", match_deconstruct_keys, 1);
4979 rb_define_method(rb_cMatch, "values_at
", match_values_at, -1);
4980 rb_define_method(rb_cMatch, "pre_match
", rb_reg_match_pre, 0);
4981 rb_define_method(rb_cMatch, "post_match
", rb_reg_match_post, 0);
4982 rb_define_method(rb_cMatch, "to_s
", match_to_s, 0);
4983 rb_define_method(rb_cMatch, "inspect
", match_inspect, 0);
4984 rb_define_method(rb_cMatch, "string", match_string, 0);
4985 rb_define_method(rb_cMatch, "hash
", match_hash, 0);
4986 rb_define_method(rb_cMatch, "eql?
", match_equal, 1);
4987 rb_define_method(rb_cMatch, "==
", match_equal, 1);
4988 rb_define_method(rb_cMatch, "integer_at
", match_integer_at, -1);
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define FIX2INT
Old name of RB_FIX2INT.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_obj_reveal(VALUE obj, VALUE klass)
Make a hidden object visible again.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_cMatch
MatchData class.
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
#define RGENGC_WB_PROTECTED_MATCH
This is a compile-time flag to enable/disable write barrier for struct RMatch.
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_resize(VALUE ary, long len)
Expands or shrinks the passed array to the passed length.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Identical to rb_ary_new_from_values(), except it expects exactly two parameters.
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
OnigPosition rb_reg_onig_match(VALUE re, VALUE str, OnigPosition(*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), void *args, struct re_registers *regs)
Runs a regular expression match using function match.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define RB_ALLOCV_N(type, v, n)
Allocates a memory region, possibly on stack.
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
#define RB_ALLOCV_END(v)
Polite way to declare that the given array is not used any longer.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
int char_offset_num_allocated
Number of rmatch_offset that ::rmatch::char_offset holds.
struct re_registers regs
"Registers" of a match.
Represents the region of a capture group.
long beg
Beginning of a group.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.