12#include "ruby/internal/config.h"
19#include "internal/bignum.h"
20#include "internal/encoding.h"
21#include "internal/error.h"
22#include "internal/hash.h"
23#include "internal/imemo.h"
24#include "internal/re.h"
25#include "internal/string.h"
26#include "internal/object.h"
27#include "internal/ractor.h"
28#include "internal/variable.h"
33#include "ractor_core.h"
37typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
38#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
40#define BEG(no) (regs->beg[(no)])
41#define END(no) (regs->end[(no)])
44static const char casetable[] = {
45 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
46 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
47 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
48 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
50 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
52 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
54 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
56 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
58 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
60 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
62 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
64 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
66 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
68 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
70 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
72 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
73 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
74 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
75 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
76 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
77 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
78 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
79 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
80 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
81 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
82 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
83 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
84 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
85 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
86 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
87 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
88 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
91# error >>> "You lose. You will need a translation table for your character set." <<<
95rb_hrtime_t rb_reg_match_time_limit = 0;
98rb_memcicmp(
const void *x,
const void *y,
long len)
100 const unsigned char *p1 = x, *p2 = y;
104 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
110#if defined(HAVE_MEMMEM) && !defined(__APPLE__)
112rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
114 const unsigned char *y;
116 if ((y = memmem(ys, n, xs, m)) != NULL)
123rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
125 const unsigned char *x = xs, *xe = xs + m;
126 const unsigned char *y = ys, *ye = ys + n;
127#define VALUE_MAX ((VALUE)~(VALUE)0)
131 rb_bug(
"!!too long pattern string!!");
133 if (!(y = memchr(y, *x, n - m + 1)))
137 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
157rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
159 const unsigned char *x = xs, *xe = xs + m;
160 const unsigned char *y = ys;
161 VALUE i, qstable[256];
164 for (i = 0; i < 256; ++i)
167 qstable[*x] = xe - x;
169 for (; y + m <= ys + n; y += *(qstable + y[m])) {
170 if (*xs == *y && memcmp(xs, y, m) == 0)
176static inline unsigned int
177rb_memsearch_qs_utf8_hash(
const unsigned char *x)
179 register const unsigned int mix = 8353;
180 register unsigned int h = *x;
205 return (
unsigned char)h;
209rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
211 const unsigned char *x = xs, *xe = xs + m;
212 const unsigned char *y = ys;
213 VALUE i, qstable[512];
216 for (i = 0; i < 512; ++i) {
219 for (; x < xe; ++x) {
220 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
223 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
224 if (*xs == *y && memcmp(xs, y, m) == 0)
231rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
233 const unsigned char *x = xs, x0 = *xs, *y = ys;
235 for (n -= m; n >= 0; n -= char_size, y += char_size) {
236 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
243rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
245 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
249rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
251 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
257 const unsigned char *x = x0, *y = y0;
259 if (m > n)
return -1;
261 return memcmp(x0, y0, m) == 0 ? 0 : -1;
267 const unsigned char *ys = memchr(y, *x, n);
274 else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
276 return rb_memsearch_ss(x0, m, y0, n);
278 else if (enc == rb_utf8_encoding()){
279 return rb_memsearch_qs_utf8(x0, m, y0, n);
282 else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
283 return rb_memsearch_wchar(x0, m, y0, n);
285 else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
286 return rb_memsearch_qchar(x0, m, y0, n);
288 return rb_memsearch_qs(x0, m, y0, n);
291#define REG_ENCODING_NONE FL_USER6
293#define KCODE_FIXED FL_USER4
302 val = ONIG_OPTION_IGNORECASE;
305 val = ONIG_OPTION_EXTEND;
308 val = ONIG_OPTION_MULTILINE;
317enum { OPTBUF_SIZE = 4 };
320option_to_str(
char str[OPTBUF_SIZE],
int options)
323 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
324 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
325 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
337 *kcode = rb_ascii8bit_encindex();
338 return (*option = ARG_ENCODING_NONE);
340 *kcode = ENCINDEX_EUC_JP;
343 *kcode = ENCINDEX_Windows_31J;
346 *kcode = rb_utf8_encindex();
350 return (*option = char_to_option(c));
352 *option = ARG_ENCODING_FIXED;
357rb_reg_check(
VALUE re)
365rb_reg_expr_str(
VALUE str,
const char *s,
long len,
368 const char *p, *pend;
373 p = s; pend = p +
len;
377 c = rb_enc_ascget(p, pend, &clen, enc);
380 p += mbclen(p, pend, enc);
404 int unicode_p = rb_enc_unicode_p(enc);
407 c = rb_enc_ascget(p, pend, &clen, enc);
408 if (c ==
'\\' && p+clen < pend) {
409 int n = clen + mbclen(p+clen, pend, enc);
415 clen = rb_enc_precise_mbclen(p, pend, enc);
417 c = (
unsigned char)*p;
422 unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
423 rb_str_buf_cat_escaped_char(str, c, unicode_p);
430 else if (c == term) {
438 else if (!rb_enc_isspace(c, enc)) {
442 snprintf(b,
sizeof(b),
"\\x%02X", c);
458 rb_encoding *resenc = rb_default_internal_encoding();
459 if (resenc == NULL) resenc = rb_default_external_encoding();
461 if (re && rb_enc_asciicompat(enc)) {
462 rb_enc_copy(str, re);
465 rb_enc_associate(str, rb_usascii_encoding());
469 rb_reg_expr_str(str, RSTRING_PTR(src_str), RSTRING_LEN(src_str), enc, resenc,
'/');
474 char opts[OPTBUF_SIZE];
476 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
478 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
504rb_reg_source(
VALUE re)
525rb_reg_inspect(
VALUE re)
530 return rb_reg_desc(re);
533static VALUE rb_reg_str_with_term(
VALUE re,
int term);
565 return rb_reg_str_with_term(re,
'/');
569rb_reg_str_with_term(
VALUE re,
int term)
572 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
574 char optbuf[OPTBUF_SIZE + 1];
579 rb_enc_copy(str, re);
582 const UChar *ptr = (UChar *)RSTRING_PTR(src_str);
583 long len = RSTRING_LEN(src_str);
585 if (
len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
588 if ((
len -= 2) > 0) {
590 opt = char_to_option((
int )*ptr);
600 if (
len > 1 && *ptr ==
'-') {
604 opt = char_to_option((
int )*ptr);
619 if (*ptr ==
':' && ptr[
len-1] ==
')') {
626 err = onig_new(&rp, ptr, ptr +
len, options,
627 enc, OnigDefaultSyntax, NULL);
640 if ((options & embeddable) != embeddable) {
642 option_to_str(optbuf + 1, ~options);
647 if (rb_enc_asciicompat(enc)) {
648 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
656 rb_enc_associate(str, rb_usascii_encoding());
660 s = RSTRING_PTR(str);
666 rb_str_resize(str, RSTRING_LEN(str) - n);
668 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
671 rb_enc_copy(str, re);
678NORETURN(
static void rb_reg_raise(
const char *err,
VALUE re));
681rb_reg_raise(
const char *err,
VALUE re)
683 VALUE desc = rb_reg_desc(re);
689rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
691 char opts[OPTBUF_SIZE + 1];
693 rb_encoding *resenc = rb_default_internal_encoding();
694 if (resenc == NULL) resenc = rb_default_external_encoding();
696 rb_enc_associate(desc, enc);
698 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
700 option_to_str(opts + 1, options);
705NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
708rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
714rb_reg_error_desc(
VALUE str,
int options,
const char *err)
716 return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
717 rb_enc_get(str), options, err);
720NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
723rb_reg_raise_str(
VALUE str,
int options,
const char *err)
743rb_reg_casefold_p(
VALUE re)
746 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
788rb_reg_options_m(
VALUE re)
795reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
796 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
799 rb_ary_push(ary, rb_enc_str_new((
const char *)name, name_end-name, regex->enc));
817rb_reg_names(
VALUE re)
822 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
827reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
828 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
834 for (i = 0; i < back_num; i++)
837 rb_hash_aset(hash,
rb_str_new((
const char*)name, name_end-name),ary);
861rb_reg_named_captures(
VALUE re)
864 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
865 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
870onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
872 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
877 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
879 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
882 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
892make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
893 const char *sourcefile,
int sourceline)
906 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s +
len), flags,
907 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
909 onig_error_code_to_str((UChar*)err, r, &einfo);
968match_alloc(
VALUE klass)
972 NEWOBJ_OF(match,
struct RMatch, klass, flags, alloc_size, 0);
985 if (to->allocated)
return 0;
988 if (to->allocated)
return 0;
989 return ONIGERR_MEMORY;
998pair_byte_cmp(
const void *pair1,
const void *pair2)
1000 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
1001#if SIZEOF_LONG > SIZEOF_INT
1002 return diff ? diff > 0 ? 1 : -1 : 0;
1009update_char_offset(
VALUE match)
1013 int i, num_regs, num_pos;
1024 num_regs = rm->
regs.num_regs;
1031 enc = rb_enc_get(
RMATCH(match)->str);
1033 for (i = 0; i < num_regs; i++) {
1042 for (i = 0; i < num_regs; i++) {
1045 pairs[num_pos++].byte_pos = BEG(i);
1046 pairs[num_pos++].byte_pos = END(i);
1048 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1050 s = p = RSTRING_PTR(
RMATCH(match)->str);
1052 for (i = 0; i < num_pos; i++) {
1053 q = s + pairs[i].byte_pos;
1055 pairs[i].char_pos = c;
1059 for (i = 0; i < num_regs; i++) {
1067 key.byte_pos = BEG(i);
1068 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1071 key.byte_pos = END(i);
1072 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1080match_check(
VALUE match)
1082 if (!
RMATCH(match)->regexp) {
1099 rm = RMATCH_EXT(obj);
1103 if (RMATCH_EXT(orig)->char_offset_num_allocated) {
1129match_regexp(
VALUE match)
1133 regexp =
RMATCH(match)->regexp;
1134 if (
NIL_P(regexp)) {
1164match_names(
VALUE match)
1169 return rb_reg_names(
RMATCH(match)->regexp);
1185match_size(
VALUE match)
1191static int name_to_backref_number(
const struct re_registers *,
VALUE,
const char*,
const char*);
1192NORETURN(
static void name_to_backref_error(
VALUE name));
1195name_to_backref_error(
VALUE name)
1197 rb_raise(
rb_eIndexError,
"undefined group name reference: % "PRIsVALUE,
1204 if (i < 0 || regs->num_regs <= i)
1209match_backref_number(
VALUE match,
VALUE backref)
1226 num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
1229 name_to_backref_error(backref);
1238 return match_backref_number(match, backref);
1253 int i = match_backref_number(match, n);
1257 backref_number_check(regs, i);
1262 update_char_offset(match);
1264 LONG2NUM(RMATCH_EXT(match)->char_offset[i].end));
1288 int i = match_backref_number(match, n);
1292 backref_number_check(regs, i);
1312 int i = match_backref_number(match, n);
1316 backref_number_check(regs, i);
1336 int i = match_backref_number(match, n);
1340 backref_number_check(regs, i);
1360 int i = match_backref_number(match, n);
1364 backref_number_check(regs, i);
1369 update_char_offset(match);
1370 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg);
1386 int i = match_backref_number(match, n);
1390 backref_number_check(regs, i);
1395 update_char_offset(match);
1396 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end);
1428 int i = match_backref_number(match, n);
1431 backref_number_check(regs, i);
1433 long start = BEG(i), end = END(i);
1472 int i = match_backref_number(match, n);
1476 backref_number_check(regs, i);
1481 update_char_offset(match);
1483 &RMATCH_EXT(match)->char_offset[i];
1487#define MATCH_BUSY FL_USER2
1492 FL_SET(match, MATCH_BUSY);
1496rb_match_unbusy(
VALUE match)
1502rb_match_count(
VALUE match)
1505 if (
NIL_P(match))
return -1;
1507 if (!regs)
return -1;
1508 return regs->num_regs;
1519 int err = onig_region_resize(&rmatch->
regs, 1);
1520 if (err) rb_memerror();
1521 rmatch->
regs.beg[0] = pos;
1522 rmatch->
regs.end[0] = pos +
len;
1526rb_backref_set_string(
VALUE string,
long pos,
long len)
1532 match_set_string(match,
string, pos,
len);
1567rb_reg_fixed_encoding_p(
VALUE re)
1569 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1573rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1574 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1582 "incompatible encoding regexp match (%s regexp with %s string)",
1583 rb_enc_inspect_name(rb_enc_get(re)),
1584 rb_enc_inspect_name(rb_enc_get(
str)));
1591 int cr = rb_enc_str_coderange(
str);
1594 rb_raise(rb_eArgError,
1595 "invalid byte sequence in %s",
1596 rb_enc_name(rb_enc_get(
str)));
1600 enc = rb_enc_get(
str);
1607 else if (!rb_enc_asciicompat(enc)) {
1608 reg_enc_error(re,
str);
1610 else if (rb_reg_fixed_encoding_p(re)) {
1613 reg_enc_error(re,
str);
1617 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1618 enc != rb_ascii8bit_encoding() &&
1620 rb_warn(
"historical binary regexp match /.../n against %s string",
1636 if (reg->enc == enc)
return reg;
1641 const char *pattern = RSTRING_PTR(src_str);
1643 onig_errmsg_buffer err =
"";
1644 unescaped = rb_reg_preprocess(
1645 pattern, pattern + RSTRING_LEN(src_str), enc,
1646 &fixed_enc, err, 0);
1648 if (
NIL_P(unescaped)) {
1649 rb_raise(rb_eArgError,
"regexp preprocess failed: %s", err);
1653 rb_hrtime_t timelimit = reg->timelimit;
1660 if (ruby_single_main_ractor &&
RREGEXP(re)->usecnt == 0) {
1662 r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr +
len),
1664 OnigDefaultSyntax, &einfo);
1668 onig_free_body(&tmp_reg);
1671 onig_free_body(reg);
1677 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr +
len),
1679 OnigDefaultSyntax, &einfo);
1683 onig_error_code_to_str((UChar*)err, r, &einfo);
1684 rb_reg_raise(err, re);
1687 reg->timelimit = timelimit;
1702 if (!tmpreg)
RREGEXP(re)->usecnt++;
1704 OnigPosition result = match(reg,
str, regs, args);
1706 if (!tmpreg)
RREGEXP(re)->usecnt--;
1712 onig_region_free(regs, 0);
1717 case ONIGERR_TIMEOUT:
1718 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
1720 onig_errmsg_buffer err =
"";
1721 onig_error_code_to_str((UChar*)err, (
int)result);
1722 rb_reg_raise(err, re);
1737 enc = rb_reg_prepare_enc(re,
str, 0);
1743 range = RSTRING_LEN(
str) - pos;
1746 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(
str)) {
1747 string = (UChar*)RSTRING_PTR(
str);
1750 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1753 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1777 (UChar *)(ptr +
len),
1778 (UChar *)(ptr + args->pos),
1779 (UChar *)(ptr + args->range),
1786rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1788 long len = RSTRING_LEN(str);
1789 if (pos >
len || pos < 0) {
1796 .range = reverse ? 0 :
len,
1800 OnigPosition result =
rb_reg_onig_match(re, str, reg_onig_search, &args, ®s);
1802 if (result == ONIG_MISMATCH) {
1804 return ONIG_MISMATCH;
1824 onig_region_free(&RMATCH_EXT(match)->regs,
false);
1830 if (set_backref_str) {
1844 if (set_match) *set_match = match;
1850rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *match)
1852 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, match);
1858 return rb_reg_search_set_match(re, str, pos, reverse, 1, NULL);
1871 (UChar *)(ptr +
len),
1906 if (nth >= regs->num_regs) {
1910 nth += regs->num_regs;
1911 if (nth <= 0)
return Qnil;
1913 return RBOOL(BEG(nth) != -1);
1920 long start, end,
len;
1926 if (nth >= regs->num_regs) {
1930 nth += regs->num_regs;
1931 if (nth <= 0)
return Qnil;
1934 if (start == -1)
return Qnil;
1974 if (BEG(0) == -1)
return Qnil;
2008 if (BEG(0) == -1)
return Qnil;
2009 str =
RMATCH(match)->str;
2016match_last_index(
VALUE match)
2021 if (
NIL_P(match))
return -1;
2024 if (BEG(0) == -1)
return -1;
2026 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
2034 int i = match_last_index(match);
2035 if (i <= 0)
return Qnil;
2041rb_reg_last_defined(
VALUE match)
2043 int i = match_last_index(match);
2044 if (i < 0)
return Qnil;
2049last_match_getter(
ID _x,
VALUE *_y)
2055prematch_getter(
ID _x,
VALUE *_y)
2061postmatch_getter(
ID _x,
VALUE *_y)
2067last_paren_match_getter(
ID _x,
VALUE *_y)
2073match_array(
VALUE match,
int start)
2083 target =
RMATCH(match)->str;
2085 for (i=start; i<regs->num_regs; i++) {
2086 if (regs->beg[i] == -1) {
2113match_to_a(
VALUE match)
2115 return match_array(match, 0);
2135match_captures(
VALUE match)
2137 return match_array(match, 1);
2141name_to_backref_number(
const struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2143 if (
NIL_P(regexp))
return -1;
2144 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2145 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2148#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2150 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2151 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2164 num = NAME_TO_NUMBER(regs, re, name,
2167 name_to_backref_error(name);
2173match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2176 long j, end = olen < beg+
len ? olen : beg+
len;
2178 if (
len == 0)
return result;
2180 for (j = beg; j < end; j++) {
2183 if (beg +
len > j) {
2204 return match_ary_subseq(match, beg,
len, result);
2247match_aref(
int argc,
VALUE *argv,
VALUE match)
2254 if (
NIL_P(length)) {
2259 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2264 return match_ary_aref(match, idx,
Qnil);
2277 if (beg < 0)
return Qnil;
2279 else if (beg > num_regs) {
2282 if (beg+
len > num_regs) {
2283 len = num_regs - beg;
2285 return match_ary_subseq(match, beg,
len,
Qnil);
2316match_values_at(
int argc,
VALUE *argv,
VALUE match)
2324 for (i=0; i<argc; i++) {
2329 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2334 match_ary_aref(match, argv[i], result);
2361match_to_s(
VALUE match)
2370match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2371 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
2373 struct MEMO *memo = MEMO_CAST(arg);
2374 VALUE hash = memo->v1;
2375 VALUE match = memo->v2;
2376 long symbolize = memo->u3.state;
2378 VALUE key = rb_enc_str_new((
const char *)name, name_end-name, regex->enc);
2380 if (symbolize > 0) {
2389 for (i = 0; i < back_num; i++) {
2392 rb_hash_aset(hash, key, value);
2398 rb_hash_aset(hash, key,
Qnil);
2437match_named_captures(
int argc,
VALUE *argv,
VALUE match)
2444 return rb_hash_new();
2447 VALUE symbolize_names = 0;
2452 static ID keyword_ids[1];
2454 VALUE symbolize_names_val;
2456 if (!keyword_ids[0]) {
2459 rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
2460 if (!UNDEF_P(symbolize_names_val) &&
RTEST(symbolize_names_val)) {
2461 symbolize_names = 1;
2465 hash = rb_hash_new();
2466 memo = rb_imemo_memo_new(hash, match, symbolize_names);
2468 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, (
void*)memo);
2490match_deconstruct_keys(
VALUE match,
VALUE keys)
2498 return rb_hash_new_with_size(0);
2502 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2505 memo = rb_imemo_memo_new(h, match, 1);
2507 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, (
void*)memo);
2515 return rb_hash_new_with_size(0);
2556match_string(
VALUE match)
2559 return RMATCH(match)->str;
2568match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2569 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2574 for (i = 0; i < back_num; i++) {
2575 arg[back_refs[i]].name = name;
2576 arg[back_refs[i]].len = name_end - name;
2603match_inspect(
VALUE match)
2609 int num_regs = regs->num_regs;
2615 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2617 else if (
NIL_P(regexp)) {
2618 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2626 match_inspect_name_iter, names);
2631 for (i = 0; i < num_regs; i++) {
2638 rb_str_catf(str,
"%d", i);
2657read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2659 const char *p = *pp;
2661 int meta_prefix = 0, ctrl_prefix = 0;
2664 if (p == end || *p++ !=
'\\') {
2665 errcpy(err,
"too short escaped multibyte character");
2671 errcpy(err,
"too short escape sequence");
2675 case '\\': code =
'\\';
break;
2676 case 'n': code =
'\n';
break;
2677 case 't': code =
'\t';
break;
2678 case 'r': code =
'\r';
break;
2679 case 'f': code =
'\f';
break;
2680 case 'v': code =
'\013';
break;
2681 case 'a': code =
'\007';
break;
2682 case 'e': code =
'\033';
break;
2685 case '0':
case '1':
case '2':
case '3':
2686 case '4':
case '5':
case '6':
case '7':
2695 errcpy(err,
"invalid hex escape");
2703 errcpy(err,
"duplicate meta escape");
2707 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2717 errcpy(err,
"too short meta escape");
2721 if (p == end || *p++ !=
'-') {
2722 errcpy(err,
"too short control escape");
2727 errcpy(err,
"duplicate control escape");
2731 if (p < end && (*p & 0x80) == 0) {
2741 errcpy(err,
"too short control escape");
2745 errcpy(err,
"unexpected escape sequence");
2748 if (code < 0 || 0xff < code) {
2749 errcpy(err,
"invalid escape code");
2763unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2766 const char *p = *pp;
2768 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2769 char *chbuf = (
char *)area;
2774 memset(chbuf, 0, chmaxlen);
2776 byte = read_escaped_byte(&p, end, err);
2781 area[chlen++] = byte;
2782 while (chlen < chmaxlen &&
2784 byte = read_escaped_byte(&p, end, err);
2788 area[chlen++] = byte;
2791 l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2793 errcpy(err,
"invalid multibyte escape");
2796 if (1 < chlen || (area[0] & 0x80)) {
2801 else if (*encp != enc) {
2802 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2808 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2816check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2818 if ((0xd800 <= code && code <= 0xdfff) ||
2820 errcpy(err,
"invalid Unicode range");
2827append_utf8(
unsigned long uv,
2830 if (check_unicode_range(uv, err) != 0)
2834 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2844 *encp = rb_utf8_encoding();
2845 else if (*encp != rb_utf8_encoding()) {
2846 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2854unescape_unicode_list(
const char **pp,
const char *end,
2857 const char *p = *pp;
2858 int has_unicode = 0;
2862 while (p < end &&
ISSPACE(*p)) p++;
2865 code = ruby_scan_hex(p, end-p, &
len);
2869 errcpy(err,
"invalid Unicode range");
2873 if (append_utf8(code, buf, encp, err) != 0)
2877 while (p < end &&
ISSPACE(*p)) p++;
2880 if (has_unicode == 0) {
2881 errcpy(err,
"invalid Unicode list");
2891unescape_unicode_bmp(
const char **pp,
const char *end,
2894 const char *p = *pp;
2899 errcpy(err,
"invalid Unicode escape");
2902 code = ruby_scan_hex(p, 4, &
len);
2904 errcpy(err,
"invalid Unicode escape");
2907 if (append_utf8(code, buf, encp, err) != 0)
2914unescape_nonascii0(
const char **pp,
const char *end,
rb_encoding *enc,
2916 onig_errmsg_buffer err,
int options,
int recurse)
2918 const char *p = *pp;
2921 int in_char_class = 0;
2923 int extended_mode = options & ONIG_OPTION_EXTEND;
2927 int chlen = rb_enc_precise_mbclen(p, end, enc);
2930 errcpy(err,
"invalid multibyte character");
2934 if (1 < chlen || (*p & 0x80)) {
2940 else if (*encp != enc) {
2941 errcpy(err,
"non ASCII character in UTF-8 regexp");
2950 errcpy(err,
"too short escape sequence");
2953 chlen = rb_enc_precise_mbclen(p, end, enc);
2955 goto invalid_multibyte;
2964 case '1':
case '2':
case '3':
2965 case '4':
case '5':
case '6':
case '7':
2967 size_t len = end-(p-1), octlen;
2968 if (ruby_scan_oct(p-1,
len < 3 ?
len : 3, &octlen) <= 0177) {
2984 if (rb_is_usascii_enc(enc)) {
2985 const char *pbeg = p;
2986 int byte = read_escaped_byte(&p, end, err);
2987 if (
byte == -1)
return -1;
2992 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2999 errcpy(err,
"too short escape sequence");
3005 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
3007 if (p == end || *p++ !=
'}') {
3008 errcpy(err,
"invalid Unicode list");
3015 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
3037 if (extended_mode && !in_char_class) {
3039 while ((p < end) && ((c = *p++) !=
'\n')) {
3040 if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
3053 if (in_char_class) {
3060 if (!in_char_class && recurse) {
3061 if (--parens == 0) {
3068 if (!in_char_class && p + 1 < end && *p ==
'?') {
3069 if (*(p+1) ==
'#') {
3071 const char *orig_p = p;
3074 while (cont && (p < end)) {
3077 if (!(c & 0x80))
break;
3078 if (!*encp && enc == rb_utf8_encoding()) {
3084 chlen = rb_enc_precise_mbclen(p, end, enc);
3086 goto invalid_multibyte;
3107 int local_extend = 0;
3114 for (s = p+1; s < end; s++) {
3117 local_extend = invert ? -1 : 1;
3124 if (local_extend == 0 ||
3125 (local_extend == -1 && !extended_mode) ||
3126 (local_extend == 1 && extended_mode)) {
3133 int local_options = options;
3134 if (local_extend == 1) {
3135 local_options |= ONIG_OPTION_EXTEND;
3138 local_options &= ~ONIG_OPTION_EXTEND;
3142 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3145 if (ret < 0)
return ret;
3150 extended_mode = local_extend == 1;
3167 else if (!in_char_class && recurse) {
3185unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
3187 onig_errmsg_buffer err,
int options)
3189 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3194rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
3195 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3198 int has_property = 0;
3202 if (rb_enc_asciicompat(enc))
3206 rb_enc_associate(buf, enc);
3209 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3212 if (has_property && !*fixed_enc) {
3217 rb_enc_associate(buf, *fixed_enc);
3224rb_reg_check_preprocess(
VALUE str)
3227 onig_errmsg_buffer err =
"";
3233 p = RSTRING_PTR(str);
3234 end = p + RSTRING_LEN(str);
3235 enc = rb_enc_get(str);
3237 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3241 return rb_reg_error_desc(str, 0, err);
3247rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3251 onig_errmsg_buffer err =
"";
3257 rb_raise(rb_eArgError,
"no arguments given");
3266 src_enc = rb_enc_get(str);
3267 if (options & ARG_ENCODING_NONE &&
3268 src_enc != ascii8bit) {
3270 rb_raise(
rb_eRegexpError,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3272 src_enc = ascii8bit;
3276 p = RSTRING_PTR(str);
3277 end = p + RSTRING_LEN(str);
3279 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3282 rb_raise(rb_eArgError,
"%s", err);
3284 if (fixed_enc != 0) {
3285 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3286 rb_raise(
rb_eRegexpError,
"encoding mismatch in dynamic regexp : %s and %s",
3287 rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
3289 regexp_enc = fixed_enc;
3298 rb_enc_associate(result, regexp_enc);
3305rb_reg_initialize_check(
VALUE obj)
3307 rb_check_frozen(obj);
3315 int options, onig_errmsg_buffer err,
3316 const char *sourcefile,
int sourceline)
3323 rb_reg_initialize_check(obj);
3325 if (rb_enc_dummy_p(enc)) {
3326 errcpy(err,
"can't make regexp with dummy encoding");
3330 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc, err, options);
3331 if (
NIL_P(unescaped))
3335 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3336 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3337 errcpy(err,
"incompatible character encoding");
3340 if (fixed_enc != a_enc) {
3341 options |= ARG_ENCODING_FIXED;
3345 else if (!(options & ARG_ENCODING_FIXED)) {
3346 enc = rb_usascii_encoding();
3349 rb_enc_associate((
VALUE)re, enc);
3350 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3353 if (options & ARG_ENCODING_NONE) {
3357 re->
ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
3358 options & ARG_REG_OPTION_MASK, err,
3359 sourcefile, sourceline);
3360 if (!re->
ptr)
return -1;
3373 if (regenc != enc) {
3375 str = rb_enc_associate(dup, enc = regenc);
3377 str = rb_fstring(str);
3382rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3383 const char *sourcefile,
int sourceline)
3386 rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
3387 if (options & ARG_ENCODING_NONE) {
3389 if (enc != ascii8bit) {
3391 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3397 ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
3398 options, err, sourcefile, sourceline);
3399 if (ret == 0) reg_set_source(obj, str, str_enc);
3404rb_reg_s_alloc(
VALUE klass)
3424 return rb_reg_init_str(rb_reg_alloc(), s, options);
3428rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3430 onig_errmsg_buffer err =
"";
3432 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3433 rb_reg_raise_str(s, options, err);
3442 onig_errmsg_buffer err =
"";
3444 if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
3445 enc, options, err, NULL, 0) != 0) {
3446 rb_reg_raise_str(s, options, err);
3448 reg_set_source(re, s, enc);
3454rb_reg_new_ary(
VALUE ary,
int opt)
3462 VALUE re = rb_reg_alloc();
3463 onig_errmsg_buffer err =
"";
3465 if (rb_reg_initialize(re, s,
len, enc, options, err, NULL, 0) != 0) {
3466 rb_enc_reg_raise(s,
len, enc, options, err);
3480rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3482 VALUE re = rb_reg_alloc();
3483 onig_errmsg_buffer err =
"";
3486 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3487 rb_set_errinfo(rb_reg_error_desc(str, options, err));
3493static VALUE reg_cache;
3498 if (rb_ractor_main_p()) {
3501 && memcmp(
RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3511static st_index_t reg_hash(
VALUE re);
3523rb_reg_hash(
VALUE re)
3525 st_index_t hashval = reg_hash(re);
3558 if (re1 == re2)
return Qtrue;
3560 rb_reg_check(re1); rb_reg_check(re2);
3580match_hash(
VALUE match)
3587 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3610 if (match1 == match2)
return Qtrue;
3614 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3617 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3618 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3619 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3658match_integer_at(
int argc,
VALUE *argv,
VALUE match)
3670 else if ((nth = namev_to_backref_number(regs,
RMATCH(match)->regexp, idx)) < 0) {
3671 name_to_backref_error(idx);
3674 if (argc > 1 && (base =
NUM2INT(argv[1])) < 0) {
3675 rb_raise(rb_eArgError,
"invalid radix %d", base);
3678 if (nth >= regs->num_regs)
return Qnil;
3679 if (nth < 0 && (nth += regs->num_regs) <= 0)
return Qnil;
3681 long start = BEG(nth), end = END(nth);
3682 if (start < 0)
return Qnil;
3683 RUBY_ASSERT(start <= end, "%ld > %ld
", start, end);
3685 VALUE str = RMATCH(match)->str;
3686 RUBY_ASSERT(end <= RSTRING_LEN(str), "%ld > %ld
", end, RSTRING_LEN(str));
3689 return rb_int_parse_cstr(RSTRING_PTR(str) + start, end - start, &endp, NULL,
3690 base, RB_INT_PARSE_DEFAULT);
3694reg_operand(VALUE s, int check)
3697 return rb_sym2str(s);
3699 else if (RB_TYPE_P(s, T_STRING)) {
3703 return check ? rb_str_to_str(s) : rb_check_string_type(s);
3708reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
3713 rb_backref_set(Qnil);
3716 *strp = str = reg_operand(str, TRUE);
3719 VALUE l = rb_str_length(str);
3725 pos = rb_str_offset(str, pos);
3727 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3732 * self =~ other -> integer or nil
3734 * Returns the integer index (in characters) of the first match
3735 * for +self+ and +other+, or +nil+ if none;
3736 * updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables].
3738 * /at/ =~ 'input data' # => 7
3739 * $~ # => #<MatchData "at
">
3740 * /ax/ =~ 'input data' # => nil
3743 * Assigns named captures to local variables of the same names
3744 * if and only if +self+:
3746 * - Is a regexp literal;
3747 * see {Regexp Literals}[rdoc-ref:syntax/literals.rdoc@Regexp+Literals].
3748 * - Does not contain interpolations;
3749 * see {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode].
3750 * - Is at the left of the expression.
3754 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = y '
3758 * Assigns +nil+ if not matched:
3760 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = '
3764 * Does not make local variable assignments if +self+ is not a regexp literal:
3766 * r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
3768 * p foo # Undefined local variable
3769 * p bar # Undefined local variable
3771 * The assignment does not occur if the regexp is not at the left:
3773 * ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
3774 * p foo, foo # Undefined local variables
3776 * A regexp interpolation, <tt>#{}</tt>, also disables
3780 * /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
3781 * p foo # Undefined local variable
3786rb_reg_match(VALUE re, VALUE str)
3788 long pos = reg_match_pos(re, &str, 0, NULL);
3789 if (pos < 0) return Qnil;
3790 pos = rb_str_sublen(str, pos);
3791 return LONG2FIX(pos);
3796 * self === other -> true or false
3798 * Returns whether +self+ finds a match in +other+:
3800 * /^[a-z]*$/ === 'HELLO' # => false
3801 * /^[A-Z]*$/ === 'HELLO' # => true
3803 * This method is called in case statements:
3807 * when /\A[a-z]*\z/; print "Lower
case\n
"
3808 * when /\A[A-Z]*\z/; print "Upper
case\n
"
3809 * else print "Mixed
case\n
"
3810 * end # => "Upper
case"
3815rb_reg_eqq(VALUE re, VALUE str)
3819 str = reg_operand(str, FALSE);
3821 rb_backref_set(Qnil);
3824 start = rb_reg_search(re, str, 0, 0);
3825 return RBOOL(start >= 0);
3831 * ~ rxp -> integer or nil
3833 * Equivalent to <tt><i>rxp</i> =~ $_</tt>:
3841rb_reg_match2(VALUE re)
3844 VALUE line = rb_lastline_get();
3846 if (!RB_TYPE_P(line, T_STRING)) {
3847 rb_backref_set(Qnil);
3851 start = rb_reg_search(re, line, 0, 0);
3855 start = rb_str_sublen(line, start);
3856 return LONG2FIX(start);
3862 * match(string, offset = 0) -> matchdata or nil
3863 * match(string, offset = 0) {|matchdata| ... } -> object
3865 * With no block given, returns the MatchData object
3866 * that describes the match, if any, or +nil+ if none;
3867 * the search begins at the given character +offset+ in +string+:
3869 * /abra/.match('abracadabra') # => #<MatchData "abra
">
3870 * /abra/.match('abracadabra', 4) # => #<MatchData "abra
">
3871 * /abra/.match('abracadabra', 8) # => nil
3872 * /abra/.match('abracadabra', 800) # => nil
3874 * string = "\u{5d0 5d1 5e8 5d0}cadabra
"
3875 * /abra/.match(string, 7) #=> #<MatchData "abra
">
3876 * /abra/.match(string, 8) #=> nil
3877 * /abra/.match(string.b, 8) #=> #<MatchData "abra
">
3879 * With a block given, calls the block if and only if a match is found;
3880 * returns the block's value:
3882 * /abra/.match('abracadabra') {|matchdata| p matchdata }
3883 * # => #<MatchData "abra
">
3884 * /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
3885 * # => #<MatchData "abra
">
3886 * /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
3888 * /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
3891 * Output (from the first two blocks above):
3893 * #<MatchData "abra
">
3894 * #<MatchData "abra
">
3896 * /(.)(.)(.)/.match("abc
")[2] # => "b
"
3897 * /(.)(.)/.match("abc
", 1)[2] # => "c
"
3902rb_reg_match_m(int argc, VALUE *argv, VALUE re)
3904 VALUE result = Qnil, str, initpos;
3907 if (rb_scan_args(argc, argv, "11
", &str, &initpos) == 2) {
3908 pos = NUM2LONG(initpos);
3914 pos = reg_match_pos(re, &str, pos, &result);
3916 rb_backref_set(Qnil);
3919 rb_match_busy(result);
3920 if (!NIL_P(result) && rb_block_given_p()) {
3921 return rb_yield(result);
3928 * match?(string) -> true or false
3929 * match?(string, offset = 0) -> true or false
3931 * Returns <code>true</code> or <code>false</code> to indicate whether the
3932 * regexp is matched or not without updating $~ and other related variables.
3933 * If the second parameter is present, it specifies the position in the string
3934 * to begin the search.
3936 * /R.../.match?("Ruby
") # => true
3937 * /R.../.match?("Ruby
", 1) # => false
3938 * /P.../.match?("Ruby
") # => false
3943rb_reg_match_m_p(int argc, VALUE *argv, VALUE re)
3945 long pos = rb_check_arity(argc, 1, 2) > 1 ? NUM2LONG(argv[1]) : 0;
3946 return rb_reg_match_p(re, argv[0], pos);
3950rb_reg_match_p(VALUE re, VALUE str, long pos)
3952 if (NIL_P(str)) return Qfalse;
3953 str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str);
3956 pos += NUM2LONG(rb_str_length(str));
3957 if (pos < 0) return Qfalse;
3961 const char *beg = rb_str_subpos(str, pos, &len);
3962 if (!beg) return Qfalse;
3963 pos = beg - RSTRING_PTR(str);
3967 struct reg_onig_search_args args = {
3969 .range = RSTRING_LEN(str),
3972 return rb_reg_onig_match(re, str, reg_onig_search, &args, NULL) == ONIG_MISMATCH ? Qfalse : Qtrue;
3976 * Document-method: compile
3978 * Alias for Regexp.new
3982str_to_option(VALUE str)
3987 str = rb_check_string_type(str);
3988 if (NIL_P(str)) return -1;
3989 RSTRING_GETMEM(str, ptr, len);
3990 for (long i = 0; i < len; ++i) {
3991 int f = char_to_option(ptr[i]);
3993 rb_raise(rb_eArgError, "unknown regexp option: %
"PRIsVALUE, str);
4001set_timeout(rb_hrtime_t *hrt, VALUE timeout)
4003 double timeout_d = NIL_P(timeout) ? 0.0 : NUM2DBL(timeout);
4004 if (!NIL_P(timeout) && timeout_d <= 0) {
4005 rb_raise(rb_eArgError, "invalid timeout: %
"PRIsVALUE, timeout);
4007 double2hrtime(hrt, timeout_d);
4011reg_copy(VALUE copy, VALUE orig)
4016 rb_reg_initialize_check(copy);
4017 if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) {
4018 /* ONIGERR_MEMORY only */
4019 rb_raise(rb_eRegexpError, "%s
", onig_error_code_to_format(r));
4021 RREGEXP_PTR(copy) = re;
4022 RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src);
4023 RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit;
4024 rb_enc_copy(copy, orig);
4025 FL_SET_RAW(copy, FL_TEST_RAW(orig, KCODE_FIXED|REG_ENCODING_NONE));
4026 if (RBASIC_CLASS(copy) == rb_cRegexp) {
4033struct reg_init_args {
4040static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
4041static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
4045 * Regexp.new(string, options = 0, timeout: nil) -> regexp
4046 * Regexp.new(regexp, timeout: nil) -> regexp
4048 * With argument +string+ given, returns a new regexp with the given string
4051 * r = Regexp.new('foo') # => /foo/
4052 * r.source # => "foo
"
4055 * Optional argument +options+ is one of the following:
4057 * - A String of options:
4059 * Regexp.new('foo', 'i') # => /foo/i
4060 * Regexp.new('foo', 'im') # => /foo/im
4062 * - The bit-wise OR of one or more of the constants
4063 * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
4064 * Regexp::NOENCODING:
4066 * Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
4067 * Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
4068 * Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
4069 * Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
4070 * flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
4071 * Regexp.new('foo', flags) # => /foo/mix
4073 * - +nil+ or +false+, which is ignored.
4074 * - Any other truthy value, in which case the regexp will be
4077 * If optional keyword argument +timeout+ is given,
4078 * its float value overrides the timeout interval for the class,
4080 * If +nil+ is passed as +timeout, it uses the timeout interval
4081 * for the class, Regexp.timeout.
4083 * With argument +regexp+ given, returns a new regexp. The source,
4084 * options, timeout are the same as +regexp+. +options+ and +n_flag+
4085 * arguments are ineffective. The timeout can be overridden by
4086 * +timeout+ keyword.
4088 * options = Regexp::MULTILINE
4089 * r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
4090 * r2 = Regexp.new(r) # => /foo/m
4091 * r2.timeout # => 1.1
4092 * r3 = Regexp.new(r, timeout: 3.14) # => /foo/m
4093 * r3.timeout # => 3.14
4098rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
4100 struct reg_init_args args;
4101 VALUE re = reg_extract_args(argc, argv, &args);
4104 reg_init_args(self, args.str, args.enc, args.flags);
4110 set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout);
4111 if (RBASIC_CLASS(self) == rb_cRegexp) {
4119reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
4122 rb_encoding *enc = 0;
4123 VALUE str, src, opts = Qundef, kwargs;
4126 rb_scan_args(argc, argv, "11:
", &src, &opts, &kwargs);
4128 args->timeout = Qnil;
4129 if (!NIL_P(kwargs)) {
4130 static ID keywords[1];
4132 keywords[0] = rb_intern_const("timeout
");
4134 rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
4137 if (RB_TYPE_P(src, T_REGEXP)) {
4141 rb_warn("flags ignored
");
4144 flags = rb_reg_options(re);
4145 str = RREGEXP_SRC(re);
4150 if (FIXNUM_P(opts)) flags = FIX2INT(opts);
4151 else if ((f = str_to_option(opts)) >= 0) flags = f;
4152 else if (rb_bool_expected(opts, "ignorecase
", FALSE))
4153 flags = ONIG_OPTION_IGNORECASE;
4155 str = StringValue(src);
4159 args->flags = flags;
4164reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags)
4166 if (enc && rb_enc_get(str) != enc)
4167 rb_reg_init_str_enc(self, str, enc, flags);
4169 rb_reg_init_str(self, str, flags);
4174rb_reg_quote(VALUE str)
4176 rb_encoding *enc = rb_enc_get(str);
4180 int ascii_only = rb_enc_str_asciionly_p(str);
4182 s = RSTRING_PTR(str);
4183 send = s + RSTRING_LEN(str);
4185 c = rb_enc_ascget(s, send, &clen, enc);
4187 s += mbclen(s, send, enc);
4191 case '[': case ']': case '{': case '}':
4192 case '(': case ')': case '|': case '-':
4193 case '*': case '.': case '\\':
4194 case '?': case '+': case '^': case '$':
4196 case '\t': case '\f': case '\v': case '\n': case '\r':
4201 tmp = rb_str_new3(str);
4203 rb_enc_associate(tmp, rb_usascii_encoding());
4208 tmp = rb_str_new(0, RSTRING_LEN(str)*2);
4210 rb_enc_associate(tmp, rb_usascii_encoding());
4213 rb_enc_copy(tmp, str);
4215 t = RSTRING_PTR(tmp);
4216 /* copy upto metacharacter */
4217 const char *p = RSTRING_PTR(str);
4218 memcpy(t, p, s - p);
4222 c = rb_enc_ascget(s, send, &clen, enc);
4224 int n = mbclen(s, send, enc);
4232 case '[': case ']': case '{': case '}':
4233 case '(': case ')': case '|': case '-':
4234 case '*': case '.': case '\\':
4235 case '?': case '+': case '^': case '$':
4237 t += rb_enc_mbcput('\\', t, enc);
4240 t += rb_enc_mbcput('\\', t, enc);
4241 t += rb_enc_mbcput(' ', t, enc);
4244 t += rb_enc_mbcput('\\', t, enc);
4245 t += rb_enc_mbcput('t', t, enc);
4248 t += rb_enc_mbcput('\\', t, enc);
4249 t += rb_enc_mbcput('n', t, enc);
4252 t += rb_enc_mbcput('\\', t, enc);
4253 t += rb_enc_mbcput('r', t, enc);
4256 t += rb_enc_mbcput('\\', t, enc);
4257 t += rb_enc_mbcput('f', t, enc);
4260 t += rb_enc_mbcput('\\', t, enc);
4261 t += rb_enc_mbcput('v', t, enc);
4264 t += rb_enc_mbcput(c, t, enc);
4266 rb_str_resize(tmp, t - RSTRING_PTR(tmp));
4273 * Regexp.escape(string) -> new_string
4275 * Returns a new string that escapes any characters
4276 * that have special meaning in a regular expression:
4278 * s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\.
"
4280 * For any string +s+, this call returns a MatchData object:
4282 * r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
4283 * r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.
">
4288rb_reg_s_quote(VALUE c, VALUE str)
4290 return rb_reg_quote(reg_operand(str, TRUE));
4294rb_reg_options(VALUE re)
4299 options = RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4300 if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4301 if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4306rb_check_regexp_type(VALUE re)
4308 return rb_check_convert_type(re, T_REGEXP, "Regexp
", "to_regexp
");
4313 * Regexp.try_convert(object) -> regexp or nil
4315 * Returns +object+ if it is a regexp:
4317 * Regexp.try_convert(/re/) # => /re/
4319 * Otherwise if +object+ responds to <tt>:to_regexp</tt>,
4320 * calls <tt>object.to_regexp</tt> and returns the result.
4322 * Returns +nil+ if +object+ does not respond to <tt>:to_regexp</tt>.
4324 * Regexp.try_convert('re') # => nil
4326 * Raises an exception unless <tt>object.to_regexp</tt> returns a regexp.
4330rb_reg_s_try_convert(VALUE dummy, VALUE re)
4332 return rb_check_regexp_type(re);
4336rb_reg_s_union(VALUE self, VALUE args0)
4338 long argc = RARRAY_LEN(args0);
4342 args[0] = rb_str_new2("(?!)
");
4343 return rb_class_new_instance(1, args, rb_cRegexp);
4345 else if (argc == 1) {
4346 VALUE arg = rb_ary_entry(args0, 0);
4347 VALUE re = rb_check_regexp_type(arg);
4352 quoted = rb_reg_s_quote(Qnil, arg);
4353 return rb_reg_new_str(quoted, 0);
4358 VALUE source = rb_str_buf_new(0);
4359 rb_encoding *result_enc;
4361 int has_asciionly = 0;
4362 rb_encoding *has_ascii_compat_fixed = 0;
4363 rb_encoding *has_ascii_incompat = 0;
4365 for (i = 0; i < argc; i++) {
4367 VALUE e = rb_ary_entry(args0, i);
4370 rb_str_buf_cat_ascii(source, "|
");
4372 v = rb_check_regexp_type(e);
4374 rb_encoding *enc = rb_enc_get(v);
4375 if (!rb_enc_asciicompat(enc)) {
4376 if (!has_ascii_incompat)
4377 has_ascii_incompat = enc;
4378 else if (has_ascii_incompat != enc)
4379 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4380 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4382 else if (rb_reg_fixed_encoding_p(v)) {
4383 if (!has_ascii_compat_fixed)
4384 has_ascii_compat_fixed = enc;
4385 else if (has_ascii_compat_fixed != enc)
4386 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4387 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4392 v = rb_reg_str_with_term(v, -1);
4397 enc = rb_enc_get(e);
4398 if (!rb_enc_asciicompat(enc)) {
4399 if (!has_ascii_incompat)
4400 has_ascii_incompat = enc;
4401 else if (has_ascii_incompat != enc)
4402 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4403 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4405 else if (rb_enc_str_asciionly_p(e)) {
4409 if (!has_ascii_compat_fixed)
4410 has_ascii_compat_fixed = enc;
4411 else if (has_ascii_compat_fixed != enc)
4412 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4413 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4415 v = rb_reg_s_quote(Qnil, e);
4417 if (has_ascii_incompat) {
4418 if (has_asciionly) {
4419 rb_raise(rb_eArgError, "ASCII incompatible encoding: %s
",
4420 rb_enc_name(has_ascii_incompat));
4422 if (has_ascii_compat_fixed) {
4423 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4424 rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
4429 rb_enc_copy(source, v);
4431 rb_str_append(source, v);
4434 if (has_ascii_incompat) {
4435 result_enc = has_ascii_incompat;
4437 else if (has_ascii_compat_fixed) {
4438 result_enc = has_ascii_compat_fixed;
4441 result_enc = rb_ascii8bit_encoding();
4444 rb_enc_associate(source, result_enc);
4445 return rb_class_new_instance(1, &source, rb_cRegexp);
4451 * Regexp.union(*patterns) -> regexp
4452 * Regexp.union(array_of_patterns) -> regexp
4454 * Returns a new regexp that is the union of the given patterns:
4456 * r = Regexp.union(%w[cat dog]) # => /cat|dog/
4457 * r.match('cat') # => #<MatchData "cat
">
4458 * r.match('dog') # => #<MatchData "dog
">
4459 * r.match('cog') # => nil
4461 * For each pattern that is a string, <tt>Regexp.new(pattern)</tt> is used:
4463 * Regexp.union('penzance') # => /penzance/
4464 * Regexp.union('a+b*c') # => /a\+b\*c/
4465 * Regexp.union('skiing', 'sledding') # => /skiing|sledding/
4466 * Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
4468 * For each pattern that is a regexp, it is used as is,
4469 * including its flags:
4471 * Regexp.union(/foo/i, /bar/m, /baz/x)
4472 * # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
4473 * Regexp.union([/foo/i, /bar/m, /baz/x])
4474 * # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
4476 * With no arguments, returns <tt>/(?!)/</tt>:
4478 * Regexp.union # => /(?!)/
4480 * If any regexp pattern contains captures, the behavior is unspecified.
4484rb_reg_s_union_m(VALUE self, VALUE args)
4487 if (RARRAY_LEN(args) == 1 &&
4488 !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
4489 return rb_reg_s_union(self, v);
4491 return rb_reg_s_union(self, args);
4496 * Regexp.linear_time?(re)
4497 * Regexp.linear_time?(string, options = 0)
4499 * Returns +true+ if matching against <tt>re</tt> can be
4500 * done in linear time to the input string.
4502 * Regexp.linear_time?(/re/) # => true
4504 * Note that this is a property of the ruby interpreter, not of the argument
4505 * regular expression. Identical regexp can or cannot run in linear time
4506 * depending on your ruby binary. Neither forward nor backward compatibility
4507 * is guaranteed about the return value of this method. Our current algorithm
4508 * is (*1) but this is subject to change in the future. Alternative
4509 * implementations can also behave differently. They might always return
4510 * false for everything.
4512 * (*1): https://doi.org/10.1109/SP40001.2021.00032
4516rb_reg_s_linear_time_p(int argc, VALUE *argv, VALUE self)
4518 struct reg_init_args args;
4519 VALUE re = reg_extract_args(argc, argv, &args);
4522 re = reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4525 return RBOOL(onig_check_linear_time(RREGEXP_PTR(re)));
4530rb_reg_init_copy(VALUE copy, VALUE re)
4532 if (!OBJ_INIT_COPY(copy, re)) return copy;
4534 return reg_copy(copy, re);
4538rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
4543 rb_encoding *str_enc = rb_enc_get(str);
4544 rb_encoding *src_enc = rb_enc_get(src);
4545 int acompat = rb_enc_asciicompat(str_enc);
4547#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4549 RSTRING_GETMEM(str, s, n);
4554 int c = ASCGET(s, e, &clen);
4558 s += mbclen(s, e, str_enc);
4564 if (c != '\\' || s == e) continue;
4567 val = rb_str_buf_new(ss-p);
4569 rb_enc_str_buf_cat(val, p, ss-p, str_enc);
4571 c = ASCGET(s, e, &clen);
4573 s += mbclen(s, e, str_enc);
4574 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4582 case '1': case '2': case '3': case '4':
4583 case '5': case '6': case '7': case '8': case '9':
4584 if (!NIL_P(regexp) && onig_noname_group_capture_is_active(RREGEXP_PTR(regexp))) {
4593 if (s < e && ASCGET(s, e, &clen) == '<') {
4594 char *name, *name_end;
4596 name_end = name = s + clen;
4597 while (name_end < e) {
4598 c = ASCGET(name_end, e, &clen);
4599 if (c == '>') break;
4600 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4603 VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)),
4604 (long)(name_end - name));
4605 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
4606 name_to_backref_error(n);
4608 p = s = name_end + clen;
4612 rb_raise(rb_eRuntimeError, "invalid group name reference format
");
4616 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4625 rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
4629 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
4633 no = regs->num_regs-1;
4634 while (BEG(no) == -1 && no > 0) no--;
4635 if (no == 0) continue;
4639 rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
4643 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4648 if (no >= regs->num_regs) continue;
4649 if (BEG(no) == -1) continue;
4650 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
4654 if (!val) return str;
4656 rb_enc_str_buf_cat(val, p, e-p, str_enc);
4663ignorecase_getter(ID _x, VALUE *_y)
4665 rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective
");
4670ignorecase_setter(VALUE val, ID id, VALUE *_)
4672 rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective; ignored
");
4678 VALUE match = rb_backref_get();
4680 if (NIL_P(match)) return Qnil;
4681 rb_match_busy(match);
4686get_LAST_MATCH_INFO(ID _x, VALUE *_y)
4688 return match_getter();
4692match_setter(VALUE val, ID _x, VALUE *_y)
4695 Check_Type(val, T_MATCH);
4697 rb_backref_set(val);
4702 * Regexp.last_match -> matchdata or nil
4703 * Regexp.last_match(n) -> string or nil
4704 * Regexp.last_match(name) -> string or nil
4706 * With no argument, returns the value of <tt>$~</tt>,
4707 * which is the result of the most recent pattern match
4708 * (see {Regexp global variables}[rdoc-ref:Regexp@Global+Variables]):
4710 * /c(.)t/ =~ 'cat' # => 0
4711 * Regexp.last_match # => #<MatchData "cat
" 1:"a
">
4712 * /a/ =~ 'foo' # => nil
4713 * Regexp.last_match # => nil
4715 * With non-negative integer argument +n+, returns the _n_th field in the
4716 * matchdata, if any, or nil if none:
4718 * /c(.)t/ =~ 'cat' # => 0
4719 * Regexp.last_match(0) # => "cat
"
4720 * Regexp.last_match(1) # => "a
"
4721 * Regexp.last_match(2) # => nil
4723 * With negative integer argument +n+, counts backwards from the last field:
4725 * Regexp.last_match(-1) # => "a
"
4727 * With string or symbol argument +name+,
4728 * returns the string value for the named capture, if any:
4730 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
4731 * Regexp.last_match # => #<MatchData "var = val
" lhs:"var
"rhs:"val
">
4732 * Regexp.last_match(:lhs) # => "var
"
4733 * Regexp.last_match('rhs') # => "val
"
4734 * Regexp.last_match('foo') # Raises IndexError.
4739rb_reg_s_last_match(int argc, VALUE *argv, VALUE _)
4741 if (rb_check_arity(argc, 0, 1) == 1) {
4742 VALUE match = rb_backref_get();
4744 if (NIL_P(match)) return Qnil;
4745 n = match_backref_number(match, argv[0]);
4746 return rb_reg_nth_match(n, match);
4748 return match_getter();
4752re_warn(const char *s)
4757// This function is periodically called during regexp matching
4759rb_reg_timeout_p(regex_t *reg, void *end_time_)
4761 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4763 if (*end_time == 0) {
4764 // This is the first time to check interrupts;
4765 // just measure the current time and determine the end time
4766 // if timeout is set.
4767 rb_hrtime_t timelimit = reg->timelimit;
4770 // no per-object timeout.
4771 timelimit = rb_reg_match_time_limit;
4775 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4778 // no timeout is set
4779 *end_time = RB_HRTIME_MAX;
4783 if (*end_time < rb_hrtime_now()) {
4784 // Timeout has exceeded
4794 * Regexp.timeout -> float or nil
4796 * It returns the current default timeout interval for Regexp matching in second.
4797 * +nil+ means no default timeout configuration.
4801rb_reg_s_timeout_get(VALUE dummy)
4803 double d = hrtime2double(rb_reg_match_time_limit);
4804 if (d == 0.0) return Qnil;
4810 * Regexp.timeout = float or nil
4812 * It sets the default timeout interval for Regexp matching in second.
4813 * +nil+ means no default timeout configuration.
4814 * This configuration is process-global. If you want to set timeout for
4815 * each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
4817 * Regexp.timeout = 1
4818 * /^a*b?a*$/ =~ "a
" * 100000 + "x
" #=> regexp match timeout (RuntimeError)
4822rb_reg_s_timeout_set(VALUE dummy, VALUE timeout)
4824 rb_ractor_ensure_main_ractor("can not access
Regexp.timeout from non-main Ractors
");
4826 set_timeout(&rb_reg_match_time_limit, timeout);
4833 * rxp.timeout -> float or nil
4835 * It returns the timeout interval for Regexp matching in second.
4836 * +nil+ means no default timeout configuration.
4838 * This configuration is per-object. The global configuration set by
4839 * Regexp.timeout= is ignored if per-object configuration is set.
4841 * re = Regexp.new("^a*b?a*$
", timeout: 1)
4842 * re.timeout #=> 1.0
4843 * re =~ "a
" * 100000 + "x
" #=> regexp match timeout (RuntimeError)
4847rb_reg_timeout_get(VALUE re)
4850 double d = hrtime2double(RREGEXP_PTR(re)->timelimit);
4851 if (d == 0.0) return Qnil;
4856 * Document-class: RegexpError
4858 * Raised when given an invalid regexp expression.
4862 * <em>raises the exception:</em>
4864 * RegexpError: target of repeat operator is not specified: /?/
4868 * Document-class: Regexp
4870 * :include: doc/_regexp.rdoc
4876 rb_eRegexpError = rb_define_class("RegexpError
", rb_eStandardError);
4878 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4879 onig_set_warn_func(re_warn);
4880 onig_set_verb_warn_func(re_warn);
4882 rb_define_virtual_variable("$~
", get_LAST_MATCH_INFO, match_setter);
4883 rb_define_virtual_variable("$&
", last_match_getter, 0);
4884 rb_define_virtual_variable("$`
", prematch_getter, 0);
4885 rb_define_virtual_variable("$
'", postmatch_getter, 0);
4886 rb_define_virtual_variable("$+", last_paren_match_getter, 0);
4888 rb_gvar_ractor_local("$~");
4889 rb_gvar_ractor_local("$&");
4890 rb_gvar_ractor_local("$`");
4891 rb_gvar_ractor_local("$'");
4892 rb_gvar_ractor_local("$+
");
4893 rb_gvar_box_dynamic("$~
");
4894 rb_gvar_box_ready("$&
");
4895 rb_gvar_box_ready("$`
");
4896 rb_gvar_box_ready("$
'");
4897 rb_gvar_box_ready("$+");
4899 rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
4901 rb_cRegexp = rb_define_class("Regexp", rb_cObject);
4902 rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc);
4903 rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance_pass_kw, -1);
4904 rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, 1);
4905 rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, 1);
4906 rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
4907 rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
4908 rb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
4909 rb_define_singleton_method(rb_cRegexp, "linear_time?", rb_reg_s_linear_time_p, -1);
4911 rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
4912 rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
4913 rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
4914 rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
4915 rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
4916 rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1);
4917 rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
4918 rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
4919 rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1);
4920 rb_define_method(rb_cRegexp, "match?", rb_reg_match_m_p, -1);
4921 rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
4922 rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
4923 rb_define_method(rb_cRegexp, "source", rb_reg_source, 0);
4924 rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
4925 rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
4926 rb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
4927 rb_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
4928 rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
4929 rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
4930 rb_define_method(rb_cRegexp, "timeout", rb_reg_timeout_get, 0);
4932 /* Raised when regexp matching timed out. */
4933 rb_eRegexpTimeoutError = rb_define_class_under(rb_cRegexp, "TimeoutError", rb_eRegexpError);
4934 rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0);
4935 rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1);
4937 /* see Regexp.options and Regexp.new */
4938 rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
4939 /* see Regexp.options and Regexp.new */
4940 rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
4941 /* see Regexp.options and Regexp.new */
4942 rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
4943 /* see Regexp.options and Regexp.new */
4944 rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
4945 /* see Regexp.options and Regexp.new */
4946 rb_define_const(rb_cRegexp, "NOENCODING", INT2FIX(ARG_ENCODING_NONE));
4948 rb_global_variable(®_cache);
4950 rb_cMatch = rb_define_class("MatchData", rb_cObject);
4951 rb_define_alloc_func(rb_cMatch, match_alloc);
4952 rb_undef_method(CLASS_OF(rb_cMatch), "new");
4953 rb_undef_method(CLASS_OF(rb_cMatch), "allocate");
4955 rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
4956 rb_define_method(rb_cMatch, "regexp", match_regexp, 0);
4957 rb_define_method(rb_cMatch, "names", match_names, 0);
4958 rb_define_method(rb_cMatch, "size", match_size, 0);
4959 rb_define_method(rb_cMatch, "length", match_size, 0);
4960 rb_define_method(rb_cMatch, "offset", match_offset, 1);
4961 rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1);
4962 rb_define_method(rb_cMatch, "bytebegin", match_bytebegin, 1);
4963 rb_define_method(rb_cMatch, "byteend", match_byteend, 1);
4964 rb_define_method(rb_cMatch, "begin", match_begin, 1);
4965 rb_define_method(rb_cMatch, "end", match_end, 1);
4966 rb_define_method(rb_cMatch, "match", match_nth, 1);
4967 rb_define_method(rb_cMatch, "match_length", match_nth_length, 1);
4968 rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
4969 rb_define_method(rb_cMatch, "[]", match_aref, -1);
4970 rb_define_method(rb_cMatch, "captures", match_captures, 0);
4971 rb_define_alias(rb_cMatch, "deconstruct", "captures");
4972 rb_define_method(rb_cMatch, "named_captures", match_named_captures, -1);
4973 rb_define_method(rb_cMatch, "deconstruct_keys", match_deconstruct_keys, 1);
4974 rb_define_method(rb_cMatch, "values_at", match_values_at, -1);
4975 rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
4976 rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
4977 rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
4978 rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
4979 rb_define_method(rb_cMatch, "string", match_string, 0);
4980 rb_define_method(rb_cMatch, "hash", match_hash, 0);
4981 rb_define_method(rb_cMatch, "eql?", match_equal, 1);
4982 rb_define_method(rb_cMatch, "==", match_equal, 1);
4983 rb_define_method(rb_cMatch, "integer_at", match_integer_at, -1);
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define FIX2INT
Old name of RB_FIX2INT.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_obj_reveal(VALUE obj, VALUE klass)
Make a hidden object visible again.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_cMatch
MatchData class.
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
#define RGENGC_WB_PROTECTED_MATCH
This is a compile-time flag to enable/disable write barrier for struct RMatch.
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_resize(VALUE ary, long len)
Expands or shrinks the passed array to the passed length.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Identical to rb_ary_new_from_values(), except it expects exactly two parameters.
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
OnigPosition rb_reg_onig_match(VALUE re, VALUE str, OnigPosition(*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), void *args, struct re_registers *regs)
Runs a regular expression match using function match.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define RB_ALLOCV_N(type, v, n)
Allocates a memory region, possibly on stack.
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
#define RB_ALLOCV_END(v)
Polite way to declare that the given array is not used any longer.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
int char_offset_num_allocated
Number of rmatch_offset that ::rmatch::char_offset holds.
struct re_registers regs
"Registers" of a match.
Represents the region of a capture group.
long beg
Beginning of a group.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.