12#include "ruby/internal/config.h"
19#include "internal/bignum.h"
20#include "internal/encoding.h"
21#include "internal/error.h"
22#include "internal/hash.h"
23#include "internal/imemo.h"
24#include "internal/re.h"
25#include "internal/string.h"
26#include "internal/object.h"
27#include "internal/ractor.h"
28#include "internal/variable.h"
33#include "ractor_core.h"
37typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
38#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
41static const char casetable[] = {
42 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
43 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
44 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
45 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
47 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
49 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
51 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
53 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
55 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
57 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
59 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
61 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
63 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
65 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
67 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
69 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
70 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
71 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
72 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
73 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
74 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
75 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
76 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
77 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
78 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
79 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
80 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
81 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
82 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
83 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
84 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
85 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
88# error >>> "You lose. You will need a translation table for your character set." <<<
92rb_hrtime_t rb_reg_match_time_limit = 0;
95rb_memcicmp(
const void *x,
const void *y,
long len)
97 const unsigned char *p1 = x, *p2 = y;
101 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
107#if defined(HAVE_MEMMEM) && !defined(__APPLE__)
109rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
111 const unsigned char *y;
113 if ((y = memmem(ys, n, xs, m)) != NULL)
120rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
122 const unsigned char *x = xs, *xe = xs + m;
123 const unsigned char *y = ys, *ye = ys + n;
124#define VALUE_MAX ((VALUE)~(VALUE)0)
128 rb_bug(
"!!too long pattern string!!");
130 if (!(y = memchr(y, *x, n - m + 1)))
134 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
154rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
156 const unsigned char *x = xs, *xe = xs + m;
157 const unsigned char *y = ys;
158 VALUE i, qstable[256];
161 for (i = 0; i < 256; ++i)
164 qstable[*x] = xe - x;
166 for (; y + m <= ys + n; y += *(qstable + y[m])) {
167 if (*xs == *y && memcmp(xs, y, m) == 0)
173static inline unsigned int
174rb_memsearch_qs_utf8_hash(
const unsigned char *x)
176 register const unsigned int mix = 8353;
177 register unsigned int h = *x;
202 return (
unsigned char)h;
206rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
208 const unsigned char *x = xs, *xe = xs + m;
209 const unsigned char *y = ys;
210 VALUE i, qstable[512];
213 for (i = 0; i < 512; ++i) {
216 for (; x < xe; ++x) {
217 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
220 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
221 if (*xs == *y && memcmp(xs, y, m) == 0)
228rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
230 const unsigned char *x = xs, x0 = *xs, *y = ys;
232 for (n -= m; n >= 0; n -= char_size, y += char_size) {
233 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
240rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
242 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
246rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
248 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
254 const unsigned char *x = x0, *y = y0;
256 if (m > n)
return -1;
258 return memcmp(x0, y0, m) == 0 ? 0 : -1;
264 const unsigned char *ys = memchr(y, *x, n);
271 else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
273 return rb_memsearch_ss(x0, m, y0, n);
275 else if (enc == rb_utf8_encoding()){
276 return rb_memsearch_qs_utf8(x0, m, y0, n);
279 else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
280 return rb_memsearch_wchar(x0, m, y0, n);
282 else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
283 return rb_memsearch_qchar(x0, m, y0, n);
285 return rb_memsearch_qs(x0, m, y0, n);
288#define REG_ENCODING_NONE FL_USER6
290#define KCODE_FIXED FL_USER4
299 val = ONIG_OPTION_IGNORECASE;
302 val = ONIG_OPTION_EXTEND;
305 val = ONIG_OPTION_MULTILINE;
314enum { OPTBUF_SIZE = 4 };
317option_to_str(
char str[OPTBUF_SIZE],
int options)
320 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
321 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
322 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
334 *kcode = rb_ascii8bit_encindex();
335 return (*option = ARG_ENCODING_NONE);
337 *kcode = ENCINDEX_EUC_JP;
340 *kcode = ENCINDEX_Windows_31J;
343 *kcode = rb_utf8_encindex();
347 return (*option = char_to_option(c));
349 *option = ARG_ENCODING_FIXED;
354rb_reg_check(
VALUE re)
362rb_reg_expr_str(
VALUE str,
const char *s,
long len,
365 const char *p, *pend;
370 p = s; pend = p +
len;
374 c = rb_enc_ascget(p, pend, &clen, enc);
377 p += mbclen(p, pend, enc);
401 int unicode_p = rb_enc_unicode_p(enc);
404 c = rb_enc_ascget(p, pend, &clen, enc);
405 if (c ==
'\\' && p+clen < pend) {
406 int n = clen + mbclen(p+clen, pend, enc);
412 clen = rb_enc_precise_mbclen(p, pend, enc);
414 c = (
unsigned char)*p;
419 unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
420 rb_str_buf_cat_escaped_char(str, c, unicode_p);
427 else if (c == term) {
435 else if (!rb_enc_isspace(c, enc)) {
439 snprintf(b,
sizeof(b),
"\\x%02X", c);
455 rb_encoding *resenc = rb_default_internal_encoding();
456 if (resenc == NULL) resenc = rb_default_external_encoding();
458 if (re && rb_enc_asciicompat(enc)) {
459 rb_enc_copy(str, re);
462 rb_enc_associate(str, rb_usascii_encoding());
466 rb_reg_expr_str(str, RSTRING_PTR(src_str), RSTRING_LEN(src_str), enc, resenc,
'/');
471 char opts[OPTBUF_SIZE];
473 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
475 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
501rb_reg_source(
VALUE re)
522rb_reg_inspect(
VALUE re)
527 return rb_reg_desc(re);
530static VALUE rb_reg_str_with_term(
VALUE re,
int term);
562 return rb_reg_str_with_term(re,
'/');
566rb_reg_str_with_term(
VALUE re,
int term)
569 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
571 char optbuf[OPTBUF_SIZE + 1];
576 rb_enc_copy(str, re);
579 const UChar *ptr = (UChar *)RSTRING_PTR(src_str);
580 long len = RSTRING_LEN(src_str);
582 if (
len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
585 if ((
len -= 2) > 0) {
587 opt = char_to_option((
int )*ptr);
597 if (
len > 1 && *ptr ==
'-') {
601 opt = char_to_option((
int )*ptr);
616 if (*ptr ==
':' && ptr[
len-1] ==
')') {
623 err = onig_new(&rp, ptr, ptr +
len, options,
624 enc, OnigDefaultSyntax, NULL);
637 if ((options & embeddable) != embeddable) {
639 option_to_str(optbuf + 1, ~options);
644 if (rb_enc_asciicompat(enc)) {
645 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
653 rb_enc_associate(str, rb_usascii_encoding());
657 s = RSTRING_PTR(str);
663 rb_str_resize(str, RSTRING_LEN(str) - n);
665 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
668 rb_enc_copy(str, re);
675NORETURN(
static void rb_reg_raise(
const char *err,
VALUE re));
678rb_reg_raise(
const char *err,
VALUE re)
680 VALUE desc = rb_reg_desc(re);
686rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
688 char opts[OPTBUF_SIZE + 1];
690 rb_encoding *resenc = rb_default_internal_encoding();
691 if (resenc == NULL) resenc = rb_default_external_encoding();
693 rb_enc_associate(desc, enc);
695 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
697 option_to_str(opts + 1, options);
702NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
705rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
711rb_reg_error_desc(
VALUE str,
int options,
const char *err)
713 return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
714 rb_enc_get(str), options, err);
717NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
720rb_reg_raise_str(
VALUE str,
int options,
const char *err)
740rb_reg_casefold_p(
VALUE re)
743 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
785rb_reg_options_m(
VALUE re)
792reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
793 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
796 rb_ary_push(ary, rb_enc_str_new((
const char *)name, name_end-name, regex->enc));
814rb_reg_names(
VALUE re)
819 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
824reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
825 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
831 for (i = 0; i < back_num; i++)
834 rb_hash_aset(hash,
rb_str_new((
const char*)name, name_end-name),ary);
858rb_reg_named_captures(
VALUE re)
861 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
862 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
867onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
869 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
874 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
876 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
879 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
889make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
890 const char *sourcefile,
int sourceline)
903 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s +
len), flags,
904 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
906 onig_error_code_to_str((UChar*)err, r, &einfo);
965match_alloc_n(
VALUE klass,
int num_regs)
967 int capa = num_regs * 2;
968 size_t alloc_size = offsetof(
struct RMatch, as) +
sizeof(OnigPosition) *
capa;
969 if (alloc_size <
sizeof(
struct RMatch)) {
970 alloc_size =
sizeof(
struct RMatch);
974 if (!rb_gc_size_allocatable_p(alloc_size)) {
975 alloc_size =
sizeof(
struct RMatch);
976 flags |= RMATCH_ONIG;
980 NEWOBJ_OF(match,
struct RMatch, klass, flags, alloc_size);
981 memset(((
char *)match) +
sizeof(
struct RBasic), 0, alloc_size -
sizeof(
struct RBasic));
987match_alloc(
VALUE klass)
989 return match_alloc_n(klass, 0);
996 if (to->allocated)
return 0;
999 if (to->allocated)
return 0;
1000 return ONIGERR_MEMORY;
1004match_to_onig(
VALUE match,
int num_regs,
const OnigPosition *src_beg,
const OnigPosition *src_end)
1008 if (onig_region_resize(&tmp, num_regs)) {
1011 memcpy(tmp.beg, src_beg, num_regs *
sizeof(OnigPosition));
1012 memcpy(tmp.end, src_end, num_regs *
sizeof(OnigPosition));
1018rb_match_ensure_onig(
VALUE match)
1023 match_to_onig(match, n, &rm->
as.embed[0], &rm->
as.embed[n]);
1030match_set_regs(
VALUE match,
int num_regs,
const OnigPosition *beg,
const OnigPosition *end)
1035 if (onig_region_resize(&rm->
as.onig,
num_regs)) {
1038 memcpy(rm->
as.onig.beg, beg,
num_regs *
sizeof(OnigPosition));
1039 memcpy(rm->
as.onig.end, end,
num_regs *
sizeof(OnigPosition));
1042 memcpy(&rm->
as.embed[0], beg,
num_regs *
sizeof(OnigPosition));
1046 match_to_onig(match,
num_regs, beg, end);
1057pair_byte_cmp(
const void *pair1,
const void *pair2)
1059 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
1060#if SIZEOF_LONG > SIZEOF_INT
1061 return diff ? diff > 0 ? 1 : -1 : 0;
1068update_char_offset(
VALUE match)
1101 if (RMATCH_BEG(match, i) < 0)
1103 pairs[num_pos++].byte_pos = RMATCH_BEG(match, i);
1104 pairs[num_pos++].byte_pos = RMATCH_END(match, i);
1106 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1108 s = p = RSTRING_PTR(
RMATCH(match)->
str);
1110 for (i = 0; i < num_pos; i++) {
1111 q = s + pairs[i].byte_pos;
1113 pairs[i].char_pos = c;
1119 if (RMATCH_BEG(match, i) < 0) {
1125 key.byte_pos = RMATCH_BEG(match, i);
1126 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1129 key.byte_pos = RMATCH_END(match, i);
1130 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1138match_check(
VALUE match)
1157 match_set_regs(obj, RMATCH_NREGS(orig), RMATCH_BEG_PTR(orig), RMATCH_END_PTR(orig));
1186match_regexp(
VALUE match)
1221match_names(
VALUE match)
1242match_size(
VALUE match)
1245 return INT2FIX(RMATCH_NREGS(match));
1248static int match_name_to_backref_number(
VALUE match,
VALUE name);
1249NORETURN(
static void name_to_backref_error(
VALUE name));
1252name_to_backref_error(
VALUE name)
1254 rb_raise(
rb_eIndexError,
"undefined group name reference: % "PRIsVALUE,
1259backref_number_check(
VALUE match,
int i)
1261 if (i < 0 || RMATCH_NREGS(match) <= i)
1266match_backref_number(
VALUE match,
VALUE backref)
1278 num = match_name_to_backref_number(match, backref);
1281 name_to_backref_error(backref);
1290 return match_backref_number(match, backref);
1305 int i = match_backref_number(match, n);
1308 backref_number_check(match, i);
1310 if (RMATCH_BEG(match, i) < 0)
1313 update_char_offset(match);
1339 int i = match_backref_number(match, n);
1342 backref_number_check(match, i);
1344 if (RMATCH_BEG(match, i) < 0)
1362 int i = match_backref_number(match, n);
1365 backref_number_check(match, i);
1367 if (RMATCH_BEG(match, i) < 0)
1369 return LONG2NUM(RMATCH_BEG(match, i));
1385 int i = match_backref_number(match, n);
1388 backref_number_check(match, i);
1390 if (RMATCH_BEG(match, i) < 0)
1392 return LONG2NUM(RMATCH_END(match, i));
1408 int i = match_backref_number(match, n);
1411 backref_number_check(match, i);
1413 if (RMATCH_BEG(match, i) < 0)
1416 update_char_offset(match);
1433 int i = match_backref_number(match, n);
1436 backref_number_check(match, i);
1438 if (RMATCH_BEG(match, i) < 0)
1441 update_char_offset(match);
1474 int i = match_backref_number(match, n);
1476 backref_number_check(match, i);
1478 long start = RMATCH_BEG(match, i), end = RMATCH_END(match, i);
1517 int i = match_backref_number(match, n);
1520 backref_number_check(match, i);
1522 if (RMATCH_BEG(match, i) < 0)
1525 update_char_offset(match);
1527 &
RMATCH(match)->char_offset[i];
1531#define MATCH_BUSY FL_USER2
1536 FL_SET(match, MATCH_BUSY);
1540rb_match_unbusy(
VALUE match)
1546rb_match_count(
VALUE match)
1548 if (
NIL_P(match))
return -1;
1549 return RMATCH_NREGS(match);
1553match_alloc_or_reuse(
VALUE existing,
int num_regs)
1555 if (!
NIL_P(existing) &&
1556 !
FL_TEST(existing, MATCH_BUSY) &&
1560 return match_alloc_n(
rb_cMatch, num_regs);
1570 OnigPosition beg = pos, end = pos +
len;
1571 match_set_regs(m, 1, &beg, &end);
1575rb_backref_set_string(
VALUE string,
long pos,
long len)
1578 match_set_string(match,
string, pos,
len);
1613rb_reg_fixed_encoding_p(
VALUE re)
1615 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1619rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1620 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1628 "incompatible encoding regexp match (%s regexp with %s string)",
1629 rb_enc_inspect_name(rb_enc_get(re)),
1630 rb_enc_inspect_name(rb_enc_get(
str)));
1637 int cr = rb_enc_str_coderange(
str);
1640 rb_raise(rb_eArgError,
1641 "invalid byte sequence in %s",
1642 rb_enc_name(rb_enc_get(
str)));
1646 enc = rb_enc_get(
str);
1653 else if (!rb_enc_asciicompat(enc)) {
1654 reg_enc_error(re,
str);
1656 else if (rb_reg_fixed_encoding_p(re)) {
1659 reg_enc_error(re,
str);
1663 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1664 enc != rb_ascii8bit_encoding() &&
1666 rb_warn(
"historical binary regexp match /.../n against %s string",
1682 if (reg->enc == enc)
return reg;
1687 const char *pattern = RSTRING_PTR(src_str);
1689 onig_errmsg_buffer err =
"";
1690 unescaped = rb_reg_preprocess(
1691 pattern, pattern + RSTRING_LEN(src_str), enc,
1692 &fixed_enc, err, 0);
1694 if (
NIL_P(unescaped)) {
1695 rb_raise(rb_eArgError,
"regexp preprocess failed: %s", err);
1699 rb_hrtime_t timelimit = reg->timelimit;
1706 if (ruby_single_main_ractor &&
RREGEXP(re)->usecnt == 0) {
1708 r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr +
len),
1710 OnigDefaultSyntax, &einfo);
1714 onig_free_body(&tmp_reg);
1717 onig_free_body(reg);
1723 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr +
len),
1725 OnigDefaultSyntax, &einfo);
1729 onig_error_code_to_str((UChar*)err, r, &einfo);
1730 rb_reg_raise(err, re);
1733 reg->timelimit = timelimit;
1748 if (!tmpreg)
RREGEXP(re)->usecnt++;
1750 OnigPosition result = match(reg,
str, regs, args);
1752 if (!tmpreg)
RREGEXP(re)->usecnt--;
1761 case ONIGERR_TIMEOUT:
1762 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
1764 onig_errmsg_buffer err =
"";
1765 onig_error_code_to_str((UChar*)err, (
int)result);
1766 rb_reg_raise(err, re);
1781 enc = rb_reg_prepare_enc(re,
str, 0);
1787 range = RSTRING_LEN(
str) - pos;
1790 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(
str)) {
1791 string = (UChar*)RSTRING_PTR(
str);
1794 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1797 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1821 (UChar *)(ptr +
len),
1822 (UChar *)(ptr + args->pos),
1823 (UChar *)(ptr + args->range),
1830rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1832 long len = RSTRING_LEN(str);
1833 if (pos >
len || pos < 0) {
1840 .range = reverse ? 0 :
len,
1848 int cap = n < ONIG_NREGION ? ONIG_NREGION : n;
1850 OnigPosition *buf =
ALLOCV_N(OnigPosition, regs_buf, (
size_t)cap * 2);
1858 OnigPosition result =
rb_reg_onig_match(re, str, reg_onig_search, &args, ®s);
1860 if (result == ONIG_MISMATCH) {
1863 return ONIG_MISMATCH;
1867 VALUE match = match_alloc_or_reuse(existing, regs.num_regs);
1869 match_set_regs(match, regs.num_regs, regs.beg, regs.end);
1872 if (set_backref_str) {
1886 if (set_match) *set_match = match;
1892rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *match)
1894 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, match);
1900 return rb_reg_search_set_match(re, str, pos, reverse, 1, NULL);
1913 (UChar *)(ptr +
len),
1925 int cap = n < ONIG_NREGION ? ONIG_NREGION : n;
1927 OnigPosition *buf =
ALLOCV_N(OnigPosition, regs_buf, (
size_t)cap * 2);
1942 match_set_regs(match, regs.num_regs, regs.beg, regs.end);
1957 int num_regs = RMATCH_NREGS(match);
1958 if (nth >= num_regs) {
1963 if (nth <= 0)
return Qnil;
1965 return RBOOL(RMATCH_BEG(match, nth) != -1);
1972 long start, end,
len;
1976 int num_regs = RMATCH_NREGS(match);
1977 if (nth >= num_regs) {
1982 if (nth <= 0)
return Qnil;
1984 start = RMATCH_BEG(match, nth);
1985 if (start == -1)
return Qnil;
1986 end = RMATCH_END(match, nth);
2023 if (RMATCH_BEG(match, 0) == -1)
return Qnil;
2055 if (RMATCH_BEG(match, 0) == -1)
return Qnil;
2056 str =
RMATCH(match)->str;
2057 pos = RMATCH_END(match, 0);
2063match_last_index(
VALUE match)
2067 if (
NIL_P(match))
return -1;
2069 if (RMATCH_BEG(match, 0) == -1)
return -1;
2071 for (i = RMATCH_NREGS(match) - 1; RMATCH_BEG(match, i) == -1 && i > 0; i--)
2079 int i = match_last_index(match);
2080 if (i <= 0)
return Qnil;
2081 long start = RMATCH_BEG(match, i);
2086rb_reg_last_defined(
VALUE match)
2088 int i = match_last_index(match);
2089 if (i < 0)
return Qnil;
2094last_match_getter(
ID _x,
VALUE *_y)
2100prematch_getter(
ID _x,
VALUE *_y)
2106postmatch_getter(
ID _x,
VALUE *_y)
2112last_paren_match_getter(
ID _x,
VALUE *_y)
2118match_array(
VALUE match,
int start)
2125 int num_regs = RMATCH_NREGS(match);
2127 target =
RMATCH(match)->str;
2129 for (i = start; i < num_regs; i++) {
2130 long beg = RMATCH_BEG(match, i);
2158match_to_a(
VALUE match)
2160 return match_array(match, 0);
2180match_captures(
VALUE match)
2182 return match_array(match, 1);
2186name_to_backref_number(
const struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2188 if (
NIL_P(regexp))
return -1;
2189 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2190 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2193#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2195 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2196 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2199match_name_to_backref_number(
VALUE match,
VALUE name)
2202 if (
NIL_P(regexp))
return -1;
2205 int n = onig_name_to_group_numbers(
RREGEXP_PTR(regexp),
2206 (
const unsigned char *)RSTRING_PTR(name),
2208 if (n < 0)
return n;
2209 if (n == 0)
return ONIGERR_PARSER_BUG;
2210 if (n == 1)
return nums[0];
2211 for (
int i = n - 1; i >= 0; i--) {
2212 if (RMATCH_BEG(match, nums[i]) != ONIG_REGION_NOTPOS)
2219namev_to_backref_number(
VALUE match,
VALUE name)
2235 num = match_name_to_backref_number(match, name);
2238 name_to_backref_error(name);
2244match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2246 long olen = RMATCH_NREGS(match);
2247 long j, end = olen < beg+
len ? olen : beg+
len;
2249 if (
len == 0)
return result;
2251 for (j = beg; j < end; j++) {
2254 if (beg +
len > j) {
2264 int num_regs = RMATCH_NREGS(match);
2275 return match_ary_subseq(match, beg,
len, result);
2318match_aref(
int argc,
VALUE *argv,
VALUE match)
2325 if (
NIL_P(length)) {
2330 int num = namev_to_backref_number(match, idx);
2335 return match_ary_aref(match, idx,
Qnil);
2342 long num_regs = RMATCH_NREGS(match);
2348 if (beg < 0)
return Qnil;
2350 else if (beg > num_regs) {
2353 if (beg+
len > num_regs) {
2354 len = num_regs - beg;
2356 return match_ary_subseq(match, beg,
len,
Qnil);
2387match_values_at(
int argc,
VALUE *argv,
VALUE match)
2395 for (i=0; i<argc; i++) {
2400 int num = namev_to_backref_number(match, argv[i]);
2405 match_ary_aref(match, argv[i], result);
2432match_to_s(
VALUE match)
2447match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2448 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
2451 VALUE hash = data->hash;
2452 VALUE match = data->match;
2454 VALUE key = rb_enc_str_new((
const char *)name, name_end-name, regex->enc);
2456 if (data->symbolize) {
2465 for (i = 0; i < back_num; i++) {
2468 rb_hash_aset(hash, key, value);
2474 rb_hash_aset(hash, key,
Qnil);
2513match_named_captures(
int argc,
VALUE *argv,
VALUE match)
2519 return rb_hash_new();
2522 int symbolize_names = 0;
2527 static ID keyword_ids[1];
2529 VALUE symbolize_names_val;
2531 if (!keyword_ids[0]) {
2534 rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
2535 if (!UNDEF_P(symbolize_names_val) &&
RTEST(symbolize_names_val)) {
2536 symbolize_names = 1;
2540 hash = rb_hash_new();
2543 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, &data);
2565match_deconstruct_keys(
VALUE match,
VALUE keys)
2573 return rb_hash_new_with_size(0);
2577 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2581 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, &data);
2589 return rb_hash_new_with_size(0);
2602 int num = match_name_to_backref_number(match, name);
2629match_string(
VALUE match)
2632 return RMATCH(match)->str;
2641match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2642 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2647 for (i = 0; i < back_num; i++) {
2648 arg[back_refs[i]].name = name;
2649 arg[back_refs[i]].len = name_end - name;
2676match_inspect(
VALUE match)
2681 int num_regs = RMATCH_NREGS(match);
2687 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2689 else if (
NIL_P(regexp)) {
2690 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2698 match_inspect_name_iter, names);
2703 for (i = 0; i < num_regs; i++) {
2710 rb_str_catf(str,
"%d", i);
2729read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2731 const char *p = *pp;
2733 int meta_prefix = 0, ctrl_prefix = 0;
2736 if (p == end || *p++ !=
'\\') {
2737 errcpy(err,
"too short escaped multibyte character");
2743 errcpy(err,
"too short escape sequence");
2747 case '\\': code =
'\\';
break;
2748 case 'n': code =
'\n';
break;
2749 case 't': code =
'\t';
break;
2750 case 'r': code =
'\r';
break;
2751 case 'f': code =
'\f';
break;
2752 case 'v': code =
'\013';
break;
2753 case 'a': code =
'\007';
break;
2754 case 'e': code =
'\033';
break;
2757 case '0':
case '1':
case '2':
case '3':
2758 case '4':
case '5':
case '6':
case '7':
2767 errcpy(err,
"invalid hex escape");
2775 errcpy(err,
"duplicate meta escape");
2779 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2789 errcpy(err,
"too short meta escape");
2793 if (p == end || *p++ !=
'-') {
2794 errcpy(err,
"too short control escape");
2799 errcpy(err,
"duplicate control escape");
2803 if (p < end && (*p & 0x80) == 0) {
2813 errcpy(err,
"too short control escape");
2817 errcpy(err,
"unexpected escape sequence");
2820 if (code < 0 || 0xff < code) {
2821 errcpy(err,
"invalid escape code");
2835unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2838 const char *p = *pp;
2840 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2841 char *chbuf = (
char *)area;
2846 memset(chbuf, 0, chmaxlen);
2848 byte = read_escaped_byte(&p, end, err);
2853 area[chlen++] = byte;
2854 while (chlen < chmaxlen &&
2856 byte = read_escaped_byte(&p, end, err);
2860 area[chlen++] = byte;
2863 l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2865 errcpy(err,
"invalid multibyte escape");
2868 if (1 < chlen || (area[0] & 0x80)) {
2873 else if (*encp != enc) {
2874 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2880 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2888check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2890 if ((0xd800 <= code && code <= 0xdfff) ||
2892 errcpy(err,
"invalid Unicode range");
2899append_utf8(
unsigned long uv,
2902 if (check_unicode_range(uv, err) != 0)
2906 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2916 *encp = rb_utf8_encoding();
2917 else if (*encp != rb_utf8_encoding()) {
2918 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2926unescape_unicode_list(
const char **pp,
const char *end,
2929 const char *p = *pp;
2930 int has_unicode = 0;
2934 while (p < end &&
ISSPACE(*p)) p++;
2937 code = ruby_scan_hex(p, end-p, &
len);
2941 errcpy(err,
"invalid Unicode range");
2945 if (append_utf8(code, buf, encp, err) != 0)
2949 while (p < end &&
ISSPACE(*p)) p++;
2952 if (has_unicode == 0) {
2953 errcpy(err,
"invalid Unicode list");
2963unescape_unicode_bmp(
const char **pp,
const char *end,
2966 const char *p = *pp;
2971 errcpy(err,
"invalid Unicode escape");
2974 code = ruby_scan_hex(p, 4, &
len);
2976 errcpy(err,
"invalid Unicode escape");
2979 if (append_utf8(code, buf, encp, err) != 0)
2986unescape_nonascii0(
const char **pp,
const char *end,
rb_encoding *enc,
2988 onig_errmsg_buffer err,
int options,
int recurse)
2990 const char *p = *pp;
2993 int in_char_class = 0;
2995 int extended_mode = options & ONIG_OPTION_EXTEND;
2999 int chlen = rb_enc_precise_mbclen(p, end, enc);
3002 errcpy(err,
"invalid multibyte character");
3006 if (1 < chlen || (*p & 0x80)) {
3012 else if (*encp != enc) {
3013 errcpy(err,
"non ASCII character in UTF-8 regexp");
3022 errcpy(err,
"too short escape sequence");
3025 chlen = rb_enc_precise_mbclen(p, end, enc);
3027 goto invalid_multibyte;
3036 case '1':
case '2':
case '3':
3037 case '4':
case '5':
case '6':
case '7':
3039 size_t len = end-(p-1), octlen;
3040 if (ruby_scan_oct(p-1,
len < 3 ?
len : 3, &octlen) <= 0177) {
3056 if (rb_is_usascii_enc(enc)) {
3057 const char *pbeg = p;
3058 int byte = read_escaped_byte(&p, end, err);
3059 if (
byte == -1)
return -1;
3064 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
3071 errcpy(err,
"too short escape sequence");
3077 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
3079 if (p == end || *p++ !=
'}') {
3080 errcpy(err,
"invalid Unicode list");
3087 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
3109 if (extended_mode && !in_char_class) {
3111 while ((p < end) && ((c = *p++) !=
'\n')) {
3112 if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
3125 if (in_char_class) {
3132 if (!in_char_class && recurse) {
3133 if (--parens == 0) {
3140 if (!in_char_class && p + 1 < end && *p ==
'?') {
3141 if (*(p+1) ==
'#') {
3143 const char *orig_p = p;
3146 while (cont && (p < end)) {
3149 if (!(c & 0x80))
break;
3150 if (!*encp && enc == rb_utf8_encoding()) {
3156 chlen = rb_enc_precise_mbclen(p, end, enc);
3158 goto invalid_multibyte;
3179 int local_extend = 0;
3186 for (s = p+1; s < end; s++) {
3189 local_extend = invert ? -1 : 1;
3196 if (local_extend == 0 ||
3197 (local_extend == -1 && !extended_mode) ||
3198 (local_extend == 1 && extended_mode)) {
3205 int local_options = options;
3206 if (local_extend == 1) {
3207 local_options |= ONIG_OPTION_EXTEND;
3210 local_options &= ~ONIG_OPTION_EXTEND;
3214 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3217 if (ret < 0)
return ret;
3222 extended_mode = local_extend == 1;
3239 else if (!in_char_class && recurse) {
3257unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
3259 onig_errmsg_buffer err,
int options)
3261 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3266rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
3267 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3270 int has_property = 0;
3274 if (rb_enc_asciicompat(enc))
3278 rb_enc_associate(buf, enc);
3281 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3284 if (has_property && !*fixed_enc) {
3289 rb_enc_associate(buf, *fixed_enc);
3296rb_reg_check_preprocess(
VALUE str)
3299 onig_errmsg_buffer err =
"";
3305 p = RSTRING_PTR(str);
3306 end = p + RSTRING_LEN(str);
3307 enc = rb_enc_get(str);
3309 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3313 return rb_reg_error_desc(str, 0, err);
3319rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3323 onig_errmsg_buffer err =
"";
3329 rb_raise(rb_eArgError,
"no arguments given");
3338 src_enc = rb_enc_get(str);
3339 if (options & ARG_ENCODING_NONE &&
3340 src_enc != ascii8bit) {
3342 rb_raise(
rb_eRegexpError,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3344 src_enc = ascii8bit;
3348 p = RSTRING_PTR(str);
3349 end = p + RSTRING_LEN(str);
3351 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3354 rb_raise(rb_eArgError,
"%s", err);
3356 if (fixed_enc != 0) {
3357 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3358 rb_raise(
rb_eRegexpError,
"encoding mismatch in dynamic regexp : %s and %s",
3359 rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
3361 regexp_enc = fixed_enc;
3370 rb_enc_associate(result, regexp_enc);
3377rb_reg_initialize_check(
VALUE obj)
3379 rb_check_frozen(obj);
3387 int options, onig_errmsg_buffer err,
3388 const char *sourcefile,
int sourceline)
3395 rb_reg_initialize_check(obj);
3397 if (rb_enc_dummy_p(enc)) {
3398 errcpy(err,
"can't make regexp with dummy encoding");
3402 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc, err, options);
3403 if (
NIL_P(unescaped))
3407 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3408 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3409 errcpy(err,
"incompatible character encoding");
3412 if (fixed_enc != a_enc) {
3413 options |= ARG_ENCODING_FIXED;
3417 else if (!(options & ARG_ENCODING_FIXED)) {
3418 enc = rb_usascii_encoding();
3421 rb_enc_associate((
VALUE)re, enc);
3422 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3425 if (options & ARG_ENCODING_NONE) {
3429 re->
ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
3430 options & ARG_REG_OPTION_MASK, err,
3431 sourcefile, sourceline);
3432 if (!re->
ptr)
return -1;
3445 if (regenc != enc) {
3447 str = rb_enc_associate(dup, enc = regenc);
3449 str = rb_fstring(str);
3454rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3455 const char *sourcefile,
int sourceline)
3458 rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
3459 if (options & ARG_ENCODING_NONE) {
3461 if (enc != ascii8bit) {
3463 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3469 ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
3470 options, err, sourcefile, sourceline);
3471 if (ret == 0) reg_set_source(obj, str, str_enc);
3476rb_reg_s_alloc(
VALUE klass)
3496 return rb_reg_init_str(rb_reg_alloc(), s, options);
3500rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3502 onig_errmsg_buffer err =
"";
3504 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3505 rb_reg_raise_str(s, options, err);
3514 onig_errmsg_buffer err =
"";
3516 if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
3517 enc, options, err, NULL, 0) != 0) {
3518 rb_reg_raise_str(s, options, err);
3520 reg_set_source(re, s, enc);
3526rb_reg_new_ary(
VALUE ary,
int opt)
3532rb_reg_new_from_values(
long cnt,
const VALUE *elements,
int opt)
3534 const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, elements);
3535 VALUE val = rb_reg_new_ary(ary, (
int)opt);
3543 VALUE re = rb_reg_alloc();
3544 onig_errmsg_buffer err =
"";
3546 if (rb_reg_initialize(re, s,
len, enc, options, err, NULL, 0) != 0) {
3547 rb_enc_reg_raise(s,
len, enc, options, err);
3561rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3563 VALUE re = rb_reg_alloc();
3564 onig_errmsg_buffer err =
"";
3567 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3568 rb_set_errinfo(rb_reg_error_desc(str, options, err));
3574static VALUE reg_cache;
3579 if (rb_ractor_main_p()) {
3582 && memcmp(
RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3592static st_index_t reg_hash(
VALUE re);
3604rb_reg_hash(
VALUE re)
3606 st_index_t hashval = reg_hash(re);
3639 if (re1 == re2)
return Qtrue;
3641 rb_reg_check(re1); rb_reg_check(re2);
3661match_hash(
VALUE match)
3667 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3668 int num_regs = RMATCH_NREGS(match);
3688 if (match1 == match2)
return Qtrue;
3692 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3693 int num_regs = RMATCH_NREGS(match1);
3694 if (num_regs != RMATCH_NREGS(match2))
return Qfalse;
3695 if (memcmp(RMATCH_BEG_PTR(match1), RMATCH_BEG_PTR(match2), num_regs *
sizeof(OnigPosition)))
return Qfalse;
3696 if (memcmp(RMATCH_END_PTR(match1), RMATCH_END_PTR(match2), num_regs *
sizeof(OnigPosition)))
return Qfalse;
3735match_integer_at(
int argc,
VALUE *argv,
VALUE match)
3747 else if ((nth = namev_to_backref_number(match, idx)) < 0) {
3748 name_to_backref_error(idx);
3751 if (argc > 1 && (base =
NUM2INT(argv[1])) < 0) {
3752 rb_raise(rb_eArgError,
"invalid radix %d", base);
3755 if (nth >= RMATCH_NREGS(match))
return Qnil;
3756 if (nth < 0 && (nth += RMATCH_NREGS(match)) <= 0)
return Qnil;
3758 long start = RMATCH_BEG(match, nth), end = RMATCH_END(match, nth);
3759 if (start < 0)
return Qnil;
3760 RUBY_ASSERT(start <= end, "%ld > %ld
", start, end);
3762 VALUE str = RMATCH(match)->str;
3763 RUBY_ASSERT(end <= RSTRING_LEN(str), "%ld > %ld
", end, RSTRING_LEN(str));
3766 return rb_int_parse_cstr(RSTRING_PTR(str) + start, end - start, &endp, NULL,
3767 base, RB_INT_PARSE_DEFAULT);
3771reg_operand(VALUE s, int check)
3774 return rb_sym2str(s);
3776 else if (RB_TYPE_P(s, T_STRING)) {
3780 return check ? rb_str_to_str(s) : rb_check_string_type(s);
3785reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
3790 rb_backref_set(Qnil);
3793 *strp = str = reg_operand(str, TRUE);
3796 VALUE l = rb_str_length(str);
3802 pos = rb_str_offset(str, pos);
3804 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3809 * self =~ other -> integer or nil
3811 * Returns the integer index (in characters) of the first match
3812 * for +self+ and +other+, or +nil+ if none;
3813 * updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables].
3815 * /at/ =~ 'input data' # => 7
3816 * $~ # => #<MatchData "at
">
3817 * /ax/ =~ 'input data' # => nil
3820 * Assigns named captures to local variables of the same names
3821 * if and only if +self+:
3823 * - Is a regexp literal;
3824 * see {Regexp Literals}[rdoc-ref:syntax/literals.rdoc@Regexp+Literals].
3825 * - Does not contain interpolations;
3826 * see {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode].
3827 * - Is at the left of the expression.
3831 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = y '
3835 * Assigns +nil+ if not matched:
3837 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = '
3841 * Does not make local variable assignments if +self+ is not a regexp literal:
3843 * r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
3845 * p foo # Undefined local variable
3846 * p bar # Undefined local variable
3848 * The assignment does not occur if the regexp is not at the left:
3850 * ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
3851 * p foo, foo # Undefined local variables
3853 * A regexp interpolation, <tt>#{}</tt>, also disables
3857 * /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
3858 * p foo # Undefined local variable
3863rb_reg_match(VALUE re, VALUE str)
3865 long pos = reg_match_pos(re, &str, 0, NULL);
3866 if (pos < 0) return Qnil;
3867 pos = rb_str_sublen(str, pos);
3868 return LONG2FIX(pos);
3873 * self === other -> true or false
3875 * Returns whether +self+ finds a match in +other+:
3877 * /^[a-z]*$/ === 'HELLO' # => false
3878 * /^[A-Z]*$/ === 'HELLO' # => true
3880 * This method is called in case statements:
3884 * when /\A[a-z]*\z/; print "Lower
case\n
"
3885 * when /\A[A-Z]*\z/; print "Upper
case\n
"
3886 * else print "Mixed
case\n
"
3887 * end # => "Upper
case"
3892rb_reg_eqq(VALUE re, VALUE str)
3896 str = reg_operand(str, FALSE);
3898 rb_backref_set(Qnil);
3901 start = rb_reg_search(re, str, 0, 0);
3902 return RBOOL(start >= 0);
3908 * ~ rxp -> integer or nil
3910 * Equivalent to <tt><i>rxp</i> =~ $_</tt>:
3918rb_reg_match2(VALUE re)
3921 VALUE line = rb_lastline_get();
3923 if (!RB_TYPE_P(line, T_STRING)) {
3924 rb_backref_set(Qnil);
3928 start = rb_reg_search(re, line, 0, 0);
3932 start = rb_str_sublen(line, start);
3933 return LONG2FIX(start);
3939 * match(string, offset = 0) -> matchdata or nil
3940 * match(string, offset = 0) {|matchdata| ... } -> object
3942 * With no block given, returns the MatchData object
3943 * that describes the match, if any, or +nil+ if none;
3944 * the search begins at the given character +offset+ in +string+:
3946 * /abra/.match('abracadabra') # => #<MatchData "abra
">
3947 * /abra/.match('abracadabra', 4) # => #<MatchData "abra
">
3948 * /abra/.match('abracadabra', 8) # => nil
3949 * /abra/.match('abracadabra', 800) # => nil
3951 * string = "\u{5d0 5d1 5e8 5d0}cadabra
"
3952 * /abra/.match(string, 7) #=> #<MatchData "abra
">
3953 * /abra/.match(string, 8) #=> nil
3954 * /abra/.match(string.b, 8) #=> #<MatchData "abra
">
3956 * With a block given, calls the block if and only if a match is found;
3957 * returns the block's value:
3959 * /abra/.match('abracadabra') {|matchdata| p matchdata }
3960 * # => #<MatchData "abra
">
3961 * /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
3962 * # => #<MatchData "abra
">
3963 * /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
3965 * /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
3968 * Output (from the first two blocks above):
3970 * #<MatchData "abra
">
3971 * #<MatchData "abra
">
3973 * /(.)(.)(.)/.match("abc
")[2] # => "b
"
3974 * /(.)(.)/.match("abc
", 1)[2] # => "c
"
3979rb_reg_match_m(int argc, VALUE *argv, VALUE re)
3981 VALUE result = Qnil, str, initpos;
3984 if (rb_scan_args(argc, argv, "11
", &str, &initpos) == 2) {
3985 pos = NUM2LONG(initpos);
3991 pos = reg_match_pos(re, &str, pos, &result);
3993 rb_backref_set(Qnil);
3996 rb_match_busy(result);
3997 if (!NIL_P(result) && rb_block_given_p()) {
3998 return rb_yield(result);
4005 * match?(string) -> true or false
4006 * match?(string, offset = 0) -> true or false
4008 * Returns <code>true</code> or <code>false</code> to indicate whether the
4009 * regexp is matched or not without updating $~ and other related variables.
4010 * If the second parameter is present, it specifies the position in the string
4011 * to begin the search.
4013 * /R.../.match?("Ruby
") # => true
4014 * /R.../.match?("Ruby
", 1) # => false
4015 * /P.../.match?("Ruby
") # => false
4020rb_reg_match_m_p(int argc, VALUE *argv, VALUE re)
4022 long pos = rb_check_arity(argc, 1, 2) > 1 ? NUM2LONG(argv[1]) : 0;
4023 return rb_reg_match_p(re, argv[0], pos);
4027rb_reg_match_p(VALUE re, VALUE str, long pos)
4029 if (NIL_P(str)) return Qfalse;
4030 str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str);
4033 pos += NUM2LONG(rb_str_length(str));
4034 if (pos < 0) return Qfalse;
4038 const char *beg = rb_str_subpos(str, pos, &len);
4039 if (!beg) return Qfalse;
4040 pos = beg - RSTRING_PTR(str);
4044 struct reg_onig_search_args args = {
4046 .range = RSTRING_LEN(str),
4049 return rb_reg_onig_match(re, str, reg_onig_search, &args, NULL) == ONIG_MISMATCH ? Qfalse : Qtrue;
4053 * Document-method: compile
4055 * Alias for Regexp.new
4059str_to_option(VALUE str)
4064 str = rb_check_string_type(str);
4065 if (NIL_P(str)) return -1;
4066 RSTRING_GETMEM(str, ptr, len);
4067 for (long i = 0; i < len; ++i) {
4068 int f = char_to_option(ptr[i]);
4070 rb_raise(rb_eArgError, "unknown regexp option: %
"PRIsVALUE, str);
4078set_timeout(rb_hrtime_t *hrt, VALUE timeout)
4080 double timeout_d = NIL_P(timeout) ? 0.0 : NUM2DBL(timeout);
4081 if (!NIL_P(timeout) && !(timeout_d > 0)) {
4082 rb_raise(rb_eArgError, "invalid timeout: %
"PRIsVALUE, timeout);
4084 double2hrtime(hrt, timeout_d);
4088reg_copy(VALUE copy, VALUE orig)
4093 rb_reg_initialize_check(copy);
4094 if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) {
4095 /* ONIGERR_MEMORY only */
4096 rb_raise(rb_eRegexpError, "%s
", onig_error_code_to_format(r));
4098 RREGEXP_PTR(copy) = re;
4099 RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src);
4100 RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit;
4101 rb_enc_copy(copy, orig);
4102 FL_SET_RAW(copy, FL_TEST_RAW(orig, KCODE_FIXED|REG_ENCODING_NONE));
4103 if (RBASIC_CLASS(copy) == rb_cRegexp) {
4110struct reg_init_args {
4117static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
4118static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
4122 * Regexp.new(string, options = 0, timeout: nil) -> regexp
4123 * Regexp.new(regexp, timeout: nil) -> regexp
4125 * With argument +string+ given, returns a new regexp with the given string
4128 * r = Regexp.new('foo') # => /foo/
4129 * r.source # => "foo
"
4132 * Optional argument +options+ is one of the following:
4134 * - A String of options:
4136 * Regexp.new('foo', 'i') # => /foo/i
4137 * Regexp.new('foo', 'im') # => /foo/im
4139 * - The bit-wise OR of one or more of the constants
4140 * Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
4141 * Regexp::NOENCODING:
4143 * Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
4144 * Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
4145 * Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
4146 * Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
4147 * flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
4148 * Regexp.new('foo', flags) # => /foo/mix
4150 * - +nil+ or +false+, which is ignored.
4151 * - Any other truthy value, in which case the regexp will be
4154 * If optional keyword argument +timeout+ is given,
4155 * its float value overrides the timeout interval for the class,
4157 * If +nil+ is passed as +timeout, it uses the timeout interval
4158 * for the class, Regexp.timeout.
4160 * With argument +regexp+ given, returns a new regexp. The source,
4161 * options, timeout are the same as +regexp+. +options+ and +n_flag+
4162 * arguments are ineffective. The timeout can be overridden by
4163 * +timeout+ keyword.
4165 * options = Regexp::MULTILINE
4166 * r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
4167 * r2 = Regexp.new(r) # => /foo/m
4168 * r2.timeout # => 1.1
4169 * r3 = Regexp.new(r, timeout: 3.14) # => /foo/m
4170 * r3.timeout # => 3.14
4175rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
4177 struct reg_init_args args;
4178 VALUE re = reg_extract_args(argc, argv, &args);
4181 reg_init_args(self, args.str, args.enc, args.flags);
4187 set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout);
4188 if (RBASIC_CLASS(self) == rb_cRegexp) {
4196reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
4199 rb_encoding *enc = 0;
4200 VALUE str, src, opts = Qundef, kwargs;
4203 rb_scan_args(argc, argv, "11:
", &src, &opts, &kwargs);
4205 args->timeout = Qnil;
4206 if (!NIL_P(kwargs)) {
4207 static ID keywords[1];
4209 keywords[0] = rb_intern_const("timeout
");
4211 rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
4214 if (RB_TYPE_P(src, T_REGEXP)) {
4218 rb_warn("flags ignored
");
4221 flags = rb_reg_options(re);
4222 str = RREGEXP_SRC(re);
4227 if (FIXNUM_P(opts)) flags = FIX2INT(opts);
4228 else if ((f = str_to_option(opts)) >= 0) flags = f;
4229 else if (rb_bool_expected(opts, "ignorecase
", FALSE))
4230 flags = ONIG_OPTION_IGNORECASE;
4232 str = StringValue(src);
4236 args->flags = flags;
4241reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags)
4243 if (enc && rb_enc_get(str) != enc)
4244 rb_reg_init_str_enc(self, str, enc, flags);
4246 rb_reg_init_str(self, str, flags);
4251rb_reg_quote(VALUE str)
4253 rb_encoding *enc = rb_enc_get(str);
4257 int ascii_only = rb_enc_str_asciionly_p(str);
4259 s = RSTRING_PTR(str);
4260 send = s + RSTRING_LEN(str);
4262 c = rb_enc_ascget(s, send, &clen, enc);
4264 s += mbclen(s, send, enc);
4268 case '[': case ']': case '{': case '}':
4269 case '(': case ')': case '|': case '-':
4270 case '*': case '.': case '\\':
4271 case '?': case '+': case '^': case '$':
4273 case '\t': case '\f': case '\v': case '\n': case '\r':
4278 tmp = rb_str_new3(str);
4280 rb_enc_associate(tmp, rb_usascii_encoding());
4285 tmp = rb_str_new(0, RSTRING_LEN(str)*2);
4287 rb_enc_associate(tmp, rb_usascii_encoding());
4290 rb_enc_copy(tmp, str);
4292 t = RSTRING_PTR(tmp);
4293 /* copy upto metacharacter */
4294 const char *p = RSTRING_PTR(str);
4295 memcpy(t, p, s - p);
4299 c = rb_enc_ascget(s, send, &clen, enc);
4301 int n = mbclen(s, send, enc);
4309 case '[': case ']': case '{': case '}':
4310 case '(': case ')': case '|': case '-':
4311 case '*': case '.': case '\\':
4312 case '?': case '+': case '^': case '$':
4314 t += rb_enc_mbcput('\\', t, enc);
4317 t += rb_enc_mbcput('\\', t, enc);
4318 t += rb_enc_mbcput(' ', t, enc);
4321 t += rb_enc_mbcput('\\', t, enc);
4322 t += rb_enc_mbcput('t', t, enc);
4325 t += rb_enc_mbcput('\\', t, enc);
4326 t += rb_enc_mbcput('n', t, enc);
4329 t += rb_enc_mbcput('\\', t, enc);
4330 t += rb_enc_mbcput('r', t, enc);
4333 t += rb_enc_mbcput('\\', t, enc);
4334 t += rb_enc_mbcput('f', t, enc);
4337 t += rb_enc_mbcput('\\', t, enc);
4338 t += rb_enc_mbcput('v', t, enc);
4341 t += rb_enc_mbcput(c, t, enc);
4343 rb_str_resize(tmp, t - RSTRING_PTR(tmp));
4350 * Regexp.escape(string) -> new_string
4352 * Returns a new string that escapes any characters
4353 * that have special meaning in a regular expression:
4355 * s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\.
"
4357 * For any string +s+, this call returns a MatchData object:
4359 * r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
4360 * r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.
">
4365rb_reg_s_quote(VALUE c, VALUE str)
4367 return rb_reg_quote(reg_operand(str, TRUE));
4371rb_reg_options(VALUE re)
4376 options = RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4377 if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4378 if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4383rb_check_regexp_type(VALUE re)
4385 return rb_check_convert_type(re, T_REGEXP, "Regexp
", "to_regexp
");
4390 * Regexp.try_convert(object) -> regexp or nil
4392 * Returns +object+ if it is a regexp:
4394 * Regexp.try_convert(/re/) # => /re/
4396 * Otherwise if +object+ responds to <tt>:to_regexp</tt>,
4397 * calls <tt>object.to_regexp</tt> and returns the result.
4399 * Returns +nil+ if +object+ does not respond to <tt>:to_regexp</tt>.
4401 * Regexp.try_convert('re') # => nil
4403 * Raises an exception unless <tt>object.to_regexp</tt> returns a regexp.
4407rb_reg_s_try_convert(VALUE dummy, VALUE re)
4409 return rb_check_regexp_type(re);
4413rb_reg_s_union(VALUE self, VALUE args0)
4415 long argc = RARRAY_LEN(args0);
4419 args[0] = rb_str_new2("(?!)
");
4420 return rb_class_new_instance(1, args, rb_cRegexp);
4422 else if (argc == 1) {
4423 VALUE arg = rb_ary_entry(args0, 0);
4424 VALUE re = rb_check_regexp_type(arg);
4429 quoted = rb_reg_s_quote(Qnil, arg);
4430 return rb_reg_new_str(quoted, 0);
4435 VALUE source = rb_str_buf_new(0);
4436 rb_encoding *result_enc;
4438 int has_asciionly = 0;
4439 rb_encoding *has_ascii_compat_fixed = 0;
4440 rb_encoding *has_ascii_incompat = 0;
4442 for (i = 0; i < argc; i++) {
4444 VALUE e = rb_ary_entry(args0, i);
4447 rb_str_buf_cat_ascii(source, "|
");
4449 v = rb_check_regexp_type(e);
4451 rb_encoding *enc = rb_enc_get(v);
4452 if (!rb_enc_asciicompat(enc)) {
4453 if (!has_ascii_incompat)
4454 has_ascii_incompat = enc;
4455 else if (has_ascii_incompat != enc)
4456 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4457 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4459 else if (rb_reg_fixed_encoding_p(v)) {
4460 if (!has_ascii_compat_fixed)
4461 has_ascii_compat_fixed = enc;
4462 else if (has_ascii_compat_fixed != enc)
4463 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4464 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4469 v = rb_reg_str_with_term(v, -1);
4474 enc = rb_enc_get(e);
4475 if (!rb_enc_asciicompat(enc)) {
4476 if (!has_ascii_incompat)
4477 has_ascii_incompat = enc;
4478 else if (has_ascii_incompat != enc)
4479 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4480 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4482 else if (rb_enc_str_asciionly_p(e)) {
4486 if (!has_ascii_compat_fixed)
4487 has_ascii_compat_fixed = enc;
4488 else if (has_ascii_compat_fixed != enc)
4489 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4490 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4492 v = rb_reg_s_quote(Qnil, e);
4494 if (has_ascii_incompat) {
4495 if (has_asciionly) {
4496 rb_raise(rb_eArgError, "ASCII incompatible encoding: %s
",
4497 rb_enc_name(has_ascii_incompat));
4499 if (has_ascii_compat_fixed) {
4500 rb_raise(rb_eArgError, "incompatible encodings: %s and %s
",
4501 rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
4506 rb_enc_copy(source, v);
4508 rb_str_append(source, v);
4511 if (has_ascii_incompat) {
4512 result_enc = has_ascii_incompat;
4514 else if (has_ascii_compat_fixed) {
4515 result_enc = has_ascii_compat_fixed;
4518 result_enc = rb_ascii8bit_encoding();
4521 rb_enc_associate(source, result_enc);
4522 return rb_class_new_instance(1, &source, rb_cRegexp);
4528 * Regexp.union(*patterns) -> regexp
4529 * Regexp.union(array_of_patterns) -> regexp
4531 * Returns a new regexp that is the union of the given patterns:
4533 * r = Regexp.union(%w[cat dog]) # => /cat|dog/
4534 * r.match('cat') # => #<MatchData "cat
">
4535 * r.match('dog') # => #<MatchData "dog
">
4536 * r.match('cog') # => nil
4538 * For each pattern that is a string, <tt>Regexp.new(pattern)</tt> is used:
4540 * Regexp.union('penzance') # => /penzance/
4541 * Regexp.union('a+b*c') # => /a\+b\*c/
4542 * Regexp.union('skiing', 'sledding') # => /skiing|sledding/
4543 * Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
4545 * For each pattern that is a regexp, it is used as is,
4546 * including its flags:
4548 * Regexp.union(/foo/i, /bar/m, /baz/x)
4549 * # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
4550 * Regexp.union([/foo/i, /bar/m, /baz/x])
4551 * # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
4553 * With no arguments, returns <tt>/(?!)/</tt>:
4555 * Regexp.union # => /(?!)/
4557 * If any regexp pattern contains captures, the behavior is unspecified.
4561rb_reg_s_union_m(VALUE self, VALUE args)
4564 if (RARRAY_LEN(args) == 1 &&
4565 !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
4566 return rb_reg_s_union(self, v);
4568 return rb_reg_s_union(self, args);
4573 * Regexp.linear_time?(re)
4574 * Regexp.linear_time?(string, options = 0)
4576 * Returns +true+ if matching against <tt>re</tt> can be
4577 * done in linear time to the input string.
4579 * Regexp.linear_time?(/re/) # => true
4581 * Note that this is a property of the ruby interpreter, not of the argument
4582 * regular expression. Identical regexp can or cannot run in linear time
4583 * depending on your ruby binary. Neither forward nor backward compatibility
4584 * is guaranteed about the return value of this method. Our current algorithm
4585 * is (*1) but this is subject to change in the future. Alternative
4586 * implementations can also behave differently. They might always return
4587 * false for everything.
4589 * (*1): https://doi.org/10.1109/SP40001.2021.00032
4593rb_reg_s_linear_time_p(int argc, VALUE *argv, VALUE self)
4595 struct reg_init_args args;
4596 VALUE re = reg_extract_args(argc, argv, &args);
4599 re = reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4602 return RBOOL(onig_check_linear_time(RREGEXP_PTR(re)));
4607rb_reg_init_copy(VALUE copy, VALUE re)
4609 if (!OBJ_INIT_COPY(copy, re)) return copy;
4611 return reg_copy(copy, re);
4615do_regsub(VALUE str, VALUE src, VALUE regexp, int num_regs, const OnigPosition *beg, const OnigPosition *end)
4620 rb_encoding *str_enc = rb_enc_get(str);
4621 rb_encoding *src_enc = rb_enc_get(src);
4622 int acompat = rb_enc_asciicompat(str_enc);
4624#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4626 RSTRING_GETMEM(str, s, n);
4631 int c = ASCGET(s, e, &clen);
4635 s += mbclen(s, e, str_enc);
4641 if (c != '\\' || s == e) continue;
4644 val = rb_str_buf_new(ss-p);
4646 rb_enc_str_buf_cat(val, p, ss-p, str_enc);
4648 c = ASCGET(s, e, &clen);
4650 s += mbclen(s, e, str_enc);
4651 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4659 case '1': case '2': case '3': case '4':
4660 case '5': case '6': case '7': case '8': case '9':
4661 if (!NIL_P(regexp) && onig_noname_group_capture_is_active(RREGEXP_PTR(regexp))) {
4670 if (s < e && ASCGET(s, e, &clen) == '<') {
4671 char *name, *name_end;
4673 name_end = name = s + clen;
4674 while (name_end < e) {
4675 c = ASCGET(name_end, e, &clen);
4676 if (c == '>') break;
4677 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4680 VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)),
4681 (long)(name_end - name));
4682 struct re_registers tmp = {
4683 .allocated = num_regs,
4684 .num_regs = num_regs,
4685 .beg = (OnigPosition *)beg,
4686 .end = (OnigPosition *)end,
4688 if ((no = NAME_TO_NUMBER(&tmp, regexp, n, name, name_end)) < 1) {
4689 name_to_backref_error(n);
4691 p = s = name_end + clen;
4695 rb_raise(rb_eRuntimeError, "invalid group name reference format
");
4699 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4708 rb_enc_str_buf_cat(val, RSTRING_PTR(src), beg[0], src_enc);
4712 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+end[0], RSTRING_LEN(src)-end[0], src_enc);
4717 while (beg[no] == -1 && no > 0) no--;
4718 if (no == 0) continue;
4722 rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
4726 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4731 if (no >= num_regs) continue;
4732 if (beg[no] == -1) continue;
4733 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+beg[no], end[no]-beg[no], src_enc);
4737 if (!val) return str;
4739 rb_enc_str_buf_cat(val, p, e-p, str_enc);
4747rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
4749 return do_regsub(str, src, regexp, regs->num_regs, regs->beg, regs->end);
4753rb_reg_regsub_match(VALUE str, VALUE src, VALUE match)
4755 return do_regsub(str, src, RMATCH(match)->regexp,
4756 RMATCH_NREGS(match), RMATCH_BEG_PTR(match), RMATCH_END_PTR(match));
4760ignorecase_getter(ID _x, VALUE *_y)
4762 rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective
");
4767ignorecase_setter(VALUE val, ID id, VALUE *_)
4769 rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective; ignored
");
4775 VALUE match = rb_backref_get();
4777 if (NIL_P(match)) return Qnil;
4778 rb_match_busy(match);
4783get_LAST_MATCH_INFO(ID _x, VALUE *_y)
4785 return match_getter();
4789match_setter(VALUE val, ID _x, VALUE *_y)
4792 Check_Type(val, T_MATCH);
4794 rb_backref_set(val);
4799 * Regexp.last_match -> matchdata or nil
4800 * Regexp.last_match(n) -> string or nil
4801 * Regexp.last_match(name) -> string or nil
4803 * With no argument, returns the value of <tt>$~</tt>,
4804 * which is the result of the most recent pattern match
4805 * (see {Regexp global variables}[rdoc-ref:Regexp@Global+Variables]):
4807 * /c(.)t/ =~ 'cat' # => 0
4808 * Regexp.last_match # => #<MatchData "cat
" 1:"a
">
4809 * /a/ =~ 'foo' # => nil
4810 * Regexp.last_match # => nil
4812 * With non-negative integer argument +n+, returns the _n_th field in the
4813 * matchdata, if any, or nil if none:
4815 * /c(.)t/ =~ 'cat' # => 0
4816 * Regexp.last_match(0) # => "cat
"
4817 * Regexp.last_match(1) # => "a
"
4818 * Regexp.last_match(2) # => nil
4820 * With negative integer argument +n+, counts backwards from the last field:
4822 * Regexp.last_match(-1) # => "a
"
4824 * With string or symbol argument +name+,
4825 * returns the string value for the named capture, if any:
4827 * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
4828 * Regexp.last_match # => #<MatchData "var = val
" lhs:"var
"rhs:"val
">
4829 * Regexp.last_match(:lhs) # => "var
"
4830 * Regexp.last_match('rhs') # => "val
"
4831 * Regexp.last_match('foo') # Raises IndexError.
4836rb_reg_s_last_match(int argc, VALUE *argv, VALUE _)
4838 if (rb_check_arity(argc, 0, 1) == 1) {
4839 VALUE match = rb_backref_get();
4841 if (NIL_P(match)) return Qnil;
4842 n = match_backref_number(match, argv[0]);
4843 return rb_reg_nth_match(n, match);
4845 return match_getter();
4849re_warn(const char *s)
4854// This function is periodically called during regexp matching
4856rb_reg_timeout_p(regex_t *reg, void *end_time_)
4858 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4860 if (*end_time == 0) {
4861 // This is the first time to check interrupts;
4862 // just measure the current time and determine the end time
4863 // if timeout is set.
4864 rb_hrtime_t timelimit = reg->timelimit;
4867 // no per-object timeout.
4868 timelimit = rb_reg_match_time_limit;
4872 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4875 // no timeout is set
4876 *end_time = RB_HRTIME_MAX;
4880 if (*end_time < rb_hrtime_now()) {
4881 // Timeout has exceeded
4891 * Regexp.timeout -> float or nil
4893 * It returns the current default timeout interval for Regexp matching in second.
4894 * +nil+ means no default timeout configuration.
4898rb_reg_s_timeout_get(VALUE dummy)
4900 double d = hrtime2double(rb_reg_match_time_limit);
4901 if (d == 0.0) return Qnil;
4907 * Regexp.timeout = float or nil
4909 * It sets the default timeout interval for Regexp matching in second.
4910 * +nil+ means no default timeout configuration.
4911 * This configuration is process-global. If you want to set timeout for
4912 * each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
4914 * Regexp.timeout = 1
4915 * /^a*b?a*$/ =~ "a
" * 100000 + "x
" #=> regexp match timeout (RuntimeError)
4919rb_reg_s_timeout_set(VALUE dummy, VALUE timeout)
4921 rb_ractor_ensure_main_ractor("can not access
Regexp.timeout from non-main Ractors
");
4923 set_timeout(&rb_reg_match_time_limit, timeout);
4930 * rxp.timeout -> float or nil
4932 * It returns the timeout interval for Regexp matching in second.
4933 * +nil+ means no default timeout configuration.
4935 * This configuration is per-object. The global configuration set by
4936 * Regexp.timeout= is ignored if per-object configuration is set.
4938 * re = Regexp.new("^a*b?a*$
", timeout: 1)
4939 * re.timeout #=> 1.0
4940 * re =~ "a
" * 100000 + "x
" #=> regexp match timeout (RuntimeError)
4944rb_reg_timeout_get(VALUE re)
4947 double d = hrtime2double(RREGEXP_PTR(re)->timelimit);
4948 if (d == 0.0) return Qnil;
4953 * Document-class: RegexpError
4955 * Raised when given an invalid regexp expression.
4959 * <em>raises the exception:</em>
4961 * RegexpError: target of repeat operator is not specified: /?/
4965 * Document-class: Regexp
4967 * :include: doc/_regexp.rdoc
4973 rb_eRegexpError = rb_define_class("RegexpError
", rb_eStandardError);
4975 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4976 onig_set_warn_func(re_warn);
4977 onig_set_verb_warn_func(re_warn);
4979 rb_define_virtual_variable("$~
", get_LAST_MATCH_INFO, match_setter);
4980 rb_define_virtual_variable("$&
", last_match_getter, 0);
4981 rb_define_virtual_variable("$`
", prematch_getter, 0);
4982 rb_define_virtual_variable("$
'", postmatch_getter, 0);
4983 rb_define_virtual_variable("$+", last_paren_match_getter, 0);
4985 rb_gvar_ractor_local("$~");
4986 rb_gvar_ractor_local("$&");
4987 rb_gvar_ractor_local("$`");
4988 rb_gvar_ractor_local("$'");
4989 rb_gvar_ractor_local("$+
");
4990 rb_gvar_box_dynamic("$~
");
4991 rb_gvar_box_ready("$&
");
4992 rb_gvar_box_ready("$`
");
4993 rb_gvar_box_ready("$
'");
4994 rb_gvar_box_ready("$+");
4996 rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
4998 rb_cRegexp = rb_define_class("Regexp", rb_cObject);
4999 rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc);
5000 rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance_pass_kw, -1);
5001 rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, 1);
5002 rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, 1);
5003 rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
5004 rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
5005 rb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
5006 rb_define_singleton_method(rb_cRegexp, "linear_time?", rb_reg_s_linear_time_p, -1);
5008 rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
5009 rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
5010 rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
5011 rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
5012 rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
5013 rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1);
5014 rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
5015 rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
5016 rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1);
5017 rb_define_method(rb_cRegexp, "match?", rb_reg_match_m_p, -1);
5018 rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
5019 rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
5020 rb_define_method(rb_cRegexp, "source", rb_reg_source, 0);
5021 rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
5022 rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
5023 rb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
5024 rb_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
5025 rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
5026 rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
5027 rb_define_method(rb_cRegexp, "timeout", rb_reg_timeout_get, 0);
5029 /* Raised when regexp matching timed out. */
5030 rb_eRegexpTimeoutError = rb_define_class_under(rb_cRegexp, "TimeoutError", rb_eRegexpError);
5031 rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0);
5032 rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1);
5034 /* see Regexp.options and Regexp.new */
5035 rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
5036 /* see Regexp.options and Regexp.new */
5037 rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
5038 /* see Regexp.options and Regexp.new */
5039 rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
5040 /* see Regexp.options and Regexp.new */
5041 rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
5042 /* see Regexp.options and Regexp.new */
5043 rb_define_const(rb_cRegexp, "NOENCODING", INT2FIX(ARG_ENCODING_NONE));
5045 rb_global_variable(®_cache);
5047 rb_cMatch = rb_define_class("MatchData", rb_cObject);
5048 rb_define_alloc_func(rb_cMatch, match_alloc);
5049 rb_undef_method(CLASS_OF(rb_cMatch), "new");
5050 rb_undef_method(CLASS_OF(rb_cMatch), "allocate");
5052 rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
5053 rb_define_method(rb_cMatch, "regexp", match_regexp, 0);
5054 rb_define_method(rb_cMatch, "names", match_names, 0);
5055 rb_define_method(rb_cMatch, "size", match_size, 0);
5056 rb_define_method(rb_cMatch, "length", match_size, 0);
5057 rb_define_method(rb_cMatch, "offset", match_offset, 1);
5058 rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1);
5059 rb_define_method(rb_cMatch, "bytebegin", match_bytebegin, 1);
5060 rb_define_method(rb_cMatch, "byteend", match_byteend, 1);
5061 rb_define_method(rb_cMatch, "begin", match_begin, 1);
5062 rb_define_method(rb_cMatch, "end", match_end, 1);
5063 rb_define_method(rb_cMatch, "match", match_nth, 1);
5064 rb_define_method(rb_cMatch, "match_length", match_nth_length, 1);
5065 rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
5066 rb_define_method(rb_cMatch, "[]", match_aref, -1);
5067 rb_define_method(rb_cMatch, "captures", match_captures, 0);
5068 rb_define_alias(rb_cMatch, "deconstruct", "captures");
5069 rb_define_method(rb_cMatch, "named_captures", match_named_captures, -1);
5070 rb_define_method(rb_cMatch, "deconstruct_keys", match_deconstruct_keys, 1);
5071 rb_define_method(rb_cMatch, "values_at", match_values_at, -1);
5072 rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
5073 rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
5074 rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
5075 rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
5076 rb_define_method(rb_cMatch, "string", match_string, 0);
5077 rb_define_method(rb_cMatch, "hash", match_hash, 0);
5078 rb_define_method(rb_cMatch, "eql?", match_equal, 1);
5079 rb_define_method(rb_cMatch, "==", match_equal, 1);
5080 rb_define_method(rb_cMatch, "integer_at", match_integer_at, -1);
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define FIX2INT
Old name of RB_FIX2INT.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define ALLOCV_END
Old name of RB_ALLOCV_END.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_obj_reveal(VALUE obj, VALUE klass)
Make a hidden object visible again.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_cMatch
MatchData class.
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_resize(VALUE ary, long len)
Expands or shrinks the passed array to the passed length.
VALUE rb_ary_clear(VALUE ary)
Destructively removes everything form an array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Identical to rb_ary_new_from_values(), except it expects exactly two parameters.
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
int capa
Designed capacity of the buffer.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
OnigPosition rb_reg_onig_match(VALUE re, VALUE str, OnigPosition(*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), void *args, struct re_registers *regs)
Runs a regular expression match using function match.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define RB_ALLOCV_N(type, v, n)
Allocates a memory region, possibly on stack.
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
#define RB_ALLOCV_END(v)
Polite way to declare that the given array is not used any longer.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define RMATCH(obj)
Convenient casting macro.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
Ruby object's base components.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
union RMatch::@55 as
"Registers" of a match.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
int char_offset_num_allocated
Number of rmatch_offset that ::rmatch::char_offset holds.
int num_regs
Number of capture-group registers.
int capa
Capacity of as.embed, in OnigPosition slots.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
Represents the region of a capture group.
long beg
Beginning of a group.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.