14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa,
long termlen)
251rb_str_size_as_embedded(
VALUE str)
254 if (STR_EMBED_P(str)) {
256 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
258 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 else if (rb_str_reembeddable_p(str)) {
264 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
266 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
269 real_size =
sizeof(
struct RString);
276STR_EMBEDDABLE_P(
long len,
long termlen)
278 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
283static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
284static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
286static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
287static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
330rb_debug_rstring_null_ptr(
const char *func)
332 fprintf(stderr,
"%s is returning NULL!! "
333 "SIGSEGV is highly expected to follow immediately.\n"
334 "If you could reproduce, attach your debugger here, "
335 "and look at the passed string.\n",
340static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
343get_encoding(
VALUE str)
349mustnot_broken(
VALUE str)
351 if (is_broken_string(str)) {
352 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
357mustnot_wchar(
VALUE str)
360 if (rb_enc_mbminlen(enc) > 1) {
361 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
365static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
367#if SIZEOF_LONG == SIZEOF_VOIDP
368#define PRECOMPUTED_FAKESTR_HASH 1
373BARE_STRING_P(
VALUE str)
378static inline st_index_t
379str_do_hash(
VALUE str)
381 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
383 if (e && !is_ascii_string(str)) {
390str_store_precomputed_hash(
VALUE str, st_index_t hash)
396 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
397 size_t free_bytes = str_embed_capa(str) - used_bytes;
401 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
403 FL_SET(str, STR_PRECOMPUTED_HASH);
416 if (
FL_TEST(str, RSTRING_FSTR))
419 bare = BARE_STRING_P(str);
421 if (STR_EMBED_P(str)) {
426 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
433 rb_str_resize(str, RSTRING_LEN(str));
435 fstr = register_fstring(str,
false,
false);
438 str_replace_shared_without_enc(str, fstr);
445static VALUE fstring_table_obj;
448fstring_concurrent_set_hash(
VALUE str)
450#ifdef PRECOMPUTED_FAKESTR_HASH
454 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
471 const char *aptr, *bptr;
478 return (alen == blen &&
480 memcmp(aptr, bptr, alen) == 0);
485 bool force_precompute_hash;
489fstring_concurrent_set_create(
VALUE str,
void *data)
499 long len = RSTRING_LEN(str);
500 long capa =
len +
sizeof(st_index_t);
501 int term_len = TERM_LEN(str);
503 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
505 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
506 STR_SET_LEN(new_str, RSTRING_LEN(str));
508 rb_enc_copy(new_str, str);
509 str_store_precomputed_hash(new_str, str_do_hash(str));
513 rb_enc_copy(new_str, str);
514#ifdef PRECOMPUTED_FAKESTR_HASH
515 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
516 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
530 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
533 if (STR_SHARED_P(str)) {
535 str_make_independent(str);
538 if (!BARE_STRING_P(str)) {
544 RBASIC(str)->flags |= RSTRING_FSTR;
546 RB_OBJ_SET_SHAREABLE(str);
560 .hash = fstring_concurrent_set_hash,
561 .cmp = fstring_concurrent_set_cmp,
562 .create = fstring_concurrent_set_create,
567Init_fstring_table(
void)
569 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
570 rb_gc_register_address(&fstring_table_obj);
574register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
578 .force_precompute_hash = force_precompute_hash
581#if SIZEOF_VOIDP == SIZEOF_LONG
585 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
589 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
591 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
603rb_obj_is_fstring_table(
VALUE obj)
607 return obj == fstring_table_obj;
611rb_gc_free_fstring(
VALUE obj)
613 ASSERT_vm_locking_with_barrier();
619 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
621 RB_DEBUG_COUNTER_INC(obj_str_fstr);
627rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
629 if (fstring_table_obj) {
630 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
635setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
638 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
651 return (
VALUE)fake_str;
660 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
669rb_fstring_new(
const char *ptr,
long len)
671 struct RString fake_str = {RBASIC_INIT};
672 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
678 struct RString fake_str = {RBASIC_INIT};
679 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
683rb_fstring_cstr(
const char *
ptr)
685 return rb_fstring_new(
ptr, strlen(
ptr));
689single_byte_optimizable(
VALUE str)
693 case ENCINDEX_ASCII_8BIT:
694 case ENCINDEX_US_ASCII:
716static inline const char *
717search_nonascii(
const char *p,
const char *e)
719 const uintptr_t *s, *t;
721#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
722# if SIZEOF_UINTPTR_T == 8
723# define NONASCII_MASK UINT64_C(0x8080808080808080)
724# elif SIZEOF_UINTPTR_T == 4
725# define NONASCII_MASK UINT32_C(0x80808080)
727# error "don't know what to do."
730# if SIZEOF_UINTPTR_T == 8
731# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
732# elif SIZEOF_UINTPTR_T == 4
733# define NONASCII_MASK 0x80808080UL
735# error "don't know what to do."
739 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
740#if !UNALIGNED_WORD_ACCESS
741 if ((uintptr_t)p % SIZEOF_VOIDP) {
742 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
747 case 7:
if (p[-7]&0x80)
return p-7;
748 case 6:
if (p[-6]&0x80)
return p-6;
749 case 5:
if (p[-5]&0x80)
return p-5;
750 case 4:
if (p[-4]&0x80)
return p-4;
752 case 3:
if (p[-3]&0x80)
return p-3;
753 case 2:
if (p[-2]&0x80)
return p-2;
754 case 1:
if (p[-1]&0x80)
return p-1;
759#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
760#define aligned_ptr(value) \
761 __builtin_assume_aligned((value), sizeof(uintptr_t))
763#define aligned_ptr(value) (uintptr_t *)(value)
766 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
769 if (*s & NONASCII_MASK) {
770#ifdef WORDS_BIGENDIAN
771 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
773 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
783 case 7:
if (e[-7]&0x80)
return e-7;
784 case 6:
if (e[-6]&0x80)
return e-6;
785 case 5:
if (e[-5]&0x80)
return e-5;
786 case 4:
if (e[-4]&0x80)
return e-4;
788 case 3:
if (e[-3]&0x80)
return e-3;
789 case 2:
if (e[-2]&0x80)
return e-2;
790 case 1:
if (e[-1]&0x80)
return e-1;
798 const char *e = p +
len;
800 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
802 p = search_nonascii(p, e);
806 if (rb_enc_asciicompat(enc)) {
807 p = search_nonascii(p, e);
810 int ret = rb_enc_precise_mbclen(p, e, enc);
814 p = search_nonascii(p, e);
820 int ret = rb_enc_precise_mbclen(p, e, enc);
836 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
839 p = search_nonascii(p, e);
843 else if (rb_enc_asciicompat(enc)) {
844 p = search_nonascii(p, e);
850 int ret = rb_enc_precise_mbclen(p, e, enc);
857 p = search_nonascii(p, e);
863 int ret = rb_enc_precise_mbclen(p, e, enc);
888 rb_enc_set_index(str1, rb_enc_get_index(str2));
896rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
901 str_enc_copy(dest, src);
902 if (RSTRING_LEN(dest) == 0) {
903 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
914 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
915 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
926rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
928 str_enc_copy(dest, src);
935 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
941 return enc_coderange_scan(str, enc);
950 cr = enc_coderange_scan(str, get_encoding(str));
957rb_enc_str_asciicompat(
VALUE str)
960 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
968 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
977str_mod_check(
VALUE s,
const char *p,
long len)
979 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
985str_capacity(
VALUE str,
const int termlen)
987 if (STR_EMBED_P(str)) {
988 return str_embed_capa(str) - termlen;
990 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
994 return RSTRING(str)->as.heap.aux.capa;
1001 return str_capacity(str, TERM_LEN(str));
1005must_not_null(
const char *
ptr)
1008 rb_raise(rb_eArgError,
"NULL pointer given");
1013str_alloc_embed(
VALUE klass,
size_t capa)
1015 size_t size = rb_str_embed_size(
capa, 0);
1019 NEWOBJ_OF(str,
struct RString, klass,
1023 str->as.embed.ary[0] = 0;
1029str_alloc_heap(
VALUE klass)
1031 NEWOBJ_OF(str,
struct RString, klass,
1035 str->as.heap.aux.capa = 0;
1036 str->as.heap.ptr = NULL;
1042empty_str_alloc(
VALUE klass)
1044 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1045 VALUE str = str_alloc_embed(klass, 0);
1046 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1057 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1061 enc = rb_ascii8bit_encoding();
1064 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1066 int termlen = rb_enc_mbminlen(enc);
1068 if (STR_EMBEDDABLE_P(
len, termlen)) {
1069 str = str_alloc_embed(klass,
len + termlen);
1075 str = str_alloc_heap(klass);
1081 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1084 rb_enc_raw_set(str, enc);
1087 memcpy(RSTRING_PTR(str),
ptr,
len);
1090 memset(RSTRING_PTR(str), 0,
len);
1093 STR_SET_LEN(str,
len);
1094 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1101 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1136 __msan_unpoison_string(
ptr);
1156 if (rb_enc_mbminlen(enc) != 1) {
1157 rb_raise(rb_eArgError,
"wchar encoding given");
1159 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1163str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1168 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1172 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1175 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1176 str = str_alloc_heap(klass);
1180 RBASIC(str)->flags |= STR_NOFREE;
1181 rb_enc_associate_index(str, encindex);
1210static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1212 int ecflags,
VALUE ecopts);
1217 int encidx = rb_enc_to_index(enc);
1218 if (rb_enc_get_index(str) == encidx)
1219 return is_ascii_string(str);
1230 if (!to)
return str;
1231 if (!from) from = rb_enc_get(str);
1232 if (from == to)
return str;
1233 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1234 rb_is_ascii8bit_enc(to)) {
1235 if (STR_ENC_GET(str) != to) {
1237 rb_enc_associate(str, to);
1244 from, to, ecflags, ecopts);
1245 if (
NIL_P(newstr)) {
1253rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1258 olen = RSTRING_LEN(newstr);
1259 if (ofs < -olen || olen < ofs)
1261 if (ofs < 0) ofs += olen;
1263 STR_SET_LEN(newstr, ofs);
1267 rb_str_modify(newstr);
1268 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1276 STR_SET_LEN(str, 0);
1277 rb_enc_associate(str, enc);
1283str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1285 int ecflags,
VALUE ecopts)
1290 VALUE econv_wrapper;
1291 const unsigned char *start, *sp;
1292 unsigned char *dest, *dp;
1293 size_t converted_output = (size_t)ofs;
1298 RBASIC_CLEAR_CLASS(econv_wrapper);
1300 if (!ec)
return Qnil;
1303 sp = (
unsigned char*)
ptr;
1305 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1306 (dp = dest + converted_output),
1310 size_t converted_input = sp - start;
1311 size_t rest =
len - converted_input;
1312 converted_output = dp - dest;
1314 if (converted_input && converted_output &&
1315 rest < (LONG_MAX / converted_output)) {
1316 rest = (rest * converted_output) / converted_input;
1321 olen += rest < 2 ? 2 : rest;
1322 rb_str_resize(newstr, olen);
1329 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1331 rb_enc_associate(newstr, to);
1350 const int eidx = rb_enc_to_index(eenc);
1353 return rb_enc_str_new(
ptr,
len, eenc);
1357 if ((eidx == rb_ascii8bit_encindex()) ||
1358 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1362 ienc = rb_default_internal_encoding();
1363 if (!ienc || eenc == ienc) {
1364 return rb_enc_str_new(
ptr,
len, eenc);
1368 if ((eidx == rb_ascii8bit_encindex()) ||
1369 (eidx == rb_usascii_encindex()) ||
1370 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1371 return rb_enc_str_new(
ptr,
len, ienc);
1374 str = rb_enc_str_new(NULL, 0, ienc);
1377 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1378 rb_str_initialize(str,
ptr,
len, eenc);
1386 int eidx = rb_enc_to_index(eenc);
1387 if (eidx == rb_usascii_encindex() &&
1388 !is_ascii_string(str)) {
1389 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1392 rb_enc_associate_index(str, eidx);
1451str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1453 const int termlen = TERM_LEN(str);
1458 if (str_embed_capa(str2) >=
len + termlen) {
1459 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1460 STR_SET_EMBED(str2);
1461 memcpy(ptr2, RSTRING_PTR(str),
len);
1462 TERM_FILL(ptr2+
len, termlen);
1466 if (STR_SHARED_P(str)) {
1467 root =
RSTRING(str)->as.heap.aux.shared;
1476 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1478 rb_fatal(
"about to free a possible shared root");
1480 char *ptr2 = STR_HEAP_PTR(str2);
1482 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1485 FL_SET(str2, STR_NOEMBED);
1487 STR_SET_SHARED(str2, root);
1490 STR_SET_LEN(str2,
len);
1498 str_replace_shared_without_enc(str2, str);
1499 rb_enc_cr_str_exact_copy(str2, str);
1506 return str_replace_shared(str_alloc_heap(klass), str);
1523rb_str_new_frozen_String(
VALUE orig)
1531rb_str_frozen_bare_string(
VALUE orig)
1533 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1538rb_str_tmp_frozen_acquire(
VALUE orig)
1541 return str_new_frozen_buffer(0, orig, FALSE);
1545rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1547 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1548 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1550 VALUE str = str_alloc_heap(0);
1553 FL_SET(str, STR_SHARED_ROOT);
1555 size_t capa = str_capacity(orig, TERM_LEN(orig));
1561 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1562 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1569 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1570 RBASIC(orig)->flags &= ~STR_NOFREE;
1571 STR_SET_SHARED(orig, str);
1573 RB_OBJ_SET_SHAREABLE(str);
1585rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1590 if (STR_EMBED_P(tmp)) {
1593 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1599 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1603 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1604 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1609 STR_SET_LEN(tmp, 0);
1617 return str_new_frozen_buffer(klass, orig, TRUE);
1627 VALUE str = str_alloc_heap(klass);
1628 STR_SET_LEN(str, RSTRING_LEN(orig));
1629 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1630 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1631 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1632 RBASIC(orig)->flags &= ~STR_NOFREE;
1633 STR_SET_SHARED(orig, str);
1640str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1644 long len = RSTRING_LEN(orig);
1645 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1646 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1648 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1649 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1655 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1656 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1662 if ((ofs > 0) || (rest > 0) ||
1665 str = str_new_shared(klass,
shared);
1667 RSTRING(str)->as.heap.ptr += ofs;
1668 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1676 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1677 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1679 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1680 STR_SET_LEN(str, RSTRING_LEN(orig));
1686 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1689 str = heap_str_make_shared(klass, orig);
1694 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1706str_new_empty_String(
VALUE str)
1709 rb_enc_copy(v, str);
1713#define STR_BUF_MIN_SIZE 63
1718 if (STR_EMBEDDABLE_P(
capa, 1)) {
1726 RSTRING(str)->as.heap.ptr[0] =
'\0';
1746 return str_new(0, 0,
len);
1752 if (STR_EMBED_P(str)) {
1753 RB_DEBUG_COUNTER_INC(obj_str_embed);
1755 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1756 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1757 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1760 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1761 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1766rb_str_memsize(
VALUE str)
1768 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1769 return STR_HEAP_SIZE(str);
1779 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1782static inline void str_discard(
VALUE str);
1783static void str_shared_replace(
VALUE str,
VALUE str2);
1788 if (str != str2) str_shared_replace(str, str2);
1799 enc = STR_ENC_GET(str2);
1802 termlen = rb_enc_mbminlen(enc);
1804 STR_SET_LEN(str, RSTRING_LEN(str2));
1806 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1808 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1809 rb_enc_associate(str, enc);
1813 if (STR_EMBED_P(str2)) {
1815 long len = RSTRING_LEN(str2);
1818 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1819 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1820 RSTRING(str2)->as.heap.ptr = new_ptr;
1821 STR_SET_LEN(str2,
len);
1823 STR_SET_NOEMBED(str2);
1826 STR_SET_NOEMBED(str);
1828 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1830 if (
FL_TEST(str2, STR_SHARED)) {
1832 STR_SET_SHARED(str,
shared);
1835 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1839 STR_SET_EMBED(str2);
1840 RSTRING_PTR(str2)[0] = 0;
1841 STR_SET_LEN(str2, 0);
1842 rb_enc_associate(str, enc);
1856 return rb_obj_as_string_result(str, obj);
1872 len = RSTRING_LEN(str2);
1873 if (STR_SHARED_P(str2)) {
1876 STR_SET_NOEMBED(str);
1877 STR_SET_LEN(str,
len);
1878 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1879 STR_SET_SHARED(str,
shared);
1880 rb_enc_cr_str_exact_copy(str, str2);
1883 str_replace_shared(str, str2);
1892 size_t size = rb_str_embed_size(
capa, 0);
1896 NEWOBJ_OF(str,
struct RString, klass,
1907 NEWOBJ_OF(str,
struct RString, klass,
1910 str->as.heap.aux.capa = 0;
1911 str->as.heap.ptr = NULL;
1921 encidx = rb_enc_get_index(str);
1922 flags &= ~ENCODING_MASK;
1925 if (encidx) rb_enc_associate_index(dup, encidx);
1935 long len = RSTRING_LEN(str);
1940 STR_SET_LEN(dup, RSTRING_LEN(str));
1941 return str_duplicate_setup_encoding(str, dup, flags);
1950 root =
RSTRING(str)->as.heap.aux.shared;
1953 root = str = str_new_frozen(klass, str);
1959 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1960 FL_SET(root, STR_SHARED_ROOT);
1962 flags |= RSTRING_NOEMBED | STR_SHARED;
1964 STR_SET_LEN(dup, RSTRING_LEN(str));
1965 return str_duplicate_setup_encoding(str, dup, flags);
1971 if (STR_EMBED_P(str)) {
1972 return str_duplicate_setup_embed(klass, str, dup);
1975 return str_duplicate_setup_heap(klass, str, dup);
1983 if (STR_EMBED_P(str)) {
1984 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1987 dup = str_alloc_heap(klass);
1990 return str_duplicate_setup(klass, str, dup);
2001rb_str_dup_m(
VALUE str)
2003 if (LIKELY(BARE_STRING_P(str))) {
2014 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2021 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2025 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2026 str_duplicate_setup_embed(klass, str, new_str);
2029 new_str = ec_str_alloc_heap(ec, klass);
2030 str_duplicate_setup_heap(klass, str, new_str);
2039rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2041 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2065 static ID keyword_ids[2];
2066 VALUE orig, opt, venc, vcapa;
2071 if (!keyword_ids[0]) {
2072 keyword_ids[0] = rb_id_encoding();
2073 CONST_ID(keyword_ids[1],
"capacity");
2081 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2082 enc = rb_to_encoding(venc);
2084 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2087 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2089 if (
capa < STR_BUF_MIN_SIZE) {
2090 capa = STR_BUF_MIN_SIZE;
2094 len = RSTRING_LEN(orig);
2098 if (orig == str) n = 0;
2100 str_modifiable(str);
2101 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2103 const size_t size = (size_t)
capa + termlen;
2104 const char *
const old_ptr = RSTRING_PTR(str);
2105 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2106 char *new_ptr =
ALLOC_N(
char, size);
2107 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2108 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2110 RSTRING(str)->as.heap.ptr = new_ptr;
2112 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2113 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2114 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2116 STR_SET_LEN(str,
len);
2119 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2120 rb_enc_cr_str_exact_copy(str, orig);
2122 FL_SET(str, STR_NOEMBED);
2129 rb_enc_associate(str, enc);
2141rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2147 static ID keyword_ids[2];
2157 keyword_ids[0] = rb_id_encoding();
2158 CONST_ID(keyword_ids[1],
"capacity");
2160 encoding = kwargs[0];
2161 capacity = kwargs[1];
2170 if (UNDEF_P(encoding)) {
2172 encoding = rb_obj_encoding(orig);
2176 if (!UNDEF_P(encoding)) {
2177 enc = rb_to_encoding(encoding);
2181 if (UNDEF_P(capacity)) {
2183 VALUE empty_str = str_new(klass,
"", 0);
2185 rb_enc_associate(empty_str, enc);
2189 VALUE copy = str_duplicate(klass, orig);
2190 rb_enc_associate(copy, enc);
2203 if (orig_capa >
capa) {
2208 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2209 STR_SET_LEN(str, 0);
2220#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2235static inline uintptr_t
2236count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2241 d = (d>>6) | (~d>>7);
2242 d &= NONASCII_MASK >> 7;
2245#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2247 return rb_popcount_intptr(d);
2251# if SIZEOF_VOIDP == 8
2260enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2266 long diff = (long)(e - p);
2267 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2272 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2273 const uintptr_t *s, *t;
2274 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2275 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2276 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2277 while (p < (
const char *)s) {
2278 if (is_utf8_lead_byte(*p))
len++;
2282 len += count_utf8_lead_bytes_with_word(s);
2285 p = (
const char *)s;
2288 if (is_utf8_lead_byte(*p))
len++;
2294 else if (rb_enc_asciicompat(enc)) {
2299 q = search_nonascii(p, e);
2305 p += rb_enc_fast_mbclen(p, e, enc);
2312 q = search_nonascii(p, e);
2318 p += rb_enc_mbclen(p, e, enc);
2325 for (c=0; p<e; c++) {
2326 p += rb_enc_mbclen(p, e, enc);
2341rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2349 long diff = (long)(e - p);
2350 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2352 else if (rb_enc_asciicompat(enc)) {
2356 q = search_nonascii(p, e);
2364 ret = rb_enc_precise_mbclen(p, e, enc);
2379 for (c=0; p<e; c++) {
2380 ret = rb_enc_precise_mbclen(p, e, enc);
2387 if (p + rb_enc_mbminlen(enc) <= e)
2388 p += rb_enc_mbminlen(enc);
2404 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2405 if (!enc) enc = STR_ENC_GET(str);
2406 p = RSTRING_PTR(str);
2411 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2416 return enc_strlen(p, e, enc, cr);
2423 return str_strlen(str, NULL);
2437 return LONG2NUM(str_strlen(str, NULL));
2449rb_str_bytesize(
VALUE str)
2468rb_str_empty(
VALUE str)
2470 return RBOOL(RSTRING_LEN(str) == 0);
2489 char *ptr1, *ptr2, *ptr3;
2494 enc = rb_enc_check_str(str1, str2);
2497 termlen = rb_enc_mbminlen(enc);
2498 if (len1 > LONG_MAX - len2) {
2499 rb_raise(rb_eArgError,
"string size too big");
2501 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2502 ptr3 = RSTRING_PTR(str3);
2503 memcpy(ptr3, ptr1, len1);
2504 memcpy(ptr3+len1, ptr2, len2);
2505 TERM_FILL(&ptr3[len1+len2], termlen);
2521 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2524 int enc1 = rb_enc_get_index(str1);
2525 int enc2 = rb_enc_get_index(str2);
2530 else if (enc2 < 0) {
2533 else if (enc1 != enc2) {
2536 else if (len1 > LONG_MAX - len2) {
2570 rb_enc_copy(str2, str);
2575 rb_raise(rb_eArgError,
"negative argument");
2577 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2578 if (STR_EMBEDDABLE_P(
len, 1)) {
2580 memset(RSTRING_PTR(str2), 0,
len + 1);
2587 STR_SET_LEN(str2,
len);
2588 rb_enc_copy(str2, str);
2591 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2592 rb_raise(rb_eArgError,
"argument too big");
2595 len *= RSTRING_LEN(str);
2596 termlen = TERM_LEN(str);
2598 ptr2 = RSTRING_PTR(str2);
2600 n = RSTRING_LEN(str);
2601 memcpy(ptr2, RSTRING_PTR(str), n);
2602 while (n <=
len/2) {
2603 memcpy(ptr2 + n, ptr2, n);
2606 memcpy(ptr2 + n, ptr2,
len-n);
2608 STR_SET_LEN(str2,
len);
2609 TERM_FILL(&ptr2[
len], termlen);
2610 rb_enc_cr_str_copy_for_substr(str2, str);
2647rb_check_lockedtmp(
VALUE str)
2649 if (
FL_TEST(str, STR_TMPLOCK)) {
2656#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2658str_modifiable(
VALUE str)
2662 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2663 if (CHILLED_STRING_P(str)) {
2664 CHILLED_STRING_MUTATED(str);
2666 rb_check_lockedtmp(str);
2667 rb_check_frozen(str);
2672str_dependent_p(
VALUE str)
2674 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2684#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2686str_independent(
VALUE str)
2690 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2691 str_modifiable(str);
2692 return !str_dependent_p(str);
2698str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2708 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2713 STR_SET_LEN(str,
len);
2718 oldptr = RSTRING_PTR(str);
2720 memcpy(
ptr, oldptr,
len);
2722 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2725 STR_SET_NOEMBED(str);
2726 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2727 TERM_FILL(
ptr +
len, termlen);
2729 STR_SET_LEN(str,
len);
2736 if (!str_independent(str))
2737 str_make_independent(str);
2746 int termlen = TERM_LEN(str);
2747 long len = RSTRING_LEN(str);
2750 rb_raise(rb_eArgError,
"negative expanding string size");
2752 if (expand >= LONG_MAX -
len) {
2753 rb_raise(rb_eArgError,
"string size too big");
2756 if (!str_independent(str)) {
2757 str_make_independent_expand(str,
len, expand, termlen);
2759 else if (expand > 0) {
2760 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2767str_modify_keep_cr(
VALUE str)
2769 if (!str_independent(str))
2770 str_make_independent(str);
2777str_discard(
VALUE str)
2779 str_modifiable(str);
2780 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2781 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2782 RSTRING(str)->as.heap.ptr = 0;
2783 STR_SET_LEN(str, 0);
2790 int encindex = rb_enc_get_index(str);
2792 if (RB_UNLIKELY(encindex == -1)) {
2796 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2801 if (!rb_enc_asciicompat(enc)) {
2823 return RSTRING_PTR(str);
2827zero_filled(
const char *s,
int n)
2829 for (; n > 0; --n) {
2836str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2838 const char *e = s +
len;
2840 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2841 if (zero_filled(s, minlen))
return s;
2847str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2852 if (str_dependent_p(str)) {
2853 if (!zero_filled(s +
len, termlen))
2854 str_make_independent_expand(str,
len, 0L, termlen);
2857 TERM_FILL(s +
len, termlen);
2860 return RSTRING_PTR(str);
2864rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2866 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2867 long len = RSTRING_LEN(str);
2871 rb_check_lockedtmp(str);
2872 str_make_independent_expand(str,
len, 0L, termlen);
2874 else if (str_dependent_p(str)) {
2875 if (termlen > oldtermlen)
2876 str_make_independent_expand(str,
len, 0L, termlen);
2879 if (!STR_EMBED_P(str)) {
2884 if (termlen > oldtermlen) {
2885 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2893str_null_check(
VALUE str,
int *w)
2895 char *s = RSTRING_PTR(str);
2896 long len = RSTRING_LEN(str);
2898 const int minlen = rb_enc_mbminlen(enc);
2902 if (str_null_char(s,
len, minlen, enc)) {
2905 return str_fill_term(str, s,
len, minlen);
2908 if (!s || memchr(s, 0,
len)) {
2912 s = str_fill_term(str, s,
len, minlen);
2918rb_str_to_cstr(
VALUE str)
2921 return str_null_check(str, &w);
2929 char *s = str_null_check(str, &w);
2932 rb_raise(rb_eArgError,
"string contains null char");
2934 rb_raise(rb_eArgError,
"string contains null byte");
2940rb_str_fill_terminator(
VALUE str,
const int newminlen)
2942 char *s = RSTRING_PTR(str);
2943 long len = RSTRING_LEN(str);
2944 return str_fill_term(str, s,
len, newminlen);
2950 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2976str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2985 else if (rb_enc_asciicompat(enc)) {
2986 const char *p2, *e2;
2989 while (p < e && 0 < nth) {
2996 p2 = search_nonascii(p, e2);
3005 n = rb_enc_mbclen(p, e, enc);
3016 while (p < e && nth--) {
3017 p += rb_enc_mbclen(p, e, enc);
3028 return str_nth_len(p, e, &nth, enc);
3032str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3037 p = str_nth_len(p, e, &nth, enc);
3046str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3048 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3049 if (!pp)
return e - p;
3056 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3057 STR_ENC_GET(str), single_byte_optimizable(str));
3062str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3065 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3066 const uintptr_t *s, *t;
3067 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3068 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3069 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3070 while (p < (
const char *)s) {
3071 if (is_utf8_lead_byte(*p)) nth--;
3075 nth -= count_utf8_lead_bytes_with_word(s);
3077 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3081 if (is_utf8_lead_byte(*p)) {
3082 if (nth == 0)
break;
3092str_utf8_offset(
const char *p,
const char *e,
long nth)
3094 const char *pp = str_utf8_nth(p, e, &nth);
3103 if (single_byte_optimizable(str) || pos < 0)
3106 char *p = RSTRING_PTR(str);
3107 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3112str_subseq(
VALUE str,
long beg,
long len)
3120 const int termlen = TERM_LEN(str);
3121 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3128 if (str_embed_capa(str2) >=
len + termlen) {
3129 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3130 STR_SET_EMBED(str2);
3131 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3132 TERM_FILL(ptr2+
len, termlen);
3134 STR_SET_LEN(str2,
len);
3138 str_replace_shared(str2, str);
3141 RSTRING(str2)->as.heap.ptr += beg;
3142 if (RSTRING_LEN(str2) >
len) {
3143 STR_SET_LEN(str2,
len);
3153 VALUE str2 = str_subseq(str, beg,
len);
3154 rb_enc_cr_str_copy_for_substr(str2, str);
3163 const long blen = RSTRING_LEN(str);
3165 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3167 if (
len < 0)
return 0;
3168 if (beg < 0 && -beg < 0)
return 0;
3172 if (single_byte_optimizable(str)) {
3173 if (beg > blen)
return 0;
3176 if (beg < 0)
return 0;
3178 if (
len > blen - beg)
3180 if (
len < 0)
return 0;
3185 if (
len > -beg)
len = -beg;
3189 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3192 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3198 slen = str_strlen(str, enc);
3200 if (beg < 0)
return 0;
3202 if (
len == 0)
goto end;
3205 else if (beg > 0 && beg > blen) {
3209 if (beg > str_strlen(str, enc))
return 0;
3214 enc == rb_utf8_encoding()) {
3215 p = str_utf8_nth(s, e, &beg);
3216 if (beg > 0)
return 0;
3217 len = str_utf8_offset(p, e,
len);
3223 p = s + beg * char_sz;
3227 else if (
len * char_sz > e - p)
3232 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3233 if (beg > 0)
return 0;
3237 len = str_offset(p, e,
len, enc, 0);
3245static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3250 return str_substr(str, beg,
len, TRUE);
3260str_substr(
VALUE str,
long beg,
long len,
int empty)
3264 if (!p)
return Qnil;
3265 if (!
len && !empty)
return Qnil;
3267 beg = p - RSTRING_PTR(str);
3269 VALUE str2 = str_subseq(str, beg,
len);
3270 rb_enc_cr_str_copy_for_substr(str2, str);
3278 if (CHILLED_STRING_P(str)) {
3283 rb_str_resize(str, RSTRING_LEN(str));
3301 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3344str_uminus(
VALUE str)
3349 return rb_fstring(str);
3353#define rb_str_dup_frozen rb_str_new_frozen
3358 rb_check_frozen(str);
3359 if (
FL_TEST(str, STR_TMPLOCK)) {
3362 FL_SET(str, STR_TMPLOCK);
3369 rb_check_frozen(str);
3370 if (!
FL_TEST(str, STR_TMPLOCK)) {
3390 const int termlen = TERM_LEN(str);
3392 str_modifiable(str);
3393 if (STR_SHARED_P(str)) {
3396 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3397 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3408 else if (
len > RSTRING_LEN(str)) {
3412 const char *
const new_end = RSTRING_PTR(str) +
len;
3422 else if (
len < RSTRING_LEN(str)) {
3430 STR_SET_LEN(str,
len);
3431 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3438 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3441 int independent = str_independent(str);
3442 long slen = RSTRING_LEN(str);
3443 const int termlen = TERM_LEN(str);
3445 if (slen >
len || (termlen != 1 && slen <
len)) {
3451 if (STR_EMBED_P(str)) {
3452 if (
len == slen)
return str;
3453 if (str_embed_capa(str) >=
len + termlen) {
3454 STR_SET_LEN(str,
len);
3458 str_make_independent_expand(str, slen,
len - slen, termlen);
3460 else if (str_embed_capa(str) >=
len + termlen) {
3461 char *
ptr = STR_HEAP_PTR(str);
3463 if (slen >
len) slen =
len;
3466 STR_SET_LEN(str,
len);
3467 if (independent) ruby_xfree(
ptr);
3470 else if (!independent) {
3471 if (
len == slen)
return str;
3472 str_make_independent_expand(str, slen,
len - slen, termlen);
3476 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3477 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3480 else if (
len == slen)
return str;
3481 STR_SET_LEN(str,
len);
3488str_ensure_available_capa(
VALUE str,
long len)
3490 str_modify_keep_cr(str);
3492 const int termlen = TERM_LEN(str);
3493 long olen = RSTRING_LEN(str);
3495 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3496 rb_raise(rb_eArgError,
"string sizes too big");
3499 long total = olen +
len;
3500 long capa = str_capacity(str, termlen);
3503 if (total >= LONG_MAX / 2) {
3506 while (total >
capa) {
3509 RESIZE_CAPA_TERM(str,
capa, termlen);
3514str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3517 str_modify_keep_cr(str);
3522 if (
len == 0)
return 0;
3524 long total, olen,
off = -1;
3526 const int termlen = TERM_LEN(str);
3529 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3533 long capa = str_capacity(str, termlen);
3535 if (olen > LONG_MAX -
len) {
3536 rb_raise(rb_eArgError,
"string sizes too big");
3540 if (total >= LONG_MAX / 2) {
3543 while (total >
capa) {
3546 RESIZE_CAPA_TERM(str,
capa, termlen);
3547 sptr = RSTRING_PTR(str);
3552 memcpy(sptr + olen,
ptr,
len);
3553 STR_SET_LEN(str, total);
3554 TERM_FILL(sptr + total, termlen);
3559#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3560#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3565 if (
len == 0)
return str;
3567 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3569 return str_buf_cat(str,
ptr,
len);
3580rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3585 if (UNLIKELY(!str_independent(str))) {
3586 str_make_independent(str);
3589 long string_length = -1;
3590 const int null_terminator_length = 1;
3595 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3596 rb_raise(rb_eArgError,
"string sizes too big");
3599 long string_capacity = str_capacity(str, null_terminator_length);
3605 if (LIKELY(string_capacity >= string_length + 1)) {
3607 sptr[string_length] = byte;
3608 STR_SET_LEN(str, string_length + 1);
3609 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3613 str_buf_cat(str, (
char *)&
byte, 1);
3629 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3640rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3641 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3650 if (str_encindex == ptr_encindex) {
3652 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3656 str_enc = rb_enc_from_index(str_encindex);
3657 ptr_enc = rb_enc_from_index(ptr_encindex);
3658 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3661 if (RSTRING_LEN(str) == 0) {
3664 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3670 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3679 *ptr_cr_ret = ptr_cr;
3681 if (str_encindex != ptr_encindex &&
3684 str_enc = rb_enc_from_index(str_encindex);
3685 ptr_enc = rb_enc_from_index(ptr_encindex);
3690 res_encindex = str_encindex;
3695 res_encindex = str_encindex;
3699 res_encindex = ptr_encindex;
3704 res_encindex = str_encindex;
3711 res_encindex = str_encindex;
3717 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3719 str_buf_cat(str,
ptr,
len);
3725 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3732 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3742 if (rb_enc_asciicompat(enc)) {
3743 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3749 unsigned int c = (
unsigned char)*
ptr;
3750 int len = rb_enc_codelen(c, enc);
3751 rb_enc_mbcput(c, buf, enc);
3752 rb_enc_cr_str_buf_cat(str, buf,
len,
3765 if (str_enc_fastpath(str)) {
3769 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3775 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3786 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3802rb_str_concat_literals(
size_t num,
const VALUE *strary)
3806 unsigned long len = 1;
3811 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3813 str_enc_copy_direct(str, strary[0]);
3815 for (i = s; i < num; ++i) {
3816 const VALUE v = strary[i];
3820 if (encidx != ENCINDEX_US_ASCII) {
3822 rb_enc_set_index(str, encidx);
3835rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3837 str_modifiable(str);
3842 else if (argc > 1) {
3845 rb_enc_copy(arg_str, str);
3846 for (i = 0; i < argc; i++) {
3881rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3883 long needed_capacity = 0;
3887 for (
int index = 0; index < argc; index++) {
3888 VALUE obj = argv[index];
3896 needed_capacity += RSTRING_LEN(obj);
3901 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3908 str_ensure_available_capa(str, needed_capacity);
3911 for (
int index = 0; index < argc; index++) {
3912 VALUE obj = argv[index];
3917 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3918 char byte = (char)(
NUM2INT(obj) & 0xFF);
3932 rb_bug(
"append_as_bytes arguments should have been validated");
3936 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3937 TERM_FILL(sptr, TERM_LEN(str));
3942 for (
int index = 0; index < argc; index++) {
3943 VALUE obj = argv[index];
3960 rb_bug(
"append_as_bytes arguments should have been validated");
4039 if (rb_num_to_uint(str2, &code) == 0) {
4052 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4055 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4058 long pos = RSTRING_LEN(str1);
4063 switch (
len = rb_enc_codelen(code, enc)) {
4064 case ONIGERR_INVALID_CODE_POINT_VALUE:
4065 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4067 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4073 rb_enc_mbcput(code, buf, enc);
4074 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4075 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4077 rb_str_resize(str1, pos+
len);
4078 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4091rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4093 int encidx = rb_enc_to_index(enc);
4095 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4100 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4101 return ENCINDEX_ASCII_8BIT;
4123rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4125 str_modifiable(str);
4130 else if (argc > 1) {
4133 rb_enc_copy(arg_str, str);
4134 for (i = 0; i < argc; i++) {
4147 st_index_t precomputed_hash;
4148 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4150 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4151 return precomputed_hash;
4154 return str_do_hash(str);
4161 const char *ptr1, *ptr2;
4164 return (len1 != len2 ||
4166 memcmp(ptr1, ptr2, len1) != 0);
4178rb_str_hash_m(
VALUE str)
4184#define lesser(a,b) (((a)>(b))?(b):(a))
4192 if (RSTRING_LEN(str1) == 0)
return TRUE;
4193 if (RSTRING_LEN(str2) == 0)
return TRUE;
4196 if (idx1 == idx2)
return TRUE;
4201 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4205 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4215 const char *ptr1, *ptr2;
4218 if (str1 == str2)
return 0;
4221 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4230 if (len1 > len2)
return 1;
4233 if (retval > 0)
return 1;
4267 if (str1 == str2)
return Qtrue;
4274 return rb_str_eql_internal(str1, str2);
4288 if (str1 == str2)
return Qtrue;
4290 return rb_str_eql_internal(str1, str2);
4322 return rb_invcmp(str1, str2);
4364 return str_casecmp(str1, s);
4372 const char *p1, *p1end, *p2, *p2end;
4374 enc = rb_enc_compatible(str1, str2);
4379 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4380 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4381 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4382 while (p1 < p1end && p2 < p2end) {
4384 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4385 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4387 return INT2FIX(c1 < c2 ? -1 : 1);
4394 while (p1 < p1end && p2 < p2end) {
4395 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4396 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4398 if (0 <= c1 && 0 <= c2) {
4402 return INT2FIX(c1 < c2 ? -1 : 1);
4406 l1 = rb_enc_mbclen(p1, p1end, enc);
4407 l2 = rb_enc_mbclen(p2, p2end, enc);
4408 len = l1 < l2 ? l1 : l2;
4409 r = memcmp(p1, p2,
len);
4411 return INT2FIX(r < 0 ? -1 : 1);
4413 return INT2FIX(l1 < l2 ? -1 : 1);
4419 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4420 if (p1 == p1end)
return INT2FIX(-1);
4453 return str_casecmp_p(str1, s);
4460 VALUE folded_str1, folded_str2;
4461 VALUE fold_opt = sym_fold;
4463 enc = rb_enc_compatible(str1, str2);
4468 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4469 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4471 return rb_str_eql(folded_str1, folded_str2);
4475strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4476 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4478 const char *search_start = str_ptr;
4479 long pos, search_len = str_len - offset;
4483 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4484 if (pos < 0)
return pos;
4486 if (t == search_start + pos)
break;
4487 search_len -= t - search_start;
4488 if (search_len <= 0)
return -1;
4489 offset += t - search_start;
4492 return pos + offset;
4496#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4497#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4500rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4502 const char *str_ptr, *str_ptr_end, *sub_ptr;
4503 long str_len, sub_len;
4506 enc = rb_enc_check(str, sub);
4507 if (is_broken_string(sub))
return -1;
4509 str_ptr = RSTRING_PTR(str);
4511 str_len = RSTRING_LEN(str);
4512 sub_ptr = RSTRING_PTR(sub);
4513 sub_len = RSTRING_LEN(sub);
4515 if (str_len < sub_len)
return -1;
4518 long str_len_char, sub_len_char;
4519 int single_byte = single_byte_optimizable(str);
4520 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4521 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4523 offset += str_len_char;
4524 if (offset < 0)
return -1;
4526 if (str_len_char - offset < sub_len_char)
return -1;
4527 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4530 if (sub_len == 0)
return offset;
4533 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4546rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4553 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4554 long slen = str_strlen(str, enc);
4556 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4568 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4569 enc, single_byte_optimizable(str));
4580 pos = rb_str_index(str, sub, pos);
4594str_ensure_byte_pos(
VALUE str,
long pos)
4596 if (!single_byte_optimizable(str)) {
4597 const char *s = RSTRING_PTR(str);
4599 const char *p = s + pos;
4600 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4602 "offset %ld does not land on character boundary", pos);
4675rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4681 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4682 long slen = RSTRING_LEN(str);
4684 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4695 str_ensure_byte_pos(str, pos);
4707 pos = rb_str_byteindex(str, sub, pos);
4708 if (pos >= 0)
return LONG2NUM(pos);
4715memrchr(
const char *search_str,
int chr,
long search_len)
4717 const char *ptr = search_str + search_len;
4718 while (ptr > search_str) {
4719 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4729 char *hit, *adjusted;
4731 long slen, searchlen;
4734 sbeg = RSTRING_PTR(str);
4735 slen = RSTRING_LEN(sub);
4736 if (slen == 0)
return s - sbeg;
4738 t = RSTRING_PTR(sub);
4740 searchlen = s - sbeg + 1;
4742 if (memcmp(s, t, slen) == 0) {
4747 hit = memrchr(sbeg, c, searchlen);
4750 if (hit != adjusted) {
4751 searchlen = adjusted - sbeg;
4754 if (memcmp(hit, t, slen) == 0)
4756 searchlen = adjusted - sbeg;
4757 }
while (searchlen > 0);
4771 enc = rb_enc_check(str, sub);
4772 if (is_broken_string(sub))
return -1;
4773 singlebyte = single_byte_optimizable(str);
4774 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4775 slen = str_strlen(sub, enc);
4778 if (
len < slen)
return -1;
4779 if (
len - pos < slen) pos =
len - slen;
4780 if (
len == 0)
return pos;
4782 sbeg = RSTRING_PTR(str);
4785 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4791 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4792 return str_rindex(str, sub, s, enc);
4804rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4809 long pos,
len = str_strlen(str, enc);
4811 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4813 if (pos < 0 && (pos +=
len) < 0) {
4819 if (pos >
len) pos =
len;
4827 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4828 enc, single_byte_optimizable(str));
4839 pos = rb_str_rindex(str, sub, pos);
4849rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4855 enc = rb_enc_check(str, sub);
4856 if (is_broken_string(sub))
return -1;
4857 len = RSTRING_LEN(str);
4858 slen = RSTRING_LEN(sub);
4861 if (
len < slen)
return -1;
4862 if (
len - pos < slen) pos =
len - slen;
4863 if (
len == 0)
return pos;
4865 sbeg = RSTRING_PTR(str);
4868 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4875 return str_rindex(str, sub, s, enc);
4965rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4969 long pos,
len = RSTRING_LEN(str);
4971 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4973 if (pos < 0 && (pos +=
len) < 0) {
4979 if (pos >
len) pos =
len;
4985 str_ensure_byte_pos(str, pos);
4997 pos = rb_str_byterindex(str, sub, pos);
4998 if (pos >= 0)
return LONG2NUM(pos);
5037 switch (OBJ_BUILTIN_TYPE(y)) {
5091rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5098 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5129rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5133 re = get_pat(argv[0]);
5134 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5143static enum neighbor_char
5149 if (rb_enc_mbminlen(enc) > 1) {
5151 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5153 return NEIGHBOR_NOT_CHAR;
5155 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5157 if (!l)
return NEIGHBOR_NOT_CHAR;
5158 if (l !=
len)
return NEIGHBOR_WRAPPED;
5159 rb_enc_mbcput(c, p, enc);
5160 r = rb_enc_precise_mbclen(p, p +
len, enc);
5162 return NEIGHBOR_NOT_CHAR;
5164 return NEIGHBOR_FOUND;
5167 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5170 return NEIGHBOR_WRAPPED;
5171 ++((
unsigned char*)p)[i];
5172 l = rb_enc_precise_mbclen(p, p+
len, enc);
5176 return NEIGHBOR_FOUND;
5179 memset(p+l, 0xff,
len-l);
5185 for (len2 =
len-1; 0 < len2; len2--) {
5186 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5190 memset(p+len2+1, 0xff,
len-(len2+1));
5195static enum neighbor_char
5200 if (rb_enc_mbminlen(enc) > 1) {
5202 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5204 return NEIGHBOR_NOT_CHAR;
5206 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5207 if (!c)
return NEIGHBOR_NOT_CHAR;
5210 if (!l)
return NEIGHBOR_NOT_CHAR;
5211 if (l !=
len)
return NEIGHBOR_WRAPPED;
5212 rb_enc_mbcput(c, p, enc);
5213 r = rb_enc_precise_mbclen(p, p +
len, enc);
5215 return NEIGHBOR_NOT_CHAR;
5217 return NEIGHBOR_FOUND;
5220 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5223 return NEIGHBOR_WRAPPED;
5224 --((
unsigned char*)p)[i];
5225 l = rb_enc_precise_mbclen(p, p+
len, enc);
5229 return NEIGHBOR_FOUND;
5232 memset(p+l, 0,
len-l);
5238 for (len2 =
len-1; 0 < len2; len2--) {
5239 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5243 memset(p+len2+1, 0,
len-(len2+1));
5257static enum neighbor_char
5258enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5260 enum neighbor_char ret;
5264 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5268 const int max_gaps = 1;
5270 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5272 ctype = ONIGENC_CTYPE_DIGIT;
5274 ctype = ONIGENC_CTYPE_ALPHA;
5276 return NEIGHBOR_NOT_CHAR;
5279 for (
try = 0;
try <= max_gaps; ++
try) {
5280 ret = enc_succ_char(p,
len, enc);
5281 if (ret == NEIGHBOR_FOUND) {
5282 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5284 return NEIGHBOR_FOUND;
5291 ret = enc_pred_char(p,
len, enc);
5292 if (ret == NEIGHBOR_FOUND) {
5293 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5306 return NEIGHBOR_NOT_CHAR;
5309 if (ctype != ONIGENC_CTYPE_DIGIT) {
5311 return NEIGHBOR_WRAPPED;
5315 enc_succ_char(carry,
len, enc);
5316 return NEIGHBOR_WRAPPED;
5334 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5335 rb_enc_cr_str_copy_for_substr(str, orig);
5336 return str_succ(str);
5343 char *sbeg, *s, *e, *last_alnum = 0;
5344 int found_alnum = 0;
5346 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5347 long carry_pos = 0, carry_len = 1;
5348 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5350 slen = RSTRING_LEN(str);
5351 if (slen == 0)
return str;
5353 enc = STR_ENC_GET(str);
5354 sbeg = RSTRING_PTR(str);
5355 s = e = sbeg + slen;
5357 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5358 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5364 l = rb_enc_precise_mbclen(s, e, enc);
5365 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5366 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5367 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5369 case NEIGHBOR_NOT_CHAR:
5371 case NEIGHBOR_FOUND:
5373 case NEIGHBOR_WRAPPED:
5378 carry_pos = s - sbeg;
5383 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5384 enum neighbor_char neighbor;
5385 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5386 l = rb_enc_precise_mbclen(s, e, enc);
5387 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5388 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5390 neighbor = enc_succ_char(tmp, l, enc);
5392 case NEIGHBOR_FOUND:
5396 case NEIGHBOR_WRAPPED:
5399 case NEIGHBOR_NOT_CHAR:
5402 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5404 enc_succ_char(s, l, enc);
5406 if (!rb_enc_asciicompat(enc)) {
5407 MEMCPY(carry, s,
char, l);
5410 carry_pos = s - sbeg;
5414 RESIZE_CAPA(str, slen + carry_len);
5415 sbeg = RSTRING_PTR(str);
5416 s = sbeg + carry_pos;
5417 memmove(s + carry_len, s, slen - carry_pos);
5418 memmove(s, carry, carry_len);
5420 STR_SET_LEN(str, slen);
5421 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5437rb_str_succ_bang(
VALUE str)
5445all_digits_p(
const char *s,
long len)
5473 VALUE end, exclusive;
5477 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5483 VALUE current, after_end;
5490 enc = rb_enc_check(beg, end);
5491 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5493 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5494 char c = RSTRING_PTR(beg)[0];
5495 char e = RSTRING_PTR(end)[0];
5497 if (c > e || (excl && c == e))
return beg;
5499 VALUE str = rb_enc_str_new(&c, 1, enc);
5501 if ((*each)(str, arg))
break;
5502 if (!excl && c == e)
break;
5504 if (excl && c == e)
break;
5509 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5510 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5511 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5516 b = rb_str_to_inum(beg, 10, FALSE);
5517 e = rb_str_to_inum(end, 10, FALSE);
5524 if (excl && bi == ei)
break;
5525 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5530 ID op = excl ?
'<' : idLE;
5531 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5536 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5537 b = rb_funcallv(b, succ, 0, 0);
5544 if (n > 0 || (excl && n == 0))
return beg;
5546 after_end = rb_funcallv(end, succ, 0, 0);
5551 next = rb_funcallv(current, succ, 0, 0);
5552 if ((*each)(current, arg))
break;
5553 if (
NIL_P(next))
break;
5557 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5572 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5573 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5574 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5576 b = rb_str_to_inum(beg, 10, FALSE);
5582 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5590 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5591 b = rb_funcallv(b, succ, 0, 0);
5597 VALUE next = rb_funcallv(current, succ, 0, 0);
5598 if ((*each)(current, arg))
break;
5601 if (RSTRING_LEN(current) == 0)
5612 if (!
rb_equal(str, *argp))
return 0;
5626 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5627 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5628 rb_enc_asciicompat(STR_ENC_GET(val))) {
5629 const char *bp = RSTRING_PTR(beg);
5630 const char *ep = RSTRING_PTR(end);
5631 const char *vp = RSTRING_PTR(val);
5632 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5633 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5641 if (b <= v && v < e)
return Qtrue;
5642 return RBOOL(!
RTEST(exclusive) && v == e);
5649 all_digits_p(bp, RSTRING_LEN(beg)) &&
5650 all_digits_p(ep, RSTRING_LEN(end))) {
5655 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5657 return RBOOL(
NIL_P(val));
5680 return rb_str_subpat(str, indx,
INT2FIX(0));
5683 if (rb_str_index(str, indx, 0) != -1)
5689 long beg,
len = str_strlen(str, NULL);
5701 return str_substr(str, idx, 1, FALSE);
5718rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5722 return rb_str_subpat(str, argv[0], argv[1]);
5725 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5729 return rb_str_aref(str, argv[0]);
5735 char *ptr = RSTRING_PTR(str);
5736 long olen = RSTRING_LEN(str), nlen;
5738 str_modifiable(str);
5739 if (
len > olen)
len = olen;
5741 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5743 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5745 ptr =
RSTRING(str)->as.embed.ary;
5746 memmove(ptr, oldptr +
len, nlen);
5747 if (fl == STR_NOEMBED)
xfree(oldptr);
5750 if (!STR_SHARED_P(str)) {
5752 rb_enc_cr_str_exact_copy(shared, str);
5757 STR_SET_LEN(str, nlen);
5759 if (!SHARABLE_MIDDLE_SUBSTRING) {
5760 TERM_FILL(ptr + nlen, TERM_LEN(str));
5767rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5773 if (beg == 0 && vlen == 0) {
5778 str_modify_keep_cr(str);
5782 RESIZE_CAPA(str, slen + vlen -
len);
5783 sptr = RSTRING_PTR(str);
5792 memmove(sptr + beg + vlen,
5794 slen - (beg +
len));
5796 if (vlen < beg &&
len < 0) {
5800 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5803 STR_SET_LEN(str, slen);
5804 TERM_FILL(&sptr[slen], TERM_LEN(str));
5811 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5820 int singlebyte = single_byte_optimizable(str);
5826 enc = rb_enc_check(str, val);
5827 slen = str_strlen(str, enc);
5829 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5838 if (
len > slen - beg) {
5841 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5846 beg = p - RSTRING_PTR(str);
5848 rb_str_update_0(str, beg,
len, val);
5849 rb_enc_associate(str, enc);
5860 long start, end,
len;
5870 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5874 nth += regs->num_regs;
5884 enc = rb_enc_check_str(str, val);
5885 rb_str_update_0(str, start,
len, val);
5886 rb_enc_associate(str, enc);
5894 switch (
TYPE(indx)) {
5896 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5900 beg = rb_str_index(str, indx, 0);
5939rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5943 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5951 return rb_str_aset(str, argv[0], argv[1]);
6003rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6011 str_modify_keep_cr(str);
6019 if ((nth += regs->num_regs) <= 0)
return Qnil;
6021 else if (nth >= regs->num_regs)
return Qnil;
6023 len = END(nth) - beg;
6026 else if (argc == 2) {
6035 beg = p - RSTRING_PTR(str);
6039 beg = rb_str_index(str, indx, 0);
6040 if (beg == -1)
return Qnil;
6041 len = RSTRING_LEN(indx);
6053 beg = p - RSTRING_PTR(str);
6062 beg = p - RSTRING_PTR(str);
6066 rb_enc_cr_str_copy_for_substr(result, str);
6074 char *sptr = RSTRING_PTR(str);
6075 long slen = RSTRING_LEN(str);
6076 if (beg +
len > slen)
6080 slen - (beg +
len));
6082 STR_SET_LEN(str, slen);
6083 TERM_FILL(&sptr[slen], TERM_LEN(str));
6094 switch (OBJ_BUILTIN_TYPE(pat)) {
6113get_pat_quoted(
VALUE pat,
int check)
6117 switch (OBJ_BUILTIN_TYPE(pat)) {
6131 if (check && is_broken_string(pat)) {
6138rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6141 pos = rb_str_byteindex(str, pat, pos);
6142 if (set_backref_str) {
6144 str = rb_str_new_frozen_String(str);
6145 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6147 *match = match_data;
6157 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6162rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6164 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6182rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6196 hash = rb_check_hash_type(argv[1]);
6202 pat = get_pat_quoted(argv[0], 1);
6204 str_modifiable(str);
6205 beg = rb_pat_search(pat, str, 0, 1);
6219 end0 = beg0 + RSTRING_LEN(pat);
6228 if (iter || !
NIL_P(hash)) {
6229 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6235 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6238 str_mod_check(str, p,
len);
6239 rb_check_frozen(str);
6245 enc = rb_enc_compatible(str, repl);
6248 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6252 rb_enc_inspect_name(str_enc),
6253 rb_enc_inspect_name(STR_ENC_GET(repl)));
6255 enc = STR_ENC_GET(repl);
6258 rb_enc_associate(str, enc);
6268 rlen = RSTRING_LEN(repl);
6269 len = RSTRING_LEN(str);
6271 RESIZE_CAPA(str,
len + rlen - plen);
6273 p = RSTRING_PTR(str);
6275 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6277 rp = RSTRING_PTR(repl);
6278 memmove(p + beg0, rp, rlen);
6280 STR_SET_LEN(str,
len);
6281 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6304 rb_str_sub_bang(argc, argv, str);
6309str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6312 long beg, beg0, end0;
6313 long offset, blen, slen,
len, last;
6314 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6316 int need_backref_str = -1;
6326 hash = rb_check_hash_type(argv[1]);
6330 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6338 rb_error_arity(argc, 1, 2);
6341 pat = get_pat_quoted(argv[0], 1);
6342 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6345 if (bang)
return Qnil;
6350 blen = RSTRING_LEN(str) + 30;
6352 sp = RSTRING_PTR(str);
6353 slen = RSTRING_LEN(str);
6355 str_enc = STR_ENC_GET(str);
6356 rb_enc_associate(dest, str_enc);
6363 end0 = beg0 + RSTRING_LEN(pat);
6377 struct RString fake_str = {RBASIC_INIT};
6379 if (mode == FAST_MAP) {
6388 val = rb_hash_aref(hash, key);
6391 str_mod_check(str, sp, slen);
6396 else if (need_backref_str) {
6398 if (need_backref_str < 0) {
6399 need_backref_str = val != repl;
6406 len = beg0 - offset;
6420 if (RSTRING_LEN(str) <= end0)
break;
6421 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6423 offset = end0 +
len;
6425 cp = RSTRING_PTR(str) + offset;
6426 if (offset > RSTRING_LEN(str))
break;
6429 if (mode != FAST_MAP && mode != STR) {
6432 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6437 if (RSTRING_LEN(str) > offset) {
6440 rb_pat_search0(pat, str, last, 1, &match);
6442 str_shared_replace(str, dest);
6467rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6469 str_modify_keep_cr(str);
6470 return str_gsub(argc, argv, str, 1);
6520 return str_gsub(argc, argv, str, 0);
6540 str_modifiable(str);
6541 if (str == str2)
return str;
6545 return str_replace(str, str2);
6562rb_str_clear(
VALUE str)
6566 STR_SET_LEN(str, 0);
6567 RSTRING_PTR(str)[0] = 0;
6568 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6584rb_str_chr(
VALUE str)
6602 pos += RSTRING_LEN(str);
6603 if (pos < 0 || RSTRING_LEN(str) <= pos)
6606 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6626 long len = RSTRING_LEN(str);
6627 char *
ptr, *head, *left = 0;
6631 if (pos < -
len ||
len <= pos)
6638 char byte = (char)(
NUM2INT(w) & 0xFF);
6640 if (!str_independent(str))
6641 str_make_independent(str);
6642 enc = STR_ENC_GET(str);
6643 head = RSTRING_PTR(str);
6645 if (!STR_EMBED_P(str)) {
6652 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6660 width = rb_enc_precise_mbclen(left, head+
len, enc);
6662 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6678str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6680 long n = RSTRING_LEN(str);
6682 if (beg > n ||
len < 0)
return Qnil;
6685 if (beg < 0)
return Qnil;
6690 if (!empty)
return Qnil;
6694 VALUE str2 = str_subseq(str, beg,
len);
6696 str_enc_copy_direct(str2, str);
6698 if (RSTRING_LEN(str2) == 0) {
6699 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6733 long beg,
len = RSTRING_LEN(str);
6741 return str_byte_substr(str, beg,
len, TRUE);
6746 return str_byte_substr(str, idx, 1, FALSE);
6758rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6763 return str_byte_substr(str, beg,
len, TRUE);
6766 return str_byte_aref(str, argv[0]);
6770str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6772 long end, slen = RSTRING_LEN(str);
6775 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6784 if (*
len > slen - *beg) {
6788 str_ensure_byte_pos(str, *beg);
6789 str_ensure_byte_pos(str, end);
6803rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6805 long beg,
len, vbeg, vlen;
6810 if (!(argc == 2 || argc == 3 || argc == 5)) {
6811 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6815 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6816 rb_builtin_class_name(argv[0]));
6823 vlen = RSTRING_LEN(val);
6828 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6829 rb_builtin_class_name(argv[2]));
6841 vlen = RSTRING_LEN(val);
6849 str_check_beg_len(str, &beg, &
len);
6850 str_check_beg_len(val, &vbeg, &vlen);
6851 str_modify_keep_cr(str);
6854 rb_enc_associate(str, rb_enc_check(str, val));
6857 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6879rb_str_reverse(
VALUE str)
6886 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6887 enc = STR_ENC_GET(str);
6893 if (RSTRING_LEN(str) > 1) {
6894 if (single_byte_optimizable(str)) {
6901 int clen = rb_enc_fast_mbclen(s, e, enc);
6909 cr = rb_enc_asciicompat(enc) ?
6912 int clen = rb_enc_mbclen(s, e, enc);
6921 STR_SET_LEN(rev, RSTRING_LEN(str));
6922 str_enc_copy_direct(rev, str);
6944rb_str_reverse_bang(
VALUE str)
6946 if (RSTRING_LEN(str) > 1) {
6947 if (single_byte_optimizable(str)) {
6950 str_modify_keep_cr(str);
6951 s = RSTRING_PTR(str);
6960 str_shared_replace(str, rb_str_reverse(str));
6964 str_modify_keep_cr(str);
6993 i = rb_str_index(str, arg, 0);
6995 return RBOOL(i != -1);
7039 rb_raise(rb_eArgError,
"invalid radix %d", base);
7041 return rb_str_to_inum(str, base, FALSE);
7066rb_str_to_f(
VALUE str)
7083rb_str_to_s(
VALUE str)
7095 char s[RUBY_MAX_CHAR_LEN];
7096 int n = rb_enc_codelen(c, enc);
7098 rb_enc_mbcput(c, s, enc);
7103#define CHAR_ESC_LEN 13
7106rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7108 char buf[CHAR_ESC_LEN + 1];
7116 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7118 else if (c < 0x10000) {
7119 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7122 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7127 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7130 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7133 l = (int)strlen(buf);
7139ruby_escaped_char(
int c)
7142 case '\0':
return "\\0";
7143 case '\n':
return "\\n";
7144 case '\r':
return "\\r";
7145 case '\t':
return "\\t";
7146 case '\f':
return "\\f";
7147 case '\013':
return "\\v";
7148 case '\010':
return "\\b";
7149 case '\007':
return "\\a";
7150 case '\033':
return "\\e";
7151 case '\x7f':
return "\\c?";
7157rb_str_escape(
VALUE str)
7161 const char *p = RSTRING_PTR(str);
7163 const char *prev = p;
7164 char buf[CHAR_ESC_LEN + 1];
7166 int unicode_p = rb_enc_unicode_p(enc);
7167 int asciicompat = rb_enc_asciicompat(enc);
7172 int n = rb_enc_precise_mbclen(p, pend, enc);
7174 if (p > prev) str_buf_cat(result, prev, p - prev);
7175 n = rb_enc_mbminlen(enc);
7177 n = (int)(pend - p);
7179 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7180 str_buf_cat(result, buf, strlen(buf));
7186 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7188 cc = ruby_escaped_char(c);
7190 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7191 str_buf_cat(result, cc, strlen(cc));
7194 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7197 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7198 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7202 if (p > prev) str_buf_cat(result, prev, p - prev);
7221 const char *p, *pend, *prev;
7222 char buf[CHAR_ESC_LEN + 1];
7224 rb_encoding *resenc = rb_default_internal_encoding();
7225 int unicode_p = rb_enc_unicode_p(enc);
7226 int asciicompat = rb_enc_asciicompat(enc);
7228 if (resenc == NULL) resenc = rb_default_external_encoding();
7229 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7230 rb_enc_associate(result, resenc);
7231 str_buf_cat2(result,
"\"");
7239 n = rb_enc_precise_mbclen(p, pend, enc);
7241 if (p > prev) str_buf_cat(result, prev, p - prev);
7242 n = rb_enc_mbminlen(enc);
7244 n = (int)(pend - p);
7246 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7247 str_buf_cat(result, buf, strlen(buf));
7253 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7255 if ((asciicompat || unicode_p) &&
7256 (c ==
'"'|| c ==
'\\' ||
7261 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7262 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7263 str_buf_cat2(result,
"\\");
7264 if (asciicompat || enc == resenc) {
7270 case '\n': cc =
'n';
break;
7271 case '\r': cc =
'r';
break;
7272 case '\t': cc =
't';
break;
7273 case '\f': cc =
'f';
break;
7274 case '\013': cc =
'v';
break;
7275 case '\010': cc =
'b';
break;
7276 case '\007': cc =
'a';
break;
7277 case 033: cc =
'e';
break;
7278 default: cc = 0;
break;
7281 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7284 str_buf_cat(result, buf, 2);
7297 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7301 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7302 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7307 if (p > prev) str_buf_cat(result, prev, p - prev);
7308 str_buf_cat2(result,
"\"");
7313#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7326 int encidx = rb_enc_get_index(str);
7329 const char *p, *pend;
7332 int u8 = (encidx == rb_utf8_encindex());
7333 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7336 if (!rb_enc_asciicompat(enc)) {
7338 len += strlen(enc->name);
7341 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7344 unsigned char c = *p++;
7347 case '"':
case '\\':
7348 case '\n':
case '\r':
7349 case '\t':
case '\f':
7350 case '\013':
case '\010':
case '\007':
case '\033':
7355 clen = IS_EVSTR(p, pend) ? 2 : 1;
7363 if (u8 && c > 0x7F) {
7364 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7366 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7369 else if (cc <= 0xFFFFF)
7382 if (clen > LONG_MAX -
len) {
7389 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7390 q = RSTRING_PTR(result); qend = q +
len + 1;
7394 unsigned char c = *p++;
7396 if (c ==
'"' || c ==
'\\') {
7400 else if (c ==
'#') {
7401 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7404 else if (c ==
'\n') {
7408 else if (c ==
'\r') {
7412 else if (c ==
'\t') {
7416 else if (c ==
'\f') {
7420 else if (c ==
'\013') {
7424 else if (c ==
'\010') {
7428 else if (c ==
'\007') {
7432 else if (c ==
'\033') {
7442 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7444 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7447 snprintf(q, qend-q,
"u%04X", cc);
7449 snprintf(q, qend-q,
"u{%X}", cc);
7454 snprintf(q, qend-q,
"x%02X", c);
7460 if (!rb_enc_asciicompat(enc)) {
7461 snprintf(q, qend-q, nonascii_suffix, enc->name);
7462 encidx = rb_ascii8bit_encindex();
7465 rb_enc_associate_index(result, encidx);
7471unescape_ascii(
unsigned int c)
7495undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7497 const char *s = *ss;
7501 unsigned char buf[6];
7519 *buf = unescape_ascii(*s);
7531 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7532 if (*penc != enc_utf8) {
7534 rb_enc_associate(undumped, enc_utf8);
7551 if (hexlen == 0 || hexlen > 6) {
7557 if (0xd800 <= c && c <= 0xdfff) {
7560 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7570 if (0xd800 <= c && c <= 0xdfff) {
7573 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7601static VALUE rb_str_is_ascii_only_p(
VALUE str);
7613str_undump(
VALUE str)
7615 const char *s = RSTRING_PTR(str);
7618 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7620 bool binary =
false;
7624 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7627 if (!str_null_check(str, &w)) {
7630 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7631 if (*s !=
'"')
goto invalid_format;
7649 static const char force_encoding_suffix[] =
".force_encoding(\"";
7650 static const char dup_suffix[] =
".dup";
7651 const char *encname;
7656 size =
sizeof(dup_suffix) - 1;
7657 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7659 size =
sizeof(force_encoding_suffix) - 1;
7660 if (s_end - s <= size)
goto invalid_format;
7661 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7665 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7669 s = memchr(s,
'"', s_end-s);
7671 if (!s)
goto invalid_format;
7672 if (s_end - s != 2)
goto invalid_format;
7673 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7675 encidx = rb_enc_find_index2(encname, (
long)size);
7679 rb_enc_associate_index(undumped, encidx);
7689 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7700 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7706 if (rb_enc_dummy_p(enc)) {
7713str_true_enc(
VALUE str)
7716 rb_str_check_dummy_enc(enc);
7720static OnigCaseFoldType
7721check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7726 rb_raise(rb_eArgError,
"too many options");
7727 if (argv[0]==sym_turkic) {
7728 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7730 if (argv[1]==sym_lithuanian)
7731 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7733 rb_raise(rb_eArgError,
"invalid second option");
7736 else if (argv[0]==sym_lithuanian) {
7737 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7739 if (argv[1]==sym_turkic)
7740 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7742 rb_raise(rb_eArgError,
"invalid second option");
7746 rb_raise(rb_eArgError,
"too many options");
7747 else if (argv[0]==sym_ascii)
7748 flags |= ONIGENC_CASE_ASCII_ONLY;
7749 else if (argv[0]==sym_fold) {
7750 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7751 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7753 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7756 rb_raise(rb_eArgError,
"invalid option");
7763 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7769#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7770#ifndef CASEMAP_DEBUG
7771# define CASEMAP_DEBUG 0
7779 OnigUChar space[FLEX_ARY_LEN];
7783mapping_buffer_free(
void *p)
7787 while (current_buffer) {
7788 previous_buffer = current_buffer;
7789 current_buffer = current_buffer->next;
7790 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7796 {0, mapping_buffer_free,},
7797 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7805 const OnigUChar *source_current, *source_end;
7806 int target_length = 0;
7807 VALUE buffer_anchor;
7810 size_t buffer_count = 0;
7811 int buffer_length_or_invalid;
7813 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7815 source_current = (OnigUChar*)RSTRING_PTR(source);
7820 while (source_current < source_end) {
7822 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7823 if (CASEMAP_DEBUG) {
7824 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7827 *pre_buffer = current_buffer;
7828 pre_buffer = ¤t_buffer->next;
7829 current_buffer->next = NULL;
7830 current_buffer->capa =
capa;
7831 buffer_length_or_invalid = enc->case_map(flags,
7832 &source_current, source_end,
7833 current_buffer->space,
7834 current_buffer->space+current_buffer->capa,
7836 if (buffer_length_or_invalid < 0) {
7837 current_buffer =
DATA_PTR(buffer_anchor);
7839 mapping_buffer_free(current_buffer);
7840 rb_raise(rb_eArgError,
"input string invalid");
7842 target_length += current_buffer->used = buffer_length_or_invalid;
7844 if (CASEMAP_DEBUG) {
7845 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7848 if (buffer_count==1) {
7849 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7852 char *target_current;
7855 target_current = RSTRING_PTR(target);
7856 current_buffer =
DATA_PTR(buffer_anchor);
7857 while (current_buffer) {
7858 memcpy(target_current, current_buffer->space, current_buffer->used);
7859 target_current += current_buffer->used;
7860 current_buffer = current_buffer->next;
7863 current_buffer =
DATA_PTR(buffer_anchor);
7865 mapping_buffer_free(current_buffer);
7870 str_enc_copy_direct(target, source);
7879 const OnigUChar *source_current, *source_end;
7880 OnigUChar *target_current, *target_end;
7881 long old_length = RSTRING_LEN(source);
7882 int length_or_invalid;
7884 if (old_length == 0)
return Qnil;
7886 source_current = (OnigUChar*)RSTRING_PTR(source);
7888 if (source == target) {
7889 target_current = (OnigUChar*)source_current;
7890 target_end = (OnigUChar*)source_end;
7893 target_current = (OnigUChar*)RSTRING_PTR(target);
7897 length_or_invalid = onigenc_ascii_only_case_map(flags,
7898 &source_current, source_end,
7899 target_current, target_end, enc);
7900 if (length_or_invalid < 0)
7901 rb_raise(rb_eArgError,
"input string invalid");
7902 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7903 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7904 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7905 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7906 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7909 str_enc_copy(target, source);
7915upcase_single(
VALUE str)
7917 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7918 bool modified =
false;
7921 unsigned int c = *(
unsigned char*)s;
7923 if (
'a' <= c && c <=
'z') {
7924 *s =
'A' + (c -
'a');
7945rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7948 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7950 flags = check_case_options(argc, argv, flags);
7951 str_modify_keep_cr(str);
7952 enc = str_true_enc(str);
7953 if (case_option_single_p(flags, enc, str)) {
7954 if (upcase_single(str))
7955 flags |= ONIGENC_CASE_MODIFIED;
7957 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7958 rb_str_ascii_casemap(str, str, &flags, enc);
7960 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7962 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7975rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7978 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7981 flags = check_case_options(argc, argv, flags);
7982 enc = str_true_enc(str);
7983 if (case_option_single_p(flags, enc, str)) {
7984 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7985 str_enc_copy_direct(ret, str);
7988 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7990 rb_str_ascii_casemap(str, ret, &flags, enc);
7993 ret = rb_str_casemap(str, &flags, enc);
8000downcase_single(
VALUE str)
8002 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8003 bool modified =
false;
8006 unsigned int c = *(
unsigned char*)s;
8008 if (
'A' <= c && c <=
'Z') {
8009 *s =
'a' + (c -
'A');
8031rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8034 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8036 flags = check_case_options(argc, argv, flags);
8037 str_modify_keep_cr(str);
8038 enc = str_true_enc(str);
8039 if (case_option_single_p(flags, enc, str)) {
8040 if (downcase_single(str))
8041 flags |= ONIGENC_CASE_MODIFIED;
8043 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8044 rb_str_ascii_casemap(str, str, &flags, enc);
8046 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8048 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8062rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8065 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8068 flags = check_case_options(argc, argv, flags);
8069 enc = str_true_enc(str);
8070 if (case_option_single_p(flags, enc, str)) {
8071 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8072 str_enc_copy_direct(ret, str);
8073 downcase_single(ret);
8075 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8077 rb_str_ascii_casemap(str, ret, &flags, enc);
8080 ret = rb_str_casemap(str, &flags, enc);
8100rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8103 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8105 flags = check_case_options(argc, argv, flags);
8106 str_modify_keep_cr(str);
8107 enc = str_true_enc(str);
8108 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8109 if (flags&ONIGENC_CASE_ASCII_ONLY)
8110 rb_str_ascii_casemap(str, str, &flags, enc);
8112 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8114 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8128rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8131 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8134 flags = check_case_options(argc, argv, flags);
8135 enc = str_true_enc(str);
8136 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8137 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8139 rb_str_ascii_casemap(str, ret, &flags, enc);
8142 ret = rb_str_casemap(str, &flags, enc);
8161rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8164 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8166 flags = check_case_options(argc, argv, flags);
8167 str_modify_keep_cr(str);
8168 enc = str_true_enc(str);
8169 if (flags&ONIGENC_CASE_ASCII_ONLY)
8170 rb_str_ascii_casemap(str, str, &flags, enc);
8172 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8174 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8188rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8191 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8194 flags = check_case_options(argc, argv, flags);
8195 enc = str_true_enc(str);
8196 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8197 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8199 rb_str_ascii_casemap(str, ret, &flags, enc);
8202 ret = rb_str_casemap(str, &flags, enc);
8207typedef unsigned char *USTR;
8211 unsigned int now, max;
8223 if (t->p == t->pend)
return -1;
8224 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8227 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8229 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8231 if (t->p < t->pend) {
8232 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8235 if (t->now < 0x80 && c < 0x80) {
8236 rb_raise(rb_eArgError,
8237 "invalid range \"%c-%c\" in string transliteration",
8241 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8245 else if (t->now < c) {
8254 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8255 if (t->now == t->max) {
8260 if (t->now < t->max) {
8276 const unsigned int errc = -1;
8277 unsigned int trans[256];
8279 struct tr trsrc, trrepl;
8281 unsigned int c, c0, last = 0;
8282 int modify = 0, i, l;
8283 unsigned char *s, *send;
8285 int singlebyte = single_byte_optimizable(str);
8289#define CHECK_IF_ASCII(c) \
8290 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8291 (cr = ENC_CODERANGE_VALID) : 0)
8295 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8296 if (RSTRING_LEN(repl) == 0) {
8297 return rb_str_delete_bang(1, &src, str);
8301 e1 = rb_enc_check(str, src);
8302 e2 = rb_enc_check(str, repl);
8307 enc = rb_enc_check(src, repl);
8309 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8310 if (RSTRING_LEN(src) > 1 &&
8311 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8312 trsrc.p + l < trsrc.pend) {
8316 trrepl.p = RSTRING_PTR(repl);
8317 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8318 trsrc.gen = trrepl.gen = 0;
8319 trsrc.now = trrepl.now = 0;
8320 trsrc.max = trrepl.max = 0;
8323 for (i=0; i<256; i++) {
8326 while ((c = trnext(&trsrc, enc)) != errc) {
8331 if (!hash) hash = rb_hash_new();
8335 while ((c = trnext(&trrepl, enc)) != errc)
8338 for (i=0; i<256; i++) {
8339 if (trans[i] != errc) {
8347 for (i=0; i<256; i++) {
8350 while ((c = trnext(&trsrc, enc)) != errc) {
8351 r = trnext(&trrepl, enc);
8352 if (r == errc) r = trrepl.now;
8355 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8358 if (!hash) hash = rb_hash_new();
8366 str_modify_keep_cr(str);
8367 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8368 termlen = rb_enc_mbminlen(enc);
8371 long offset, max = RSTRING_LEN(str);
8372 unsigned int save = -1;
8373 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8378 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8381 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8384 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8386 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8395 if (cflag) c = last;
8398 else if (cflag) c = errc;
8404 if (c != (
unsigned int)-1) {
8410 tlen = rb_enc_codelen(c, enc);
8416 if (enc != e1) may_modify = 1;
8418 if ((offset = t - buf) + tlen > max) {
8419 size_t MAYBE_UNUSED(old) = max + termlen;
8420 max = offset + tlen + (send - s);
8421 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8424 rb_enc_mbcput(c, t, enc);
8425 if (may_modify && memcmp(s, t, tlen) != 0) {
8431 if (!STR_EMBED_P(str)) {
8432 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8434 TERM_FILL((
char *)t, termlen);
8435 RSTRING(str)->as.heap.ptr = (
char *)buf;
8436 STR_SET_LEN(str, t - buf);
8437 STR_SET_NOEMBED(str);
8438 RSTRING(str)->as.heap.aux.capa = max;
8442 c = (
unsigned char)*s;
8443 if (trans[c] != errc) {
8460 long offset, max = (long)((send - s) * 1.2);
8461 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8466 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8469 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8472 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8474 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8482 if (cflag) c = last;
8485 else if (cflag) c = errc;
8489 c = cflag ? last : errc;
8492 tlen = rb_enc_codelen(c, enc);
8497 if (enc != e1) may_modify = 1;
8499 if ((offset = t - buf) + tlen > max) {
8500 size_t MAYBE_UNUSED(old) = max + termlen;
8501 max = offset + tlen + (long)((send - s) * 1.2);
8502 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8506 rb_enc_mbcput(c, t, enc);
8507 if (may_modify && memcmp(s, t, tlen) != 0) {
8515 if (!STR_EMBED_P(str)) {
8516 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8518 TERM_FILL((
char *)t, termlen);
8519 RSTRING(str)->as.heap.ptr = (
char *)buf;
8520 STR_SET_LEN(str, t - buf);
8521 STR_SET_NOEMBED(str);
8522 RSTRING(str)->as.heap.aux.capa = max;
8528 rb_enc_associate(str, enc);
8550 return tr_trans(str, src, repl, 0);
8595 tr_trans(str, src, repl, 0);
8599#define TR_TABLE_MAX (UCHAR_MAX+1)
8600#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8602tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8605 const unsigned int errc = -1;
8606 char buf[TR_TABLE_MAX];
8609 VALUE table = 0, ptable = 0;
8610 int i, l, cflag = 0;
8612 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8613 tr.gen =
tr.now =
tr.max = 0;
8615 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8620 for (i=0; i<TR_TABLE_MAX; i++) {
8623 stable[TR_TABLE_MAX] = cflag;
8625 else if (stable[TR_TABLE_MAX] && !cflag) {
8626 stable[TR_TABLE_MAX] = 0;
8628 for (i=0; i<TR_TABLE_MAX; i++) {
8632 while ((c = trnext(&
tr, enc)) != errc) {
8633 if (c < TR_TABLE_MAX) {
8634 buf[(
unsigned char)c] = !cflag;
8639 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8642 table = ptable ? ptable : rb_hash_new();
8646 table = rb_hash_new();
8651 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8652 rb_hash_aset(table, key,
Qtrue);
8656 for (i=0; i<TR_TABLE_MAX; i++) {
8657 stable[i] = stable[i] && buf[i];
8659 if (!table && !cflag) {
8666tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8668 if (c < TR_TABLE_MAX) {
8669 return table[c] != 0;
8675 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8676 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8680 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8683 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8698rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8700 char squeez[TR_TABLE_SIZE];
8703 VALUE del = 0, nodel = 0;
8705 int i, ascompat, cr;
8707 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8709 for (i=0; i<argc; i++) {
8713 enc = rb_enc_check(str, s);
8714 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8717 str_modify_keep_cr(str);
8718 ascompat = rb_enc_asciicompat(enc);
8719 s = t = RSTRING_PTR(str);
8726 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8737 c = rb_enc_codepoint_len(s, send, &clen, enc);
8739 if (tr_find(c, squeez, del, nodel)) {
8743 if (t != s) rb_enc_mbcput(c, t, enc);
8750 TERM_FILL(t, TERM_LEN(str));
8751 STR_SET_LEN(str, t - RSTRING_PTR(str));
8754 if (modify)
return str;
8768rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8771 rb_str_delete_bang(argc, argv, str);
8789rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8791 char squeez[TR_TABLE_SIZE];
8793 VALUE del = 0, nodel = 0;
8794 unsigned char *s, *send, *t;
8796 int ascompat, singlebyte = single_byte_optimizable(str);
8800 enc = STR_ENC_GET(str);
8803 for (i=0; i<argc; i++) {
8807 enc = rb_enc_check(str, s);
8808 if (singlebyte && !single_byte_optimizable(s))
8810 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8814 str_modify_keep_cr(str);
8815 s = t = (
unsigned char *)RSTRING_PTR(str);
8816 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8819 ascompat = rb_enc_asciicompat(enc);
8823 unsigned int c = *s++;
8824 if (c != save || (argc > 0 && !squeez[c])) {
8834 if (ascompat && (c = *s) < 0x80) {
8835 if (c != save || (argc > 0 && !squeez[c])) {
8841 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8843 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8844 if (t != s) rb_enc_mbcput(c, t, enc);
8853 TERM_FILL((
char *)t, TERM_LEN(str));
8854 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8855 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8859 if (modify)
return str;
8873rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8876 rb_str_squeeze_bang(argc, argv, str);
8896 return tr_trans(str, src, repl, 1);
8924 tr_trans(str, src, repl, 1);
8937rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8939 char table[TR_TABLE_SIZE];
8941 VALUE del = 0, nodel = 0, tstr;
8951 enc = rb_enc_check(str, tstr);
8954 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8955 (ptstr = RSTRING_PTR(tstr),
8956 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8957 !is_broken_string(str)) {
8959 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8961 s = RSTRING_PTR(str);
8962 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8965 if (*(
unsigned char*)s++ == c) n++;
8971 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8972 for (i=1; i<argc; i++) {
8975 enc = rb_enc_check(str, tstr);
8976 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8979 s = RSTRING_PTR(str);
8980 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8982 ascompat = rb_enc_asciicompat(enc);
8986 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8994 c = rb_enc_codepoint_len(s, send, &clen, enc);
8995 if (tr_find(c, table, del, nodel)) {
9006rb_fs_check(
VALUE val)
9010 if (
NIL_P(val))
return 0;
9015static const char isspacetable[256] = {
9016 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9017 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9018 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9020 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9034#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9037split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9039 if (empty_count >= 0 &&
len == 0) {
9040 return empty_count + 1;
9042 if (empty_count > 0) {
9047 }
while (--empty_count > 0);
9051 rb_yield(str_new_empty_String(str));
9052 }
while (--empty_count > 0);
9066 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9070literal_split_pattern(
VALUE spat, split_type_t default_type)
9078 return SPLIT_TYPE_CHARS;
9080 else if (rb_enc_asciicompat(enc)) {
9081 if (
len == 1 && ptr[0] ==
' ') {
9082 return SPLIT_TYPE_AWK;
9087 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9088 return SPLIT_TYPE_AWK;
9091 return default_type;
9104rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9109 split_type_t split_type;
9110 long beg, end, i = 0, empty_count = -1;
9115 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9117 if (lim <= 0) limit =
Qnil;
9118 else if (lim == 1) {
9119 if (RSTRING_LEN(str) == 0)
9130 if (
NIL_P(limit) && !lim) empty_count = 0;
9132 enc = STR_ENC_GET(str);
9133 split_type = SPLIT_TYPE_REGEXP;
9135 spat = get_pat_quoted(spat, 0);
9137 else if (
NIL_P(spat = rb_fs)) {
9138 split_type = SPLIT_TYPE_AWK;
9140 else if (!(spat = rb_fs_check(spat))) {
9141 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9146 if (split_type != SPLIT_TYPE_AWK) {
9151 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9152 if (split_type == SPLIT_TYPE_AWK) {
9154 split_type = SPLIT_TYPE_STRING;
9159 mustnot_broken(spat);
9160 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9168#define SPLIT_STR(beg, len) ( \
9169 empty_count = split_string(result, str, beg, len, empty_count), \
9170 str_mod_check(str, str_start, str_len))
9173 char *ptr = RSTRING_PTR(str);
9174 char *
const str_start = ptr;
9175 const long str_len = RSTRING_LEN(str);
9176 char *
const eptr = str_start + str_len;
9177 if (split_type == SPLIT_TYPE_AWK) {
9184 if (is_ascii_string(str)) {
9185 while (ptr < eptr) {
9186 c = (
unsigned char)*ptr++;
9188 if (ascii_isspace(c)) {
9194 if (!
NIL_P(limit) && lim <= i)
break;
9197 else if (ascii_isspace(c)) {
9198 SPLIT_STR(beg, end-beg);
9201 if (!
NIL_P(limit)) ++i;
9209 while (ptr < eptr) {
9212 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9221 if (!
NIL_P(limit) && lim <= i)
break;
9225 SPLIT_STR(beg, end-beg);
9228 if (!
NIL_P(limit)) ++i;
9236 else if (split_type == SPLIT_TYPE_STRING) {
9237 char *substr_start = ptr;
9238 char *sptr = RSTRING_PTR(spat);
9239 long slen = RSTRING_LEN(spat);
9242 mustnot_broken(str);
9243 enc = rb_enc_check(str, spat);
9244 while (ptr < eptr &&
9245 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9248 if (t != ptr + end) {
9252 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9253 str_mod_check(spat, sptr, slen);
9256 if (!
NIL_P(limit) && lim <= ++i)
break;
9258 beg = ptr - str_start;
9260 else if (split_type == SPLIT_TYPE_CHARS) {
9264 mustnot_broken(str);
9265 enc = rb_enc_get(str);
9266 while (ptr < eptr &&
9267 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9268 SPLIT_STR(ptr - str_start, n);
9270 if (!
NIL_P(limit) && lim <= ++i)
break;
9272 beg = ptr - str_start;
9276 long len = RSTRING_LEN(str);
9284 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9289 if (start == end && BEG(0) == END(0)) {
9294 else if (last_null == 1) {
9295 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9302 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9308 SPLIT_STR(beg, end-beg);
9309 beg = start = END(0);
9313 for (idx=1; idx < regs->num_regs; idx++) {
9314 if (BEG(idx) == -1)
continue;
9315 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9317 if (!
NIL_P(limit) && lim <= ++i)
break;
9319 if (match) rb_match_unbusy(match);
9321 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9322 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9325 return result ? result : str;
9335 return rb_str_split_m(1, &sep, str);
9338#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9353#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9356chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9358 const char *prev = rb_enc_prev_char(p, e, e, enc);
9361 prev = rb_enc_prev_char(p, e, e, enc);
9362 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9374 RSTRING_LEN(rs) != 1 ||
9375 RSTRING_PTR(rs)[0] !=
'\n')) {
9381#define rb_rs get_rs()
9388 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9389 long pos,
len, rslen;
9395 static ID keywords[1];
9400 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9404 if (!ENUM_ELEM(ary, str)) {
9412 if (!RSTRING_LEN(str))
goto end;
9414 ptr = subptr = RSTRING_PTR(str);
9416 len = RSTRING_LEN(str);
9418 rslen = RSTRING_LEN(rs);
9421 enc = rb_enc_get(str);
9423 enc = rb_enc_check(str, rs);
9428 const char *eol = NULL;
9430 while (subend < pend) {
9431 long chomp_rslen = 0;
9433 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9435 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9437 if (eol == subend)
break;
9441 chomp_rslen = -rslen;
9445 if (!subptr) subptr = subend;
9449 }
while (subend < pend);
9451 if (rslen == 0) chomp_rslen = 0;
9453 subend - subptr + (chomp ? chomp_rslen : rslen));
9454 if (ENUM_ELEM(ary, line)) {
9455 str_mod_check(str, ptr,
len);
9457 subptr = eol = NULL;
9462 rsptr = RSTRING_PTR(rs);
9463 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9472 rsptr = RSTRING_PTR(rs);
9473 rslen = RSTRING_LEN(rs);
9476 while (subptr < pend) {
9477 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9481 if (hit != adjusted) {
9485 subend = hit += rslen;
9488 subend = chomp_newline(subptr, subend, enc);
9495 if (ENUM_ELEM(ary, line)) {
9496 str_mod_check(str, ptr,
len);
9501 if (subptr != pend) {
9504 pend = chomp_newline(subptr, pend, enc);
9506 else if (pend - subptr >= rslen &&
9507 memcmp(pend - rslen, rsptr, rslen) == 0) {
9512 ENUM_ELEM(ary, line);
9533rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9536 return rb_str_enumerate_lines(argc, argv, str, 0);
9591rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9593 VALUE ary = WANTARRAY(
"lines", 0);
9594 return rb_str_enumerate_lines(argc, argv, str, ary);
9608 for (i=0; i<RSTRING_LEN(str); i++) {
9609 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9627rb_str_each_byte(
VALUE str)
9630 return rb_str_enumerate_bytes(str, 0);
9642rb_str_bytes(
VALUE str)
9644 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9645 return rb_str_enumerate_bytes(str, ary);
9663 ptr = RSTRING_PTR(str);
9664 len = RSTRING_LEN(str);
9665 enc = rb_enc_get(str);
9668 for (i = 0; i <
len; i += n) {
9669 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9674 for (i = 0; i <
len; i += n) {
9675 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9696rb_str_each_char(
VALUE str)
9699 return rb_str_enumerate_chars(str, 0);
9711rb_str_chars(
VALUE str)
9714 return rb_str_enumerate_chars(str, ary);
9718rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9723 const char *ptr, *end;
9726 if (single_byte_optimizable(str))
9727 return rb_str_enumerate_bytes(str, ary);
9730 ptr = RSTRING_PTR(str);
9732 enc = STR_ENC_GET(str);
9735 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9756rb_str_each_codepoint(
VALUE str)
9759 return rb_str_enumerate_codepoints(str, 0);
9771rb_str_codepoints(
VALUE str)
9774 return rb_str_enumerate_codepoints(str, ary);
9780 int encidx = rb_enc_to_index(enc);
9782 const OnigUChar source_ascii[] =
"\\X";
9783 const OnigUChar *source = source_ascii;
9784 size_t source_len =
sizeof(source_ascii) - 1;
9787#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9788#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9789#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9790#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9791#define CASE_UTF(e) \
9792 case ENCINDEX_UTF_##e: { \
9793 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9794 source = source_UTF_##e; \
9795 source_len = sizeof(source_UTF_##e); \
9798 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9806 regex_t *reg_grapheme_cluster;
9808 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9809 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9811 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9812 onig_error_code_to_str(message, r, &einfo);
9813 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9816 return reg_grapheme_cluster;
9822 int encidx = rb_enc_to_index(enc);
9823 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9825 if (encidx == rb_utf8_encindex()) {
9826 if (!reg_grapheme_cluster_utf8) {
9827 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9830 return reg_grapheme_cluster_utf8;
9839 size_t grapheme_cluster_count = 0;
9841 const char *ptr, *end;
9843 if (!rb_enc_unicode_p(enc)) {
9847 bool cached_reg_grapheme_cluster =
true;
9848 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9849 if (!reg_grapheme_cluster) {
9850 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9851 cached_reg_grapheme_cluster =
false;
9854 ptr = RSTRING_PTR(str);
9858 OnigPosition
len = onig_match(reg_grapheme_cluster,
9859 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9860 (
const OnigUChar *)ptr, NULL, 0);
9861 if (
len <= 0)
break;
9862 grapheme_cluster_count++;
9866 if (!cached_reg_grapheme_cluster) {
9867 onig_free(reg_grapheme_cluster);
9870 return SIZET2NUM(grapheme_cluster_count);
9874rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9878 const char *ptr0, *ptr, *end;
9880 if (!rb_enc_unicode_p(enc)) {
9881 return rb_str_enumerate_chars(str, ary);
9886 bool cached_reg_grapheme_cluster =
true;
9887 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9888 if (!reg_grapheme_cluster) {
9889 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9890 cached_reg_grapheme_cluster =
false;
9893 ptr0 = ptr = RSTRING_PTR(str);
9897 OnigPosition
len = onig_match(reg_grapheme_cluster,
9898 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9899 (
const OnigUChar *)ptr, NULL, 0);
9900 if (
len <= 0)
break;
9905 if (!cached_reg_grapheme_cluster) {
9906 onig_free(reg_grapheme_cluster);
9926rb_str_each_grapheme_cluster(
VALUE str)
9929 return rb_str_enumerate_grapheme_clusters(str, 0);
9941rb_str_grapheme_clusters(
VALUE str)
9944 return rb_str_enumerate_grapheme_clusters(str, ary);
9948chopped_length(
VALUE str)
9951 const char *p, *p2, *beg, *end;
9953 beg = RSTRING_PTR(str);
9954 end = beg + RSTRING_LEN(str);
9955 if (beg >= end)
return 0;
9956 p = rb_enc_prev_char(beg, end, end, enc);
9958 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9959 p2 = rb_enc_prev_char(beg, p, end, enc);
9960 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9978rb_str_chop_bang(
VALUE str)
9980 str_modify_keep_cr(str);
9981 if (RSTRING_LEN(str) > 0) {
9983 len = chopped_length(str);
9984 STR_SET_LEN(str,
len);
9985 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10004rb_str_chop(
VALUE str)
10010smart_chomp(
VALUE str,
const char *e,
const char *p)
10013 if (rb_enc_mbminlen(enc) > 1) {
10018 pp = e - rb_enc_mbminlen(enc);
10021 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10029 if (--e > p && *(e-1) ==
'\r') {
10046 char *pp, *e, *rsptr;
10048 char *
const p = RSTRING_PTR(str);
10049 long len = RSTRING_LEN(str);
10051 if (
len == 0)
return 0;
10054 return smart_chomp(str, e, p);
10057 enc = rb_enc_get(str);
10060 if (rb_enc_mbminlen(enc) > 1) {
10065 pp -= rb_enc_mbminlen(enc);
10068 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10075 while (e > p && *(e-1) ==
'\n') {
10077 if (e > p && *(e-1) ==
'\r')
10083 if (rslen >
len)
return len;
10085 enc = rb_enc_get(rs);
10086 newline = rsptr[rslen-1];
10087 if (rslen == rb_enc_mbminlen(enc)) {
10089 if (newline ==
'\n')
10090 return smart_chomp(str, e, p);
10094 return smart_chomp(str, e, p);
10098 enc = rb_enc_check(str, rs);
10099 if (is_broken_string(rs)) {
10103 if (p[
len-1] == newline &&
10105 memcmp(rsptr, pp, rslen) == 0)) {
10106 if (at_char_boundary(p, pp, e, enc))
10107 return len - rslen;
10119chomp_rs(
int argc,
const VALUE *argv)
10123 VALUE rs = argv[0];
10135 long olen = RSTRING_LEN(str);
10136 long len = chompped_length(str, rs);
10137 if (
len >= olen)
return Qnil;
10138 str_modify_keep_cr(str);
10139 STR_SET_LEN(str,
len);
10140 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10160rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10163 str_modifiable(str);
10164 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10165 rs = chomp_rs(argc, argv);
10167 return rb_str_chomp_string(str, rs);
10180rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10182 VALUE rs = chomp_rs(argc, argv);
10190 const char *
const start = s;
10192 if (!s || s >= e)
return 0;
10195 if (single_byte_optimizable(str)) {
10196 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10201 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10223rb_str_lstrip_bang(
VALUE str)
10227 long olen, loffset;
10229 str_modify_keep_cr(str);
10230 enc = STR_ENC_GET(str);
10232 loffset = lstrip_offset(str, start, start+olen, enc);
10234 long len = olen-loffset;
10235 s = start + loffset;
10236 memmove(start, s,
len);
10237 STR_SET_LEN(str,
len);
10238 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10262rb_str_lstrip(
VALUE str)
10267 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10268 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10277 rb_str_check_dummy_enc(enc);
10281 if (!s || s >= e)
return 0;
10285 if (single_byte_optimizable(str)) {
10287 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10292 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10314rb_str_rstrip_bang(
VALUE str)
10318 long olen, roffset;
10320 str_modify_keep_cr(str);
10321 enc = STR_ENC_GET(str);
10323 roffset = rstrip_offset(str, start, start+olen, enc);
10325 long len = olen - roffset;
10327 STR_SET_LEN(str,
len);
10328 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10351rb_str_rstrip(
VALUE str)
10355 long olen, roffset;
10357 enc = STR_ENC_GET(str);
10359 roffset = rstrip_offset(str, start, start+olen, enc);
10361 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10379rb_str_strip_bang(
VALUE str)
10382 long olen, loffset, roffset;
10385 str_modify_keep_cr(str);
10386 enc = STR_ENC_GET(str);
10388 loffset = lstrip_offset(str, start, start+olen, enc);
10389 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10391 if (loffset > 0 || roffset > 0) {
10392 long len = olen-roffset;
10395 memmove(start, start + loffset,
len);
10397 STR_SET_LEN(str,
len);
10398 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10421rb_str_strip(
VALUE str)
10424 long olen, loffset, roffset;
10428 loffset = lstrip_offset(str, start, start+olen, enc);
10429 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10431 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10436scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10439 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10445 end = pos + RSTRING_LEN(pat);
10459 if (RSTRING_LEN(str) > end)
10460 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10469 if (!regs || regs->num_regs == 1) {
10475 for (
int i = 1; i < regs->num_regs; i++) {
10506 long last = -1, prev = 0;
10507 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10509 pat = get_pat_quoted(pat, 1);
10510 mustnot_broken(str);
10514 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10519 if (last >= 0) rb_pat_search(pat, str, last, 1);
10524 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10528 str_mod_check(str, p,
len);
10530 if (last >= 0) rb_pat_search(pat, str, last, 1);
10582rb_str_hex(
VALUE str)
10584 return rb_str_to_inum(str, 16, FALSE);
10668rb_str_oct(
VALUE str)
10670 return rb_str_to_inum(str, -8, FALSE);
10673#ifndef HAVE_CRYPT_R
10678 rb_nativethread_lock_t lock;
10679} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10748# define CRYPT_END() ALLOCV_END(databuf)
10751 extern char *crypt(
const char *,
const char *);
10752# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10755 const char *s, *saltp;
10758 char salt_8bit_clean[3];
10762 mustnot_wchar(str);
10763 mustnot_wchar(salt);
10765 saltp = RSTRING_PTR(salt);
10766 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10767 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10771 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10772 salt_8bit_clean[0] = saltp[0] & 0x7f;
10773 salt_8bit_clean[1] = saltp[1] & 0x7f;
10774 salt_8bit_clean[2] =
'\0';
10775 saltp = salt_8bit_clean;
10780# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10781 data->initialized = 0;
10783 res = crypt_r(s, saltp, data);
10786 res = crypt(s, saltp);
10801 size_t res_size = strlen(res)+1;
10802 tmp_buf =
ALLOCA_N(
char, res_size);
10803 memcpy(tmp_buf, res, res_size);
10840 char *ptr, *p, *pend;
10843 unsigned long sum0 = 0;
10848 ptr = p = RSTRING_PTR(str);
10849 len = RSTRING_LEN(str);
10855 str_mod_check(str, ptr,
len);
10858 sum0 += (
unsigned char)*p;
10869 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10870 sum0 &= (((
unsigned long)1)<<bits)-1;
10890rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10894 long width,
len, flen = 1, fclen = 1;
10897 const char *f =
" ";
10898 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10900 int singlebyte = 1, cr;
10904 enc = STR_ENC_GET(str);
10905 termlen = rb_enc_mbminlen(enc);
10909 enc = rb_enc_check(str, pad);
10910 f = RSTRING_PTR(pad);
10911 flen = RSTRING_LEN(pad);
10912 fclen = str_strlen(pad, enc);
10913 singlebyte = single_byte_optimizable(pad);
10914 if (flen == 0 || fclen == 0) {
10915 rb_raise(rb_eArgError,
"zero width padding");
10918 len = str_strlen(str, enc);
10919 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10921 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10925 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10926 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10928 size = RSTRING_LEN(str);
10929 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10930 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10931 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10932 rb_raise(rb_eArgError,
"argument too big");
10936 p = RSTRING_PTR(res);
10938 memset(p, *f, llen);
10942 while (llen >= fclen) {
10948 memcpy(p, f, llen2);
10952 memcpy(p, RSTRING_PTR(str), size);
10955 memset(p, *f, rlen);
10959 while (rlen >= fclen) {
10965 memcpy(p, f, rlen2);
10969 TERM_FILL(p, termlen);
10970 STR_SET_LEN(res, p-RSTRING_PTR(res));
10991rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10993 return rb_str_justify(argc, argv, str,
'l');
11005rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11007 return rb_str_justify(argc, argv, str,
'r');
11020rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11022 return rb_str_justify(argc, argv, str,
'c');
11038 sep = get_pat_quoted(sep, 0);
11050 pos = rb_str_index(str, sep, 0);
11051 if (pos < 0)
goto failed;
11056 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11059 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11073 long pos = RSTRING_LEN(str);
11075 sep = get_pat_quoted(sep, 0);
11088 pos = rb_str_rindex(str, sep, pos);
11097 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11099 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11111rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11115 for (i=0; i<argc; i++) {
11116 VALUE tmp = argv[i];
11118 if (rb_reg_start_with_p(tmp, str))
11122 const char *p, *s, *e;
11127 enc = rb_enc_check(str, tmp);
11128 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11129 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11130 p = RSTRING_PTR(str);
11133 if (!at_char_right_boundary(p, s, e, enc))
11135 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11151rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11155 for (i=0; i<argc; i++) {
11156 VALUE tmp = argv[i];
11157 const char *p, *s, *e;
11162 enc = rb_enc_check(str, tmp);
11163 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11164 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11165 p = RSTRING_PTR(str);
11168 if (!at_char_boundary(p, s, e, enc))
11170 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11186deleted_prefix_length(
VALUE str,
VALUE prefix)
11188 const char *strptr, *prefixptr;
11189 long olen, prefixlen;
11194 if (!is_broken_string(prefix) ||
11195 !rb_enc_asciicompat(enc) ||
11196 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11197 enc = rb_enc_check(str, prefix);
11201 prefixlen = RSTRING_LEN(prefix);
11202 if (prefixlen <= 0)
return 0;
11203 olen = RSTRING_LEN(str);
11204 if (olen < prefixlen)
return 0;
11205 strptr = RSTRING_PTR(str);
11206 prefixptr = RSTRING_PTR(prefix);
11207 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11208 if (is_broken_string(prefix)) {
11209 if (!is_broken_string(str)) {
11213 const char *strend = strptr + olen;
11214 const char *after_prefix = strptr + prefixlen;
11215 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11236rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11239 str_modify_keep_cr(str);
11241 prefixlen = deleted_prefix_length(str, prefix);
11242 if (prefixlen <= 0)
return Qnil;
11256rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11260 prefixlen = deleted_prefix_length(str, prefix);
11261 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11263 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11276deleted_suffix_length(
VALUE str,
VALUE suffix)
11278 const char *strptr, *suffixptr;
11279 long olen, suffixlen;
11283 if (is_broken_string(suffix))
return 0;
11284 enc = rb_enc_check(str, suffix);
11287 suffixlen = RSTRING_LEN(suffix);
11288 if (suffixlen <= 0)
return 0;
11289 olen = RSTRING_LEN(str);
11290 if (olen < suffixlen)
return 0;
11291 strptr = RSTRING_PTR(str);
11292 suffixptr = RSTRING_PTR(suffix);
11293 const char *strend = strptr + olen;
11294 const char *before_suffix = strend - suffixlen;
11295 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11296 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11312rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11314 long olen, suffixlen,
len;
11315 str_modifiable(str);
11317 suffixlen = deleted_suffix_length(str, suffix);
11318 if (suffixlen <= 0)
return Qnil;
11320 olen = RSTRING_LEN(str);
11321 str_modify_keep_cr(str);
11322 len = olen - suffixlen;
11323 STR_SET_LEN(str,
len);
11324 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11340rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11344 suffixlen = deleted_suffix_length(str, suffix);
11345 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11347 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11354 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11360nil_setter_warning(
ID id)
11362 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11369 if (!
NIL_P(*var)) {
11370 nil_setter_warning(
id);
11377 val = rb_fs_check(val);
11380 "value of %"PRIsVALUE
" must be String or Regexp",
11384 nil_setter_warning(
id);
11401 str_modifiable(str);
11404 int idx = rb_enc_to_index(encoding);
11411 rb_enc_associate_index(str, idx);
11435 if (STR_EMBED_P(str)) {
11436 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11441 str_replace_shared_without_enc(str2, str);
11443 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11473rb_str_valid_encoding_p(
VALUE str)
11493rb_str_is_ascii_only_p(
VALUE str)
11503 static const char ellipsis[] =
"...";
11504 const long ellipsislen =
sizeof(ellipsis) - 1;
11506 const long blen = RSTRING_LEN(str);
11507 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11508 VALUE estr, ret = 0;
11511 if (
len * rb_enc_mbminlen(enc) >= blen ||
11515 else if (
len <= ellipsislen ||
11517 if (rb_enc_asciicompat(enc)) {
11519 rb_enc_associate(ret, enc);
11526 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11531 rb_enc_from_encoding(enc), 0,
Qnil);
11544 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11550 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11569 if (enc == STR_ENC_GET(str)) {
11574 return enc_str_scrub(enc, str, repl, cr);
11582 const char *rep, *p, *e, *p1, *sp;
11588 rb_raise(rb_eArgError,
"both of block and replacement given");
11595 if (!
NIL_P(repl)) {
11596 repl = str_compat_and_valid(repl, enc);
11599 if (rb_enc_dummy_p(enc)) {
11602 encidx = rb_enc_to_index(enc);
11604#define DEFAULT_REPLACE_CHAR(str) do { \
11605 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11606 rep = replace; replen = (int)sizeof(replace); \
11609 slen = RSTRING_LEN(str);
11610 p = RSTRING_PTR(str);
11615 if (rb_enc_asciicompat(enc)) {
11621 else if (!
NIL_P(repl)) {
11622 rep = RSTRING_PTR(repl);
11623 replen = RSTRING_LEN(repl);
11626 else if (encidx == rb_utf8_encindex()) {
11627 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11631 DEFAULT_REPLACE_CHAR(
"?");
11636 p = search_nonascii(p, e);
11641 int ret = rb_enc_precise_mbclen(p, e, enc);
11660 if (e - p < clen) clen = e - p;
11667 for (; clen > 1; clen--) {
11668 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11679 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11680 str_mod_check(str, sp, slen);
11681 repl = str_compat_and_valid(repl, enc);
11688 p = search_nonascii(p, e);
11714 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11715 str_mod_check(str, sp, slen);
11716 repl = str_compat_and_valid(repl, enc);
11725 long mbminlen = rb_enc_mbminlen(enc);
11729 else if (!
NIL_P(repl)) {
11730 rep = RSTRING_PTR(repl);
11731 replen = RSTRING_LEN(repl);
11733 else if (encidx == ENCINDEX_UTF_16BE) {
11734 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11736 else if (encidx == ENCINDEX_UTF_16LE) {
11737 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11739 else if (encidx == ENCINDEX_UTF_32BE) {
11740 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11742 else if (encidx == ENCINDEX_UTF_32LE) {
11743 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11746 DEFAULT_REPLACE_CHAR(
"?");
11750 int ret = rb_enc_precise_mbclen(p, e, enc);
11763 if (e - p < clen) clen = e - p;
11764 if (clen <= mbminlen * 2) {
11769 for (; clen > mbminlen; clen-=mbminlen) {
11770 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11780 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11781 str_mod_check(str, sp, slen);
11782 repl = str_compat_and_valid(repl, enc);
11807 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11808 str_mod_check(str, sp, slen);
11809 repl = str_compat_and_valid(repl, enc);
11849str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11857static ID id_normalize;
11858static ID id_normalized_p;
11859static VALUE mUnicodeNormalize;
11862unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11864 static int UnicodeNormalizeRequired = 0;
11867 if (!UnicodeNormalizeRequired) {
11868 rb_require(
"unicode_normalize/normalize.rb");
11869 UnicodeNormalizeRequired = 1;
11873 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11884rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11886 return unicode_normalize_common(argc, argv, str, id_normalize);
11900rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11902 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11929rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11931 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12063#define sym_equal rb_obj_equal
12066sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12070 int c = rb_enc_precise_mbclen(s, send, enc);
12074 c = rb_enc_mbc_to_codepoint(s, send, enc);
12082rb_str_symname_p(
VALUE sym)
12087 rb_encoding *resenc = rb_default_internal_encoding();
12089 if (resenc == NULL) resenc = rb_default_external_encoding();
12090 enc = STR_ENC_GET(sym);
12091 ptr = RSTRING_PTR(sym);
12092 len = RSTRING_LEN(sym);
12093 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12101rb_str_quote_unprintable(
VALUE str)
12109 resenc = rb_default_internal_encoding();
12110 if (resenc == NULL) resenc = rb_default_external_encoding();
12111 enc = STR_ENC_GET(str);
12112 ptr = RSTRING_PTR(str);
12113 len = RSTRING_LEN(str);
12114 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12115 !sym_printable(ptr, ptr +
len, enc)) {
12116 return rb_str_escape(str);
12122rb_id_quote_unprintable(
ID id)
12124 VALUE str = rb_id2str(
id);
12125 if (!rb_str_symname_p(str)) {
12126 return rb_str_escape(str);
12144sym_inspect(
VALUE sym)
12151 if (!rb_str_symname_p(str)) {
12153 len = RSTRING_LEN(str);
12154 rb_str_resize(str,
len + 1);
12155 dest = RSTRING_PTR(str);
12156 memmove(dest + 1, dest,
len);
12160 VALUE orig_str = str;
12162 len = RSTRING_LEN(orig_str);
12163 str = rb_enc_str_new(0,
len + 1, enc);
12166 ptr = RSTRING_PTR(orig_str);
12167 dest = RSTRING_PTR(str);
12168 memcpy(dest + 1, ptr,
len);
12188rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12193 rb_raise(rb_eArgError,
"no receiver given");
12290 return rb_str_match(
rb_sym2str(sym), other);
12305sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12307 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12320sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12322 return rb_str_match_m_p(argc, argv, sym);
12340 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12351sym_length(
VALUE sym)
12365sym_empty(
VALUE sym)
12399sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12415sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12431sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12445sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12447 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12460sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12462 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12474sym_encoding(
VALUE sym)
12480string_for_symbol(
VALUE name)
12485 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12499 name = string_for_symbol(name);
12500 return rb_intern_str(name);
12509 name = string_for_symbol(name);
12533 return rb_fstring(str);
12539 struct RString fake_str = {RBASIC_INIT};
12540 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12552 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12553 rb_enc_autoload(enc);
12556 struct RString fake_str = {RBASIC_INIT};
12557 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12563 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12564 rb_enc_autoload(enc);
12567 struct RString fake_str = {RBASIC_INIT};
12568 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12579#if USE_YJIT || USE_ZJIT
12581rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12586 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12587 rb_str_buf_cat_byte(str, (
char) code);
12597fstring_set_class_i(
VALUE *str,
void *data)
12601 return ST_CONTINUE;
12609 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12776 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.