14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa)
249rb_str_size_as_embedded(
VALUE str)
252 if (STR_EMBED_P(str)) {
253 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
257 else if (rb_str_reembeddable_p(str)) {
258 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
261 real_size =
sizeof(
struct RString);
265 real_size +=
sizeof(st_index_t);
272STR_EMBEDDABLE_P(
long len,
long termlen)
274 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
279static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
280static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
282static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
283static inline void str_modifiable(
VALUE str);
288str_make_independent(
VALUE str)
290 long len = RSTRING_LEN(str);
291 int termlen = TERM_LEN(str);
292 str_make_independent_expand((str),
len, 0L, termlen);
295static inline int str_dependent_p(
VALUE str);
298rb_str_make_independent(
VALUE str)
300 if (str_dependent_p(str)) {
301 str_make_independent(str);
306rb_str_make_embedded(
VALUE str)
311 char *buf =
RSTRING(str)->as.heap.ptr;
315 STR_SET_LEN(str,
len);
318 memcpy(RSTRING_PTR(str), buf,
len);
322 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
326rb_debug_rstring_null_ptr(
const char *func)
328 fprintf(stderr,
"%s is returning NULL!! "
329 "SIGSEGV is highly expected to follow immediately.\n"
330 "If you could reproduce, attach your debugger here, "
331 "and look at the passed string.\n",
336static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
339get_encoding(
VALUE str)
345mustnot_broken(
VALUE str)
347 if (is_broken_string(str)) {
348 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
353mustnot_wchar(
VALUE str)
356 if (rb_enc_mbminlen(enc) > 1) {
357 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
361static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
363#if SIZEOF_LONG == SIZEOF_VOIDP
364#define PRECOMPUTED_FAKESTR_HASH 1
369BARE_STRING_P(
VALUE str)
374static inline st_index_t
375str_do_hash(
VALUE str)
377 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
379 if (e && !is_ascii_string(str)) {
386str_store_precomputed_hash(
VALUE str, st_index_t hash)
392 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
393 size_t free_bytes = str_embed_capa(str) - used_bytes;
397 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
399 FL_SET(str, STR_PRECOMPUTED_HASH);
412 if (
FL_TEST(str, RSTRING_FSTR))
415 bare = BARE_STRING_P(str);
417 if (STR_EMBED_P(str)) {
422 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
429 rb_str_resize(str, RSTRING_LEN(str));
431 fstr = register_fstring(str,
false,
false);
434 str_replace_shared_without_enc(str, fstr);
441static VALUE fstring_table_obj;
444fstring_concurrent_set_hash(
VALUE str)
446#ifdef PRECOMPUTED_FAKESTR_HASH
450 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
467 const char *aptr, *bptr;
474 return (alen == blen &&
476 memcmp(aptr, bptr, alen) == 0);
481 bool force_precompute_hash;
485fstring_concurrent_set_create(
VALUE str,
void *data)
495 long len = RSTRING_LEN(str);
496 long capa =
len +
sizeof(st_index_t);
497 int term_len = TERM_LEN(str);
499 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
501 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
502 STR_SET_LEN(new_str, RSTRING_LEN(str));
504 rb_enc_copy(new_str, str);
505 str_store_precomputed_hash(new_str, str_do_hash(str));
509 rb_enc_copy(new_str, str);
510#ifdef PRECOMPUTED_FAKESTR_HASH
511 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
512 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
526 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
529 if (STR_SHARED_P(str)) {
531 str_make_independent(str);
534 if (!BARE_STRING_P(str)) {
540 RBASIC(str)->flags |= RSTRING_FSTR;
542 RB_OBJ_SET_SHAREABLE(str);
556 .hash = fstring_concurrent_set_hash,
557 .cmp = fstring_concurrent_set_cmp,
558 .create = fstring_concurrent_set_create,
563Init_fstring_table(
void)
565 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
566 rb_gc_register_address(&fstring_table_obj);
570register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
574 .force_precompute_hash = force_precompute_hash
577#if SIZEOF_VOIDP == SIZEOF_LONG
581 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
585 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
587 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
599rb_obj_is_fstring_table(
VALUE obj)
603 return obj == fstring_table_obj;
607rb_gc_free_fstring(
VALUE obj)
609 ASSERT_vm_locking_with_barrier();
611 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
613 RB_DEBUG_COUNTER_INC(obj_str_fstr);
619rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
621 if (fstring_table_obj) {
622 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
627setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
630 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
643 return (
VALUE)fake_str;
652 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
661rb_fstring_new(
const char *ptr,
long len)
663 struct RString fake_str = {RBASIC_INIT};
664 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
670 struct RString fake_str = {RBASIC_INIT};
671 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
675rb_fstring_cstr(
const char *
ptr)
677 return rb_fstring_new(
ptr, strlen(
ptr));
681single_byte_optimizable(
VALUE str)
685 case ENCINDEX_ASCII_8BIT:
686 case ENCINDEX_US_ASCII:
708static inline const char *
709search_nonascii(
const char *p,
const char *e)
711 const uintptr_t *s, *t;
713#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
714# if SIZEOF_UINTPTR_T == 8
715# define NONASCII_MASK UINT64_C(0x8080808080808080)
716# elif SIZEOF_UINTPTR_T == 4
717# define NONASCII_MASK UINT32_C(0x80808080)
719# error "don't know what to do."
722# if SIZEOF_UINTPTR_T == 8
723# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
724# elif SIZEOF_UINTPTR_T == 4
725# define NONASCII_MASK 0x80808080UL
727# error "don't know what to do."
731 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
732#if !UNALIGNED_WORD_ACCESS
733 if ((uintptr_t)p % SIZEOF_VOIDP) {
734 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
739 case 7:
if (p[-7]&0x80)
return p-7;
740 case 6:
if (p[-6]&0x80)
return p-6;
741 case 5:
if (p[-5]&0x80)
return p-5;
742 case 4:
if (p[-4]&0x80)
return p-4;
744 case 3:
if (p[-3]&0x80)
return p-3;
745 case 2:
if (p[-2]&0x80)
return p-2;
746 case 1:
if (p[-1]&0x80)
return p-1;
751#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
752#define aligned_ptr(value) \
753 __builtin_assume_aligned((value), sizeof(uintptr_t))
755#define aligned_ptr(value) (uintptr_t *)(value)
758 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
761 if (*s & NONASCII_MASK) {
762#ifdef WORDS_BIGENDIAN
763 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
765 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
775 case 7:
if (e[-7]&0x80)
return e-7;
776 case 6:
if (e[-6]&0x80)
return e-6;
777 case 5:
if (e[-5]&0x80)
return e-5;
778 case 4:
if (e[-4]&0x80)
return e-4;
780 case 3:
if (e[-3]&0x80)
return e-3;
781 case 2:
if (e[-2]&0x80)
return e-2;
782 case 1:
if (e[-1]&0x80)
return e-1;
790 const char *e = p +
len;
792 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
794 p = search_nonascii(p, e);
798 if (rb_enc_asciicompat(enc)) {
799 p = search_nonascii(p, e);
802 int ret = rb_enc_precise_mbclen(p, e, enc);
806 p = search_nonascii(p, e);
812 int ret = rb_enc_precise_mbclen(p, e, enc);
828 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
831 p = search_nonascii(p, e);
835 else if (rb_enc_asciicompat(enc)) {
836 p = search_nonascii(p, e);
842 int ret = rb_enc_precise_mbclen(p, e, enc);
849 p = search_nonascii(p, e);
855 int ret = rb_enc_precise_mbclen(p, e, enc);
880 rb_enc_set_index(str1, rb_enc_get_index(str2));
888rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
893 str_enc_copy(dest, src);
894 if (RSTRING_LEN(dest) == 0) {
895 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
906 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
907 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
918rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
920 str_enc_copy(dest, src);
927 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
933 return enc_coderange_scan(str, enc);
942 cr = enc_coderange_scan(str, get_encoding(str));
949rb_enc_str_asciicompat(
VALUE str)
952 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
960 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
969str_mod_check(
VALUE s,
const char *p,
long len)
971 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
977str_capacity(
VALUE str,
const int termlen)
979 if (STR_EMBED_P(str)) {
980 return str_embed_capa(str) - termlen;
982 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
986 return RSTRING(str)->as.heap.aux.capa;
993 return str_capacity(str, TERM_LEN(str));
997must_not_null(
const char *
ptr)
1000 rb_raise(rb_eArgError,
"NULL pointer given");
1005str_alloc_embed(
VALUE klass,
size_t capa)
1007 size_t size = rb_str_embed_size(
capa);
1011 NEWOBJ_OF(str,
struct RString, klass,
1015 str->as.embed.ary[0] = 0;
1021str_alloc_heap(
VALUE klass)
1023 NEWOBJ_OF(str,
struct RString, klass,
1027 str->as.heap.aux.capa = 0;
1028 str->as.heap.ptr = NULL;
1034empty_str_alloc(
VALUE klass)
1036 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1037 VALUE str = str_alloc_embed(klass, 0);
1038 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1049 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1053 enc = rb_ascii8bit_encoding();
1056 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1058 int termlen = rb_enc_mbminlen(enc);
1060 if (STR_EMBEDDABLE_P(
len, termlen)) {
1061 str = str_alloc_embed(klass,
len + termlen);
1067 str = str_alloc_heap(klass);
1073 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1076 rb_enc_raw_set(str, enc);
1079 memcpy(RSTRING_PTR(str),
ptr,
len);
1082 memset(RSTRING_PTR(str), 0,
len);
1085 STR_SET_LEN(str,
len);
1086 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1093 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1128 __msan_unpoison_string(
ptr);
1148 if (rb_enc_mbminlen(enc) != 1) {
1149 rb_raise(rb_eArgError,
"wchar encoding given");
1151 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1155str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1160 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1164 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1167 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1168 str = str_alloc_heap(klass);
1172 RBASIC(str)->flags |= STR_NOFREE;
1173 rb_enc_associate_index(str, encindex);
1202static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1204 int ecflags,
VALUE ecopts);
1209 int encidx = rb_enc_to_index(enc);
1210 if (rb_enc_get_index(str) == encidx)
1211 return is_ascii_string(str);
1222 if (!to)
return str;
1223 if (!from) from = rb_enc_get(str);
1224 if (from == to)
return str;
1225 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1226 rb_is_ascii8bit_enc(to)) {
1227 if (STR_ENC_GET(str) != to) {
1229 rb_enc_associate(str, to);
1236 from, to, ecflags, ecopts);
1237 if (
NIL_P(newstr)) {
1245rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1250 olen = RSTRING_LEN(newstr);
1251 if (ofs < -olen || olen < ofs)
1253 if (ofs < 0) ofs += olen;
1255 STR_SET_LEN(newstr, ofs);
1259 rb_str_modify(newstr);
1260 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1268 STR_SET_LEN(str, 0);
1269 rb_enc_associate(str, enc);
1275str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1277 int ecflags,
VALUE ecopts)
1282 VALUE econv_wrapper;
1283 const unsigned char *start, *sp;
1284 unsigned char *dest, *dp;
1285 size_t converted_output = (size_t)ofs;
1290 RBASIC_CLEAR_CLASS(econv_wrapper);
1292 if (!ec)
return Qnil;
1295 sp = (
unsigned char*)
ptr;
1297 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1298 (dp = dest + converted_output),
1302 size_t converted_input = sp - start;
1303 size_t rest =
len - converted_input;
1304 converted_output = dp - dest;
1306 if (converted_input && converted_output &&
1307 rest < (LONG_MAX / converted_output)) {
1308 rest = (rest * converted_output) / converted_input;
1313 olen += rest < 2 ? 2 : rest;
1314 rb_str_resize(newstr, olen);
1321 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1323 rb_enc_associate(newstr, to);
1342 const int eidx = rb_enc_to_index(eenc);
1345 return rb_enc_str_new(
ptr,
len, eenc);
1349 if ((eidx == rb_ascii8bit_encindex()) ||
1350 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1354 ienc = rb_default_internal_encoding();
1355 if (!ienc || eenc == ienc) {
1356 return rb_enc_str_new(
ptr,
len, eenc);
1360 if ((eidx == rb_ascii8bit_encindex()) ||
1361 (eidx == rb_usascii_encindex()) ||
1362 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1363 return rb_enc_str_new(
ptr,
len, ienc);
1366 str = rb_enc_str_new(NULL, 0, ienc);
1369 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1370 rb_str_initialize(str,
ptr,
len, eenc);
1378 int eidx = rb_enc_to_index(eenc);
1379 if (eidx == rb_usascii_encindex() &&
1380 !is_ascii_string(str)) {
1381 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1384 rb_enc_associate_index(str, eidx);
1443str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1445 const int termlen = TERM_LEN(str);
1450 if (str_embed_capa(str2) >=
len + termlen) {
1451 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1452 STR_SET_EMBED(str2);
1453 memcpy(ptr2, RSTRING_PTR(str),
len);
1454 TERM_FILL(ptr2+
len, termlen);
1458 if (STR_SHARED_P(str)) {
1459 root =
RSTRING(str)->as.heap.aux.shared;
1468 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1470 rb_fatal(
"about to free a possible shared root");
1472 char *ptr2 = STR_HEAP_PTR(str2);
1474 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1477 FL_SET(str2, STR_NOEMBED);
1479 STR_SET_SHARED(str2, root);
1482 STR_SET_LEN(str2,
len);
1490 str_replace_shared_without_enc(str2, str);
1491 rb_enc_cr_str_exact_copy(str2, str);
1498 return str_replace_shared(str_alloc_heap(klass), str);
1515rb_str_new_frozen_String(
VALUE orig)
1523rb_str_frozen_bare_string(
VALUE orig)
1525 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1530rb_str_tmp_frozen_acquire(
VALUE orig)
1533 return str_new_frozen_buffer(0, orig, FALSE);
1537rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1539 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1540 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1542 VALUE str = str_alloc_heap(0);
1545 FL_SET(str, STR_SHARED_ROOT);
1547 size_t capa = str_capacity(orig, TERM_LEN(orig));
1553 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1554 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1561 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1562 RBASIC(orig)->flags &= ~STR_NOFREE;
1563 STR_SET_SHARED(orig, str);
1565 RB_OBJ_SET_SHAREABLE(str);
1577rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1582 if (STR_EMBED_P(tmp)) {
1585 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1591 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1595 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1596 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1601 STR_SET_LEN(tmp, 0);
1609 return str_new_frozen_buffer(klass, orig, TRUE);
1619 VALUE str = str_alloc_heap(klass);
1620 STR_SET_LEN(str, RSTRING_LEN(orig));
1621 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1622 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1623 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1624 RBASIC(orig)->flags &= ~STR_NOFREE;
1625 STR_SET_SHARED(orig, str);
1632str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1636 long len = RSTRING_LEN(orig);
1637 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1638 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1640 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1641 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1647 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1648 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1654 if ((ofs > 0) || (rest > 0) ||
1657 str = str_new_shared(klass,
shared);
1659 RSTRING(str)->as.heap.ptr += ofs;
1660 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1668 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1669 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1671 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1672 STR_SET_LEN(str, RSTRING_LEN(orig));
1678 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1681 str = heap_str_make_shared(klass, orig);
1686 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1698str_new_empty_String(
VALUE str)
1701 rb_enc_copy(v, str);
1705#define STR_BUF_MIN_SIZE 63
1710 if (STR_EMBEDDABLE_P(
capa, 1)) {
1718 RSTRING(str)->as.heap.ptr[0] =
'\0';
1738 return str_new(0, 0,
len);
1744 if (STR_EMBED_P(str)) {
1745 RB_DEBUG_COUNTER_INC(obj_str_embed);
1747 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1748 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1749 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1752 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1753 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1758rb_str_memsize(
VALUE str)
1760 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1761 return STR_HEAP_SIZE(str);
1771 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1774static inline void str_discard(
VALUE str);
1775static void str_shared_replace(
VALUE str,
VALUE str2);
1780 if (str != str2) str_shared_replace(str, str2);
1791 enc = STR_ENC_GET(str2);
1794 termlen = rb_enc_mbminlen(enc);
1796 STR_SET_LEN(str, RSTRING_LEN(str2));
1798 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1800 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1801 rb_enc_associate(str, enc);
1805 if (STR_EMBED_P(str2)) {
1807 long len = RSTRING_LEN(str2);
1810 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1811 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1812 RSTRING(str2)->as.heap.ptr = new_ptr;
1813 STR_SET_LEN(str2,
len);
1815 STR_SET_NOEMBED(str2);
1818 STR_SET_NOEMBED(str);
1820 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1822 if (
FL_TEST(str2, STR_SHARED)) {
1824 STR_SET_SHARED(str,
shared);
1827 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1831 STR_SET_EMBED(str2);
1832 RSTRING_PTR(str2)[0] = 0;
1833 STR_SET_LEN(str2, 0);
1834 rb_enc_associate(str, enc);
1848 return rb_obj_as_string_result(str, obj);
1864 len = RSTRING_LEN(str2);
1865 if (STR_SHARED_P(str2)) {
1868 STR_SET_NOEMBED(str);
1869 STR_SET_LEN(str,
len);
1870 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1871 STR_SET_SHARED(str,
shared);
1872 rb_enc_cr_str_exact_copy(str, str2);
1875 str_replace_shared(str, str2);
1884 size_t size = rb_str_embed_size(
capa);
1888 NEWOBJ_OF(str,
struct RString, klass,
1899 NEWOBJ_OF(str,
struct RString, klass,
1902 str->as.heap.aux.capa = 0;
1903 str->as.heap.ptr = NULL;
1913 encidx = rb_enc_get_index(str);
1914 flags &= ~ENCODING_MASK;
1917 if (encidx) rb_enc_associate_index(dup, encidx);
1927 long len = RSTRING_LEN(str);
1932 STR_SET_LEN(dup, RSTRING_LEN(str));
1933 return str_duplicate_setup_encoding(str, dup, flags);
1942 root =
RSTRING(str)->as.heap.aux.shared;
1945 root = str = str_new_frozen(klass, str);
1951 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1952 FL_SET(root, STR_SHARED_ROOT);
1954 flags |= RSTRING_NOEMBED | STR_SHARED;
1956 STR_SET_LEN(dup, RSTRING_LEN(str));
1957 return str_duplicate_setup_encoding(str, dup, flags);
1963 if (STR_EMBED_P(str)) {
1964 return str_duplicate_setup_embed(klass, str, dup);
1967 return str_duplicate_setup_heap(klass, str, dup);
1975 if (STR_EMBED_P(str)) {
1976 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1979 dup = str_alloc_heap(klass);
1982 return str_duplicate_setup(klass, str, dup);
1993rb_str_dup_m(
VALUE str)
1995 if (LIKELY(BARE_STRING_P(str))) {
2006 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2013 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2017 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2018 str_duplicate_setup_embed(klass, str, new_str);
2021 new_str = ec_str_alloc_heap(ec, klass);
2022 str_duplicate_setup_heap(klass, str, new_str);
2031rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2033 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2057 static ID keyword_ids[2];
2058 VALUE orig, opt, venc, vcapa;
2063 if (!keyword_ids[0]) {
2064 keyword_ids[0] = rb_id_encoding();
2065 CONST_ID(keyword_ids[1],
"capacity");
2073 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2074 enc = rb_to_encoding(venc);
2076 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2079 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2081 if (
capa < STR_BUF_MIN_SIZE) {
2082 capa = STR_BUF_MIN_SIZE;
2086 len = RSTRING_LEN(orig);
2090 if (orig == str) n = 0;
2092 str_modifiable(str);
2093 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2095 const size_t size = (size_t)
capa + termlen;
2096 const char *
const old_ptr = RSTRING_PTR(str);
2097 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2098 char *new_ptr =
ALLOC_N(
char, size);
2099 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2100 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2102 RSTRING(str)->as.heap.ptr = new_ptr;
2104 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2105 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2106 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2108 STR_SET_LEN(str,
len);
2111 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2112 rb_enc_cr_str_exact_copy(str, orig);
2114 FL_SET(str, STR_NOEMBED);
2121 rb_enc_associate(str, enc);
2133rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2139 static ID keyword_ids[2];
2149 keyword_ids[0] = rb_id_encoding();
2150 CONST_ID(keyword_ids[1],
"capacity");
2152 encoding = kwargs[0];
2153 capacity = kwargs[1];
2162 if (UNDEF_P(encoding)) {
2164 encoding = rb_obj_encoding(orig);
2168 if (!UNDEF_P(encoding)) {
2169 enc = rb_to_encoding(encoding);
2173 if (UNDEF_P(capacity)) {
2175 VALUE empty_str = str_new(klass,
"", 0);
2177 rb_enc_associate(empty_str, enc);
2181 VALUE copy = str_duplicate(klass, orig);
2182 rb_enc_associate(copy, enc);
2195 if (orig_capa >
capa) {
2200 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2201 STR_SET_LEN(str, 0);
2212#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2227static inline uintptr_t
2228count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2233 d = (d>>6) | (~d>>7);
2234 d &= NONASCII_MASK >> 7;
2237#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2239 return rb_popcount_intptr(d);
2243# if SIZEOF_VOIDP == 8
2252enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2258 long diff = (long)(e - p);
2259 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2264 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2265 const uintptr_t *s, *t;
2266 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2267 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2268 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2269 while (p < (
const char *)s) {
2270 if (is_utf8_lead_byte(*p))
len++;
2274 len += count_utf8_lead_bytes_with_word(s);
2277 p = (
const char *)s;
2280 if (is_utf8_lead_byte(*p))
len++;
2286 else if (rb_enc_asciicompat(enc)) {
2291 q = search_nonascii(p, e);
2297 p += rb_enc_fast_mbclen(p, e, enc);
2304 q = search_nonascii(p, e);
2310 p += rb_enc_mbclen(p, e, enc);
2317 for (c=0; p<e; c++) {
2318 p += rb_enc_mbclen(p, e, enc);
2333rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2341 long diff = (long)(e - p);
2342 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2344 else if (rb_enc_asciicompat(enc)) {
2348 q = search_nonascii(p, e);
2356 ret = rb_enc_precise_mbclen(p, e, enc);
2371 for (c=0; p<e; c++) {
2372 ret = rb_enc_precise_mbclen(p, e, enc);
2379 if (p + rb_enc_mbminlen(enc) <= e)
2380 p += rb_enc_mbminlen(enc);
2396 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2397 if (!enc) enc = STR_ENC_GET(str);
2398 p = RSTRING_PTR(str);
2403 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2408 return enc_strlen(p, e, enc, cr);
2415 return str_strlen(str, NULL);
2429 return LONG2NUM(str_strlen(str, NULL));
2441rb_str_bytesize(
VALUE str)
2460rb_str_empty(
VALUE str)
2462 return RBOOL(RSTRING_LEN(str) == 0);
2481 char *ptr1, *ptr2, *ptr3;
2486 enc = rb_enc_check_str(str1, str2);
2489 termlen = rb_enc_mbminlen(enc);
2490 if (len1 > LONG_MAX - len2) {
2491 rb_raise(rb_eArgError,
"string size too big");
2493 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2494 ptr3 = RSTRING_PTR(str3);
2495 memcpy(ptr3, ptr1, len1);
2496 memcpy(ptr3+len1, ptr2, len2);
2497 TERM_FILL(&ptr3[len1+len2], termlen);
2513 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2516 int enc1 = rb_enc_get_index(str1);
2517 int enc2 = rb_enc_get_index(str2);
2522 else if (enc2 < 0) {
2525 else if (enc1 != enc2) {
2528 else if (len1 > LONG_MAX - len2) {
2562 rb_enc_copy(str2, str);
2567 rb_raise(rb_eArgError,
"negative argument");
2569 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2570 if (STR_EMBEDDABLE_P(
len, 1)) {
2572 memset(RSTRING_PTR(str2), 0,
len + 1);
2579 STR_SET_LEN(str2,
len);
2580 rb_enc_copy(str2, str);
2583 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2584 rb_raise(rb_eArgError,
"argument too big");
2587 len *= RSTRING_LEN(str);
2588 termlen = TERM_LEN(str);
2590 ptr2 = RSTRING_PTR(str2);
2592 n = RSTRING_LEN(str);
2593 memcpy(ptr2, RSTRING_PTR(str), n);
2594 while (n <=
len/2) {
2595 memcpy(ptr2 + n, ptr2, n);
2598 memcpy(ptr2 + n, ptr2,
len-n);
2600 STR_SET_LEN(str2,
len);
2601 TERM_FILL(&ptr2[
len], termlen);
2602 rb_enc_cr_str_copy_for_substr(str2, str);
2639rb_check_lockedtmp(
VALUE str)
2641 if (
FL_TEST(str, STR_TMPLOCK)) {
2648#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2650str_modifiable(
VALUE str)
2654 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2655 if (CHILLED_STRING_P(str)) {
2656 CHILLED_STRING_MUTATED(str);
2658 rb_check_lockedtmp(str);
2659 rb_check_frozen(str);
2664str_dependent_p(
VALUE str)
2666 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2676#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2678str_independent(
VALUE str)
2682 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2683 str_modifiable(str);
2684 return !str_dependent_p(str);
2690str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2700 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2705 STR_SET_LEN(str,
len);
2710 oldptr = RSTRING_PTR(str);
2712 memcpy(
ptr, oldptr,
len);
2714 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2717 STR_SET_NOEMBED(str);
2718 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2719 TERM_FILL(
ptr +
len, termlen);
2721 STR_SET_LEN(str,
len);
2728 if (!str_independent(str))
2729 str_make_independent(str);
2738 int termlen = TERM_LEN(str);
2739 long len = RSTRING_LEN(str);
2742 rb_raise(rb_eArgError,
"negative expanding string size");
2744 if (expand >= LONG_MAX -
len) {
2745 rb_raise(rb_eArgError,
"string size too big");
2748 if (!str_independent(str)) {
2749 str_make_independent_expand(str,
len, expand, termlen);
2751 else if (expand > 0) {
2752 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2759str_modify_keep_cr(
VALUE str)
2761 if (!str_independent(str))
2762 str_make_independent(str);
2769str_discard(
VALUE str)
2771 str_modifiable(str);
2772 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2773 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2774 RSTRING(str)->as.heap.ptr = 0;
2775 STR_SET_LEN(str, 0);
2782 int encindex = rb_enc_get_index(str);
2784 if (RB_UNLIKELY(encindex == -1)) {
2788 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2793 if (!rb_enc_asciicompat(enc)) {
2815 return RSTRING_PTR(str);
2819zero_filled(
const char *s,
int n)
2821 for (; n > 0; --n) {
2828str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2830 const char *e = s +
len;
2832 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2833 if (zero_filled(s, minlen))
return s;
2839str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2844 if (str_dependent_p(str)) {
2845 if (!zero_filled(s +
len, termlen))
2846 str_make_independent_expand(str,
len, 0L, termlen);
2849 TERM_FILL(s +
len, termlen);
2852 return RSTRING_PTR(str);
2856rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2858 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2859 long len = RSTRING_LEN(str);
2863 rb_check_lockedtmp(str);
2864 str_make_independent_expand(str,
len, 0L, termlen);
2866 else if (str_dependent_p(str)) {
2867 if (termlen > oldtermlen)
2868 str_make_independent_expand(str,
len, 0L, termlen);
2871 if (!STR_EMBED_P(str)) {
2876 if (termlen > oldtermlen) {
2877 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2885str_null_check(
VALUE str,
int *w)
2887 char *s = RSTRING_PTR(str);
2888 long len = RSTRING_LEN(str);
2890 const int minlen = rb_enc_mbminlen(enc);
2894 if (str_null_char(s,
len, minlen, enc)) {
2897 return str_fill_term(str, s,
len, minlen);
2900 if (!s || memchr(s, 0,
len)) {
2904 s = str_fill_term(str, s,
len, minlen);
2910rb_str_to_cstr(
VALUE str)
2913 return str_null_check(str, &w);
2921 char *s = str_null_check(str, &w);
2924 rb_raise(rb_eArgError,
"string contains null char");
2926 rb_raise(rb_eArgError,
"string contains null byte");
2932rb_str_fill_terminator(
VALUE str,
const int newminlen)
2934 char *s = RSTRING_PTR(str);
2935 long len = RSTRING_LEN(str);
2936 return str_fill_term(str, s,
len, newminlen);
2942 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2968str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2977 else if (rb_enc_asciicompat(enc)) {
2978 const char *p2, *e2;
2981 while (p < e && 0 < nth) {
2988 p2 = search_nonascii(p, e2);
2997 n = rb_enc_mbclen(p, e, enc);
3008 while (p < e && nth--) {
3009 p += rb_enc_mbclen(p, e, enc);
3020 return str_nth_len(p, e, &nth, enc);
3024str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3029 p = str_nth_len(p, e, &nth, enc);
3038str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3040 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3041 if (!pp)
return e - p;
3048 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3049 STR_ENC_GET(str), single_byte_optimizable(str));
3054str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3057 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3058 const uintptr_t *s, *t;
3059 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3060 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3061 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3062 while (p < (
const char *)s) {
3063 if (is_utf8_lead_byte(*p)) nth--;
3067 nth -= count_utf8_lead_bytes_with_word(s);
3069 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3073 if (is_utf8_lead_byte(*p)) {
3074 if (nth == 0)
break;
3084str_utf8_offset(
const char *p,
const char *e,
long nth)
3086 const char *pp = str_utf8_nth(p, e, &nth);
3095 if (single_byte_optimizable(str) || pos < 0)
3098 char *p = RSTRING_PTR(str);
3099 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3104str_subseq(
VALUE str,
long beg,
long len)
3112 const int termlen = TERM_LEN(str);
3113 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3120 if (str_embed_capa(str2) >=
len + termlen) {
3121 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3122 STR_SET_EMBED(str2);
3123 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3124 TERM_FILL(ptr2+
len, termlen);
3126 STR_SET_LEN(str2,
len);
3130 str_replace_shared(str2, str);
3133 RSTRING(str2)->as.heap.ptr += beg;
3134 if (RSTRING_LEN(str2) >
len) {
3135 STR_SET_LEN(str2,
len);
3145 VALUE str2 = str_subseq(str, beg,
len);
3146 rb_enc_cr_str_copy_for_substr(str2, str);
3155 const long blen = RSTRING_LEN(str);
3157 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3159 if (
len < 0)
return 0;
3160 if (beg < 0 && -beg < 0)
return 0;
3164 if (single_byte_optimizable(str)) {
3165 if (beg > blen)
return 0;
3168 if (beg < 0)
return 0;
3170 if (
len > blen - beg)
3172 if (
len < 0)
return 0;
3177 if (
len > -beg)
len = -beg;
3181 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3184 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3190 slen = str_strlen(str, enc);
3192 if (beg < 0)
return 0;
3194 if (
len == 0)
goto end;
3197 else if (beg > 0 && beg > blen) {
3201 if (beg > str_strlen(str, enc))
return 0;
3206 enc == rb_utf8_encoding()) {
3207 p = str_utf8_nth(s, e, &beg);
3208 if (beg > 0)
return 0;
3209 len = str_utf8_offset(p, e,
len);
3215 p = s + beg * char_sz;
3219 else if (
len * char_sz > e - p)
3224 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3225 if (beg > 0)
return 0;
3229 len = str_offset(p, e,
len, enc, 0);
3237static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3242 return str_substr(str, beg,
len, TRUE);
3252str_substr(
VALUE str,
long beg,
long len,
int empty)
3256 if (!p)
return Qnil;
3257 if (!
len && !empty)
return Qnil;
3259 beg = p - RSTRING_PTR(str);
3261 VALUE str2 = str_subseq(str, beg,
len);
3262 rb_enc_cr_str_copy_for_substr(str2, str);
3270 if (CHILLED_STRING_P(str)) {
3275 rb_str_resize(str, RSTRING_LEN(str));
3293 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3336str_uminus(
VALUE str)
3341 return rb_fstring(str);
3345#define rb_str_dup_frozen rb_str_new_frozen
3350 rb_check_frozen(str);
3351 if (
FL_TEST(str, STR_TMPLOCK)) {
3354 FL_SET(str, STR_TMPLOCK);
3361 rb_check_frozen(str);
3362 if (!
FL_TEST(str, STR_TMPLOCK)) {
3382 const int termlen = TERM_LEN(str);
3384 str_modifiable(str);
3385 if (STR_SHARED_P(str)) {
3388 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3389 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3400 else if (
len > RSTRING_LEN(str)) {
3404 const char *
const new_end = RSTRING_PTR(str) +
len;
3414 else if (
len < RSTRING_LEN(str)) {
3422 STR_SET_LEN(str,
len);
3423 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3430 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3433 int independent = str_independent(str);
3434 long slen = RSTRING_LEN(str);
3435 const int termlen = TERM_LEN(str);
3437 if (slen >
len || (termlen != 1 && slen <
len)) {
3443 if (STR_EMBED_P(str)) {
3444 if (
len == slen)
return str;
3445 if (str_embed_capa(str) >=
len + termlen) {
3446 STR_SET_LEN(str,
len);
3450 str_make_independent_expand(str, slen,
len - slen, termlen);
3452 else if (str_embed_capa(str) >=
len + termlen) {
3453 char *
ptr = STR_HEAP_PTR(str);
3455 if (slen >
len) slen =
len;
3458 STR_SET_LEN(str,
len);
3459 if (independent) ruby_xfree(
ptr);
3462 else if (!independent) {
3463 if (
len == slen)
return str;
3464 str_make_independent_expand(str, slen,
len - slen, termlen);
3468 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3469 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3472 else if (
len == slen)
return str;
3473 STR_SET_LEN(str,
len);
3480str_ensure_available_capa(
VALUE str,
long len)
3482 str_modify_keep_cr(str);
3484 const int termlen = TERM_LEN(str);
3485 long olen = RSTRING_LEN(str);
3487 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3488 rb_raise(rb_eArgError,
"string sizes too big");
3491 long total = olen +
len;
3492 long capa = str_capacity(str, termlen);
3495 if (total >= LONG_MAX / 2) {
3498 while (total >
capa) {
3501 RESIZE_CAPA_TERM(str,
capa, termlen);
3506str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3509 str_modify_keep_cr(str);
3514 if (
len == 0)
return 0;
3516 long total, olen,
off = -1;
3518 const int termlen = TERM_LEN(str);
3521 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3525 long capa = str_capacity(str, termlen);
3527 if (olen > LONG_MAX -
len) {
3528 rb_raise(rb_eArgError,
"string sizes too big");
3532 if (total >= LONG_MAX / 2) {
3535 while (total >
capa) {
3538 RESIZE_CAPA_TERM(str,
capa, termlen);
3539 sptr = RSTRING_PTR(str);
3544 memcpy(sptr + olen,
ptr,
len);
3545 STR_SET_LEN(str, total);
3546 TERM_FILL(sptr + total, termlen);
3551#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3552#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3557 if (
len == 0)
return str;
3559 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3561 return str_buf_cat(str,
ptr,
len);
3572rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3577 if (UNLIKELY(!str_independent(str))) {
3578 str_make_independent(str);
3581 long string_length = -1;
3582 const int null_terminator_length = 1;
3587 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3588 rb_raise(rb_eArgError,
"string sizes too big");
3591 long string_capacity = str_capacity(str, null_terminator_length);
3597 if (LIKELY(string_capacity >= string_length + 1)) {
3599 sptr[string_length] = byte;
3600 STR_SET_LEN(str, string_length + 1);
3601 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3605 str_buf_cat(str, (
char *)&
byte, 1);
3621 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3632rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3633 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3642 if (str_encindex == ptr_encindex) {
3644 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3648 str_enc = rb_enc_from_index(str_encindex);
3649 ptr_enc = rb_enc_from_index(ptr_encindex);
3650 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3653 if (RSTRING_LEN(str) == 0) {
3656 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3662 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3671 *ptr_cr_ret = ptr_cr;
3673 if (str_encindex != ptr_encindex &&
3676 str_enc = rb_enc_from_index(str_encindex);
3677 ptr_enc = rb_enc_from_index(ptr_encindex);
3682 res_encindex = str_encindex;
3687 res_encindex = str_encindex;
3691 res_encindex = ptr_encindex;
3696 res_encindex = str_encindex;
3703 res_encindex = str_encindex;
3709 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3711 str_buf_cat(str,
ptr,
len);
3717 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3724 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3734 if (rb_enc_asciicompat(enc)) {
3735 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3741 unsigned int c = (
unsigned char)*
ptr;
3742 int len = rb_enc_codelen(c, enc);
3743 rb_enc_mbcput(c, buf, enc);
3744 rb_enc_cr_str_buf_cat(str, buf,
len,
3757 if (str_enc_fastpath(str)) {
3761 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3767 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3778 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3794rb_str_concat_literals(
size_t num,
const VALUE *strary)
3798 unsigned long len = 1;
3803 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3805 str_enc_copy_direct(str, strary[0]);
3807 for (i = s; i < num; ++i) {
3808 const VALUE v = strary[i];
3812 if (encidx != ENCINDEX_US_ASCII) {
3814 rb_enc_set_index(str, encidx);
3827rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3829 str_modifiable(str);
3834 else if (argc > 1) {
3837 rb_enc_copy(arg_str, str);
3838 for (i = 0; i < argc; i++) {
3873rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3875 long needed_capacity = 0;
3879 for (
int index = 0; index < argc; index++) {
3880 VALUE obj = argv[index];
3888 needed_capacity += RSTRING_LEN(obj);
3893 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3900 str_ensure_available_capa(str, needed_capacity);
3903 for (
int index = 0; index < argc; index++) {
3904 VALUE obj = argv[index];
3909 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3910 char byte = (char)(
NUM2INT(obj) & 0xFF);
3924 rb_bug(
"append_as_bytes arguments should have been validated");
3928 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3929 TERM_FILL(sptr, TERM_LEN(str));
3934 for (
int index = 0; index < argc; index++) {
3935 VALUE obj = argv[index];
3952 rb_bug(
"append_as_bytes arguments should have been validated");
4031 if (rb_num_to_uint(str2, &code) == 0) {
4044 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4047 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4050 long pos = RSTRING_LEN(str1);
4055 switch (
len = rb_enc_codelen(code, enc)) {
4056 case ONIGERR_INVALID_CODE_POINT_VALUE:
4057 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4059 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4065 rb_enc_mbcput(code, buf, enc);
4066 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4067 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4069 rb_str_resize(str1, pos+
len);
4070 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4083rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4085 int encidx = rb_enc_to_index(enc);
4087 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4092 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4093 return ENCINDEX_ASCII_8BIT;
4115rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4117 str_modifiable(str);
4122 else if (argc > 1) {
4125 rb_enc_copy(arg_str, str);
4126 for (i = 0; i < argc; i++) {
4139 st_index_t precomputed_hash;
4140 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4142 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4143 return precomputed_hash;
4146 return str_do_hash(str);
4153 const char *ptr1, *ptr2;
4156 return (len1 != len2 ||
4158 memcmp(ptr1, ptr2, len1) != 0);
4170rb_str_hash_m(
VALUE str)
4176#define lesser(a,b) (((a)>(b))?(b):(a))
4184 if (RSTRING_LEN(str1) == 0)
return TRUE;
4185 if (RSTRING_LEN(str2) == 0)
return TRUE;
4188 if (idx1 == idx2)
return TRUE;
4193 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4197 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4207 const char *ptr1, *ptr2;
4210 if (str1 == str2)
return 0;
4213 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4222 if (len1 > len2)
return 1;
4225 if (retval > 0)
return 1;
4259 if (str1 == str2)
return Qtrue;
4266 return rb_str_eql_internal(str1, str2);
4280 if (str1 == str2)
return Qtrue;
4282 return rb_str_eql_internal(str1, str2);
4314 return rb_invcmp(str1, str2);
4356 return str_casecmp(str1, s);
4364 const char *p1, *p1end, *p2, *p2end;
4366 enc = rb_enc_compatible(str1, str2);
4371 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4372 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4373 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4374 while (p1 < p1end && p2 < p2end) {
4376 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4377 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4379 return INT2FIX(c1 < c2 ? -1 : 1);
4386 while (p1 < p1end && p2 < p2end) {
4387 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4388 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4390 if (0 <= c1 && 0 <= c2) {
4394 return INT2FIX(c1 < c2 ? -1 : 1);
4398 l1 = rb_enc_mbclen(p1, p1end, enc);
4399 l2 = rb_enc_mbclen(p2, p2end, enc);
4400 len = l1 < l2 ? l1 : l2;
4401 r = memcmp(p1, p2,
len);
4403 return INT2FIX(r < 0 ? -1 : 1);
4405 return INT2FIX(l1 < l2 ? -1 : 1);
4411 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4412 if (p1 == p1end)
return INT2FIX(-1);
4445 return str_casecmp_p(str1, s);
4452 VALUE folded_str1, folded_str2;
4453 VALUE fold_opt = sym_fold;
4455 enc = rb_enc_compatible(str1, str2);
4460 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4461 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4463 return rb_str_eql(folded_str1, folded_str2);
4467strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4468 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4470 const char *search_start = str_ptr;
4471 long pos, search_len = str_len - offset;
4475 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4476 if (pos < 0)
return pos;
4478 if (t == search_start + pos)
break;
4479 search_len -= t - search_start;
4480 if (search_len <= 0)
return -1;
4481 offset += t - search_start;
4484 return pos + offset;
4488#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4489#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4492rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4494 const char *str_ptr, *str_ptr_end, *sub_ptr;
4495 long str_len, sub_len;
4498 enc = rb_enc_check(str, sub);
4499 if (is_broken_string(sub))
return -1;
4501 str_ptr = RSTRING_PTR(str);
4503 str_len = RSTRING_LEN(str);
4504 sub_ptr = RSTRING_PTR(sub);
4505 sub_len = RSTRING_LEN(sub);
4507 if (str_len < sub_len)
return -1;
4510 long str_len_char, sub_len_char;
4511 int single_byte = single_byte_optimizable(str);
4512 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4513 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4515 offset += str_len_char;
4516 if (offset < 0)
return -1;
4518 if (str_len_char - offset < sub_len_char)
return -1;
4519 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4522 if (sub_len == 0)
return offset;
4525 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4538rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4545 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4546 long slen = str_strlen(str, enc);
4548 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4560 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4561 enc, single_byte_optimizable(str));
4572 pos = rb_str_index(str, sub, pos);
4586str_ensure_byte_pos(
VALUE str,
long pos)
4588 if (!single_byte_optimizable(str)) {
4589 const char *s = RSTRING_PTR(str);
4591 const char *p = s + pos;
4592 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4594 "offset %ld does not land on character boundary", pos);
4667rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4673 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4674 long slen = RSTRING_LEN(str);
4676 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4687 str_ensure_byte_pos(str, pos);
4699 pos = rb_str_byteindex(str, sub, pos);
4700 if (pos >= 0)
return LONG2NUM(pos);
4707memrchr(
const char *search_str,
int chr,
long search_len)
4709 const char *ptr = search_str + search_len;
4710 while (ptr > search_str) {
4711 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4721 char *hit, *adjusted;
4723 long slen, searchlen;
4726 sbeg = RSTRING_PTR(str);
4727 slen = RSTRING_LEN(sub);
4728 if (slen == 0)
return s - sbeg;
4730 t = RSTRING_PTR(sub);
4732 searchlen = s - sbeg + 1;
4734 if (memcmp(s, t, slen) == 0) {
4739 hit = memrchr(sbeg, c, searchlen);
4742 if (hit != adjusted) {
4743 searchlen = adjusted - sbeg;
4746 if (memcmp(hit, t, slen) == 0)
4748 searchlen = adjusted - sbeg;
4749 }
while (searchlen > 0);
4763 enc = rb_enc_check(str, sub);
4764 if (is_broken_string(sub))
return -1;
4765 singlebyte = single_byte_optimizable(str);
4766 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4767 slen = str_strlen(sub, enc);
4770 if (
len < slen)
return -1;
4771 if (
len - pos < slen) pos =
len - slen;
4772 if (
len == 0)
return pos;
4774 sbeg = RSTRING_PTR(str);
4777 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4783 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4784 return str_rindex(str, sub, s, enc);
4796rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4801 long pos,
len = str_strlen(str, enc);
4803 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4805 if (pos < 0 && (pos +=
len) < 0) {
4811 if (pos >
len) pos =
len;
4819 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4820 enc, single_byte_optimizable(str));
4831 pos = rb_str_rindex(str, sub, pos);
4841rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4847 enc = rb_enc_check(str, sub);
4848 if (is_broken_string(sub))
return -1;
4849 len = RSTRING_LEN(str);
4850 slen = RSTRING_LEN(sub);
4853 if (
len < slen)
return -1;
4854 if (
len - pos < slen) pos =
len - slen;
4855 if (
len == 0)
return pos;
4857 sbeg = RSTRING_PTR(str);
4860 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4867 return str_rindex(str, sub, s, enc);
4957rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4961 long pos,
len = RSTRING_LEN(str);
4963 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4965 if (pos < 0 && (pos +=
len) < 0) {
4971 if (pos >
len) pos =
len;
4977 str_ensure_byte_pos(str, pos);
4989 pos = rb_str_byterindex(str, sub, pos);
4990 if (pos >= 0)
return LONG2NUM(pos);
5029 switch (OBJ_BUILTIN_TYPE(y)) {
5083rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5090 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5121rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5125 re = get_pat(argv[0]);
5126 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5135static enum neighbor_char
5141 if (rb_enc_mbminlen(enc) > 1) {
5143 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5145 return NEIGHBOR_NOT_CHAR;
5147 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5149 if (!l)
return NEIGHBOR_NOT_CHAR;
5150 if (l !=
len)
return NEIGHBOR_WRAPPED;
5151 rb_enc_mbcput(c, p, enc);
5152 r = rb_enc_precise_mbclen(p, p +
len, enc);
5154 return NEIGHBOR_NOT_CHAR;
5156 return NEIGHBOR_FOUND;
5159 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5162 return NEIGHBOR_WRAPPED;
5163 ++((
unsigned char*)p)[i];
5164 l = rb_enc_precise_mbclen(p, p+
len, enc);
5168 return NEIGHBOR_FOUND;
5171 memset(p+l, 0xff,
len-l);
5177 for (len2 =
len-1; 0 < len2; len2--) {
5178 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5182 memset(p+len2+1, 0xff,
len-(len2+1));
5187static enum neighbor_char
5192 if (rb_enc_mbminlen(enc) > 1) {
5194 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5196 return NEIGHBOR_NOT_CHAR;
5198 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5199 if (!c)
return NEIGHBOR_NOT_CHAR;
5202 if (!l)
return NEIGHBOR_NOT_CHAR;
5203 if (l !=
len)
return NEIGHBOR_WRAPPED;
5204 rb_enc_mbcput(c, p, enc);
5205 r = rb_enc_precise_mbclen(p, p +
len, enc);
5207 return NEIGHBOR_NOT_CHAR;
5209 return NEIGHBOR_FOUND;
5212 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5215 return NEIGHBOR_WRAPPED;
5216 --((
unsigned char*)p)[i];
5217 l = rb_enc_precise_mbclen(p, p+
len, enc);
5221 return NEIGHBOR_FOUND;
5224 memset(p+l, 0,
len-l);
5230 for (len2 =
len-1; 0 < len2; len2--) {
5231 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5235 memset(p+len2+1, 0,
len-(len2+1));
5249static enum neighbor_char
5250enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5252 enum neighbor_char ret;
5256 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5260 const int max_gaps = 1;
5262 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5264 ctype = ONIGENC_CTYPE_DIGIT;
5266 ctype = ONIGENC_CTYPE_ALPHA;
5268 return NEIGHBOR_NOT_CHAR;
5271 for (
try = 0;
try <= max_gaps; ++
try) {
5272 ret = enc_succ_char(p,
len, enc);
5273 if (ret == NEIGHBOR_FOUND) {
5274 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5276 return NEIGHBOR_FOUND;
5283 ret = enc_pred_char(p,
len, enc);
5284 if (ret == NEIGHBOR_FOUND) {
5285 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5298 return NEIGHBOR_NOT_CHAR;
5301 if (ctype != ONIGENC_CTYPE_DIGIT) {
5303 return NEIGHBOR_WRAPPED;
5307 enc_succ_char(carry,
len, enc);
5308 return NEIGHBOR_WRAPPED;
5326 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5327 rb_enc_cr_str_copy_for_substr(str, orig);
5328 return str_succ(str);
5335 char *sbeg, *s, *e, *last_alnum = 0;
5336 int found_alnum = 0;
5338 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5339 long carry_pos = 0, carry_len = 1;
5340 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5342 slen = RSTRING_LEN(str);
5343 if (slen == 0)
return str;
5345 enc = STR_ENC_GET(str);
5346 sbeg = RSTRING_PTR(str);
5347 s = e = sbeg + slen;
5349 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5350 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5356 l = rb_enc_precise_mbclen(s, e, enc);
5357 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5358 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5359 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5361 case NEIGHBOR_NOT_CHAR:
5363 case NEIGHBOR_FOUND:
5365 case NEIGHBOR_WRAPPED:
5370 carry_pos = s - sbeg;
5375 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5376 enum neighbor_char neighbor;
5377 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5378 l = rb_enc_precise_mbclen(s, e, enc);
5379 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5380 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5382 neighbor = enc_succ_char(tmp, l, enc);
5384 case NEIGHBOR_FOUND:
5388 case NEIGHBOR_WRAPPED:
5391 case NEIGHBOR_NOT_CHAR:
5394 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5396 enc_succ_char(s, l, enc);
5398 if (!rb_enc_asciicompat(enc)) {
5399 MEMCPY(carry, s,
char, l);
5402 carry_pos = s - sbeg;
5406 RESIZE_CAPA(str, slen + carry_len);
5407 sbeg = RSTRING_PTR(str);
5408 s = sbeg + carry_pos;
5409 memmove(s + carry_len, s, slen - carry_pos);
5410 memmove(s, carry, carry_len);
5412 STR_SET_LEN(str, slen);
5413 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5429rb_str_succ_bang(
VALUE str)
5437all_digits_p(
const char *s,
long len)
5491 VALUE end, exclusive;
5495 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5501 VALUE current, after_end;
5508 enc = rb_enc_check(beg, end);
5509 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5511 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5512 char c = RSTRING_PTR(beg)[0];
5513 char e = RSTRING_PTR(end)[0];
5515 if (c > e || (excl && c == e))
return beg;
5517 VALUE str = rb_enc_str_new(&c, 1, enc);
5519 if ((*each)(str, arg))
break;
5520 if (!excl && c == e)
break;
5522 if (excl && c == e)
break;
5527 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5528 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5529 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5534 b = rb_str_to_inum(beg, 10, FALSE);
5535 e = rb_str_to_inum(end, 10, FALSE);
5542 if (excl && bi == ei)
break;
5543 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5548 ID op = excl ?
'<' : idLE;
5549 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5554 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5555 b = rb_funcallv(b, succ, 0, 0);
5562 if (n > 0 || (excl && n == 0))
return beg;
5564 after_end = rb_funcallv(end, succ, 0, 0);
5569 next = rb_funcallv(current, succ, 0, 0);
5570 if ((*each)(current, arg))
break;
5571 if (
NIL_P(next))
break;
5575 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5590 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5591 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5592 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5594 b = rb_str_to_inum(beg, 10, FALSE);
5600 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5608 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5609 b = rb_funcallv(b, succ, 0, 0);
5615 VALUE next = rb_funcallv(current, succ, 0, 0);
5616 if ((*each)(current, arg))
break;
5619 if (RSTRING_LEN(current) == 0)
5630 if (!
rb_equal(str, *argp))
return 0;
5644 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5645 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5646 rb_enc_asciicompat(STR_ENC_GET(val))) {
5647 const char *bp = RSTRING_PTR(beg);
5648 const char *ep = RSTRING_PTR(end);
5649 const char *vp = RSTRING_PTR(val);
5650 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5651 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5659 if (b <= v && v < e)
return Qtrue;
5660 return RBOOL(!
RTEST(exclusive) && v == e);
5667 all_digits_p(bp, RSTRING_LEN(beg)) &&
5668 all_digits_p(ep, RSTRING_LEN(end))) {
5673 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5675 return RBOOL(
NIL_P(val));
5698 return rb_str_subpat(str, indx,
INT2FIX(0));
5701 if (rb_str_index(str, indx, 0) != -1)
5707 long beg,
len = str_strlen(str, NULL);
5719 return str_substr(str, idx, 1, FALSE);
5736rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5740 return rb_str_subpat(str, argv[0], argv[1]);
5743 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5747 return rb_str_aref(str, argv[0]);
5753 char *ptr = RSTRING_PTR(str);
5754 long olen = RSTRING_LEN(str), nlen;
5756 str_modifiable(str);
5757 if (
len > olen)
len = olen;
5759 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5761 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5763 ptr =
RSTRING(str)->as.embed.ary;
5764 memmove(ptr, oldptr +
len, nlen);
5765 if (fl == STR_NOEMBED)
xfree(oldptr);
5768 if (!STR_SHARED_P(str)) {
5770 rb_enc_cr_str_exact_copy(shared, str);
5775 STR_SET_LEN(str, nlen);
5777 if (!SHARABLE_MIDDLE_SUBSTRING) {
5778 TERM_FILL(ptr + nlen, TERM_LEN(str));
5785rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5791 if (beg == 0 && vlen == 0) {
5796 str_modify_keep_cr(str);
5800 RESIZE_CAPA(str, slen + vlen -
len);
5801 sptr = RSTRING_PTR(str);
5810 memmove(sptr + beg + vlen,
5812 slen - (beg +
len));
5814 if (vlen < beg &&
len < 0) {
5818 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5821 STR_SET_LEN(str, slen);
5822 TERM_FILL(&sptr[slen], TERM_LEN(str));
5829 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5838 int singlebyte = single_byte_optimizable(str);
5844 enc = rb_enc_check(str, val);
5845 slen = str_strlen(str, enc);
5847 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5856 if (
len > slen - beg) {
5859 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5864 beg = p - RSTRING_PTR(str);
5866 rb_str_update_0(str, beg,
len, val);
5867 rb_enc_associate(str, enc);
5878 long start, end,
len;
5888 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5892 nth += regs->num_regs;
5902 enc = rb_enc_check_str(str, val);
5903 rb_str_update_0(str, start,
len, val);
5904 rb_enc_associate(str, enc);
5912 switch (
TYPE(indx)) {
5914 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5918 beg = rb_str_index(str, indx, 0);
5957rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5961 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5969 return rb_str_aset(str, argv[0], argv[1]);
6021rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6029 str_modify_keep_cr(str);
6037 if ((nth += regs->num_regs) <= 0)
return Qnil;
6039 else if (nth >= regs->num_regs)
return Qnil;
6041 len = END(nth) - beg;
6044 else if (argc == 2) {
6053 beg = p - RSTRING_PTR(str);
6057 beg = rb_str_index(str, indx, 0);
6058 if (beg == -1)
return Qnil;
6059 len = RSTRING_LEN(indx);
6071 beg = p - RSTRING_PTR(str);
6080 beg = p - RSTRING_PTR(str);
6084 rb_enc_cr_str_copy_for_substr(result, str);
6092 char *sptr = RSTRING_PTR(str);
6093 long slen = RSTRING_LEN(str);
6094 if (beg +
len > slen)
6098 slen - (beg +
len));
6100 STR_SET_LEN(str, slen);
6101 TERM_FILL(&sptr[slen], TERM_LEN(str));
6112 switch (OBJ_BUILTIN_TYPE(pat)) {
6131get_pat_quoted(
VALUE pat,
int check)
6135 switch (OBJ_BUILTIN_TYPE(pat)) {
6149 if (check && is_broken_string(pat)) {
6156rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6159 pos = rb_str_byteindex(str, pat, pos);
6160 if (set_backref_str) {
6162 str = rb_str_new_frozen_String(str);
6163 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6165 *match = match_data;
6175 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6180rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6182 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6200rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6214 hash = rb_check_hash_type(argv[1]);
6220 pat = get_pat_quoted(argv[0], 1);
6222 str_modifiable(str);
6223 beg = rb_pat_search(pat, str, 0, 1);
6237 end0 = beg0 + RSTRING_LEN(pat);
6246 if (iter || !
NIL_P(hash)) {
6247 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6253 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6256 str_mod_check(str, p,
len);
6257 rb_check_frozen(str);
6263 enc = rb_enc_compatible(str, repl);
6266 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6270 rb_enc_inspect_name(str_enc),
6271 rb_enc_inspect_name(STR_ENC_GET(repl)));
6273 enc = STR_ENC_GET(repl);
6276 rb_enc_associate(str, enc);
6286 rlen = RSTRING_LEN(repl);
6287 len = RSTRING_LEN(str);
6289 RESIZE_CAPA(str,
len + rlen - plen);
6291 p = RSTRING_PTR(str);
6293 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6295 rp = RSTRING_PTR(repl);
6296 memmove(p + beg0, rp, rlen);
6298 STR_SET_LEN(str,
len);
6299 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6322 rb_str_sub_bang(argc, argv, str);
6327str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6330 long beg, beg0, end0;
6331 long offset, blen, slen,
len, last;
6332 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6334 int need_backref_str = -1;
6344 hash = rb_check_hash_type(argv[1]);
6348 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6356 rb_error_arity(argc, 1, 2);
6359 pat = get_pat_quoted(argv[0], 1);
6360 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6363 if (bang)
return Qnil;
6368 blen = RSTRING_LEN(str) + 30;
6370 sp = RSTRING_PTR(str);
6371 slen = RSTRING_LEN(str);
6373 str_enc = STR_ENC_GET(str);
6374 rb_enc_associate(dest, str_enc);
6381 end0 = beg0 + RSTRING_LEN(pat);
6395 struct RString fake_str = {RBASIC_INIT};
6397 if (mode == FAST_MAP) {
6406 val = rb_hash_aref(hash, key);
6409 str_mod_check(str, sp, slen);
6414 else if (need_backref_str) {
6416 if (need_backref_str < 0) {
6417 need_backref_str = val != repl;
6424 len = beg0 - offset;
6438 if (RSTRING_LEN(str) <= end0)
break;
6439 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6441 offset = end0 +
len;
6443 cp = RSTRING_PTR(str) + offset;
6444 if (offset > RSTRING_LEN(str))
break;
6447 if (mode != FAST_MAP && mode != STR) {
6450 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6455 if (RSTRING_LEN(str) > offset) {
6458 rb_pat_search0(pat, str, last, 1, &match);
6460 str_shared_replace(str, dest);
6485rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6487 str_modify_keep_cr(str);
6488 return str_gsub(argc, argv, str, 1);
6538 return str_gsub(argc, argv, str, 0);
6558 str_modifiable(str);
6559 if (str == str2)
return str;
6563 return str_replace(str, str2);
6580rb_str_clear(
VALUE str)
6584 STR_SET_LEN(str, 0);
6585 RSTRING_PTR(str)[0] = 0;
6586 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6602rb_str_chr(
VALUE str)
6620 pos += RSTRING_LEN(str);
6621 if (pos < 0 || RSTRING_LEN(str) <= pos)
6624 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6644 long len = RSTRING_LEN(str);
6645 char *
ptr, *head, *left = 0;
6649 if (pos < -
len ||
len <= pos)
6656 char byte = (char)(
NUM2INT(w) & 0xFF);
6658 if (!str_independent(str))
6659 str_make_independent(str);
6660 enc = STR_ENC_GET(str);
6661 head = RSTRING_PTR(str);
6663 if (!STR_EMBED_P(str)) {
6670 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6678 width = rb_enc_precise_mbclen(left, head+
len, enc);
6680 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6696str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6698 long n = RSTRING_LEN(str);
6700 if (beg > n ||
len < 0)
return Qnil;
6703 if (beg < 0)
return Qnil;
6708 if (!empty)
return Qnil;
6712 VALUE str2 = str_subseq(str, beg,
len);
6714 str_enc_copy_direct(str2, str);
6716 if (RSTRING_LEN(str2) == 0) {
6717 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6751 long beg,
len = RSTRING_LEN(str);
6759 return str_byte_substr(str, beg,
len, TRUE);
6764 return str_byte_substr(str, idx, 1, FALSE);
6776rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6781 return str_byte_substr(str, beg,
len, TRUE);
6784 return str_byte_aref(str, argv[0]);
6788str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6790 long end, slen = RSTRING_LEN(str);
6793 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6802 if (*
len > slen - *beg) {
6806 str_ensure_byte_pos(str, *beg);
6807 str_ensure_byte_pos(str, end);
6821rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6823 long beg,
len, vbeg, vlen;
6828 if (!(argc == 2 || argc == 3 || argc == 5)) {
6829 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6833 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6834 rb_builtin_class_name(argv[0]));
6841 vlen = RSTRING_LEN(val);
6846 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6847 rb_builtin_class_name(argv[2]));
6859 vlen = RSTRING_LEN(val);
6867 str_check_beg_len(str, &beg, &
len);
6868 str_check_beg_len(val, &vbeg, &vlen);
6869 str_modify_keep_cr(str);
6872 rb_enc_associate(str, rb_enc_check(str, val));
6875 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6897rb_str_reverse(
VALUE str)
6904 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6905 enc = STR_ENC_GET(str);
6911 if (RSTRING_LEN(str) > 1) {
6912 if (single_byte_optimizable(str)) {
6919 int clen = rb_enc_fast_mbclen(s, e, enc);
6927 cr = rb_enc_asciicompat(enc) ?
6930 int clen = rb_enc_mbclen(s, e, enc);
6939 STR_SET_LEN(rev, RSTRING_LEN(str));
6940 str_enc_copy_direct(rev, str);
6962rb_str_reverse_bang(
VALUE str)
6964 if (RSTRING_LEN(str) > 1) {
6965 if (single_byte_optimizable(str)) {
6968 str_modify_keep_cr(str);
6969 s = RSTRING_PTR(str);
6978 str_shared_replace(str, rb_str_reverse(str));
6982 str_modify_keep_cr(str);
7011 i = rb_str_index(str, arg, 0);
7013 return RBOOL(i != -1);
7055 rb_raise(rb_eArgError,
"invalid radix %d", base);
7057 return rb_str_to_inum(str, base, FALSE);
7081rb_str_to_f(
VALUE str)
7096rb_str_to_s(
VALUE str)
7108 char s[RUBY_MAX_CHAR_LEN];
7109 int n = rb_enc_codelen(c, enc);
7111 rb_enc_mbcput(c, s, enc);
7116#define CHAR_ESC_LEN 13
7119rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7121 char buf[CHAR_ESC_LEN + 1];
7129 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7131 else if (c < 0x10000) {
7132 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7135 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7140 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7143 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7146 l = (int)strlen(buf);
7152ruby_escaped_char(
int c)
7155 case '\0':
return "\\0";
7156 case '\n':
return "\\n";
7157 case '\r':
return "\\r";
7158 case '\t':
return "\\t";
7159 case '\f':
return "\\f";
7160 case '\013':
return "\\v";
7161 case '\010':
return "\\b";
7162 case '\007':
return "\\a";
7163 case '\033':
return "\\e";
7164 case '\x7f':
return "\\c?";
7170rb_str_escape(
VALUE str)
7174 const char *p = RSTRING_PTR(str);
7176 const char *prev = p;
7177 char buf[CHAR_ESC_LEN + 1];
7179 int unicode_p = rb_enc_unicode_p(enc);
7180 int asciicompat = rb_enc_asciicompat(enc);
7185 int n = rb_enc_precise_mbclen(p, pend, enc);
7187 if (p > prev) str_buf_cat(result, prev, p - prev);
7188 n = rb_enc_mbminlen(enc);
7190 n = (int)(pend - p);
7192 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7193 str_buf_cat(result, buf, strlen(buf));
7199 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7201 cc = ruby_escaped_char(c);
7203 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7204 str_buf_cat(result, cc, strlen(cc));
7207 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7210 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7211 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7215 if (p > prev) str_buf_cat(result, prev, p - prev);
7234 const char *p, *pend, *prev;
7235 char buf[CHAR_ESC_LEN + 1];
7237 rb_encoding *resenc = rb_default_internal_encoding();
7238 int unicode_p = rb_enc_unicode_p(enc);
7239 int asciicompat = rb_enc_asciicompat(enc);
7241 if (resenc == NULL) resenc = rb_default_external_encoding();
7242 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7243 rb_enc_associate(result, resenc);
7244 str_buf_cat2(result,
"\"");
7252 n = rb_enc_precise_mbclen(p, pend, enc);
7254 if (p > prev) str_buf_cat(result, prev, p - prev);
7255 n = rb_enc_mbminlen(enc);
7257 n = (int)(pend - p);
7259 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7260 str_buf_cat(result, buf, strlen(buf));
7266 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7268 if ((asciicompat || unicode_p) &&
7269 (c ==
'"'|| c ==
'\\' ||
7274 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7275 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7276 str_buf_cat2(result,
"\\");
7277 if (asciicompat || enc == resenc) {
7283 case '\n': cc =
'n';
break;
7284 case '\r': cc =
'r';
break;
7285 case '\t': cc =
't';
break;
7286 case '\f': cc =
'f';
break;
7287 case '\013': cc =
'v';
break;
7288 case '\010': cc =
'b';
break;
7289 case '\007': cc =
'a';
break;
7290 case 033: cc =
'e';
break;
7291 default: cc = 0;
break;
7294 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7297 str_buf_cat(result, buf, 2);
7310 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7314 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7315 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7320 if (p > prev) str_buf_cat(result, prev, p - prev);
7321 str_buf_cat2(result,
"\"");
7326#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7339 int encidx = rb_enc_get_index(str);
7342 const char *p, *pend;
7345 int u8 = (encidx == rb_utf8_encindex());
7346 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7349 if (!rb_enc_asciicompat(enc)) {
7351 len += strlen(enc->name);
7354 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7357 unsigned char c = *p++;
7360 case '"':
case '\\':
7361 case '\n':
case '\r':
7362 case '\t':
case '\f':
7363 case '\013':
case '\010':
case '\007':
case '\033':
7368 clen = IS_EVSTR(p, pend) ? 2 : 1;
7376 if (u8 && c > 0x7F) {
7377 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7379 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7382 else if (cc <= 0xFFFFF)
7395 if (clen > LONG_MAX -
len) {
7402 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7403 q = RSTRING_PTR(result); qend = q +
len + 1;
7407 unsigned char c = *p++;
7409 if (c ==
'"' || c ==
'\\') {
7413 else if (c ==
'#') {
7414 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7417 else if (c ==
'\n') {
7421 else if (c ==
'\r') {
7425 else if (c ==
'\t') {
7429 else if (c ==
'\f') {
7433 else if (c ==
'\013') {
7437 else if (c ==
'\010') {
7441 else if (c ==
'\007') {
7445 else if (c ==
'\033') {
7455 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7457 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7460 snprintf(q, qend-q,
"u%04X", cc);
7462 snprintf(q, qend-q,
"u{%X}", cc);
7467 snprintf(q, qend-q,
"x%02X", c);
7473 if (!rb_enc_asciicompat(enc)) {
7474 snprintf(q, qend-q, nonascii_suffix, enc->name);
7475 encidx = rb_ascii8bit_encindex();
7478 rb_enc_associate_index(result, encidx);
7484unescape_ascii(
unsigned int c)
7508undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7510 const char *s = *ss;
7514 unsigned char buf[6];
7532 *buf = unescape_ascii(*s);
7544 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7545 if (*penc != enc_utf8) {
7547 rb_enc_associate(undumped, enc_utf8);
7564 if (hexlen == 0 || hexlen > 6) {
7570 if (0xd800 <= c && c <= 0xdfff) {
7573 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7583 if (0xd800 <= c && c <= 0xdfff) {
7586 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7614static VALUE rb_str_is_ascii_only_p(
VALUE str);
7632str_undump(
VALUE str)
7634 const char *s = RSTRING_PTR(str);
7637 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7639 bool binary =
false;
7643 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7646 if (!str_null_check(str, &w)) {
7649 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7650 if (*s !=
'"')
goto invalid_format;
7668 static const char force_encoding_suffix[] =
".force_encoding(\"";
7669 static const char dup_suffix[] =
".dup";
7670 const char *encname;
7675 size =
sizeof(dup_suffix) - 1;
7676 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7678 size =
sizeof(force_encoding_suffix) - 1;
7679 if (s_end - s <= size)
goto invalid_format;
7680 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7684 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7688 s = memchr(s,
'"', s_end-s);
7690 if (!s)
goto invalid_format;
7691 if (s_end - s != 2)
goto invalid_format;
7692 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7694 encidx = rb_enc_find_index2(encname, (
long)size);
7698 rb_enc_associate_index(undumped, encidx);
7708 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7719 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7725 if (rb_enc_dummy_p(enc)) {
7732str_true_enc(
VALUE str)
7735 rb_str_check_dummy_enc(enc);
7739static OnigCaseFoldType
7740check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7745 rb_raise(rb_eArgError,
"too many options");
7746 if (argv[0]==sym_turkic) {
7747 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7749 if (argv[1]==sym_lithuanian)
7750 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7752 rb_raise(rb_eArgError,
"invalid second option");
7755 else if (argv[0]==sym_lithuanian) {
7756 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7758 if (argv[1]==sym_turkic)
7759 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7761 rb_raise(rb_eArgError,
"invalid second option");
7765 rb_raise(rb_eArgError,
"too many options");
7766 else if (argv[0]==sym_ascii)
7767 flags |= ONIGENC_CASE_ASCII_ONLY;
7768 else if (argv[0]==sym_fold) {
7769 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7770 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7772 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7775 rb_raise(rb_eArgError,
"invalid option");
7782 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7788#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7789#ifndef CASEMAP_DEBUG
7790# define CASEMAP_DEBUG 0
7798 OnigUChar space[FLEX_ARY_LEN];
7802mapping_buffer_free(
void *p)
7806 while (current_buffer) {
7807 previous_buffer = current_buffer;
7808 current_buffer = current_buffer->next;
7809 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7815 {0, mapping_buffer_free,},
7816 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7824 const OnigUChar *source_current, *source_end;
7825 int target_length = 0;
7826 VALUE buffer_anchor;
7829 size_t buffer_count = 0;
7830 int buffer_length_or_invalid;
7832 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7834 source_current = (OnigUChar*)RSTRING_PTR(source);
7839 while (source_current < source_end) {
7841 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7842 if (CASEMAP_DEBUG) {
7843 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7846 *pre_buffer = current_buffer;
7847 pre_buffer = ¤t_buffer->next;
7848 current_buffer->next = NULL;
7849 current_buffer->capa =
capa;
7850 buffer_length_or_invalid = enc->case_map(flags,
7851 &source_current, source_end,
7852 current_buffer->space,
7853 current_buffer->space+current_buffer->capa,
7855 if (buffer_length_or_invalid < 0) {
7856 current_buffer =
DATA_PTR(buffer_anchor);
7858 mapping_buffer_free(current_buffer);
7859 rb_raise(rb_eArgError,
"input string invalid");
7861 target_length += current_buffer->used = buffer_length_or_invalid;
7863 if (CASEMAP_DEBUG) {
7864 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7867 if (buffer_count==1) {
7868 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7871 char *target_current;
7874 target_current = RSTRING_PTR(target);
7875 current_buffer =
DATA_PTR(buffer_anchor);
7876 while (current_buffer) {
7877 memcpy(target_current, current_buffer->space, current_buffer->used);
7878 target_current += current_buffer->used;
7879 current_buffer = current_buffer->next;
7882 current_buffer =
DATA_PTR(buffer_anchor);
7884 mapping_buffer_free(current_buffer);
7889 str_enc_copy_direct(target, source);
7898 const OnigUChar *source_current, *source_end;
7899 OnigUChar *target_current, *target_end;
7900 long old_length = RSTRING_LEN(source);
7901 int length_or_invalid;
7903 if (old_length == 0)
return Qnil;
7905 source_current = (OnigUChar*)RSTRING_PTR(source);
7907 if (source == target) {
7908 target_current = (OnigUChar*)source_current;
7909 target_end = (OnigUChar*)source_end;
7912 target_current = (OnigUChar*)RSTRING_PTR(target);
7916 length_or_invalid = onigenc_ascii_only_case_map(flags,
7917 &source_current, source_end,
7918 target_current, target_end, enc);
7919 if (length_or_invalid < 0)
7920 rb_raise(rb_eArgError,
"input string invalid");
7921 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7922 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7923 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7924 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7925 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7928 str_enc_copy(target, source);
7934upcase_single(
VALUE str)
7936 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7937 bool modified =
false;
7940 unsigned int c = *(
unsigned char*)s;
7942 if (
'a' <= c && c <=
'z') {
7943 *s =
'A' + (c -
'a');
7971rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7974 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7976 flags = check_case_options(argc, argv, flags);
7977 str_modify_keep_cr(str);
7978 enc = str_true_enc(str);
7979 if (case_option_single_p(flags, enc, str)) {
7980 if (upcase_single(str))
7981 flags |= ONIGENC_CASE_MODIFIED;
7983 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7984 rb_str_ascii_casemap(str, str, &flags, enc);
7986 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7988 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8010rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8013 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8016 flags = check_case_options(argc, argv, flags);
8017 enc = str_true_enc(str);
8018 if (case_option_single_p(flags, enc, str)) {
8019 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8020 str_enc_copy_direct(ret, str);
8023 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8025 rb_str_ascii_casemap(str, ret, &flags, enc);
8028 ret = rb_str_casemap(str, &flags, enc);
8035downcase_single(
VALUE str)
8037 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8038 bool modified =
false;
8041 unsigned int c = *(
unsigned char*)s;
8043 if (
'A' <= c && c <=
'Z') {
8044 *s =
'a' + (c -
'A');
8066rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8069 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8071 flags = check_case_options(argc, argv, flags);
8072 str_modify_keep_cr(str);
8073 enc = str_true_enc(str);
8074 if (case_option_single_p(flags, enc, str)) {
8075 if (downcase_single(str))
8076 flags |= ONIGENC_CASE_MODIFIED;
8078 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8079 rb_str_ascii_casemap(str, str, &flags, enc);
8081 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8083 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8097rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8100 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8103 flags = check_case_options(argc, argv, flags);
8104 enc = str_true_enc(str);
8105 if (case_option_single_p(flags, enc, str)) {
8106 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8107 str_enc_copy_direct(ret, str);
8108 downcase_single(ret);
8110 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8112 rb_str_ascii_casemap(str, ret, &flags, enc);
8115 ret = rb_str_casemap(str, &flags, enc);
8135rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8138 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8140 flags = check_case_options(argc, argv, flags);
8141 str_modify_keep_cr(str);
8142 enc = str_true_enc(str);
8143 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8144 if (flags&ONIGENC_CASE_ASCII_ONLY)
8145 rb_str_ascii_casemap(str, str, &flags, enc);
8147 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8149 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8182rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8185 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8188 flags = check_case_options(argc, argv, flags);
8189 enc = str_true_enc(str);
8190 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8191 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8193 rb_str_ascii_casemap(str, ret, &flags, enc);
8196 ret = rb_str_casemap(str, &flags, enc);
8215rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8218 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8220 flags = check_case_options(argc, argv, flags);
8221 str_modify_keep_cr(str);
8222 enc = str_true_enc(str);
8223 if (flags&ONIGENC_CASE_ASCII_ONLY)
8224 rb_str_ascii_casemap(str, str, &flags, enc);
8226 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8228 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8242rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8245 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8248 flags = check_case_options(argc, argv, flags);
8249 enc = str_true_enc(str);
8250 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8251 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8253 rb_str_ascii_casemap(str, ret, &flags, enc);
8256 ret = rb_str_casemap(str, &flags, enc);
8261typedef unsigned char *USTR;
8265 unsigned int now, max;
8277 if (t->p == t->pend)
return -1;
8278 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8281 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8283 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8285 if (t->p < t->pend) {
8286 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8289 if (t->now < 0x80 && c < 0x80) {
8290 rb_raise(rb_eArgError,
8291 "invalid range \"%c-%c\" in string transliteration",
8295 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8299 else if (t->now < c) {
8308 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8309 if (t->now == t->max) {
8314 if (t->now < t->max) {
8330 const unsigned int errc = -1;
8331 unsigned int trans[256];
8333 struct tr trsrc, trrepl;
8335 unsigned int c, c0, last = 0;
8336 int modify = 0, i, l;
8337 unsigned char *s, *send;
8339 int singlebyte = single_byte_optimizable(str);
8343#define CHECK_IF_ASCII(c) \
8344 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8345 (cr = ENC_CODERANGE_VALID) : 0)
8349 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8350 if (RSTRING_LEN(repl) == 0) {
8351 return rb_str_delete_bang(1, &src, str);
8355 e1 = rb_enc_check(str, src);
8356 e2 = rb_enc_check(str, repl);
8361 enc = rb_enc_check(src, repl);
8363 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8364 if (RSTRING_LEN(src) > 1 &&
8365 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8366 trsrc.p + l < trsrc.pend) {
8370 trrepl.p = RSTRING_PTR(repl);
8371 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8372 trsrc.gen = trrepl.gen = 0;
8373 trsrc.now = trrepl.now = 0;
8374 trsrc.max = trrepl.max = 0;
8377 for (i=0; i<256; i++) {
8380 while ((c = trnext(&trsrc, enc)) != errc) {
8385 if (!hash) hash = rb_hash_new();
8389 while ((c = trnext(&trrepl, enc)) != errc)
8392 for (i=0; i<256; i++) {
8393 if (trans[i] != errc) {
8401 for (i=0; i<256; i++) {
8404 while ((c = trnext(&trsrc, enc)) != errc) {
8405 r = trnext(&trrepl, enc);
8406 if (r == errc) r = trrepl.now;
8409 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8412 if (!hash) hash = rb_hash_new();
8420 str_modify_keep_cr(str);
8421 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8422 termlen = rb_enc_mbminlen(enc);
8425 long offset, max = RSTRING_LEN(str);
8426 unsigned int save = -1;
8427 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8432 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8435 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8438 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8440 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8449 if (cflag) c = last;
8452 else if (cflag) c = errc;
8458 if (c != (
unsigned int)-1) {
8464 tlen = rb_enc_codelen(c, enc);
8470 if (enc != e1) may_modify = 1;
8472 if ((offset = t - buf) + tlen > max) {
8473 size_t MAYBE_UNUSED(old) = max + termlen;
8474 max = offset + tlen + (send - s);
8475 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8478 rb_enc_mbcput(c, t, enc);
8479 if (may_modify && memcmp(s, t, tlen) != 0) {
8485 if (!STR_EMBED_P(str)) {
8486 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8488 TERM_FILL((
char *)t, termlen);
8489 RSTRING(str)->as.heap.ptr = (
char *)buf;
8490 STR_SET_LEN(str, t - buf);
8491 STR_SET_NOEMBED(str);
8492 RSTRING(str)->as.heap.aux.capa = max;
8496 c = (
unsigned char)*s;
8497 if (trans[c] != errc) {
8514 long offset, max = (long)((send - s) * 1.2);
8515 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8520 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8523 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8526 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8528 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8536 if (cflag) c = last;
8539 else if (cflag) c = errc;
8543 c = cflag ? last : errc;
8546 tlen = rb_enc_codelen(c, enc);
8551 if (enc != e1) may_modify = 1;
8553 if ((offset = t - buf) + tlen > max) {
8554 size_t MAYBE_UNUSED(old) = max + termlen;
8555 max = offset + tlen + (long)((send - s) * 1.2);
8556 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8560 rb_enc_mbcput(c, t, enc);
8561 if (may_modify && memcmp(s, t, tlen) != 0) {
8569 if (!STR_EMBED_P(str)) {
8570 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8572 TERM_FILL((
char *)t, termlen);
8573 RSTRING(str)->as.heap.ptr = (
char *)buf;
8574 STR_SET_LEN(str, t - buf);
8575 STR_SET_NOEMBED(str);
8576 RSTRING(str)->as.heap.aux.capa = max;
8582 rb_enc_associate(str, enc);
8601 return tr_trans(str, src, repl, 0);
8648 tr_trans(str, src, repl, 0);
8652#define TR_TABLE_MAX (UCHAR_MAX+1)
8653#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8655tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8658 const unsigned int errc = -1;
8659 char buf[TR_TABLE_MAX];
8662 VALUE table = 0, ptable = 0;
8663 int i, l, cflag = 0;
8665 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8666 tr.gen =
tr.now =
tr.max = 0;
8668 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8673 for (i=0; i<TR_TABLE_MAX; i++) {
8676 stable[TR_TABLE_MAX] = cflag;
8678 else if (stable[TR_TABLE_MAX] && !cflag) {
8679 stable[TR_TABLE_MAX] = 0;
8681 for (i=0; i<TR_TABLE_MAX; i++) {
8685 while ((c = trnext(&
tr, enc)) != errc) {
8686 if (c < TR_TABLE_MAX) {
8687 buf[(
unsigned char)c] = !cflag;
8692 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8695 table = ptable ? ptable : rb_hash_new();
8699 table = rb_hash_new();
8704 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8705 rb_hash_aset(table, key,
Qtrue);
8709 for (i=0; i<TR_TABLE_MAX; i++) {
8710 stable[i] = stable[i] && buf[i];
8712 if (!table && !cflag) {
8719tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8721 if (c < TR_TABLE_MAX) {
8722 return table[c] != 0;
8728 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8729 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8733 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8736 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8751rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8753 char squeez[TR_TABLE_SIZE];
8756 VALUE del = 0, nodel = 0;
8758 int i, ascompat, cr;
8760 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8762 for (i=0; i<argc; i++) {
8766 enc = rb_enc_check(str, s);
8767 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8770 str_modify_keep_cr(str);
8771 ascompat = rb_enc_asciicompat(enc);
8772 s = t = RSTRING_PTR(str);
8779 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8790 c = rb_enc_codepoint_len(s, send, &clen, enc);
8792 if (tr_find(c, squeez, del, nodel)) {
8796 if (t != s) rb_enc_mbcput(c, t, enc);
8803 TERM_FILL(t, TERM_LEN(str));
8804 STR_SET_LEN(str, t - RSTRING_PTR(str));
8807 if (modify)
return str;
8821rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8824 rb_str_delete_bang(argc, argv, str);
8842rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8844 char squeez[TR_TABLE_SIZE];
8846 VALUE del = 0, nodel = 0;
8847 unsigned char *s, *send, *t;
8849 int ascompat, singlebyte = single_byte_optimizable(str);
8853 enc = STR_ENC_GET(str);
8856 for (i=0; i<argc; i++) {
8860 enc = rb_enc_check(str, s);
8861 if (singlebyte && !single_byte_optimizable(s))
8863 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8867 str_modify_keep_cr(str);
8868 s = t = (
unsigned char *)RSTRING_PTR(str);
8869 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8872 ascompat = rb_enc_asciicompat(enc);
8876 unsigned int c = *s++;
8877 if (c != save || (argc > 0 && !squeez[c])) {
8887 if (ascompat && (c = *s) < 0x80) {
8888 if (c != save || (argc > 0 && !squeez[c])) {
8894 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8896 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8897 if (t != s) rb_enc_mbcput(c, t, enc);
8906 TERM_FILL((
char *)t, TERM_LEN(str));
8907 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8908 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8912 if (modify)
return str;
8926rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8929 rb_str_squeeze_bang(argc, argv, str);
8947 return tr_trans(str, src, repl, 1);
8970 tr_trans(str, src, repl, 1);
8983rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8985 char table[TR_TABLE_SIZE];
8987 VALUE del = 0, nodel = 0, tstr;
8997 enc = rb_enc_check(str, tstr);
9000 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9001 (ptstr = RSTRING_PTR(tstr),
9002 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9003 !is_broken_string(str)) {
9005 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9007 s = RSTRING_PTR(str);
9008 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9011 if (*(
unsigned char*)s++ == c) n++;
9017 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9018 for (i=1; i<argc; i++) {
9021 enc = rb_enc_check(str, tstr);
9022 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9025 s = RSTRING_PTR(str);
9026 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9028 ascompat = rb_enc_asciicompat(enc);
9032 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9040 c = rb_enc_codepoint_len(s, send, &clen, enc);
9041 if (tr_find(c, table, del, nodel)) {
9052rb_fs_check(
VALUE val)
9056 if (
NIL_P(val))
return 0;
9061static const char isspacetable[256] = {
9062 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9063 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9064 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9065 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9066 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9067 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9068 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9069 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9080#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9083split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9085 if (empty_count >= 0 &&
len == 0) {
9086 return empty_count + 1;
9088 if (empty_count > 0) {
9093 }
while (--empty_count > 0);
9097 rb_yield(str_new_empty_String(str));
9098 }
while (--empty_count > 0);
9112 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9116literal_split_pattern(
VALUE spat, split_type_t default_type)
9124 return SPLIT_TYPE_CHARS;
9126 else if (rb_enc_asciicompat(enc)) {
9127 if (
len == 1 && ptr[0] ==
' ') {
9128 return SPLIT_TYPE_AWK;
9133 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9134 return SPLIT_TYPE_AWK;
9137 return default_type;
9150rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9155 split_type_t split_type;
9156 long beg, end, i = 0, empty_count = -1;
9161 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9163 if (lim <= 0) limit =
Qnil;
9164 else if (lim == 1) {
9165 if (RSTRING_LEN(str) == 0)
9176 if (
NIL_P(limit) && !lim) empty_count = 0;
9178 enc = STR_ENC_GET(str);
9179 split_type = SPLIT_TYPE_REGEXP;
9181 spat = get_pat_quoted(spat, 0);
9183 else if (
NIL_P(spat = rb_fs)) {
9184 split_type = SPLIT_TYPE_AWK;
9186 else if (!(spat = rb_fs_check(spat))) {
9187 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9192 if (split_type != SPLIT_TYPE_AWK) {
9197 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9198 if (split_type == SPLIT_TYPE_AWK) {
9200 split_type = SPLIT_TYPE_STRING;
9205 mustnot_broken(spat);
9206 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9214#define SPLIT_STR(beg, len) ( \
9215 empty_count = split_string(result, str, beg, len, empty_count), \
9216 str_mod_check(str, str_start, str_len))
9219 char *ptr = RSTRING_PTR(str);
9220 char *
const str_start = ptr;
9221 const long str_len = RSTRING_LEN(str);
9222 char *
const eptr = str_start + str_len;
9223 if (split_type == SPLIT_TYPE_AWK) {
9230 if (is_ascii_string(str)) {
9231 while (ptr < eptr) {
9232 c = (
unsigned char)*ptr++;
9234 if (ascii_isspace(c)) {
9240 if (!
NIL_P(limit) && lim <= i)
break;
9243 else if (ascii_isspace(c)) {
9244 SPLIT_STR(beg, end-beg);
9247 if (!
NIL_P(limit)) ++i;
9255 while (ptr < eptr) {
9258 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9267 if (!
NIL_P(limit) && lim <= i)
break;
9271 SPLIT_STR(beg, end-beg);
9274 if (!
NIL_P(limit)) ++i;
9282 else if (split_type == SPLIT_TYPE_STRING) {
9283 char *substr_start = ptr;
9284 char *sptr = RSTRING_PTR(spat);
9285 long slen = RSTRING_LEN(spat);
9288 mustnot_broken(str);
9289 enc = rb_enc_check(str, spat);
9290 while (ptr < eptr &&
9291 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9294 if (t != ptr + end) {
9298 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9299 str_mod_check(spat, sptr, slen);
9302 if (!
NIL_P(limit) && lim <= ++i)
break;
9304 beg = ptr - str_start;
9306 else if (split_type == SPLIT_TYPE_CHARS) {
9310 mustnot_broken(str);
9311 enc = rb_enc_get(str);
9312 while (ptr < eptr &&
9313 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9314 SPLIT_STR(ptr - str_start, n);
9316 if (!
NIL_P(limit) && lim <= ++i)
break;
9318 beg = ptr - str_start;
9322 long len = RSTRING_LEN(str);
9330 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9335 if (start == end && BEG(0) == END(0)) {
9340 else if (last_null == 1) {
9341 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9348 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9354 SPLIT_STR(beg, end-beg);
9355 beg = start = END(0);
9359 for (idx=1; idx < regs->num_regs; idx++) {
9360 if (BEG(idx) == -1)
continue;
9361 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9363 if (!
NIL_P(limit) && lim <= ++i)
break;
9365 if (match) rb_match_unbusy(match);
9367 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9368 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9371 return result ? result : str;
9381 return rb_str_split_m(1, &sep, str);
9384#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9399#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9402chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9404 const char *prev = rb_enc_prev_char(p, e, e, enc);
9407 prev = rb_enc_prev_char(p, e, e, enc);
9408 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9420 RSTRING_LEN(rs) != 1 ||
9421 RSTRING_PTR(rs)[0] !=
'\n')) {
9427#define rb_rs get_rs()
9434 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9435 long pos,
len, rslen;
9441 static ID keywords[1];
9446 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9450 if (!ENUM_ELEM(ary, str)) {
9458 if (!RSTRING_LEN(str))
goto end;
9460 ptr = subptr = RSTRING_PTR(str);
9462 len = RSTRING_LEN(str);
9464 rslen = RSTRING_LEN(rs);
9467 enc = rb_enc_get(str);
9469 enc = rb_enc_check(str, rs);
9474 const char *eol = NULL;
9476 while (subend < pend) {
9477 long chomp_rslen = 0;
9479 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9481 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9483 if (eol == subend)
break;
9487 chomp_rslen = -rslen;
9491 if (!subptr) subptr = subend;
9495 }
while (subend < pend);
9497 if (rslen == 0) chomp_rslen = 0;
9499 subend - subptr + (chomp ? chomp_rslen : rslen));
9500 if (ENUM_ELEM(ary, line)) {
9501 str_mod_check(str, ptr,
len);
9503 subptr = eol = NULL;
9508 rsptr = RSTRING_PTR(rs);
9509 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9518 rsptr = RSTRING_PTR(rs);
9519 rslen = RSTRING_LEN(rs);
9522 while (subptr < pend) {
9523 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9527 if (hit != adjusted) {
9531 subend = hit += rslen;
9534 subend = chomp_newline(subptr, subend, enc);
9541 if (ENUM_ELEM(ary, line)) {
9542 str_mod_check(str, ptr,
len);
9547 if (subptr != pend) {
9550 pend = chomp_newline(subptr, pend, enc);
9552 else if (pend - subptr >= rslen &&
9553 memcmp(pend - rslen, rsptr, rslen) == 0) {
9558 ENUM_ELEM(ary, line);
9579rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9582 return rb_str_enumerate_lines(argc, argv, str, 0);
9637rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9639 VALUE ary = WANTARRAY(
"lines", 0);
9640 return rb_str_enumerate_lines(argc, argv, str, ary);
9654 for (i=0; i<RSTRING_LEN(str); i++) {
9655 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9673rb_str_each_byte(
VALUE str)
9676 return rb_str_enumerate_bytes(str, 0);
9688rb_str_bytes(
VALUE str)
9690 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9691 return rb_str_enumerate_bytes(str, ary);
9709 ptr = RSTRING_PTR(str);
9710 len = RSTRING_LEN(str);
9711 enc = rb_enc_get(str);
9714 for (i = 0; i <
len; i += n) {
9715 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9720 for (i = 0; i <
len; i += n) {
9721 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9742rb_str_each_char(
VALUE str)
9745 return rb_str_enumerate_chars(str, 0);
9757rb_str_chars(
VALUE str)
9760 return rb_str_enumerate_chars(str, ary);
9764rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9769 const char *ptr, *end;
9772 if (single_byte_optimizable(str))
9773 return rb_str_enumerate_bytes(str, ary);
9776 ptr = RSTRING_PTR(str);
9778 enc = STR_ENC_GET(str);
9781 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9802rb_str_each_codepoint(
VALUE str)
9805 return rb_str_enumerate_codepoints(str, 0);
9817rb_str_codepoints(
VALUE str)
9820 return rb_str_enumerate_codepoints(str, ary);
9826 int encidx = rb_enc_to_index(enc);
9828 const OnigUChar source_ascii[] =
"\\X";
9829 const OnigUChar *source = source_ascii;
9830 size_t source_len =
sizeof(source_ascii) - 1;
9833#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9834#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9835#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9836#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9837#define CASE_UTF(e) \
9838 case ENCINDEX_UTF_##e: { \
9839 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9840 source = source_UTF_##e; \
9841 source_len = sizeof(source_UTF_##e); \
9844 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9852 regex_t *reg_grapheme_cluster;
9854 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9855 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9857 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9858 onig_error_code_to_str(message, r, &einfo);
9859 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9862 return reg_grapheme_cluster;
9868 int encidx = rb_enc_to_index(enc);
9869 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9871 if (encidx == rb_utf8_encindex()) {
9872 if (!reg_grapheme_cluster_utf8) {
9873 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9876 return reg_grapheme_cluster_utf8;
9885 size_t grapheme_cluster_count = 0;
9887 const char *ptr, *end;
9889 if (!rb_enc_unicode_p(enc)) {
9893 bool cached_reg_grapheme_cluster =
true;
9894 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9895 if (!reg_grapheme_cluster) {
9896 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9897 cached_reg_grapheme_cluster =
false;
9900 ptr = RSTRING_PTR(str);
9904 OnigPosition
len = onig_match(reg_grapheme_cluster,
9905 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9906 (
const OnigUChar *)ptr, NULL, 0);
9907 if (
len <= 0)
break;
9908 grapheme_cluster_count++;
9912 if (!cached_reg_grapheme_cluster) {
9913 onig_free(reg_grapheme_cluster);
9916 return SIZET2NUM(grapheme_cluster_count);
9920rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9924 const char *ptr0, *ptr, *end;
9926 if (!rb_enc_unicode_p(enc)) {
9927 return rb_str_enumerate_chars(str, ary);
9932 bool cached_reg_grapheme_cluster =
true;
9933 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9934 if (!reg_grapheme_cluster) {
9935 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9936 cached_reg_grapheme_cluster =
false;
9939 ptr0 = ptr = RSTRING_PTR(str);
9943 OnigPosition
len = onig_match(reg_grapheme_cluster,
9944 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9945 (
const OnigUChar *)ptr, NULL, 0);
9946 if (
len <= 0)
break;
9951 if (!cached_reg_grapheme_cluster) {
9952 onig_free(reg_grapheme_cluster);
9972rb_str_each_grapheme_cluster(
VALUE str)
9975 return rb_str_enumerate_grapheme_clusters(str, 0);
9987rb_str_grapheme_clusters(
VALUE str)
9990 return rb_str_enumerate_grapheme_clusters(str, ary);
9994chopped_length(
VALUE str)
9997 const char *p, *p2, *beg, *end;
9999 beg = RSTRING_PTR(str);
10000 end = beg + RSTRING_LEN(str);
10001 if (beg >= end)
return 0;
10002 p = rb_enc_prev_char(beg, end, end, enc);
10004 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10005 p2 = rb_enc_prev_char(beg, p, end, enc);
10006 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10024rb_str_chop_bang(
VALUE str)
10026 str_modify_keep_cr(str);
10027 if (RSTRING_LEN(str) > 0) {
10029 len = chopped_length(str);
10030 STR_SET_LEN(str,
len);
10031 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10050rb_str_chop(
VALUE str)
10056smart_chomp(
VALUE str,
const char *e,
const char *p)
10059 if (rb_enc_mbminlen(enc) > 1) {
10064 pp = e - rb_enc_mbminlen(enc);
10067 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10075 if (--e > p && *(e-1) ==
'\r') {
10092 char *pp, *e, *rsptr;
10094 char *
const p = RSTRING_PTR(str);
10095 long len = RSTRING_LEN(str);
10097 if (
len == 0)
return 0;
10100 return smart_chomp(str, e, p);
10103 enc = rb_enc_get(str);
10106 if (rb_enc_mbminlen(enc) > 1) {
10111 pp -= rb_enc_mbminlen(enc);
10114 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10121 while (e > p && *(e-1) ==
'\n') {
10123 if (e > p && *(e-1) ==
'\r')
10129 if (rslen >
len)
return len;
10131 enc = rb_enc_get(rs);
10132 newline = rsptr[rslen-1];
10133 if (rslen == rb_enc_mbminlen(enc)) {
10135 if (newline ==
'\n')
10136 return smart_chomp(str, e, p);
10140 return smart_chomp(str, e, p);
10144 enc = rb_enc_check(str, rs);
10145 if (is_broken_string(rs)) {
10149 if (p[
len-1] == newline &&
10151 memcmp(rsptr, pp, rslen) == 0)) {
10152 if (at_char_boundary(p, pp, e, enc))
10153 return len - rslen;
10165chomp_rs(
int argc,
const VALUE *argv)
10169 VALUE rs = argv[0];
10181 long olen = RSTRING_LEN(str);
10182 long len = chompped_length(str, rs);
10183 if (
len >= olen)
return Qnil;
10184 str_modify_keep_cr(str);
10185 STR_SET_LEN(str,
len);
10186 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10206rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10209 str_modifiable(str);
10210 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10211 rs = chomp_rs(argc, argv);
10213 return rb_str_chomp_string(str, rs);
10226rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10228 VALUE rs = chomp_rs(argc, argv);
10236 const char *
const start = s;
10238 if (!s || s >= e)
return 0;
10241 if (single_byte_optimizable(str)) {
10242 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10247 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10269rb_str_lstrip_bang(
VALUE str)
10273 long olen, loffset;
10275 str_modify_keep_cr(str);
10276 enc = STR_ENC_GET(str);
10278 loffset = lstrip_offset(str, start, start+olen, enc);
10280 long len = olen-loffset;
10281 s = start + loffset;
10282 memmove(start, s,
len);
10283 STR_SET_LEN(str,
len);
10284 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10308rb_str_lstrip(
VALUE str)
10313 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10314 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10323 rb_str_check_dummy_enc(enc);
10327 if (!s || s >= e)
return 0;
10331 if (single_byte_optimizable(str)) {
10333 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10338 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10360rb_str_rstrip_bang(
VALUE str)
10364 long olen, roffset;
10366 str_modify_keep_cr(str);
10367 enc = STR_ENC_GET(str);
10369 roffset = rstrip_offset(str, start, start+olen, enc);
10371 long len = olen - roffset;
10373 STR_SET_LEN(str,
len);
10374 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10397rb_str_rstrip(
VALUE str)
10401 long olen, roffset;
10403 enc = STR_ENC_GET(str);
10405 roffset = rstrip_offset(str, start, start+olen, enc);
10407 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10425rb_str_strip_bang(
VALUE str)
10428 long olen, loffset, roffset;
10431 str_modify_keep_cr(str);
10432 enc = STR_ENC_GET(str);
10434 loffset = lstrip_offset(str, start, start+olen, enc);
10435 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10437 if (loffset > 0 || roffset > 0) {
10438 long len = olen-roffset;
10441 memmove(start, start + loffset,
len);
10443 STR_SET_LEN(str,
len);
10444 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10467rb_str_strip(
VALUE str)
10470 long olen, loffset, roffset;
10474 loffset = lstrip_offset(str, start, start+olen, enc);
10475 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10477 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10482scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10485 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10491 end = pos + RSTRING_LEN(pat);
10505 if (RSTRING_LEN(str) > end)
10506 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10515 if (!regs || regs->num_regs == 1) {
10521 for (
int i = 1; i < regs->num_regs; i++) {
10552 long last = -1, prev = 0;
10553 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10555 pat = get_pat_quoted(pat, 1);
10556 mustnot_broken(str);
10560 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10565 if (last >= 0) rb_pat_search(pat, str, last, 1);
10570 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10574 str_mod_check(str, p,
len);
10576 if (last >= 0) rb_pat_search(pat, str, last, 1);
10628rb_str_hex(
VALUE str)
10630 return rb_str_to_inum(str, 16, FALSE);
10714rb_str_oct(
VALUE str)
10716 return rb_str_to_inum(str, -8, FALSE);
10719#ifndef HAVE_CRYPT_R
10724 rb_nativethread_lock_t lock;
10725} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10794# define CRYPT_END() ALLOCV_END(databuf)
10797 extern char *crypt(
const char *,
const char *);
10798# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10801 const char *s, *saltp;
10804 char salt_8bit_clean[3];
10808 mustnot_wchar(str);
10809 mustnot_wchar(salt);
10811 saltp = RSTRING_PTR(salt);
10812 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10813 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10817 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10818 salt_8bit_clean[0] = saltp[0] & 0x7f;
10819 salt_8bit_clean[1] = saltp[1] & 0x7f;
10820 salt_8bit_clean[2] =
'\0';
10821 saltp = salt_8bit_clean;
10826# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10827 data->initialized = 0;
10829 res = crypt_r(s, saltp, data);
10832 res = crypt(s, saltp);
10847 size_t res_size = strlen(res)+1;
10848 tmp_buf =
ALLOCA_N(
char, res_size);
10849 memcpy(tmp_buf, res, res_size);
10886 char *ptr, *p, *pend;
10889 unsigned long sum0 = 0;
10894 ptr = p = RSTRING_PTR(str);
10895 len = RSTRING_LEN(str);
10901 str_mod_check(str, ptr,
len);
10904 sum0 += (
unsigned char)*p;
10915 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10916 sum0 &= (((
unsigned long)1)<<bits)-1;
10936rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10940 long width,
len, flen = 1, fclen = 1;
10943 const char *f =
" ";
10944 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10946 int singlebyte = 1, cr;
10950 enc = STR_ENC_GET(str);
10951 termlen = rb_enc_mbminlen(enc);
10955 enc = rb_enc_check(str, pad);
10956 f = RSTRING_PTR(pad);
10957 flen = RSTRING_LEN(pad);
10958 fclen = str_strlen(pad, enc);
10959 singlebyte = single_byte_optimizable(pad);
10960 if (flen == 0 || fclen == 0) {
10961 rb_raise(rb_eArgError,
"zero width padding");
10964 len = str_strlen(str, enc);
10965 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10967 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10971 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10972 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10974 size = RSTRING_LEN(str);
10975 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10976 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10977 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10978 rb_raise(rb_eArgError,
"argument too big");
10982 p = RSTRING_PTR(res);
10984 memset(p, *f, llen);
10988 while (llen >= fclen) {
10994 memcpy(p, f, llen2);
10998 memcpy(p, RSTRING_PTR(str), size);
11001 memset(p, *f, rlen);
11005 while (rlen >= fclen) {
11011 memcpy(p, f, rlen2);
11015 TERM_FILL(p, termlen);
11016 STR_SET_LEN(res, p-RSTRING_PTR(res));
11037rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11039 return rb_str_justify(argc, argv, str,
'l');
11051rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11053 return rb_str_justify(argc, argv, str,
'r');
11066rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11068 return rb_str_justify(argc, argv, str,
'c');
11084 sep = get_pat_quoted(sep, 0);
11096 pos = rb_str_index(str, sep, 0);
11097 if (pos < 0)
goto failed;
11102 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11105 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11119 long pos = RSTRING_LEN(str);
11121 sep = get_pat_quoted(sep, 0);
11134 pos = rb_str_rindex(str, sep, pos);
11143 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11145 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11157rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11161 for (i=0; i<argc; i++) {
11162 VALUE tmp = argv[i];
11164 if (rb_reg_start_with_p(tmp, str))
11168 const char *p, *s, *e;
11173 enc = rb_enc_check(str, tmp);
11174 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11175 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11176 p = RSTRING_PTR(str);
11179 if (!at_char_right_boundary(p, s, e, enc))
11181 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11197rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11201 for (i=0; i<argc; i++) {
11202 VALUE tmp = argv[i];
11203 const char *p, *s, *e;
11208 enc = rb_enc_check(str, tmp);
11209 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11210 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11211 p = RSTRING_PTR(str);
11214 if (!at_char_boundary(p, s, e, enc))
11216 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11232deleted_prefix_length(
VALUE str,
VALUE prefix)
11234 const char *strptr, *prefixptr;
11235 long olen, prefixlen;
11240 if (!is_broken_string(prefix) ||
11241 !rb_enc_asciicompat(enc) ||
11242 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11243 enc = rb_enc_check(str, prefix);
11247 prefixlen = RSTRING_LEN(prefix);
11248 if (prefixlen <= 0)
return 0;
11249 olen = RSTRING_LEN(str);
11250 if (olen < prefixlen)
return 0;
11251 strptr = RSTRING_PTR(str);
11252 prefixptr = RSTRING_PTR(prefix);
11253 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11254 if (is_broken_string(prefix)) {
11255 if (!is_broken_string(str)) {
11259 const char *strend = strptr + olen;
11260 const char *after_prefix = strptr + prefixlen;
11261 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11282rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11285 str_modify_keep_cr(str);
11287 prefixlen = deleted_prefix_length(str, prefix);
11288 if (prefixlen <= 0)
return Qnil;
11302rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11306 prefixlen = deleted_prefix_length(str, prefix);
11307 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11309 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11322deleted_suffix_length(
VALUE str,
VALUE suffix)
11324 const char *strptr, *suffixptr;
11325 long olen, suffixlen;
11329 if (is_broken_string(suffix))
return 0;
11330 enc = rb_enc_check(str, suffix);
11333 suffixlen = RSTRING_LEN(suffix);
11334 if (suffixlen <= 0)
return 0;
11335 olen = RSTRING_LEN(str);
11336 if (olen < suffixlen)
return 0;
11337 strptr = RSTRING_PTR(str);
11338 suffixptr = RSTRING_PTR(suffix);
11339 const char *strend = strptr + olen;
11340 const char *before_suffix = strend - suffixlen;
11341 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11342 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11358rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11360 long olen, suffixlen,
len;
11361 str_modifiable(str);
11363 suffixlen = deleted_suffix_length(str, suffix);
11364 if (suffixlen <= 0)
return Qnil;
11366 olen = RSTRING_LEN(str);
11367 str_modify_keep_cr(str);
11368 len = olen - suffixlen;
11369 STR_SET_LEN(str,
len);
11370 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11386rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11390 suffixlen = deleted_suffix_length(str, suffix);
11391 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11393 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11400 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11408 val = rb_fs_check(val);
11411 "value of %"PRIsVALUE
" must be String or Regexp",
11415 rb_warn_deprecated(
"'$;'", NULL);
11432 str_modifiable(str);
11435 int idx = rb_enc_to_index(encoding);
11442 rb_enc_associate_index(str, idx);
11466 if (STR_EMBED_P(str)) {
11467 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11472 str_replace_shared_without_enc(str2, str);
11474 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11507rb_str_valid_encoding_p(
VALUE str)
11527rb_str_is_ascii_only_p(
VALUE str)
11537 static const char ellipsis[] =
"...";
11538 const long ellipsislen =
sizeof(ellipsis) - 1;
11540 const long blen = RSTRING_LEN(str);
11541 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11542 VALUE estr, ret = 0;
11545 if (
len * rb_enc_mbminlen(enc) >= blen ||
11549 else if (
len <= ellipsislen ||
11551 if (rb_enc_asciicompat(enc)) {
11553 rb_enc_associate(ret, enc);
11560 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11565 rb_enc_from_encoding(enc), 0,
Qnil);
11578 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11584 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11603 if (enc == STR_ENC_GET(str)) {
11608 return enc_str_scrub(enc, str, repl, cr);
11616 const char *rep, *p, *e, *p1, *sp;
11622 rb_raise(rb_eArgError,
"both of block and replacement given");
11629 if (!
NIL_P(repl)) {
11630 repl = str_compat_and_valid(repl, enc);
11633 if (rb_enc_dummy_p(enc)) {
11636 encidx = rb_enc_to_index(enc);
11638#define DEFAULT_REPLACE_CHAR(str) do { \
11639 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11640 rep = replace; replen = (int)sizeof(replace); \
11643 slen = RSTRING_LEN(str);
11644 p = RSTRING_PTR(str);
11649 if (rb_enc_asciicompat(enc)) {
11655 else if (!
NIL_P(repl)) {
11656 rep = RSTRING_PTR(repl);
11657 replen = RSTRING_LEN(repl);
11660 else if (encidx == rb_utf8_encindex()) {
11661 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11665 DEFAULT_REPLACE_CHAR(
"?");
11670 p = search_nonascii(p, e);
11675 int ret = rb_enc_precise_mbclen(p, e, enc);
11694 if (e - p < clen) clen = e - p;
11701 for (; clen > 1; clen--) {
11702 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11713 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11714 str_mod_check(str, sp, slen);
11715 repl = str_compat_and_valid(repl, enc);
11722 p = search_nonascii(p, e);
11748 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11749 str_mod_check(str, sp, slen);
11750 repl = str_compat_and_valid(repl, enc);
11759 long mbminlen = rb_enc_mbminlen(enc);
11763 else if (!
NIL_P(repl)) {
11764 rep = RSTRING_PTR(repl);
11765 replen = RSTRING_LEN(repl);
11767 else if (encidx == ENCINDEX_UTF_16BE) {
11768 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11770 else if (encidx == ENCINDEX_UTF_16LE) {
11771 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11773 else if (encidx == ENCINDEX_UTF_32BE) {
11774 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11776 else if (encidx == ENCINDEX_UTF_32LE) {
11777 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11780 DEFAULT_REPLACE_CHAR(
"?");
11784 int ret = rb_enc_precise_mbclen(p, e, enc);
11797 if (e - p < clen) clen = e - p;
11798 if (clen <= mbminlen * 2) {
11803 for (; clen > mbminlen; clen-=mbminlen) {
11804 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11814 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11815 str_mod_check(str, sp, slen);
11816 repl = str_compat_and_valid(repl, enc);
11841 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11842 str_mod_check(str, sp, slen);
11843 repl = str_compat_and_valid(repl, enc);
11883str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11891static ID id_normalize;
11892static ID id_normalized_p;
11893static VALUE mUnicodeNormalize;
11896unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11898 static int UnicodeNormalizeRequired = 0;
11901 if (!UnicodeNormalizeRequired) {
11902 rb_require(
"unicode_normalize/normalize.rb");
11903 UnicodeNormalizeRequired = 1;
11907 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11944rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11946 return unicode_normalize_common(argc, argv, str, id_normalize);
11960rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11962 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11989rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11991 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12123#define sym_equal rb_obj_equal
12126sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12130 int c = rb_enc_precise_mbclen(s, send, enc);
12134 c = rb_enc_mbc_to_codepoint(s, send, enc);
12142rb_str_symname_p(
VALUE sym)
12147 rb_encoding *resenc = rb_default_internal_encoding();
12149 if (resenc == NULL) resenc = rb_default_external_encoding();
12150 enc = STR_ENC_GET(sym);
12151 ptr = RSTRING_PTR(sym);
12152 len = RSTRING_LEN(sym);
12153 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12161rb_str_quote_unprintable(
VALUE str)
12169 resenc = rb_default_internal_encoding();
12170 if (resenc == NULL) resenc = rb_default_external_encoding();
12171 enc = STR_ENC_GET(str);
12172 ptr = RSTRING_PTR(str);
12173 len = RSTRING_LEN(str);
12174 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12175 !sym_printable(ptr, ptr +
len, enc)) {
12176 return rb_str_escape(str);
12182rb_id_quote_unprintable(
ID id)
12184 VALUE str = rb_id2str(
id);
12185 if (!rb_str_symname_p(str)) {
12186 return rb_str_escape(str);
12204sym_inspect(
VALUE sym)
12211 if (!rb_str_symname_p(str)) {
12213 len = RSTRING_LEN(str);
12214 rb_str_resize(str,
len + 1);
12215 dest = RSTRING_PTR(str);
12216 memmove(dest + 1, dest,
len);
12220 VALUE orig_str = str;
12222 len = RSTRING_LEN(orig_str);
12223 str = rb_enc_str_new(0,
len + 1, enc);
12226 ptr = RSTRING_PTR(orig_str);
12227 dest = RSTRING_PTR(str);
12228 memcpy(dest + 1, ptr,
len);
12248rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12253 rb_raise(rb_eArgError,
"no receiver given");
12350 return rb_str_match(
rb_sym2str(sym), other);
12365sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12367 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12380sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12382 return rb_str_match_m_p(argc, argv, sym);
12400 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12411sym_length(
VALUE sym)
12425sym_empty(
VALUE sym)
12459sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12475sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12491sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12505sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12507 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12520sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12522 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12534sym_encoding(
VALUE sym)
12540string_for_symbol(
VALUE name)
12545 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12559 name = string_for_symbol(name);
12560 return rb_intern_str(name);
12569 name = string_for_symbol(name);
12593 return rb_fstring(str);
12599 struct RString fake_str = {RBASIC_INIT};
12600 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12612 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12613 rb_enc_autoload(enc);
12616 struct RString fake_str = {RBASIC_INIT};
12617 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12623 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12624 rb_enc_autoload(enc);
12627 struct RString fake_str = {RBASIC_INIT};
12628 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12641rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12646 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12647 rb_str_buf_cat_byte(str, (
char) code);
12657fstring_set_class_i(
VALUE *str,
void *data)
12661 return ST_CONTINUE;
12669 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12836 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.