14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa,
long termlen)
251rb_str_size_as_embedded(
VALUE str)
254 if (STR_EMBED_P(str)) {
256 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
258 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 else if (rb_str_reembeddable_p(str)) {
264 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
266 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
269 real_size =
sizeof(
struct RString);
276STR_EMBEDDABLE_P(
long len,
long termlen)
278 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
283static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
284static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
286static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
287static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
330rb_debug_rstring_null_ptr(
const char *func)
332 fprintf(stderr,
"%s is returning NULL!! "
333 "SIGSEGV is highly expected to follow immediately.\n"
334 "If you could reproduce, attach your debugger here, "
335 "and look at the passed string.\n",
340static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
343get_encoding(
VALUE str)
349mustnot_broken(
VALUE str)
351 if (is_broken_string(str)) {
352 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
357mustnot_wchar(
VALUE str)
360 if (rb_enc_mbminlen(enc) > 1) {
361 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
365static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
367#if SIZEOF_LONG == SIZEOF_VOIDP
368#define PRECOMPUTED_FAKESTR_HASH 1
373BARE_STRING_P(
VALUE str)
378static inline st_index_t
379str_do_hash(
VALUE str)
381 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
383 if (e && !is_ascii_string(str)) {
390str_store_precomputed_hash(
VALUE str, st_index_t hash)
396 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
397 size_t free_bytes = str_embed_capa(str) - used_bytes;
401 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
403 FL_SET(str, STR_PRECOMPUTED_HASH);
416 if (
FL_TEST(str, RSTRING_FSTR))
419 bare = BARE_STRING_P(str);
421 if (STR_EMBED_P(str)) {
426 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
433 rb_str_resize(str, RSTRING_LEN(str));
435 fstr = register_fstring(str,
false,
false);
438 str_replace_shared_without_enc(str, fstr);
445static VALUE fstring_table_obj;
448fstring_concurrent_set_hash(
VALUE str)
450#ifdef PRECOMPUTED_FAKESTR_HASH
454 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
471 const char *aptr, *bptr;
478 return (alen == blen &&
480 memcmp(aptr, bptr, alen) == 0);
485 bool force_precompute_hash;
489fstring_concurrent_set_create(
VALUE str,
void *data)
499 long len = RSTRING_LEN(str);
500 long capa =
len +
sizeof(st_index_t);
501 int term_len = TERM_LEN(str);
503 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
505 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
506 STR_SET_LEN(new_str, RSTRING_LEN(str));
508 rb_enc_copy(new_str, str);
509 str_store_precomputed_hash(new_str, str_do_hash(str));
513 rb_enc_copy(new_str, str);
514#ifdef PRECOMPUTED_FAKESTR_HASH
515 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
516 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
530 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
533 if (STR_SHARED_P(str)) {
535 str_make_independent(str);
538 if (!BARE_STRING_P(str)) {
544 RBASIC(str)->flags |= RSTRING_FSTR;
546 RB_OBJ_SET_SHAREABLE(str);
560 .hash = fstring_concurrent_set_hash,
561 .cmp = fstring_concurrent_set_cmp,
562 .create = fstring_concurrent_set_create,
567Init_fstring_table(
void)
569 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
570 rb_gc_register_address(&fstring_table_obj);
574register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
578 .force_precompute_hash = force_precompute_hash
581#if SIZEOF_VOIDP == SIZEOF_LONG
585 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
589 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
591 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
603rb_obj_is_fstring_table(
VALUE obj)
607 return obj == fstring_table_obj;
611rb_gc_free_fstring(
VALUE obj)
613 ASSERT_vm_locking_with_barrier();
615 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
617 RB_DEBUG_COUNTER_INC(obj_str_fstr);
623rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
625 if (fstring_table_obj) {
626 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
631setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
634 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
647 return (
VALUE)fake_str;
656 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
665rb_fstring_new(
const char *ptr,
long len)
667 struct RString fake_str = {RBASIC_INIT};
668 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
674 struct RString fake_str = {RBASIC_INIT};
675 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
679rb_fstring_cstr(
const char *
ptr)
681 return rb_fstring_new(
ptr, strlen(
ptr));
685single_byte_optimizable(
VALUE str)
689 case ENCINDEX_ASCII_8BIT:
690 case ENCINDEX_US_ASCII:
712static inline const char *
713search_nonascii(
const char *p,
const char *e)
715 const uintptr_t *s, *t;
717#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
718# if SIZEOF_UINTPTR_T == 8
719# define NONASCII_MASK UINT64_C(0x8080808080808080)
720# elif SIZEOF_UINTPTR_T == 4
721# define NONASCII_MASK UINT32_C(0x80808080)
723# error "don't know what to do."
726# if SIZEOF_UINTPTR_T == 8
727# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
728# elif SIZEOF_UINTPTR_T == 4
729# define NONASCII_MASK 0x80808080UL
731# error "don't know what to do."
735 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
736#if !UNALIGNED_WORD_ACCESS
737 if ((uintptr_t)p % SIZEOF_VOIDP) {
738 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
743 case 7:
if (p[-7]&0x80)
return p-7;
744 case 6:
if (p[-6]&0x80)
return p-6;
745 case 5:
if (p[-5]&0x80)
return p-5;
746 case 4:
if (p[-4]&0x80)
return p-4;
748 case 3:
if (p[-3]&0x80)
return p-3;
749 case 2:
if (p[-2]&0x80)
return p-2;
750 case 1:
if (p[-1]&0x80)
return p-1;
755#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
756#define aligned_ptr(value) \
757 __builtin_assume_aligned((value), sizeof(uintptr_t))
759#define aligned_ptr(value) (uintptr_t *)(value)
762 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
765 if (*s & NONASCII_MASK) {
766#ifdef WORDS_BIGENDIAN
767 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
769 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
779 case 7:
if (e[-7]&0x80)
return e-7;
780 case 6:
if (e[-6]&0x80)
return e-6;
781 case 5:
if (e[-5]&0x80)
return e-5;
782 case 4:
if (e[-4]&0x80)
return e-4;
784 case 3:
if (e[-3]&0x80)
return e-3;
785 case 2:
if (e[-2]&0x80)
return e-2;
786 case 1:
if (e[-1]&0x80)
return e-1;
794 const char *e = p +
len;
796 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
798 p = search_nonascii(p, e);
802 if (rb_enc_asciicompat(enc)) {
803 p = search_nonascii(p, e);
806 int ret = rb_enc_precise_mbclen(p, e, enc);
810 p = search_nonascii(p, e);
816 int ret = rb_enc_precise_mbclen(p, e, enc);
832 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
835 p = search_nonascii(p, e);
839 else if (rb_enc_asciicompat(enc)) {
840 p = search_nonascii(p, e);
846 int ret = rb_enc_precise_mbclen(p, e, enc);
853 p = search_nonascii(p, e);
859 int ret = rb_enc_precise_mbclen(p, e, enc);
884 rb_enc_set_index(str1, rb_enc_get_index(str2));
892rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
897 str_enc_copy(dest, src);
898 if (RSTRING_LEN(dest) == 0) {
899 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
910 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
911 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
922rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
924 str_enc_copy(dest, src);
931 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
937 return enc_coderange_scan(str, enc);
946 cr = enc_coderange_scan(str, get_encoding(str));
953rb_enc_str_asciicompat(
VALUE str)
956 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
964 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
973str_mod_check(
VALUE s,
const char *p,
long len)
975 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
981str_capacity(
VALUE str,
const int termlen)
983 if (STR_EMBED_P(str)) {
984 return str_embed_capa(str) - termlen;
986 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
990 return RSTRING(str)->as.heap.aux.capa;
997 return str_capacity(str, TERM_LEN(str));
1001must_not_null(
const char *
ptr)
1004 rb_raise(rb_eArgError,
"NULL pointer given");
1009str_alloc_embed(
VALUE klass,
size_t capa)
1011 size_t size = rb_str_embed_size(
capa, 0);
1015 NEWOBJ_OF(str,
struct RString, klass,
1019 str->as.embed.ary[0] = 0;
1025str_alloc_heap(
VALUE klass)
1027 NEWOBJ_OF(str,
struct RString, klass,
1031 str->as.heap.aux.capa = 0;
1032 str->as.heap.ptr = NULL;
1038empty_str_alloc(
VALUE klass)
1040 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1041 VALUE str = str_alloc_embed(klass, 0);
1042 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1053 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1057 enc = rb_ascii8bit_encoding();
1060 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1062 int termlen = rb_enc_mbminlen(enc);
1064 if (STR_EMBEDDABLE_P(
len, termlen)) {
1065 str = str_alloc_embed(klass,
len + termlen);
1071 str = str_alloc_heap(klass);
1077 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1080 rb_enc_raw_set(str, enc);
1083 memcpy(RSTRING_PTR(str),
ptr,
len);
1086 memset(RSTRING_PTR(str), 0,
len);
1089 STR_SET_LEN(str,
len);
1090 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1097 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1132 __msan_unpoison_string(
ptr);
1152 if (rb_enc_mbminlen(enc) != 1) {
1153 rb_raise(rb_eArgError,
"wchar encoding given");
1155 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1159str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1164 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1168 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1171 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1172 str = str_alloc_heap(klass);
1176 RBASIC(str)->flags |= STR_NOFREE;
1177 rb_enc_associate_index(str, encindex);
1206static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1208 int ecflags,
VALUE ecopts);
1213 int encidx = rb_enc_to_index(enc);
1214 if (rb_enc_get_index(str) == encidx)
1215 return is_ascii_string(str);
1226 if (!to)
return str;
1227 if (!from) from = rb_enc_get(str);
1228 if (from == to)
return str;
1229 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1230 rb_is_ascii8bit_enc(to)) {
1231 if (STR_ENC_GET(str) != to) {
1233 rb_enc_associate(str, to);
1240 from, to, ecflags, ecopts);
1241 if (
NIL_P(newstr)) {
1249rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1254 olen = RSTRING_LEN(newstr);
1255 if (ofs < -olen || olen < ofs)
1257 if (ofs < 0) ofs += olen;
1259 STR_SET_LEN(newstr, ofs);
1263 rb_str_modify(newstr);
1264 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1272 STR_SET_LEN(str, 0);
1273 rb_enc_associate(str, enc);
1279str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1281 int ecflags,
VALUE ecopts)
1286 VALUE econv_wrapper;
1287 const unsigned char *start, *sp;
1288 unsigned char *dest, *dp;
1289 size_t converted_output = (size_t)ofs;
1294 RBASIC_CLEAR_CLASS(econv_wrapper);
1296 if (!ec)
return Qnil;
1299 sp = (
unsigned char*)
ptr;
1301 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1302 (dp = dest + converted_output),
1306 size_t converted_input = sp - start;
1307 size_t rest =
len - converted_input;
1308 converted_output = dp - dest;
1310 if (converted_input && converted_output &&
1311 rest < (LONG_MAX / converted_output)) {
1312 rest = (rest * converted_output) / converted_input;
1317 olen += rest < 2 ? 2 : rest;
1318 rb_str_resize(newstr, olen);
1325 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1327 rb_enc_associate(newstr, to);
1346 const int eidx = rb_enc_to_index(eenc);
1349 return rb_enc_str_new(
ptr,
len, eenc);
1353 if ((eidx == rb_ascii8bit_encindex()) ||
1354 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1358 ienc = rb_default_internal_encoding();
1359 if (!ienc || eenc == ienc) {
1360 return rb_enc_str_new(
ptr,
len, eenc);
1364 if ((eidx == rb_ascii8bit_encindex()) ||
1365 (eidx == rb_usascii_encindex()) ||
1366 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1367 return rb_enc_str_new(
ptr,
len, ienc);
1370 str = rb_enc_str_new(NULL, 0, ienc);
1373 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1374 rb_str_initialize(str,
ptr,
len, eenc);
1382 int eidx = rb_enc_to_index(eenc);
1383 if (eidx == rb_usascii_encindex() &&
1384 !is_ascii_string(str)) {
1385 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1388 rb_enc_associate_index(str, eidx);
1447str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1449 const int termlen = TERM_LEN(str);
1454 if (str_embed_capa(str2) >=
len + termlen) {
1455 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1456 STR_SET_EMBED(str2);
1457 memcpy(ptr2, RSTRING_PTR(str),
len);
1458 TERM_FILL(ptr2+
len, termlen);
1462 if (STR_SHARED_P(str)) {
1463 root =
RSTRING(str)->as.heap.aux.shared;
1472 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1474 rb_fatal(
"about to free a possible shared root");
1476 char *ptr2 = STR_HEAP_PTR(str2);
1478 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1481 FL_SET(str2, STR_NOEMBED);
1483 STR_SET_SHARED(str2, root);
1486 STR_SET_LEN(str2,
len);
1494 str_replace_shared_without_enc(str2, str);
1495 rb_enc_cr_str_exact_copy(str2, str);
1502 return str_replace_shared(str_alloc_heap(klass), str);
1519rb_str_new_frozen_String(
VALUE orig)
1527rb_str_frozen_bare_string(
VALUE orig)
1529 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1534rb_str_tmp_frozen_acquire(
VALUE orig)
1537 return str_new_frozen_buffer(0, orig, FALSE);
1541rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1543 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1544 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1546 VALUE str = str_alloc_heap(0);
1549 FL_SET(str, STR_SHARED_ROOT);
1551 size_t capa = str_capacity(orig, TERM_LEN(orig));
1557 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1558 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1565 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1566 RBASIC(orig)->flags &= ~STR_NOFREE;
1567 STR_SET_SHARED(orig, str);
1569 RB_OBJ_SET_SHAREABLE(str);
1581rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1586 if (STR_EMBED_P(tmp)) {
1589 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1595 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1599 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1600 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1605 STR_SET_LEN(tmp, 0);
1613 return str_new_frozen_buffer(klass, orig, TRUE);
1623 VALUE str = str_alloc_heap(klass);
1624 STR_SET_LEN(str, RSTRING_LEN(orig));
1625 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1626 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1627 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1628 RBASIC(orig)->flags &= ~STR_NOFREE;
1629 STR_SET_SHARED(orig, str);
1636str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1640 long len = RSTRING_LEN(orig);
1641 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1642 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1644 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1645 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1651 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1652 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1658 if ((ofs > 0) || (rest > 0) ||
1661 str = str_new_shared(klass,
shared);
1663 RSTRING(str)->as.heap.ptr += ofs;
1664 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1672 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1673 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1675 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1676 STR_SET_LEN(str, RSTRING_LEN(orig));
1682 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1685 str = heap_str_make_shared(klass, orig);
1690 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1702str_new_empty_String(
VALUE str)
1705 rb_enc_copy(v, str);
1709#define STR_BUF_MIN_SIZE 63
1714 if (STR_EMBEDDABLE_P(
capa, 1)) {
1722 RSTRING(str)->as.heap.ptr[0] =
'\0';
1742 return str_new(0, 0,
len);
1748 if (STR_EMBED_P(str)) {
1749 RB_DEBUG_COUNTER_INC(obj_str_embed);
1751 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1752 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1753 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1756 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1757 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1762rb_str_memsize(
VALUE str)
1764 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1765 return STR_HEAP_SIZE(str);
1775 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1778static inline void str_discard(
VALUE str);
1779static void str_shared_replace(
VALUE str,
VALUE str2);
1784 if (str != str2) str_shared_replace(str, str2);
1795 enc = STR_ENC_GET(str2);
1798 termlen = rb_enc_mbminlen(enc);
1800 STR_SET_LEN(str, RSTRING_LEN(str2));
1802 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1804 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1805 rb_enc_associate(str, enc);
1809 if (STR_EMBED_P(str2)) {
1811 long len = RSTRING_LEN(str2);
1814 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1815 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1816 RSTRING(str2)->as.heap.ptr = new_ptr;
1817 STR_SET_LEN(str2,
len);
1819 STR_SET_NOEMBED(str2);
1822 STR_SET_NOEMBED(str);
1824 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1826 if (
FL_TEST(str2, STR_SHARED)) {
1828 STR_SET_SHARED(str,
shared);
1831 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1835 STR_SET_EMBED(str2);
1836 RSTRING_PTR(str2)[0] = 0;
1837 STR_SET_LEN(str2, 0);
1838 rb_enc_associate(str, enc);
1852 return rb_obj_as_string_result(str, obj);
1868 len = RSTRING_LEN(str2);
1869 if (STR_SHARED_P(str2)) {
1872 STR_SET_NOEMBED(str);
1873 STR_SET_LEN(str,
len);
1874 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1875 STR_SET_SHARED(str,
shared);
1876 rb_enc_cr_str_exact_copy(str, str2);
1879 str_replace_shared(str, str2);
1888 size_t size = rb_str_embed_size(
capa, 0);
1892 NEWOBJ_OF(str,
struct RString, klass,
1903 NEWOBJ_OF(str,
struct RString, klass,
1906 str->as.heap.aux.capa = 0;
1907 str->as.heap.ptr = NULL;
1917 encidx = rb_enc_get_index(str);
1918 flags &= ~ENCODING_MASK;
1921 if (encidx) rb_enc_associate_index(dup, encidx);
1931 long len = RSTRING_LEN(str);
1936 STR_SET_LEN(dup, RSTRING_LEN(str));
1937 return str_duplicate_setup_encoding(str, dup, flags);
1946 root =
RSTRING(str)->as.heap.aux.shared;
1949 root = str = str_new_frozen(klass, str);
1955 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1956 FL_SET(root, STR_SHARED_ROOT);
1958 flags |= RSTRING_NOEMBED | STR_SHARED;
1960 STR_SET_LEN(dup, RSTRING_LEN(str));
1961 return str_duplicate_setup_encoding(str, dup, flags);
1967 if (STR_EMBED_P(str)) {
1968 return str_duplicate_setup_embed(klass, str, dup);
1971 return str_duplicate_setup_heap(klass, str, dup);
1979 if (STR_EMBED_P(str)) {
1980 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1983 dup = str_alloc_heap(klass);
1986 return str_duplicate_setup(klass, str, dup);
1997rb_str_dup_m(
VALUE str)
1999 if (LIKELY(BARE_STRING_P(str))) {
2010 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2017 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2021 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2022 str_duplicate_setup_embed(klass, str, new_str);
2025 new_str = ec_str_alloc_heap(ec, klass);
2026 str_duplicate_setup_heap(klass, str, new_str);
2035rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2037 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2061 static ID keyword_ids[2];
2062 VALUE orig, opt, venc, vcapa;
2067 if (!keyword_ids[0]) {
2068 keyword_ids[0] = rb_id_encoding();
2069 CONST_ID(keyword_ids[1],
"capacity");
2077 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2078 enc = rb_to_encoding(venc);
2080 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2083 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2085 if (
capa < STR_BUF_MIN_SIZE) {
2086 capa = STR_BUF_MIN_SIZE;
2090 len = RSTRING_LEN(orig);
2094 if (orig == str) n = 0;
2096 str_modifiable(str);
2097 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2099 const size_t size = (size_t)
capa + termlen;
2100 const char *
const old_ptr = RSTRING_PTR(str);
2101 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2102 char *new_ptr =
ALLOC_N(
char, size);
2103 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2104 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2106 RSTRING(str)->as.heap.ptr = new_ptr;
2108 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2109 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2110 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2112 STR_SET_LEN(str,
len);
2115 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2116 rb_enc_cr_str_exact_copy(str, orig);
2118 FL_SET(str, STR_NOEMBED);
2125 rb_enc_associate(str, enc);
2137rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2143 static ID keyword_ids[2];
2153 keyword_ids[0] = rb_id_encoding();
2154 CONST_ID(keyword_ids[1],
"capacity");
2156 encoding = kwargs[0];
2157 capacity = kwargs[1];
2166 if (UNDEF_P(encoding)) {
2168 encoding = rb_obj_encoding(orig);
2172 if (!UNDEF_P(encoding)) {
2173 enc = rb_to_encoding(encoding);
2177 if (UNDEF_P(capacity)) {
2179 VALUE empty_str = str_new(klass,
"", 0);
2181 rb_enc_associate(empty_str, enc);
2185 VALUE copy = str_duplicate(klass, orig);
2186 rb_enc_associate(copy, enc);
2199 if (orig_capa >
capa) {
2204 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2205 STR_SET_LEN(str, 0);
2216#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2231static inline uintptr_t
2232count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2237 d = (d>>6) | (~d>>7);
2238 d &= NONASCII_MASK >> 7;
2241#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2243 return rb_popcount_intptr(d);
2247# if SIZEOF_VOIDP == 8
2256enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2262 long diff = (long)(e - p);
2263 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2268 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2269 const uintptr_t *s, *t;
2270 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2271 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2272 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2273 while (p < (
const char *)s) {
2274 if (is_utf8_lead_byte(*p))
len++;
2278 len += count_utf8_lead_bytes_with_word(s);
2281 p = (
const char *)s;
2284 if (is_utf8_lead_byte(*p))
len++;
2290 else if (rb_enc_asciicompat(enc)) {
2295 q = search_nonascii(p, e);
2301 p += rb_enc_fast_mbclen(p, e, enc);
2308 q = search_nonascii(p, e);
2314 p += rb_enc_mbclen(p, e, enc);
2321 for (c=0; p<e; c++) {
2322 p += rb_enc_mbclen(p, e, enc);
2337rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2345 long diff = (long)(e - p);
2346 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2348 else if (rb_enc_asciicompat(enc)) {
2352 q = search_nonascii(p, e);
2360 ret = rb_enc_precise_mbclen(p, e, enc);
2375 for (c=0; p<e; c++) {
2376 ret = rb_enc_precise_mbclen(p, e, enc);
2383 if (p + rb_enc_mbminlen(enc) <= e)
2384 p += rb_enc_mbminlen(enc);
2400 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2401 if (!enc) enc = STR_ENC_GET(str);
2402 p = RSTRING_PTR(str);
2407 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2412 return enc_strlen(p, e, enc, cr);
2419 return str_strlen(str, NULL);
2433 return LONG2NUM(str_strlen(str, NULL));
2445rb_str_bytesize(
VALUE str)
2464rb_str_empty(
VALUE str)
2466 return RBOOL(RSTRING_LEN(str) == 0);
2485 char *ptr1, *ptr2, *ptr3;
2490 enc = rb_enc_check_str(str1, str2);
2493 termlen = rb_enc_mbminlen(enc);
2494 if (len1 > LONG_MAX - len2) {
2495 rb_raise(rb_eArgError,
"string size too big");
2497 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2498 ptr3 = RSTRING_PTR(str3);
2499 memcpy(ptr3, ptr1, len1);
2500 memcpy(ptr3+len1, ptr2, len2);
2501 TERM_FILL(&ptr3[len1+len2], termlen);
2517 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2520 int enc1 = rb_enc_get_index(str1);
2521 int enc2 = rb_enc_get_index(str2);
2526 else if (enc2 < 0) {
2529 else if (enc1 != enc2) {
2532 else if (len1 > LONG_MAX - len2) {
2566 rb_enc_copy(str2, str);
2571 rb_raise(rb_eArgError,
"negative argument");
2573 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2574 if (STR_EMBEDDABLE_P(
len, 1)) {
2576 memset(RSTRING_PTR(str2), 0,
len + 1);
2583 STR_SET_LEN(str2,
len);
2584 rb_enc_copy(str2, str);
2587 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2588 rb_raise(rb_eArgError,
"argument too big");
2591 len *= RSTRING_LEN(str);
2592 termlen = TERM_LEN(str);
2594 ptr2 = RSTRING_PTR(str2);
2596 n = RSTRING_LEN(str);
2597 memcpy(ptr2, RSTRING_PTR(str), n);
2598 while (n <=
len/2) {
2599 memcpy(ptr2 + n, ptr2, n);
2602 memcpy(ptr2 + n, ptr2,
len-n);
2604 STR_SET_LEN(str2,
len);
2605 TERM_FILL(&ptr2[
len], termlen);
2606 rb_enc_cr_str_copy_for_substr(str2, str);
2643rb_check_lockedtmp(
VALUE str)
2645 if (
FL_TEST(str, STR_TMPLOCK)) {
2652#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2654str_modifiable(
VALUE str)
2658 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2659 if (CHILLED_STRING_P(str)) {
2660 CHILLED_STRING_MUTATED(str);
2662 rb_check_lockedtmp(str);
2663 rb_check_frozen(str);
2668str_dependent_p(
VALUE str)
2670 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2680#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2682str_independent(
VALUE str)
2686 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2687 str_modifiable(str);
2688 return !str_dependent_p(str);
2694str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2704 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2709 STR_SET_LEN(str,
len);
2714 oldptr = RSTRING_PTR(str);
2716 memcpy(
ptr, oldptr,
len);
2718 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2721 STR_SET_NOEMBED(str);
2722 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2723 TERM_FILL(
ptr +
len, termlen);
2725 STR_SET_LEN(str,
len);
2732 if (!str_independent(str))
2733 str_make_independent(str);
2742 int termlen = TERM_LEN(str);
2743 long len = RSTRING_LEN(str);
2746 rb_raise(rb_eArgError,
"negative expanding string size");
2748 if (expand >= LONG_MAX -
len) {
2749 rb_raise(rb_eArgError,
"string size too big");
2752 if (!str_independent(str)) {
2753 str_make_independent_expand(str,
len, expand, termlen);
2755 else if (expand > 0) {
2756 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2763str_modify_keep_cr(
VALUE str)
2765 if (!str_independent(str))
2766 str_make_independent(str);
2773str_discard(
VALUE str)
2775 str_modifiable(str);
2776 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2777 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2778 RSTRING(str)->as.heap.ptr = 0;
2779 STR_SET_LEN(str, 0);
2786 int encindex = rb_enc_get_index(str);
2788 if (RB_UNLIKELY(encindex == -1)) {
2792 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2797 if (!rb_enc_asciicompat(enc)) {
2819 return RSTRING_PTR(str);
2823zero_filled(
const char *s,
int n)
2825 for (; n > 0; --n) {
2832str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2834 const char *e = s +
len;
2836 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2837 if (zero_filled(s, minlen))
return s;
2843str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2848 if (str_dependent_p(str)) {
2849 if (!zero_filled(s +
len, termlen))
2850 str_make_independent_expand(str,
len, 0L, termlen);
2853 TERM_FILL(s +
len, termlen);
2856 return RSTRING_PTR(str);
2860rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2862 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2863 long len = RSTRING_LEN(str);
2867 rb_check_lockedtmp(str);
2868 str_make_independent_expand(str,
len, 0L, termlen);
2870 else if (str_dependent_p(str)) {
2871 if (termlen > oldtermlen)
2872 str_make_independent_expand(str,
len, 0L, termlen);
2875 if (!STR_EMBED_P(str)) {
2880 if (termlen > oldtermlen) {
2881 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2889str_null_check(
VALUE str,
int *w)
2891 char *s = RSTRING_PTR(str);
2892 long len = RSTRING_LEN(str);
2894 const int minlen = rb_enc_mbminlen(enc);
2898 if (str_null_char(s,
len, minlen, enc)) {
2901 return str_fill_term(str, s,
len, minlen);
2904 if (!s || memchr(s, 0,
len)) {
2908 s = str_fill_term(str, s,
len, minlen);
2914rb_str_to_cstr(
VALUE str)
2917 return str_null_check(str, &w);
2925 char *s = str_null_check(str, &w);
2928 rb_raise(rb_eArgError,
"string contains null char");
2930 rb_raise(rb_eArgError,
"string contains null byte");
2936rb_str_fill_terminator(
VALUE str,
const int newminlen)
2938 char *s = RSTRING_PTR(str);
2939 long len = RSTRING_LEN(str);
2940 return str_fill_term(str, s,
len, newminlen);
2946 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2972str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2981 else if (rb_enc_asciicompat(enc)) {
2982 const char *p2, *e2;
2985 while (p < e && 0 < nth) {
2992 p2 = search_nonascii(p, e2);
3001 n = rb_enc_mbclen(p, e, enc);
3012 while (p < e && nth--) {
3013 p += rb_enc_mbclen(p, e, enc);
3024 return str_nth_len(p, e, &nth, enc);
3028str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3033 p = str_nth_len(p, e, &nth, enc);
3042str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3044 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3045 if (!pp)
return e - p;
3052 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3053 STR_ENC_GET(str), single_byte_optimizable(str));
3058str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3061 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3062 const uintptr_t *s, *t;
3063 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3064 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3065 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3066 while (p < (
const char *)s) {
3067 if (is_utf8_lead_byte(*p)) nth--;
3071 nth -= count_utf8_lead_bytes_with_word(s);
3073 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3077 if (is_utf8_lead_byte(*p)) {
3078 if (nth == 0)
break;
3088str_utf8_offset(
const char *p,
const char *e,
long nth)
3090 const char *pp = str_utf8_nth(p, e, &nth);
3099 if (single_byte_optimizable(str) || pos < 0)
3102 char *p = RSTRING_PTR(str);
3103 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3108str_subseq(
VALUE str,
long beg,
long len)
3116 const int termlen = TERM_LEN(str);
3117 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3124 if (str_embed_capa(str2) >=
len + termlen) {
3125 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3126 STR_SET_EMBED(str2);
3127 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3128 TERM_FILL(ptr2+
len, termlen);
3130 STR_SET_LEN(str2,
len);
3134 str_replace_shared(str2, str);
3137 RSTRING(str2)->as.heap.ptr += beg;
3138 if (RSTRING_LEN(str2) >
len) {
3139 STR_SET_LEN(str2,
len);
3149 VALUE str2 = str_subseq(str, beg,
len);
3150 rb_enc_cr_str_copy_for_substr(str2, str);
3159 const long blen = RSTRING_LEN(str);
3161 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3163 if (
len < 0)
return 0;
3164 if (beg < 0 && -beg < 0)
return 0;
3168 if (single_byte_optimizable(str)) {
3169 if (beg > blen)
return 0;
3172 if (beg < 0)
return 0;
3174 if (
len > blen - beg)
3176 if (
len < 0)
return 0;
3181 if (
len > -beg)
len = -beg;
3185 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3188 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3194 slen = str_strlen(str, enc);
3196 if (beg < 0)
return 0;
3198 if (
len == 0)
goto end;
3201 else if (beg > 0 && beg > blen) {
3205 if (beg > str_strlen(str, enc))
return 0;
3210 enc == rb_utf8_encoding()) {
3211 p = str_utf8_nth(s, e, &beg);
3212 if (beg > 0)
return 0;
3213 len = str_utf8_offset(p, e,
len);
3219 p = s + beg * char_sz;
3223 else if (
len * char_sz > e - p)
3228 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3229 if (beg > 0)
return 0;
3233 len = str_offset(p, e,
len, enc, 0);
3241static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3246 return str_substr(str, beg,
len, TRUE);
3256str_substr(
VALUE str,
long beg,
long len,
int empty)
3260 if (!p)
return Qnil;
3261 if (!
len && !empty)
return Qnil;
3263 beg = p - RSTRING_PTR(str);
3265 VALUE str2 = str_subseq(str, beg,
len);
3266 rb_enc_cr_str_copy_for_substr(str2, str);
3274 if (CHILLED_STRING_P(str)) {
3279 rb_str_resize(str, RSTRING_LEN(str));
3297 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3340str_uminus(
VALUE str)
3345 return rb_fstring(str);
3349#define rb_str_dup_frozen rb_str_new_frozen
3354 rb_check_frozen(str);
3355 if (
FL_TEST(str, STR_TMPLOCK)) {
3358 FL_SET(str, STR_TMPLOCK);
3365 rb_check_frozen(str);
3366 if (!
FL_TEST(str, STR_TMPLOCK)) {
3386 const int termlen = TERM_LEN(str);
3388 str_modifiable(str);
3389 if (STR_SHARED_P(str)) {
3392 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3393 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3404 else if (
len > RSTRING_LEN(str)) {
3408 const char *
const new_end = RSTRING_PTR(str) +
len;
3418 else if (
len < RSTRING_LEN(str)) {
3426 STR_SET_LEN(str,
len);
3427 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3434 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3437 int independent = str_independent(str);
3438 long slen = RSTRING_LEN(str);
3439 const int termlen = TERM_LEN(str);
3441 if (slen >
len || (termlen != 1 && slen <
len)) {
3447 if (STR_EMBED_P(str)) {
3448 if (
len == slen)
return str;
3449 if (str_embed_capa(str) >=
len + termlen) {
3450 STR_SET_LEN(str,
len);
3454 str_make_independent_expand(str, slen,
len - slen, termlen);
3456 else if (str_embed_capa(str) >=
len + termlen) {
3457 char *
ptr = STR_HEAP_PTR(str);
3459 if (slen >
len) slen =
len;
3462 STR_SET_LEN(str,
len);
3463 if (independent) ruby_xfree(
ptr);
3466 else if (!independent) {
3467 if (
len == slen)
return str;
3468 str_make_independent_expand(str, slen,
len - slen, termlen);
3472 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3473 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3476 else if (
len == slen)
return str;
3477 STR_SET_LEN(str,
len);
3484str_ensure_available_capa(
VALUE str,
long len)
3486 str_modify_keep_cr(str);
3488 const int termlen = TERM_LEN(str);
3489 long olen = RSTRING_LEN(str);
3491 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3492 rb_raise(rb_eArgError,
"string sizes too big");
3495 long total = olen +
len;
3496 long capa = str_capacity(str, termlen);
3499 if (total >= LONG_MAX / 2) {
3502 while (total >
capa) {
3505 RESIZE_CAPA_TERM(str,
capa, termlen);
3510str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3513 str_modify_keep_cr(str);
3518 if (
len == 0)
return 0;
3520 long total, olen,
off = -1;
3522 const int termlen = TERM_LEN(str);
3525 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3529 long capa = str_capacity(str, termlen);
3531 if (olen > LONG_MAX -
len) {
3532 rb_raise(rb_eArgError,
"string sizes too big");
3536 if (total >= LONG_MAX / 2) {
3539 while (total >
capa) {
3542 RESIZE_CAPA_TERM(str,
capa, termlen);
3543 sptr = RSTRING_PTR(str);
3548 memcpy(sptr + olen,
ptr,
len);
3549 STR_SET_LEN(str, total);
3550 TERM_FILL(sptr + total, termlen);
3555#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3556#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3561 if (
len == 0)
return str;
3563 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3565 return str_buf_cat(str,
ptr,
len);
3576rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3581 if (UNLIKELY(!str_independent(str))) {
3582 str_make_independent(str);
3585 long string_length = -1;
3586 const int null_terminator_length = 1;
3591 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3592 rb_raise(rb_eArgError,
"string sizes too big");
3595 long string_capacity = str_capacity(str, null_terminator_length);
3601 if (LIKELY(string_capacity >= string_length + 1)) {
3603 sptr[string_length] = byte;
3604 STR_SET_LEN(str, string_length + 1);
3605 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3609 str_buf_cat(str, (
char *)&
byte, 1);
3625 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3636rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3637 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3646 if (str_encindex == ptr_encindex) {
3648 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3652 str_enc = rb_enc_from_index(str_encindex);
3653 ptr_enc = rb_enc_from_index(ptr_encindex);
3654 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3657 if (RSTRING_LEN(str) == 0) {
3660 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3666 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3675 *ptr_cr_ret = ptr_cr;
3677 if (str_encindex != ptr_encindex &&
3680 str_enc = rb_enc_from_index(str_encindex);
3681 ptr_enc = rb_enc_from_index(ptr_encindex);
3686 res_encindex = str_encindex;
3691 res_encindex = str_encindex;
3695 res_encindex = ptr_encindex;
3700 res_encindex = str_encindex;
3707 res_encindex = str_encindex;
3713 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3715 str_buf_cat(str,
ptr,
len);
3721 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3728 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3738 if (rb_enc_asciicompat(enc)) {
3739 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3745 unsigned int c = (
unsigned char)*
ptr;
3746 int len = rb_enc_codelen(c, enc);
3747 rb_enc_mbcput(c, buf, enc);
3748 rb_enc_cr_str_buf_cat(str, buf,
len,
3761 if (str_enc_fastpath(str)) {
3765 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3771 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3782 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3798rb_str_concat_literals(
size_t num,
const VALUE *strary)
3802 unsigned long len = 1;
3807 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3809 str_enc_copy_direct(str, strary[0]);
3811 for (i = s; i < num; ++i) {
3812 const VALUE v = strary[i];
3816 if (encidx != ENCINDEX_US_ASCII) {
3818 rb_enc_set_index(str, encidx);
3831rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3833 str_modifiable(str);
3838 else if (argc > 1) {
3841 rb_enc_copy(arg_str, str);
3842 for (i = 0; i < argc; i++) {
3877rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3879 long needed_capacity = 0;
3883 for (
int index = 0; index < argc; index++) {
3884 VALUE obj = argv[index];
3892 needed_capacity += RSTRING_LEN(obj);
3897 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3904 str_ensure_available_capa(str, needed_capacity);
3907 for (
int index = 0; index < argc; index++) {
3908 VALUE obj = argv[index];
3913 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3914 char byte = (char)(
NUM2INT(obj) & 0xFF);
3928 rb_bug(
"append_as_bytes arguments should have been validated");
3932 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3933 TERM_FILL(sptr, TERM_LEN(str));
3938 for (
int index = 0; index < argc; index++) {
3939 VALUE obj = argv[index];
3956 rb_bug(
"append_as_bytes arguments should have been validated");
4035 if (rb_num_to_uint(str2, &code) == 0) {
4048 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4051 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4054 long pos = RSTRING_LEN(str1);
4059 switch (
len = rb_enc_codelen(code, enc)) {
4060 case ONIGERR_INVALID_CODE_POINT_VALUE:
4061 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4063 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4069 rb_enc_mbcput(code, buf, enc);
4070 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4071 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4073 rb_str_resize(str1, pos+
len);
4074 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4087rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4089 int encidx = rb_enc_to_index(enc);
4091 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4096 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4097 return ENCINDEX_ASCII_8BIT;
4119rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4121 str_modifiable(str);
4126 else if (argc > 1) {
4129 rb_enc_copy(arg_str, str);
4130 for (i = 0; i < argc; i++) {
4143 st_index_t precomputed_hash;
4144 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4146 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4147 return precomputed_hash;
4150 return str_do_hash(str);
4157 const char *ptr1, *ptr2;
4160 return (len1 != len2 ||
4162 memcmp(ptr1, ptr2, len1) != 0);
4174rb_str_hash_m(
VALUE str)
4180#define lesser(a,b) (((a)>(b))?(b):(a))
4188 if (RSTRING_LEN(str1) == 0)
return TRUE;
4189 if (RSTRING_LEN(str2) == 0)
return TRUE;
4192 if (idx1 == idx2)
return TRUE;
4197 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4201 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4211 const char *ptr1, *ptr2;
4214 if (str1 == str2)
return 0;
4217 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4226 if (len1 > len2)
return 1;
4229 if (retval > 0)
return 1;
4263 if (str1 == str2)
return Qtrue;
4270 return rb_str_eql_internal(str1, str2);
4284 if (str1 == str2)
return Qtrue;
4286 return rb_str_eql_internal(str1, str2);
4318 return rb_invcmp(str1, str2);
4360 return str_casecmp(str1, s);
4368 const char *p1, *p1end, *p2, *p2end;
4370 enc = rb_enc_compatible(str1, str2);
4375 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4376 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4377 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4378 while (p1 < p1end && p2 < p2end) {
4380 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4381 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4383 return INT2FIX(c1 < c2 ? -1 : 1);
4390 while (p1 < p1end && p2 < p2end) {
4391 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4392 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4394 if (0 <= c1 && 0 <= c2) {
4398 return INT2FIX(c1 < c2 ? -1 : 1);
4402 l1 = rb_enc_mbclen(p1, p1end, enc);
4403 l2 = rb_enc_mbclen(p2, p2end, enc);
4404 len = l1 < l2 ? l1 : l2;
4405 r = memcmp(p1, p2,
len);
4407 return INT2FIX(r < 0 ? -1 : 1);
4409 return INT2FIX(l1 < l2 ? -1 : 1);
4415 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4416 if (p1 == p1end)
return INT2FIX(-1);
4449 return str_casecmp_p(str1, s);
4456 VALUE folded_str1, folded_str2;
4457 VALUE fold_opt = sym_fold;
4459 enc = rb_enc_compatible(str1, str2);
4464 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4465 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4467 return rb_str_eql(folded_str1, folded_str2);
4471strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4472 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4474 const char *search_start = str_ptr;
4475 long pos, search_len = str_len - offset;
4479 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4480 if (pos < 0)
return pos;
4482 if (t == search_start + pos)
break;
4483 search_len -= t - search_start;
4484 if (search_len <= 0)
return -1;
4485 offset += t - search_start;
4488 return pos + offset;
4492#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4493#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4496rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4498 const char *str_ptr, *str_ptr_end, *sub_ptr;
4499 long str_len, sub_len;
4502 enc = rb_enc_check(str, sub);
4503 if (is_broken_string(sub))
return -1;
4505 str_ptr = RSTRING_PTR(str);
4507 str_len = RSTRING_LEN(str);
4508 sub_ptr = RSTRING_PTR(sub);
4509 sub_len = RSTRING_LEN(sub);
4511 if (str_len < sub_len)
return -1;
4514 long str_len_char, sub_len_char;
4515 int single_byte = single_byte_optimizable(str);
4516 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4517 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4519 offset += str_len_char;
4520 if (offset < 0)
return -1;
4522 if (str_len_char - offset < sub_len_char)
return -1;
4523 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4526 if (sub_len == 0)
return offset;
4529 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4542rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4549 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4550 long slen = str_strlen(str, enc);
4552 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4564 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4565 enc, single_byte_optimizable(str));
4576 pos = rb_str_index(str, sub, pos);
4590str_ensure_byte_pos(
VALUE str,
long pos)
4592 if (!single_byte_optimizable(str)) {
4593 const char *s = RSTRING_PTR(str);
4595 const char *p = s + pos;
4596 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4598 "offset %ld does not land on character boundary", pos);
4671rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4677 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4678 long slen = RSTRING_LEN(str);
4680 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4691 str_ensure_byte_pos(str, pos);
4703 pos = rb_str_byteindex(str, sub, pos);
4704 if (pos >= 0)
return LONG2NUM(pos);
4711memrchr(
const char *search_str,
int chr,
long search_len)
4713 const char *ptr = search_str + search_len;
4714 while (ptr > search_str) {
4715 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4725 char *hit, *adjusted;
4727 long slen, searchlen;
4730 sbeg = RSTRING_PTR(str);
4731 slen = RSTRING_LEN(sub);
4732 if (slen == 0)
return s - sbeg;
4734 t = RSTRING_PTR(sub);
4736 searchlen = s - sbeg + 1;
4738 if (memcmp(s, t, slen) == 0) {
4743 hit = memrchr(sbeg, c, searchlen);
4746 if (hit != adjusted) {
4747 searchlen = adjusted - sbeg;
4750 if (memcmp(hit, t, slen) == 0)
4752 searchlen = adjusted - sbeg;
4753 }
while (searchlen > 0);
4767 enc = rb_enc_check(str, sub);
4768 if (is_broken_string(sub))
return -1;
4769 singlebyte = single_byte_optimizable(str);
4770 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4771 slen = str_strlen(sub, enc);
4774 if (
len < slen)
return -1;
4775 if (
len - pos < slen) pos =
len - slen;
4776 if (
len == 0)
return pos;
4778 sbeg = RSTRING_PTR(str);
4781 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4787 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4788 return str_rindex(str, sub, s, enc);
4800rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4805 long pos,
len = str_strlen(str, enc);
4807 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4809 if (pos < 0 && (pos +=
len) < 0) {
4815 if (pos >
len) pos =
len;
4823 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4824 enc, single_byte_optimizable(str));
4835 pos = rb_str_rindex(str, sub, pos);
4845rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4851 enc = rb_enc_check(str, sub);
4852 if (is_broken_string(sub))
return -1;
4853 len = RSTRING_LEN(str);
4854 slen = RSTRING_LEN(sub);
4857 if (
len < slen)
return -1;
4858 if (
len - pos < slen) pos =
len - slen;
4859 if (
len == 0)
return pos;
4861 sbeg = RSTRING_PTR(str);
4864 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4871 return str_rindex(str, sub, s, enc);
4961rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4965 long pos,
len = RSTRING_LEN(str);
4967 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4969 if (pos < 0 && (pos +=
len) < 0) {
4975 if (pos >
len) pos =
len;
4981 str_ensure_byte_pos(str, pos);
4993 pos = rb_str_byterindex(str, sub, pos);
4994 if (pos >= 0)
return LONG2NUM(pos);
5033 switch (OBJ_BUILTIN_TYPE(y)) {
5087rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5094 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5125rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5129 re = get_pat(argv[0]);
5130 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5139static enum neighbor_char
5145 if (rb_enc_mbminlen(enc) > 1) {
5147 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5149 return NEIGHBOR_NOT_CHAR;
5151 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5153 if (!l)
return NEIGHBOR_NOT_CHAR;
5154 if (l !=
len)
return NEIGHBOR_WRAPPED;
5155 rb_enc_mbcput(c, p, enc);
5156 r = rb_enc_precise_mbclen(p, p +
len, enc);
5158 return NEIGHBOR_NOT_CHAR;
5160 return NEIGHBOR_FOUND;
5163 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5166 return NEIGHBOR_WRAPPED;
5167 ++((
unsigned char*)p)[i];
5168 l = rb_enc_precise_mbclen(p, p+
len, enc);
5172 return NEIGHBOR_FOUND;
5175 memset(p+l, 0xff,
len-l);
5181 for (len2 =
len-1; 0 < len2; len2--) {
5182 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5186 memset(p+len2+1, 0xff,
len-(len2+1));
5191static enum neighbor_char
5196 if (rb_enc_mbminlen(enc) > 1) {
5198 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5200 return NEIGHBOR_NOT_CHAR;
5202 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5203 if (!c)
return NEIGHBOR_NOT_CHAR;
5206 if (!l)
return NEIGHBOR_NOT_CHAR;
5207 if (l !=
len)
return NEIGHBOR_WRAPPED;
5208 rb_enc_mbcput(c, p, enc);
5209 r = rb_enc_precise_mbclen(p, p +
len, enc);
5211 return NEIGHBOR_NOT_CHAR;
5213 return NEIGHBOR_FOUND;
5216 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5219 return NEIGHBOR_WRAPPED;
5220 --((
unsigned char*)p)[i];
5221 l = rb_enc_precise_mbclen(p, p+
len, enc);
5225 return NEIGHBOR_FOUND;
5228 memset(p+l, 0,
len-l);
5234 for (len2 =
len-1; 0 < len2; len2--) {
5235 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5239 memset(p+len2+1, 0,
len-(len2+1));
5253static enum neighbor_char
5254enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5256 enum neighbor_char ret;
5260 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5264 const int max_gaps = 1;
5266 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5268 ctype = ONIGENC_CTYPE_DIGIT;
5270 ctype = ONIGENC_CTYPE_ALPHA;
5272 return NEIGHBOR_NOT_CHAR;
5275 for (
try = 0;
try <= max_gaps; ++
try) {
5276 ret = enc_succ_char(p,
len, enc);
5277 if (ret == NEIGHBOR_FOUND) {
5278 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5280 return NEIGHBOR_FOUND;
5287 ret = enc_pred_char(p,
len, enc);
5288 if (ret == NEIGHBOR_FOUND) {
5289 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5302 return NEIGHBOR_NOT_CHAR;
5305 if (ctype != ONIGENC_CTYPE_DIGIT) {
5307 return NEIGHBOR_WRAPPED;
5311 enc_succ_char(carry,
len, enc);
5312 return NEIGHBOR_WRAPPED;
5330 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5331 rb_enc_cr_str_copy_for_substr(str, orig);
5332 return str_succ(str);
5339 char *sbeg, *s, *e, *last_alnum = 0;
5340 int found_alnum = 0;
5342 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5343 long carry_pos = 0, carry_len = 1;
5344 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5346 slen = RSTRING_LEN(str);
5347 if (slen == 0)
return str;
5349 enc = STR_ENC_GET(str);
5350 sbeg = RSTRING_PTR(str);
5351 s = e = sbeg + slen;
5353 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5354 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5360 l = rb_enc_precise_mbclen(s, e, enc);
5361 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5362 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5363 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5365 case NEIGHBOR_NOT_CHAR:
5367 case NEIGHBOR_FOUND:
5369 case NEIGHBOR_WRAPPED:
5374 carry_pos = s - sbeg;
5379 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5380 enum neighbor_char neighbor;
5381 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5382 l = rb_enc_precise_mbclen(s, e, enc);
5383 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5384 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5386 neighbor = enc_succ_char(tmp, l, enc);
5388 case NEIGHBOR_FOUND:
5392 case NEIGHBOR_WRAPPED:
5395 case NEIGHBOR_NOT_CHAR:
5398 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5400 enc_succ_char(s, l, enc);
5402 if (!rb_enc_asciicompat(enc)) {
5403 MEMCPY(carry, s,
char, l);
5406 carry_pos = s - sbeg;
5410 RESIZE_CAPA(str, slen + carry_len);
5411 sbeg = RSTRING_PTR(str);
5412 s = sbeg + carry_pos;
5413 memmove(s + carry_len, s, slen - carry_pos);
5414 memmove(s, carry, carry_len);
5416 STR_SET_LEN(str, slen);
5417 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5433rb_str_succ_bang(
VALUE str)
5441all_digits_p(
const char *s,
long len)
5495 VALUE end, exclusive;
5499 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5505 VALUE current, after_end;
5512 enc = rb_enc_check(beg, end);
5513 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5515 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5516 char c = RSTRING_PTR(beg)[0];
5517 char e = RSTRING_PTR(end)[0];
5519 if (c > e || (excl && c == e))
return beg;
5521 VALUE str = rb_enc_str_new(&c, 1, enc);
5523 if ((*each)(str, arg))
break;
5524 if (!excl && c == e)
break;
5526 if (excl && c == e)
break;
5531 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5532 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5533 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5538 b = rb_str_to_inum(beg, 10, FALSE);
5539 e = rb_str_to_inum(end, 10, FALSE);
5546 if (excl && bi == ei)
break;
5547 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5552 ID op = excl ?
'<' : idLE;
5553 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5558 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5559 b = rb_funcallv(b, succ, 0, 0);
5566 if (n > 0 || (excl && n == 0))
return beg;
5568 after_end = rb_funcallv(end, succ, 0, 0);
5573 next = rb_funcallv(current, succ, 0, 0);
5574 if ((*each)(current, arg))
break;
5575 if (
NIL_P(next))
break;
5579 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5594 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5595 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5596 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5598 b = rb_str_to_inum(beg, 10, FALSE);
5604 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5612 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5613 b = rb_funcallv(b, succ, 0, 0);
5619 VALUE next = rb_funcallv(current, succ, 0, 0);
5620 if ((*each)(current, arg))
break;
5623 if (RSTRING_LEN(current) == 0)
5634 if (!
rb_equal(str, *argp))
return 0;
5648 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5649 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5650 rb_enc_asciicompat(STR_ENC_GET(val))) {
5651 const char *bp = RSTRING_PTR(beg);
5652 const char *ep = RSTRING_PTR(end);
5653 const char *vp = RSTRING_PTR(val);
5654 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5655 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5663 if (b <= v && v < e)
return Qtrue;
5664 return RBOOL(!
RTEST(exclusive) && v == e);
5671 all_digits_p(bp, RSTRING_LEN(beg)) &&
5672 all_digits_p(ep, RSTRING_LEN(end))) {
5677 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5679 return RBOOL(
NIL_P(val));
5702 return rb_str_subpat(str, indx,
INT2FIX(0));
5705 if (rb_str_index(str, indx, 0) != -1)
5711 long beg,
len = str_strlen(str, NULL);
5723 return str_substr(str, idx, 1, FALSE);
5740rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5744 return rb_str_subpat(str, argv[0], argv[1]);
5747 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5751 return rb_str_aref(str, argv[0]);
5757 char *ptr = RSTRING_PTR(str);
5758 long olen = RSTRING_LEN(str), nlen;
5760 str_modifiable(str);
5761 if (
len > olen)
len = olen;
5763 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5765 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5767 ptr =
RSTRING(str)->as.embed.ary;
5768 memmove(ptr, oldptr +
len, nlen);
5769 if (fl == STR_NOEMBED)
xfree(oldptr);
5772 if (!STR_SHARED_P(str)) {
5774 rb_enc_cr_str_exact_copy(shared, str);
5779 STR_SET_LEN(str, nlen);
5781 if (!SHARABLE_MIDDLE_SUBSTRING) {
5782 TERM_FILL(ptr + nlen, TERM_LEN(str));
5789rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5795 if (beg == 0 && vlen == 0) {
5800 str_modify_keep_cr(str);
5804 RESIZE_CAPA(str, slen + vlen -
len);
5805 sptr = RSTRING_PTR(str);
5814 memmove(sptr + beg + vlen,
5816 slen - (beg +
len));
5818 if (vlen < beg &&
len < 0) {
5822 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5825 STR_SET_LEN(str, slen);
5826 TERM_FILL(&sptr[slen], TERM_LEN(str));
5833 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5842 int singlebyte = single_byte_optimizable(str);
5848 enc = rb_enc_check(str, val);
5849 slen = str_strlen(str, enc);
5851 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5860 if (
len > slen - beg) {
5863 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5868 beg = p - RSTRING_PTR(str);
5870 rb_str_update_0(str, beg,
len, val);
5871 rb_enc_associate(str, enc);
5882 long start, end,
len;
5892 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5896 nth += regs->num_regs;
5906 enc = rb_enc_check_str(str, val);
5907 rb_str_update_0(str, start,
len, val);
5908 rb_enc_associate(str, enc);
5916 switch (
TYPE(indx)) {
5918 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5922 beg = rb_str_index(str, indx, 0);
5961rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5965 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5973 return rb_str_aset(str, argv[0], argv[1]);
6025rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6033 str_modify_keep_cr(str);
6041 if ((nth += regs->num_regs) <= 0)
return Qnil;
6043 else if (nth >= regs->num_regs)
return Qnil;
6045 len = END(nth) - beg;
6048 else if (argc == 2) {
6057 beg = p - RSTRING_PTR(str);
6061 beg = rb_str_index(str, indx, 0);
6062 if (beg == -1)
return Qnil;
6063 len = RSTRING_LEN(indx);
6075 beg = p - RSTRING_PTR(str);
6084 beg = p - RSTRING_PTR(str);
6088 rb_enc_cr_str_copy_for_substr(result, str);
6096 char *sptr = RSTRING_PTR(str);
6097 long slen = RSTRING_LEN(str);
6098 if (beg +
len > slen)
6102 slen - (beg +
len));
6104 STR_SET_LEN(str, slen);
6105 TERM_FILL(&sptr[slen], TERM_LEN(str));
6116 switch (OBJ_BUILTIN_TYPE(pat)) {
6135get_pat_quoted(
VALUE pat,
int check)
6139 switch (OBJ_BUILTIN_TYPE(pat)) {
6153 if (check && is_broken_string(pat)) {
6160rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6163 pos = rb_str_byteindex(str, pat, pos);
6164 if (set_backref_str) {
6166 str = rb_str_new_frozen_String(str);
6167 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6169 *match = match_data;
6179 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6184rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6186 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6204rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6218 hash = rb_check_hash_type(argv[1]);
6224 pat = get_pat_quoted(argv[0], 1);
6226 str_modifiable(str);
6227 beg = rb_pat_search(pat, str, 0, 1);
6241 end0 = beg0 + RSTRING_LEN(pat);
6250 if (iter || !
NIL_P(hash)) {
6251 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6257 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6260 str_mod_check(str, p,
len);
6261 rb_check_frozen(str);
6267 enc = rb_enc_compatible(str, repl);
6270 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6274 rb_enc_inspect_name(str_enc),
6275 rb_enc_inspect_name(STR_ENC_GET(repl)));
6277 enc = STR_ENC_GET(repl);
6280 rb_enc_associate(str, enc);
6290 rlen = RSTRING_LEN(repl);
6291 len = RSTRING_LEN(str);
6293 RESIZE_CAPA(str,
len + rlen - plen);
6295 p = RSTRING_PTR(str);
6297 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6299 rp = RSTRING_PTR(repl);
6300 memmove(p + beg0, rp, rlen);
6302 STR_SET_LEN(str,
len);
6303 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6326 rb_str_sub_bang(argc, argv, str);
6331str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6334 long beg, beg0, end0;
6335 long offset, blen, slen,
len, last;
6336 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6338 int need_backref_str = -1;
6348 hash = rb_check_hash_type(argv[1]);
6352 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6360 rb_error_arity(argc, 1, 2);
6363 pat = get_pat_quoted(argv[0], 1);
6364 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6367 if (bang)
return Qnil;
6372 blen = RSTRING_LEN(str) + 30;
6374 sp = RSTRING_PTR(str);
6375 slen = RSTRING_LEN(str);
6377 str_enc = STR_ENC_GET(str);
6378 rb_enc_associate(dest, str_enc);
6385 end0 = beg0 + RSTRING_LEN(pat);
6399 struct RString fake_str = {RBASIC_INIT};
6401 if (mode == FAST_MAP) {
6410 val = rb_hash_aref(hash, key);
6413 str_mod_check(str, sp, slen);
6418 else if (need_backref_str) {
6420 if (need_backref_str < 0) {
6421 need_backref_str = val != repl;
6428 len = beg0 - offset;
6442 if (RSTRING_LEN(str) <= end0)
break;
6443 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6445 offset = end0 +
len;
6447 cp = RSTRING_PTR(str) + offset;
6448 if (offset > RSTRING_LEN(str))
break;
6451 if (mode != FAST_MAP && mode != STR) {
6454 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6459 if (RSTRING_LEN(str) > offset) {
6462 rb_pat_search0(pat, str, last, 1, &match);
6464 str_shared_replace(str, dest);
6489rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6491 str_modify_keep_cr(str);
6492 return str_gsub(argc, argv, str, 1);
6542 return str_gsub(argc, argv, str, 0);
6562 str_modifiable(str);
6563 if (str == str2)
return str;
6567 return str_replace(str, str2);
6584rb_str_clear(
VALUE str)
6588 STR_SET_LEN(str, 0);
6589 RSTRING_PTR(str)[0] = 0;
6590 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6606rb_str_chr(
VALUE str)
6624 pos += RSTRING_LEN(str);
6625 if (pos < 0 || RSTRING_LEN(str) <= pos)
6628 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6648 long len = RSTRING_LEN(str);
6649 char *
ptr, *head, *left = 0;
6653 if (pos < -
len ||
len <= pos)
6660 char byte = (char)(
NUM2INT(w) & 0xFF);
6662 if (!str_independent(str))
6663 str_make_independent(str);
6664 enc = STR_ENC_GET(str);
6665 head = RSTRING_PTR(str);
6667 if (!STR_EMBED_P(str)) {
6674 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6682 width = rb_enc_precise_mbclen(left, head+
len, enc);
6684 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6700str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6702 long n = RSTRING_LEN(str);
6704 if (beg > n ||
len < 0)
return Qnil;
6707 if (beg < 0)
return Qnil;
6712 if (!empty)
return Qnil;
6716 VALUE str2 = str_subseq(str, beg,
len);
6718 str_enc_copy_direct(str2, str);
6720 if (RSTRING_LEN(str2) == 0) {
6721 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6755 long beg,
len = RSTRING_LEN(str);
6763 return str_byte_substr(str, beg,
len, TRUE);
6768 return str_byte_substr(str, idx, 1, FALSE);
6780rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6785 return str_byte_substr(str, beg,
len, TRUE);
6788 return str_byte_aref(str, argv[0]);
6792str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6794 long end, slen = RSTRING_LEN(str);
6797 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6806 if (*
len > slen - *beg) {
6810 str_ensure_byte_pos(str, *beg);
6811 str_ensure_byte_pos(str, end);
6825rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6827 long beg,
len, vbeg, vlen;
6832 if (!(argc == 2 || argc == 3 || argc == 5)) {
6833 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6837 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6838 rb_builtin_class_name(argv[0]));
6845 vlen = RSTRING_LEN(val);
6850 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6851 rb_builtin_class_name(argv[2]));
6863 vlen = RSTRING_LEN(val);
6871 str_check_beg_len(str, &beg, &
len);
6872 str_check_beg_len(val, &vbeg, &vlen);
6873 str_modify_keep_cr(str);
6876 rb_enc_associate(str, rb_enc_check(str, val));
6879 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6901rb_str_reverse(
VALUE str)
6908 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6909 enc = STR_ENC_GET(str);
6915 if (RSTRING_LEN(str) > 1) {
6916 if (single_byte_optimizable(str)) {
6923 int clen = rb_enc_fast_mbclen(s, e, enc);
6931 cr = rb_enc_asciicompat(enc) ?
6934 int clen = rb_enc_mbclen(s, e, enc);
6943 STR_SET_LEN(rev, RSTRING_LEN(str));
6944 str_enc_copy_direct(rev, str);
6966rb_str_reverse_bang(
VALUE str)
6968 if (RSTRING_LEN(str) > 1) {
6969 if (single_byte_optimizable(str)) {
6972 str_modify_keep_cr(str);
6973 s = RSTRING_PTR(str);
6982 str_shared_replace(str, rb_str_reverse(str));
6986 str_modify_keep_cr(str);
7015 i = rb_str_index(str, arg, 0);
7017 return RBOOL(i != -1);
7059 rb_raise(rb_eArgError,
"invalid radix %d", base);
7061 return rb_str_to_inum(str, base, FALSE);
7086rb_str_to_f(
VALUE str)
7101rb_str_to_s(
VALUE str)
7113 char s[RUBY_MAX_CHAR_LEN];
7114 int n = rb_enc_codelen(c, enc);
7116 rb_enc_mbcput(c, s, enc);
7121#define CHAR_ESC_LEN 13
7124rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7126 char buf[CHAR_ESC_LEN + 1];
7134 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7136 else if (c < 0x10000) {
7137 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7140 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7145 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7148 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7151 l = (int)strlen(buf);
7157ruby_escaped_char(
int c)
7160 case '\0':
return "\\0";
7161 case '\n':
return "\\n";
7162 case '\r':
return "\\r";
7163 case '\t':
return "\\t";
7164 case '\f':
return "\\f";
7165 case '\013':
return "\\v";
7166 case '\010':
return "\\b";
7167 case '\007':
return "\\a";
7168 case '\033':
return "\\e";
7169 case '\x7f':
return "\\c?";
7175rb_str_escape(
VALUE str)
7179 const char *p = RSTRING_PTR(str);
7181 const char *prev = p;
7182 char buf[CHAR_ESC_LEN + 1];
7184 int unicode_p = rb_enc_unicode_p(enc);
7185 int asciicompat = rb_enc_asciicompat(enc);
7190 int n = rb_enc_precise_mbclen(p, pend, enc);
7192 if (p > prev) str_buf_cat(result, prev, p - prev);
7193 n = rb_enc_mbminlen(enc);
7195 n = (int)(pend - p);
7197 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7198 str_buf_cat(result, buf, strlen(buf));
7204 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7206 cc = ruby_escaped_char(c);
7208 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7209 str_buf_cat(result, cc, strlen(cc));
7212 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7215 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7216 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7220 if (p > prev) str_buf_cat(result, prev, p - prev);
7239 const char *p, *pend, *prev;
7240 char buf[CHAR_ESC_LEN + 1];
7242 rb_encoding *resenc = rb_default_internal_encoding();
7243 int unicode_p = rb_enc_unicode_p(enc);
7244 int asciicompat = rb_enc_asciicompat(enc);
7246 if (resenc == NULL) resenc = rb_default_external_encoding();
7247 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7248 rb_enc_associate(result, resenc);
7249 str_buf_cat2(result,
"\"");
7257 n = rb_enc_precise_mbclen(p, pend, enc);
7259 if (p > prev) str_buf_cat(result, prev, p - prev);
7260 n = rb_enc_mbminlen(enc);
7262 n = (int)(pend - p);
7264 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7265 str_buf_cat(result, buf, strlen(buf));
7271 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7273 if ((asciicompat || unicode_p) &&
7274 (c ==
'"'|| c ==
'\\' ||
7279 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7280 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7281 str_buf_cat2(result,
"\\");
7282 if (asciicompat || enc == resenc) {
7288 case '\n': cc =
'n';
break;
7289 case '\r': cc =
'r';
break;
7290 case '\t': cc =
't';
break;
7291 case '\f': cc =
'f';
break;
7292 case '\013': cc =
'v';
break;
7293 case '\010': cc =
'b';
break;
7294 case '\007': cc =
'a';
break;
7295 case 033: cc =
'e';
break;
7296 default: cc = 0;
break;
7299 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7302 str_buf_cat(result, buf, 2);
7315 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7319 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7320 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7325 if (p > prev) str_buf_cat(result, prev, p - prev);
7326 str_buf_cat2(result,
"\"");
7331#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7344 int encidx = rb_enc_get_index(str);
7347 const char *p, *pend;
7350 int u8 = (encidx == rb_utf8_encindex());
7351 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7354 if (!rb_enc_asciicompat(enc)) {
7356 len += strlen(enc->name);
7359 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7362 unsigned char c = *p++;
7365 case '"':
case '\\':
7366 case '\n':
case '\r':
7367 case '\t':
case '\f':
7368 case '\013':
case '\010':
case '\007':
case '\033':
7373 clen = IS_EVSTR(p, pend) ? 2 : 1;
7381 if (u8 && c > 0x7F) {
7382 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7384 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7387 else if (cc <= 0xFFFFF)
7400 if (clen > LONG_MAX -
len) {
7407 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7408 q = RSTRING_PTR(result); qend = q +
len + 1;
7412 unsigned char c = *p++;
7414 if (c ==
'"' || c ==
'\\') {
7418 else if (c ==
'#') {
7419 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7422 else if (c ==
'\n') {
7426 else if (c ==
'\r') {
7430 else if (c ==
'\t') {
7434 else if (c ==
'\f') {
7438 else if (c ==
'\013') {
7442 else if (c ==
'\010') {
7446 else if (c ==
'\007') {
7450 else if (c ==
'\033') {
7460 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7462 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7465 snprintf(q, qend-q,
"u%04X", cc);
7467 snprintf(q, qend-q,
"u{%X}", cc);
7472 snprintf(q, qend-q,
"x%02X", c);
7478 if (!rb_enc_asciicompat(enc)) {
7479 snprintf(q, qend-q, nonascii_suffix, enc->name);
7480 encidx = rb_ascii8bit_encindex();
7483 rb_enc_associate_index(result, encidx);
7489unescape_ascii(
unsigned int c)
7513undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7515 const char *s = *ss;
7519 unsigned char buf[6];
7537 *buf = unescape_ascii(*s);
7549 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7550 if (*penc != enc_utf8) {
7552 rb_enc_associate(undumped, enc_utf8);
7569 if (hexlen == 0 || hexlen > 6) {
7575 if (0xd800 <= c && c <= 0xdfff) {
7578 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7588 if (0xd800 <= c && c <= 0xdfff) {
7591 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7619static VALUE rb_str_is_ascii_only_p(
VALUE str);
7637str_undump(
VALUE str)
7639 const char *s = RSTRING_PTR(str);
7642 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7644 bool binary =
false;
7648 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7651 if (!str_null_check(str, &w)) {
7654 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7655 if (*s !=
'"')
goto invalid_format;
7673 static const char force_encoding_suffix[] =
".force_encoding(\"";
7674 static const char dup_suffix[] =
".dup";
7675 const char *encname;
7680 size =
sizeof(dup_suffix) - 1;
7681 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7683 size =
sizeof(force_encoding_suffix) - 1;
7684 if (s_end - s <= size)
goto invalid_format;
7685 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7689 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7693 s = memchr(s,
'"', s_end-s);
7695 if (!s)
goto invalid_format;
7696 if (s_end - s != 2)
goto invalid_format;
7697 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7699 encidx = rb_enc_find_index2(encname, (
long)size);
7703 rb_enc_associate_index(undumped, encidx);
7713 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7724 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7730 if (rb_enc_dummy_p(enc)) {
7737str_true_enc(
VALUE str)
7740 rb_str_check_dummy_enc(enc);
7744static OnigCaseFoldType
7745check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7750 rb_raise(rb_eArgError,
"too many options");
7751 if (argv[0]==sym_turkic) {
7752 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7754 if (argv[1]==sym_lithuanian)
7755 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7757 rb_raise(rb_eArgError,
"invalid second option");
7760 else if (argv[0]==sym_lithuanian) {
7761 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7763 if (argv[1]==sym_turkic)
7764 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7766 rb_raise(rb_eArgError,
"invalid second option");
7770 rb_raise(rb_eArgError,
"too many options");
7771 else if (argv[0]==sym_ascii)
7772 flags |= ONIGENC_CASE_ASCII_ONLY;
7773 else if (argv[0]==sym_fold) {
7774 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7775 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7777 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7780 rb_raise(rb_eArgError,
"invalid option");
7787 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7793#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7794#ifndef CASEMAP_DEBUG
7795# define CASEMAP_DEBUG 0
7803 OnigUChar space[FLEX_ARY_LEN];
7807mapping_buffer_free(
void *p)
7811 while (current_buffer) {
7812 previous_buffer = current_buffer;
7813 current_buffer = current_buffer->next;
7814 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7820 {0, mapping_buffer_free,},
7821 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7829 const OnigUChar *source_current, *source_end;
7830 int target_length = 0;
7831 VALUE buffer_anchor;
7834 size_t buffer_count = 0;
7835 int buffer_length_or_invalid;
7837 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7839 source_current = (OnigUChar*)RSTRING_PTR(source);
7844 while (source_current < source_end) {
7846 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7847 if (CASEMAP_DEBUG) {
7848 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7851 *pre_buffer = current_buffer;
7852 pre_buffer = ¤t_buffer->next;
7853 current_buffer->next = NULL;
7854 current_buffer->capa =
capa;
7855 buffer_length_or_invalid = enc->case_map(flags,
7856 &source_current, source_end,
7857 current_buffer->space,
7858 current_buffer->space+current_buffer->capa,
7860 if (buffer_length_or_invalid < 0) {
7861 current_buffer =
DATA_PTR(buffer_anchor);
7863 mapping_buffer_free(current_buffer);
7864 rb_raise(rb_eArgError,
"input string invalid");
7866 target_length += current_buffer->used = buffer_length_or_invalid;
7868 if (CASEMAP_DEBUG) {
7869 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7872 if (buffer_count==1) {
7873 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7876 char *target_current;
7879 target_current = RSTRING_PTR(target);
7880 current_buffer =
DATA_PTR(buffer_anchor);
7881 while (current_buffer) {
7882 memcpy(target_current, current_buffer->space, current_buffer->used);
7883 target_current += current_buffer->used;
7884 current_buffer = current_buffer->next;
7887 current_buffer =
DATA_PTR(buffer_anchor);
7889 mapping_buffer_free(current_buffer);
7894 str_enc_copy_direct(target, source);
7903 const OnigUChar *source_current, *source_end;
7904 OnigUChar *target_current, *target_end;
7905 long old_length = RSTRING_LEN(source);
7906 int length_or_invalid;
7908 if (old_length == 0)
return Qnil;
7910 source_current = (OnigUChar*)RSTRING_PTR(source);
7912 if (source == target) {
7913 target_current = (OnigUChar*)source_current;
7914 target_end = (OnigUChar*)source_end;
7917 target_current = (OnigUChar*)RSTRING_PTR(target);
7921 length_or_invalid = onigenc_ascii_only_case_map(flags,
7922 &source_current, source_end,
7923 target_current, target_end, enc);
7924 if (length_or_invalid < 0)
7925 rb_raise(rb_eArgError,
"input string invalid");
7926 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7927 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7928 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7929 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7930 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7933 str_enc_copy(target, source);
7939upcase_single(
VALUE str)
7941 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7942 bool modified =
false;
7945 unsigned int c = *(
unsigned char*)s;
7947 if (
'a' <= c && c <=
'z') {
7948 *s =
'A' + (c -
'a');
7976rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7979 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7981 flags = check_case_options(argc, argv, flags);
7982 str_modify_keep_cr(str);
7983 enc = str_true_enc(str);
7984 if (case_option_single_p(flags, enc, str)) {
7985 if (upcase_single(str))
7986 flags |= ONIGENC_CASE_MODIFIED;
7988 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7989 rb_str_ascii_casemap(str, str, &flags, enc);
7991 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7993 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8015rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8018 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8021 flags = check_case_options(argc, argv, flags);
8022 enc = str_true_enc(str);
8023 if (case_option_single_p(flags, enc, str)) {
8024 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8025 str_enc_copy_direct(ret, str);
8028 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8030 rb_str_ascii_casemap(str, ret, &flags, enc);
8033 ret = rb_str_casemap(str, &flags, enc);
8040downcase_single(
VALUE str)
8042 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8043 bool modified =
false;
8046 unsigned int c = *(
unsigned char*)s;
8048 if (
'A' <= c && c <=
'Z') {
8049 *s =
'a' + (c -
'A');
8071rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8074 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8076 flags = check_case_options(argc, argv, flags);
8077 str_modify_keep_cr(str);
8078 enc = str_true_enc(str);
8079 if (case_option_single_p(flags, enc, str)) {
8080 if (downcase_single(str))
8081 flags |= ONIGENC_CASE_MODIFIED;
8083 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8084 rb_str_ascii_casemap(str, str, &flags, enc);
8086 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8088 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8102rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8105 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8108 flags = check_case_options(argc, argv, flags);
8109 enc = str_true_enc(str);
8110 if (case_option_single_p(flags, enc, str)) {
8111 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8112 str_enc_copy_direct(ret, str);
8113 downcase_single(ret);
8115 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8117 rb_str_ascii_casemap(str, ret, &flags, enc);
8120 ret = rb_str_casemap(str, &flags, enc);
8140rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8143 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8145 flags = check_case_options(argc, argv, flags);
8146 str_modify_keep_cr(str);
8147 enc = str_true_enc(str);
8148 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8149 if (flags&ONIGENC_CASE_ASCII_ONLY)
8150 rb_str_ascii_casemap(str, str, &flags, enc);
8152 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8154 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8187rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8190 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8193 flags = check_case_options(argc, argv, flags);
8194 enc = str_true_enc(str);
8195 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8196 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8198 rb_str_ascii_casemap(str, ret, &flags, enc);
8201 ret = rb_str_casemap(str, &flags, enc);
8220rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8223 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8225 flags = check_case_options(argc, argv, flags);
8226 str_modify_keep_cr(str);
8227 enc = str_true_enc(str);
8228 if (flags&ONIGENC_CASE_ASCII_ONLY)
8229 rb_str_ascii_casemap(str, str, &flags, enc);
8231 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8233 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8247rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8250 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8253 flags = check_case_options(argc, argv, flags);
8254 enc = str_true_enc(str);
8255 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8256 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8258 rb_str_ascii_casemap(str, ret, &flags, enc);
8261 ret = rb_str_casemap(str, &flags, enc);
8266typedef unsigned char *USTR;
8270 unsigned int now, max;
8282 if (t->p == t->pend)
return -1;
8283 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8286 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8288 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8290 if (t->p < t->pend) {
8291 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8294 if (t->now < 0x80 && c < 0x80) {
8295 rb_raise(rb_eArgError,
8296 "invalid range \"%c-%c\" in string transliteration",
8300 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8304 else if (t->now < c) {
8313 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8314 if (t->now == t->max) {
8319 if (t->now < t->max) {
8335 const unsigned int errc = -1;
8336 unsigned int trans[256];
8338 struct tr trsrc, trrepl;
8340 unsigned int c, c0, last = 0;
8341 int modify = 0, i, l;
8342 unsigned char *s, *send;
8344 int singlebyte = single_byte_optimizable(str);
8348#define CHECK_IF_ASCII(c) \
8349 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8350 (cr = ENC_CODERANGE_VALID) : 0)
8354 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8355 if (RSTRING_LEN(repl) == 0) {
8356 return rb_str_delete_bang(1, &src, str);
8360 e1 = rb_enc_check(str, src);
8361 e2 = rb_enc_check(str, repl);
8366 enc = rb_enc_check(src, repl);
8368 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8369 if (RSTRING_LEN(src) > 1 &&
8370 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8371 trsrc.p + l < trsrc.pend) {
8375 trrepl.p = RSTRING_PTR(repl);
8376 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8377 trsrc.gen = trrepl.gen = 0;
8378 trsrc.now = trrepl.now = 0;
8379 trsrc.max = trrepl.max = 0;
8382 for (i=0; i<256; i++) {
8385 while ((c = trnext(&trsrc, enc)) != errc) {
8390 if (!hash) hash = rb_hash_new();
8394 while ((c = trnext(&trrepl, enc)) != errc)
8397 for (i=0; i<256; i++) {
8398 if (trans[i] != errc) {
8406 for (i=0; i<256; i++) {
8409 while ((c = trnext(&trsrc, enc)) != errc) {
8410 r = trnext(&trrepl, enc);
8411 if (r == errc) r = trrepl.now;
8414 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8417 if (!hash) hash = rb_hash_new();
8425 str_modify_keep_cr(str);
8426 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8427 termlen = rb_enc_mbminlen(enc);
8430 long offset, max = RSTRING_LEN(str);
8431 unsigned int save = -1;
8432 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8437 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8440 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8443 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8445 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8454 if (cflag) c = last;
8457 else if (cflag) c = errc;
8463 if (c != (
unsigned int)-1) {
8469 tlen = rb_enc_codelen(c, enc);
8475 if (enc != e1) may_modify = 1;
8477 if ((offset = t - buf) + tlen > max) {
8478 size_t MAYBE_UNUSED(old) = max + termlen;
8479 max = offset + tlen + (send - s);
8480 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8483 rb_enc_mbcput(c, t, enc);
8484 if (may_modify && memcmp(s, t, tlen) != 0) {
8490 if (!STR_EMBED_P(str)) {
8491 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8493 TERM_FILL((
char *)t, termlen);
8494 RSTRING(str)->as.heap.ptr = (
char *)buf;
8495 STR_SET_LEN(str, t - buf);
8496 STR_SET_NOEMBED(str);
8497 RSTRING(str)->as.heap.aux.capa = max;
8501 c = (
unsigned char)*s;
8502 if (trans[c] != errc) {
8519 long offset, max = (long)((send - s) * 1.2);
8520 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8525 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8528 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8531 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8533 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8541 if (cflag) c = last;
8544 else if (cflag) c = errc;
8548 c = cflag ? last : errc;
8551 tlen = rb_enc_codelen(c, enc);
8556 if (enc != e1) may_modify = 1;
8558 if ((offset = t - buf) + tlen > max) {
8559 size_t MAYBE_UNUSED(old) = max + termlen;
8560 max = offset + tlen + (long)((send - s) * 1.2);
8561 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8565 rb_enc_mbcput(c, t, enc);
8566 if (may_modify && memcmp(s, t, tlen) != 0) {
8574 if (!STR_EMBED_P(str)) {
8575 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8577 TERM_FILL((
char *)t, termlen);
8578 RSTRING(str)->as.heap.ptr = (
char *)buf;
8579 STR_SET_LEN(str, t - buf);
8580 STR_SET_NOEMBED(str);
8581 RSTRING(str)->as.heap.aux.capa = max;
8587 rb_enc_associate(str, enc);
8606 return tr_trans(str, src, repl, 0);
8653 tr_trans(str, src, repl, 0);
8657#define TR_TABLE_MAX (UCHAR_MAX+1)
8658#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8660tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8663 const unsigned int errc = -1;
8664 char buf[TR_TABLE_MAX];
8667 VALUE table = 0, ptable = 0;
8668 int i, l, cflag = 0;
8670 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8671 tr.gen =
tr.now =
tr.max = 0;
8673 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8678 for (i=0; i<TR_TABLE_MAX; i++) {
8681 stable[TR_TABLE_MAX] = cflag;
8683 else if (stable[TR_TABLE_MAX] && !cflag) {
8684 stable[TR_TABLE_MAX] = 0;
8686 for (i=0; i<TR_TABLE_MAX; i++) {
8690 while ((c = trnext(&
tr, enc)) != errc) {
8691 if (c < TR_TABLE_MAX) {
8692 buf[(
unsigned char)c] = !cflag;
8697 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8700 table = ptable ? ptable : rb_hash_new();
8704 table = rb_hash_new();
8709 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8710 rb_hash_aset(table, key,
Qtrue);
8714 for (i=0; i<TR_TABLE_MAX; i++) {
8715 stable[i] = stable[i] && buf[i];
8717 if (!table && !cflag) {
8724tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8726 if (c < TR_TABLE_MAX) {
8727 return table[c] != 0;
8733 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8734 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8738 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8741 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8756rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8758 char squeez[TR_TABLE_SIZE];
8761 VALUE del = 0, nodel = 0;
8763 int i, ascompat, cr;
8765 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8767 for (i=0; i<argc; i++) {
8771 enc = rb_enc_check(str, s);
8772 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8775 str_modify_keep_cr(str);
8776 ascompat = rb_enc_asciicompat(enc);
8777 s = t = RSTRING_PTR(str);
8784 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8795 c = rb_enc_codepoint_len(s, send, &clen, enc);
8797 if (tr_find(c, squeez, del, nodel)) {
8801 if (t != s) rb_enc_mbcput(c, t, enc);
8808 TERM_FILL(t, TERM_LEN(str));
8809 STR_SET_LEN(str, t - RSTRING_PTR(str));
8812 if (modify)
return str;
8826rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8829 rb_str_delete_bang(argc, argv, str);
8847rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8849 char squeez[TR_TABLE_SIZE];
8851 VALUE del = 0, nodel = 0;
8852 unsigned char *s, *send, *t;
8854 int ascompat, singlebyte = single_byte_optimizable(str);
8858 enc = STR_ENC_GET(str);
8861 for (i=0; i<argc; i++) {
8865 enc = rb_enc_check(str, s);
8866 if (singlebyte && !single_byte_optimizable(s))
8868 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8872 str_modify_keep_cr(str);
8873 s = t = (
unsigned char *)RSTRING_PTR(str);
8874 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8877 ascompat = rb_enc_asciicompat(enc);
8881 unsigned int c = *s++;
8882 if (c != save || (argc > 0 && !squeez[c])) {
8892 if (ascompat && (c = *s) < 0x80) {
8893 if (c != save || (argc > 0 && !squeez[c])) {
8899 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8901 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8902 if (t != s) rb_enc_mbcput(c, t, enc);
8911 TERM_FILL((
char *)t, TERM_LEN(str));
8912 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8913 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8917 if (modify)
return str;
8931rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8934 rb_str_squeeze_bang(argc, argv, str);
8952 return tr_trans(str, src, repl, 1);
8975 tr_trans(str, src, repl, 1);
8988rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8990 char table[TR_TABLE_SIZE];
8992 VALUE del = 0, nodel = 0, tstr;
9002 enc = rb_enc_check(str, tstr);
9005 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9006 (ptstr = RSTRING_PTR(tstr),
9007 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9008 !is_broken_string(str)) {
9010 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9012 s = RSTRING_PTR(str);
9013 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9016 if (*(
unsigned char*)s++ == c) n++;
9022 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9023 for (i=1; i<argc; i++) {
9026 enc = rb_enc_check(str, tstr);
9027 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9030 s = RSTRING_PTR(str);
9031 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9033 ascompat = rb_enc_asciicompat(enc);
9037 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9045 c = rb_enc_codepoint_len(s, send, &clen, enc);
9046 if (tr_find(c, table, del, nodel)) {
9057rb_fs_check(
VALUE val)
9061 if (
NIL_P(val))
return 0;
9066static const char isspacetable[256] = {
9067 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9068 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9069 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9078 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9079 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9080 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9081 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9082 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9085#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9088split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9090 if (empty_count >= 0 &&
len == 0) {
9091 return empty_count + 1;
9093 if (empty_count > 0) {
9098 }
while (--empty_count > 0);
9102 rb_yield(str_new_empty_String(str));
9103 }
while (--empty_count > 0);
9117 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9121literal_split_pattern(
VALUE spat, split_type_t default_type)
9129 return SPLIT_TYPE_CHARS;
9131 else if (rb_enc_asciicompat(enc)) {
9132 if (
len == 1 && ptr[0] ==
' ') {
9133 return SPLIT_TYPE_AWK;
9138 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9139 return SPLIT_TYPE_AWK;
9142 return default_type;
9155rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9160 split_type_t split_type;
9161 long beg, end, i = 0, empty_count = -1;
9166 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9168 if (lim <= 0) limit =
Qnil;
9169 else if (lim == 1) {
9170 if (RSTRING_LEN(str) == 0)
9181 if (
NIL_P(limit) && !lim) empty_count = 0;
9183 enc = STR_ENC_GET(str);
9184 split_type = SPLIT_TYPE_REGEXP;
9186 spat = get_pat_quoted(spat, 0);
9188 else if (
NIL_P(spat = rb_fs)) {
9189 split_type = SPLIT_TYPE_AWK;
9191 else if (!(spat = rb_fs_check(spat))) {
9192 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9197 if (split_type != SPLIT_TYPE_AWK) {
9202 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9203 if (split_type == SPLIT_TYPE_AWK) {
9205 split_type = SPLIT_TYPE_STRING;
9210 mustnot_broken(spat);
9211 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9219#define SPLIT_STR(beg, len) ( \
9220 empty_count = split_string(result, str, beg, len, empty_count), \
9221 str_mod_check(str, str_start, str_len))
9224 char *ptr = RSTRING_PTR(str);
9225 char *
const str_start = ptr;
9226 const long str_len = RSTRING_LEN(str);
9227 char *
const eptr = str_start + str_len;
9228 if (split_type == SPLIT_TYPE_AWK) {
9235 if (is_ascii_string(str)) {
9236 while (ptr < eptr) {
9237 c = (
unsigned char)*ptr++;
9239 if (ascii_isspace(c)) {
9245 if (!
NIL_P(limit) && lim <= i)
break;
9248 else if (ascii_isspace(c)) {
9249 SPLIT_STR(beg, end-beg);
9252 if (!
NIL_P(limit)) ++i;
9260 while (ptr < eptr) {
9263 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9272 if (!
NIL_P(limit) && lim <= i)
break;
9276 SPLIT_STR(beg, end-beg);
9279 if (!
NIL_P(limit)) ++i;
9287 else if (split_type == SPLIT_TYPE_STRING) {
9288 char *substr_start = ptr;
9289 char *sptr = RSTRING_PTR(spat);
9290 long slen = RSTRING_LEN(spat);
9293 mustnot_broken(str);
9294 enc = rb_enc_check(str, spat);
9295 while (ptr < eptr &&
9296 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9299 if (t != ptr + end) {
9303 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9304 str_mod_check(spat, sptr, slen);
9307 if (!
NIL_P(limit) && lim <= ++i)
break;
9309 beg = ptr - str_start;
9311 else if (split_type == SPLIT_TYPE_CHARS) {
9315 mustnot_broken(str);
9316 enc = rb_enc_get(str);
9317 while (ptr < eptr &&
9318 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9319 SPLIT_STR(ptr - str_start, n);
9321 if (!
NIL_P(limit) && lim <= ++i)
break;
9323 beg = ptr - str_start;
9327 long len = RSTRING_LEN(str);
9335 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9340 if (start == end && BEG(0) == END(0)) {
9345 else if (last_null == 1) {
9346 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9353 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9359 SPLIT_STR(beg, end-beg);
9360 beg = start = END(0);
9364 for (idx=1; idx < regs->num_regs; idx++) {
9365 if (BEG(idx) == -1)
continue;
9366 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9368 if (!
NIL_P(limit) && lim <= ++i)
break;
9370 if (match) rb_match_unbusy(match);
9372 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9373 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9376 return result ? result : str;
9386 return rb_str_split_m(1, &sep, str);
9389#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9404#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9407chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9409 const char *prev = rb_enc_prev_char(p, e, e, enc);
9412 prev = rb_enc_prev_char(p, e, e, enc);
9413 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9425 RSTRING_LEN(rs) != 1 ||
9426 RSTRING_PTR(rs)[0] !=
'\n')) {
9432#define rb_rs get_rs()
9439 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9440 long pos,
len, rslen;
9446 static ID keywords[1];
9451 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9455 if (!ENUM_ELEM(ary, str)) {
9463 if (!RSTRING_LEN(str))
goto end;
9465 ptr = subptr = RSTRING_PTR(str);
9467 len = RSTRING_LEN(str);
9469 rslen = RSTRING_LEN(rs);
9472 enc = rb_enc_get(str);
9474 enc = rb_enc_check(str, rs);
9479 const char *eol = NULL;
9481 while (subend < pend) {
9482 long chomp_rslen = 0;
9484 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9486 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9488 if (eol == subend)
break;
9492 chomp_rslen = -rslen;
9496 if (!subptr) subptr = subend;
9500 }
while (subend < pend);
9502 if (rslen == 0) chomp_rslen = 0;
9504 subend - subptr + (chomp ? chomp_rslen : rslen));
9505 if (ENUM_ELEM(ary, line)) {
9506 str_mod_check(str, ptr,
len);
9508 subptr = eol = NULL;
9513 rsptr = RSTRING_PTR(rs);
9514 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9523 rsptr = RSTRING_PTR(rs);
9524 rslen = RSTRING_LEN(rs);
9527 while (subptr < pend) {
9528 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9532 if (hit != adjusted) {
9536 subend = hit += rslen;
9539 subend = chomp_newline(subptr, subend, enc);
9546 if (ENUM_ELEM(ary, line)) {
9547 str_mod_check(str, ptr,
len);
9552 if (subptr != pend) {
9555 pend = chomp_newline(subptr, pend, enc);
9557 else if (pend - subptr >= rslen &&
9558 memcmp(pend - rslen, rsptr, rslen) == 0) {
9563 ENUM_ELEM(ary, line);
9584rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9587 return rb_str_enumerate_lines(argc, argv, str, 0);
9642rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9644 VALUE ary = WANTARRAY(
"lines", 0);
9645 return rb_str_enumerate_lines(argc, argv, str, ary);
9659 for (i=0; i<RSTRING_LEN(str); i++) {
9660 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9678rb_str_each_byte(
VALUE str)
9681 return rb_str_enumerate_bytes(str, 0);
9693rb_str_bytes(
VALUE str)
9695 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9696 return rb_str_enumerate_bytes(str, ary);
9714 ptr = RSTRING_PTR(str);
9715 len = RSTRING_LEN(str);
9716 enc = rb_enc_get(str);
9719 for (i = 0; i <
len; i += n) {
9720 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9725 for (i = 0; i <
len; i += n) {
9726 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9747rb_str_each_char(
VALUE str)
9750 return rb_str_enumerate_chars(str, 0);
9762rb_str_chars(
VALUE str)
9765 return rb_str_enumerate_chars(str, ary);
9769rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9774 const char *ptr, *end;
9777 if (single_byte_optimizable(str))
9778 return rb_str_enumerate_bytes(str, ary);
9781 ptr = RSTRING_PTR(str);
9783 enc = STR_ENC_GET(str);
9786 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9807rb_str_each_codepoint(
VALUE str)
9810 return rb_str_enumerate_codepoints(str, 0);
9822rb_str_codepoints(
VALUE str)
9825 return rb_str_enumerate_codepoints(str, ary);
9831 int encidx = rb_enc_to_index(enc);
9833 const OnigUChar source_ascii[] =
"\\X";
9834 const OnigUChar *source = source_ascii;
9835 size_t source_len =
sizeof(source_ascii) - 1;
9838#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9839#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9840#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9841#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9842#define CASE_UTF(e) \
9843 case ENCINDEX_UTF_##e: { \
9844 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9845 source = source_UTF_##e; \
9846 source_len = sizeof(source_UTF_##e); \
9849 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9857 regex_t *reg_grapheme_cluster;
9859 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9860 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9862 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9863 onig_error_code_to_str(message, r, &einfo);
9864 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9867 return reg_grapheme_cluster;
9873 int encidx = rb_enc_to_index(enc);
9874 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9876 if (encidx == rb_utf8_encindex()) {
9877 if (!reg_grapheme_cluster_utf8) {
9878 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9881 return reg_grapheme_cluster_utf8;
9890 size_t grapheme_cluster_count = 0;
9892 const char *ptr, *end;
9894 if (!rb_enc_unicode_p(enc)) {
9898 bool cached_reg_grapheme_cluster =
true;
9899 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9900 if (!reg_grapheme_cluster) {
9901 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9902 cached_reg_grapheme_cluster =
false;
9905 ptr = RSTRING_PTR(str);
9909 OnigPosition
len = onig_match(reg_grapheme_cluster,
9910 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9911 (
const OnigUChar *)ptr, NULL, 0);
9912 if (
len <= 0)
break;
9913 grapheme_cluster_count++;
9917 if (!cached_reg_grapheme_cluster) {
9918 onig_free(reg_grapheme_cluster);
9921 return SIZET2NUM(grapheme_cluster_count);
9925rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9929 const char *ptr0, *ptr, *end;
9931 if (!rb_enc_unicode_p(enc)) {
9932 return rb_str_enumerate_chars(str, ary);
9937 bool cached_reg_grapheme_cluster =
true;
9938 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9939 if (!reg_grapheme_cluster) {
9940 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9941 cached_reg_grapheme_cluster =
false;
9944 ptr0 = ptr = RSTRING_PTR(str);
9948 OnigPosition
len = onig_match(reg_grapheme_cluster,
9949 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9950 (
const OnigUChar *)ptr, NULL, 0);
9951 if (
len <= 0)
break;
9956 if (!cached_reg_grapheme_cluster) {
9957 onig_free(reg_grapheme_cluster);
9977rb_str_each_grapheme_cluster(
VALUE str)
9980 return rb_str_enumerate_grapheme_clusters(str, 0);
9992rb_str_grapheme_clusters(
VALUE str)
9995 return rb_str_enumerate_grapheme_clusters(str, ary);
9999chopped_length(
VALUE str)
10002 const char *p, *p2, *beg, *end;
10004 beg = RSTRING_PTR(str);
10005 end = beg + RSTRING_LEN(str);
10006 if (beg >= end)
return 0;
10007 p = rb_enc_prev_char(beg, end, end, enc);
10009 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10010 p2 = rb_enc_prev_char(beg, p, end, enc);
10011 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10029rb_str_chop_bang(
VALUE str)
10031 str_modify_keep_cr(str);
10032 if (RSTRING_LEN(str) > 0) {
10034 len = chopped_length(str);
10035 STR_SET_LEN(str,
len);
10036 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10055rb_str_chop(
VALUE str)
10061smart_chomp(
VALUE str,
const char *e,
const char *p)
10064 if (rb_enc_mbminlen(enc) > 1) {
10069 pp = e - rb_enc_mbminlen(enc);
10072 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10080 if (--e > p && *(e-1) ==
'\r') {
10097 char *pp, *e, *rsptr;
10099 char *
const p = RSTRING_PTR(str);
10100 long len = RSTRING_LEN(str);
10102 if (
len == 0)
return 0;
10105 return smart_chomp(str, e, p);
10108 enc = rb_enc_get(str);
10111 if (rb_enc_mbminlen(enc) > 1) {
10116 pp -= rb_enc_mbminlen(enc);
10119 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10126 while (e > p && *(e-1) ==
'\n') {
10128 if (e > p && *(e-1) ==
'\r')
10134 if (rslen >
len)
return len;
10136 enc = rb_enc_get(rs);
10137 newline = rsptr[rslen-1];
10138 if (rslen == rb_enc_mbminlen(enc)) {
10140 if (newline ==
'\n')
10141 return smart_chomp(str, e, p);
10145 return smart_chomp(str, e, p);
10149 enc = rb_enc_check(str, rs);
10150 if (is_broken_string(rs)) {
10154 if (p[
len-1] == newline &&
10156 memcmp(rsptr, pp, rslen) == 0)) {
10157 if (at_char_boundary(p, pp, e, enc))
10158 return len - rslen;
10170chomp_rs(
int argc,
const VALUE *argv)
10174 VALUE rs = argv[0];
10186 long olen = RSTRING_LEN(str);
10187 long len = chompped_length(str, rs);
10188 if (
len >= olen)
return Qnil;
10189 str_modify_keep_cr(str);
10190 STR_SET_LEN(str,
len);
10191 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10211rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10214 str_modifiable(str);
10215 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10216 rs = chomp_rs(argc, argv);
10218 return rb_str_chomp_string(str, rs);
10231rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10233 VALUE rs = chomp_rs(argc, argv);
10241 const char *
const start = s;
10243 if (!s || s >= e)
return 0;
10246 if (single_byte_optimizable(str)) {
10247 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10252 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10274rb_str_lstrip_bang(
VALUE str)
10278 long olen, loffset;
10280 str_modify_keep_cr(str);
10281 enc = STR_ENC_GET(str);
10283 loffset = lstrip_offset(str, start, start+olen, enc);
10285 long len = olen-loffset;
10286 s = start + loffset;
10287 memmove(start, s,
len);
10288 STR_SET_LEN(str,
len);
10289 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10313rb_str_lstrip(
VALUE str)
10318 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10319 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10328 rb_str_check_dummy_enc(enc);
10332 if (!s || s >= e)
return 0;
10336 if (single_byte_optimizable(str)) {
10338 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10343 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10365rb_str_rstrip_bang(
VALUE str)
10369 long olen, roffset;
10371 str_modify_keep_cr(str);
10372 enc = STR_ENC_GET(str);
10374 roffset = rstrip_offset(str, start, start+olen, enc);
10376 long len = olen - roffset;
10378 STR_SET_LEN(str,
len);
10379 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10402rb_str_rstrip(
VALUE str)
10406 long olen, roffset;
10408 enc = STR_ENC_GET(str);
10410 roffset = rstrip_offset(str, start, start+olen, enc);
10412 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10430rb_str_strip_bang(
VALUE str)
10433 long olen, loffset, roffset;
10436 str_modify_keep_cr(str);
10437 enc = STR_ENC_GET(str);
10439 loffset = lstrip_offset(str, start, start+olen, enc);
10440 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10442 if (loffset > 0 || roffset > 0) {
10443 long len = olen-roffset;
10446 memmove(start, start + loffset,
len);
10448 STR_SET_LEN(str,
len);
10449 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10472rb_str_strip(
VALUE str)
10475 long olen, loffset, roffset;
10479 loffset = lstrip_offset(str, start, start+olen, enc);
10480 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10482 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10487scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10490 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10496 end = pos + RSTRING_LEN(pat);
10510 if (RSTRING_LEN(str) > end)
10511 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10520 if (!regs || regs->num_regs == 1) {
10526 for (
int i = 1; i < regs->num_regs; i++) {
10557 long last = -1, prev = 0;
10558 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10560 pat = get_pat_quoted(pat, 1);
10561 mustnot_broken(str);
10565 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10570 if (last >= 0) rb_pat_search(pat, str, last, 1);
10575 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10579 str_mod_check(str, p,
len);
10581 if (last >= 0) rb_pat_search(pat, str, last, 1);
10633rb_str_hex(
VALUE str)
10635 return rb_str_to_inum(str, 16, FALSE);
10719rb_str_oct(
VALUE str)
10721 return rb_str_to_inum(str, -8, FALSE);
10724#ifndef HAVE_CRYPT_R
10729 rb_nativethread_lock_t lock;
10730} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10799# define CRYPT_END() ALLOCV_END(databuf)
10802 extern char *crypt(
const char *,
const char *);
10803# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10806 const char *s, *saltp;
10809 char salt_8bit_clean[3];
10813 mustnot_wchar(str);
10814 mustnot_wchar(salt);
10816 saltp = RSTRING_PTR(salt);
10817 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10818 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10822 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10823 salt_8bit_clean[0] = saltp[0] & 0x7f;
10824 salt_8bit_clean[1] = saltp[1] & 0x7f;
10825 salt_8bit_clean[2] =
'\0';
10826 saltp = salt_8bit_clean;
10831# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10832 data->initialized = 0;
10834 res = crypt_r(s, saltp, data);
10837 res = crypt(s, saltp);
10852 size_t res_size = strlen(res)+1;
10853 tmp_buf =
ALLOCA_N(
char, res_size);
10854 memcpy(tmp_buf, res, res_size);
10891 char *ptr, *p, *pend;
10894 unsigned long sum0 = 0;
10899 ptr = p = RSTRING_PTR(str);
10900 len = RSTRING_LEN(str);
10906 str_mod_check(str, ptr,
len);
10909 sum0 += (
unsigned char)*p;
10920 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10921 sum0 &= (((
unsigned long)1)<<bits)-1;
10941rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10945 long width,
len, flen = 1, fclen = 1;
10948 const char *f =
" ";
10949 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10951 int singlebyte = 1, cr;
10955 enc = STR_ENC_GET(str);
10956 termlen = rb_enc_mbminlen(enc);
10960 enc = rb_enc_check(str, pad);
10961 f = RSTRING_PTR(pad);
10962 flen = RSTRING_LEN(pad);
10963 fclen = str_strlen(pad, enc);
10964 singlebyte = single_byte_optimizable(pad);
10965 if (flen == 0 || fclen == 0) {
10966 rb_raise(rb_eArgError,
"zero width padding");
10969 len = str_strlen(str, enc);
10970 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10972 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10976 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10977 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10979 size = RSTRING_LEN(str);
10980 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10981 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10982 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10983 rb_raise(rb_eArgError,
"argument too big");
10987 p = RSTRING_PTR(res);
10989 memset(p, *f, llen);
10993 while (llen >= fclen) {
10999 memcpy(p, f, llen2);
11003 memcpy(p, RSTRING_PTR(str), size);
11006 memset(p, *f, rlen);
11010 while (rlen >= fclen) {
11016 memcpy(p, f, rlen2);
11020 TERM_FILL(p, termlen);
11021 STR_SET_LEN(res, p-RSTRING_PTR(res));
11042rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11044 return rb_str_justify(argc, argv, str,
'l');
11056rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11058 return rb_str_justify(argc, argv, str,
'r');
11071rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11073 return rb_str_justify(argc, argv, str,
'c');
11089 sep = get_pat_quoted(sep, 0);
11101 pos = rb_str_index(str, sep, 0);
11102 if (pos < 0)
goto failed;
11107 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11110 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11124 long pos = RSTRING_LEN(str);
11126 sep = get_pat_quoted(sep, 0);
11139 pos = rb_str_rindex(str, sep, pos);
11148 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11150 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11162rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11166 for (i=0; i<argc; i++) {
11167 VALUE tmp = argv[i];
11169 if (rb_reg_start_with_p(tmp, str))
11173 const char *p, *s, *e;
11178 enc = rb_enc_check(str, tmp);
11179 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11180 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11181 p = RSTRING_PTR(str);
11184 if (!at_char_right_boundary(p, s, e, enc))
11186 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11202rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11206 for (i=0; i<argc; i++) {
11207 VALUE tmp = argv[i];
11208 const char *p, *s, *e;
11213 enc = rb_enc_check(str, tmp);
11214 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11215 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11216 p = RSTRING_PTR(str);
11219 if (!at_char_boundary(p, s, e, enc))
11221 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11237deleted_prefix_length(
VALUE str,
VALUE prefix)
11239 const char *strptr, *prefixptr;
11240 long olen, prefixlen;
11245 if (!is_broken_string(prefix) ||
11246 !rb_enc_asciicompat(enc) ||
11247 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11248 enc = rb_enc_check(str, prefix);
11252 prefixlen = RSTRING_LEN(prefix);
11253 if (prefixlen <= 0)
return 0;
11254 olen = RSTRING_LEN(str);
11255 if (olen < prefixlen)
return 0;
11256 strptr = RSTRING_PTR(str);
11257 prefixptr = RSTRING_PTR(prefix);
11258 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11259 if (is_broken_string(prefix)) {
11260 if (!is_broken_string(str)) {
11264 const char *strend = strptr + olen;
11265 const char *after_prefix = strptr + prefixlen;
11266 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11287rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11290 str_modify_keep_cr(str);
11292 prefixlen = deleted_prefix_length(str, prefix);
11293 if (prefixlen <= 0)
return Qnil;
11307rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11311 prefixlen = deleted_prefix_length(str, prefix);
11312 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11314 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11327deleted_suffix_length(
VALUE str,
VALUE suffix)
11329 const char *strptr, *suffixptr;
11330 long olen, suffixlen;
11334 if (is_broken_string(suffix))
return 0;
11335 enc = rb_enc_check(str, suffix);
11338 suffixlen = RSTRING_LEN(suffix);
11339 if (suffixlen <= 0)
return 0;
11340 olen = RSTRING_LEN(str);
11341 if (olen < suffixlen)
return 0;
11342 strptr = RSTRING_PTR(str);
11343 suffixptr = RSTRING_PTR(suffix);
11344 const char *strend = strptr + olen;
11345 const char *before_suffix = strend - suffixlen;
11346 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11347 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11363rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11365 long olen, suffixlen,
len;
11366 str_modifiable(str);
11368 suffixlen = deleted_suffix_length(str, suffix);
11369 if (suffixlen <= 0)
return Qnil;
11371 olen = RSTRING_LEN(str);
11372 str_modify_keep_cr(str);
11373 len = olen - suffixlen;
11374 STR_SET_LEN(str,
len);
11375 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11391rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11395 suffixlen = deleted_suffix_length(str, suffix);
11396 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11398 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11405 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11413 val = rb_fs_check(val);
11416 "value of %"PRIsVALUE
" must be String or Regexp",
11420 rb_warn_deprecated(
"'$;'", NULL);
11437 str_modifiable(str);
11440 int idx = rb_enc_to_index(encoding);
11447 rb_enc_associate_index(str, idx);
11471 if (STR_EMBED_P(str)) {
11472 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11477 str_replace_shared_without_enc(str2, str);
11479 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11512rb_str_valid_encoding_p(
VALUE str)
11532rb_str_is_ascii_only_p(
VALUE str)
11542 static const char ellipsis[] =
"...";
11543 const long ellipsislen =
sizeof(ellipsis) - 1;
11545 const long blen = RSTRING_LEN(str);
11546 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11547 VALUE estr, ret = 0;
11550 if (
len * rb_enc_mbminlen(enc) >= blen ||
11554 else if (
len <= ellipsislen ||
11556 if (rb_enc_asciicompat(enc)) {
11558 rb_enc_associate(ret, enc);
11565 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11570 rb_enc_from_encoding(enc), 0,
Qnil);
11583 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11589 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11608 if (enc == STR_ENC_GET(str)) {
11613 return enc_str_scrub(enc, str, repl, cr);
11621 const char *rep, *p, *e, *p1, *sp;
11627 rb_raise(rb_eArgError,
"both of block and replacement given");
11634 if (!
NIL_P(repl)) {
11635 repl = str_compat_and_valid(repl, enc);
11638 if (rb_enc_dummy_p(enc)) {
11641 encidx = rb_enc_to_index(enc);
11643#define DEFAULT_REPLACE_CHAR(str) do { \
11644 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11645 rep = replace; replen = (int)sizeof(replace); \
11648 slen = RSTRING_LEN(str);
11649 p = RSTRING_PTR(str);
11654 if (rb_enc_asciicompat(enc)) {
11660 else if (!
NIL_P(repl)) {
11661 rep = RSTRING_PTR(repl);
11662 replen = RSTRING_LEN(repl);
11665 else if (encidx == rb_utf8_encindex()) {
11666 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11670 DEFAULT_REPLACE_CHAR(
"?");
11675 p = search_nonascii(p, e);
11680 int ret = rb_enc_precise_mbclen(p, e, enc);
11699 if (e - p < clen) clen = e - p;
11706 for (; clen > 1; clen--) {
11707 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11718 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11719 str_mod_check(str, sp, slen);
11720 repl = str_compat_and_valid(repl, enc);
11727 p = search_nonascii(p, e);
11753 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11754 str_mod_check(str, sp, slen);
11755 repl = str_compat_and_valid(repl, enc);
11764 long mbminlen = rb_enc_mbminlen(enc);
11768 else if (!
NIL_P(repl)) {
11769 rep = RSTRING_PTR(repl);
11770 replen = RSTRING_LEN(repl);
11772 else if (encidx == ENCINDEX_UTF_16BE) {
11773 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11775 else if (encidx == ENCINDEX_UTF_16LE) {
11776 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11778 else if (encidx == ENCINDEX_UTF_32BE) {
11779 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11781 else if (encidx == ENCINDEX_UTF_32LE) {
11782 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11785 DEFAULT_REPLACE_CHAR(
"?");
11789 int ret = rb_enc_precise_mbclen(p, e, enc);
11802 if (e - p < clen) clen = e - p;
11803 if (clen <= mbminlen * 2) {
11808 for (; clen > mbminlen; clen-=mbminlen) {
11809 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11819 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11820 str_mod_check(str, sp, slen);
11821 repl = str_compat_and_valid(repl, enc);
11846 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11847 str_mod_check(str, sp, slen);
11848 repl = str_compat_and_valid(repl, enc);
11888str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11896static ID id_normalize;
11897static ID id_normalized_p;
11898static VALUE mUnicodeNormalize;
11901unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11903 static int UnicodeNormalizeRequired = 0;
11906 if (!UnicodeNormalizeRequired) {
11907 rb_require(
"unicode_normalize/normalize.rb");
11908 UnicodeNormalizeRequired = 1;
11912 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11949rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11951 return unicode_normalize_common(argc, argv, str, id_normalize);
11965rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11967 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11994rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11996 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12128#define sym_equal rb_obj_equal
12131sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12135 int c = rb_enc_precise_mbclen(s, send, enc);
12139 c = rb_enc_mbc_to_codepoint(s, send, enc);
12147rb_str_symname_p(
VALUE sym)
12152 rb_encoding *resenc = rb_default_internal_encoding();
12154 if (resenc == NULL) resenc = rb_default_external_encoding();
12155 enc = STR_ENC_GET(sym);
12156 ptr = RSTRING_PTR(sym);
12157 len = RSTRING_LEN(sym);
12158 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12166rb_str_quote_unprintable(
VALUE str)
12174 resenc = rb_default_internal_encoding();
12175 if (resenc == NULL) resenc = rb_default_external_encoding();
12176 enc = STR_ENC_GET(str);
12177 ptr = RSTRING_PTR(str);
12178 len = RSTRING_LEN(str);
12179 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12180 !sym_printable(ptr, ptr +
len, enc)) {
12181 return rb_str_escape(str);
12187rb_id_quote_unprintable(
ID id)
12189 VALUE str = rb_id2str(
id);
12190 if (!rb_str_symname_p(str)) {
12191 return rb_str_escape(str);
12209sym_inspect(
VALUE sym)
12216 if (!rb_str_symname_p(str)) {
12218 len = RSTRING_LEN(str);
12219 rb_str_resize(str,
len + 1);
12220 dest = RSTRING_PTR(str);
12221 memmove(dest + 1, dest,
len);
12225 VALUE orig_str = str;
12227 len = RSTRING_LEN(orig_str);
12228 str = rb_enc_str_new(0,
len + 1, enc);
12231 ptr = RSTRING_PTR(orig_str);
12232 dest = RSTRING_PTR(str);
12233 memcpy(dest + 1, ptr,
len);
12253rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12258 rb_raise(rb_eArgError,
"no receiver given");
12355 return rb_str_match(
rb_sym2str(sym), other);
12370sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12372 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12385sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12387 return rb_str_match_m_p(argc, argv, sym);
12405 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12416sym_length(
VALUE sym)
12430sym_empty(
VALUE sym)
12464sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12480sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12496sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12510sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12512 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12525sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12527 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12539sym_encoding(
VALUE sym)
12545string_for_symbol(
VALUE name)
12550 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12564 name = string_for_symbol(name);
12565 return rb_intern_str(name);
12574 name = string_for_symbol(name);
12598 return rb_fstring(str);
12604 struct RString fake_str = {RBASIC_INIT};
12605 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12617 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12618 rb_enc_autoload(enc);
12621 struct RString fake_str = {RBASIC_INIT};
12622 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12628 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12629 rb_enc_autoload(enc);
12632 struct RString fake_str = {RBASIC_INIT};
12633 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12646rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12651 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12652 rb_str_buf_cat_byte(str, (
char) code);
12662fstring_set_class_i(
VALUE *str,
void *data)
12666 return ST_CONTINUE;
12674 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12841 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.