14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa,
long termlen)
251rb_str_size_as_embedded(
VALUE str)
254 if (STR_EMBED_P(str)) {
256 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
258 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 else if (rb_str_reembeddable_p(str)) {
264 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
266 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
269 real_size =
sizeof(
struct RString);
276STR_EMBEDDABLE_P(
long len,
long termlen)
278 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
283static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
284static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
286static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
287static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
330rb_debug_rstring_null_ptr(
const char *func)
332 fprintf(stderr,
"%s is returning NULL!! "
333 "SIGSEGV is highly expected to follow immediately.\n"
334 "If you could reproduce, attach your debugger here, "
335 "and look at the passed string.\n",
340static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
343get_encoding(
VALUE str)
349mustnot_broken(
VALUE str)
351 if (is_broken_string(str)) {
352 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
357mustnot_wchar(
VALUE str)
360 if (rb_enc_mbminlen(enc) > 1) {
361 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
365static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
367#if SIZEOF_LONG == SIZEOF_VOIDP
368#define PRECOMPUTED_FAKESTR_HASH 1
373BARE_STRING_P(
VALUE str)
378static inline st_index_t
379str_do_hash(
VALUE str)
381 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
383 if (e && !is_ascii_string(str)) {
390str_store_precomputed_hash(
VALUE str, st_index_t hash)
396 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
397 size_t free_bytes = str_embed_capa(str) - used_bytes;
401 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
403 FL_SET(str, STR_PRECOMPUTED_HASH);
416 if (
FL_TEST(str, RSTRING_FSTR))
419 bare = BARE_STRING_P(str);
421 if (STR_EMBED_P(str)) {
426 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
433 rb_str_resize(str, RSTRING_LEN(str));
435 fstr = register_fstring(str,
false,
false);
438 str_replace_shared_without_enc(str, fstr);
445static VALUE fstring_table_obj;
448fstring_concurrent_set_hash(
VALUE str)
450#ifdef PRECOMPUTED_FAKESTR_HASH
454 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
471 const char *aptr, *bptr;
478 return (alen == blen &&
480 memcmp(aptr, bptr, alen) == 0);
485 bool force_precompute_hash;
489fstring_concurrent_set_create(
VALUE str,
void *data)
499 long len = RSTRING_LEN(str);
500 long capa =
len +
sizeof(st_index_t);
501 int term_len = TERM_LEN(str);
503 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
505 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
506 STR_SET_LEN(new_str, RSTRING_LEN(str));
508 rb_enc_copy(new_str, str);
509 str_store_precomputed_hash(new_str, str_do_hash(str));
513 rb_enc_copy(new_str, str);
514#ifdef PRECOMPUTED_FAKESTR_HASH
515 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
516 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
530 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
533 if (STR_SHARED_P(str)) {
535 str_make_independent(str);
538 if (!BARE_STRING_P(str)) {
544 RBASIC(str)->flags |= RSTRING_FSTR;
546 RB_OBJ_SET_SHAREABLE(str);
560 .hash = fstring_concurrent_set_hash,
561 .cmp = fstring_concurrent_set_cmp,
562 .create = fstring_concurrent_set_create,
567Init_fstring_table(
void)
569 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
570 rb_gc_register_address(&fstring_table_obj);
574register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
578 .force_precompute_hash = force_precompute_hash
581#if SIZEOF_VOIDP == SIZEOF_LONG
585 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
589 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
591 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
603rb_obj_is_fstring_table(
VALUE obj)
607 return obj == fstring_table_obj;
611rb_gc_free_fstring(
VALUE obj)
613 ASSERT_vm_locking_with_barrier();
619 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
621 RB_DEBUG_COUNTER_INC(obj_str_fstr);
627rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
629 if (fstring_table_obj) {
630 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
635setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
638 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
651 return (
VALUE)fake_str;
660 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
669rb_fstring_new(
const char *ptr,
long len)
671 struct RString fake_str = {RBASIC_INIT};
672 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
678 struct RString fake_str = {RBASIC_INIT};
679 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
683rb_fstring_cstr(
const char *
ptr)
685 return rb_fstring_new(
ptr, strlen(
ptr));
689single_byte_optimizable(
VALUE str)
693 case ENCINDEX_ASCII_8BIT:
694 case ENCINDEX_US_ASCII:
716static inline const char *
717search_nonascii(
const char *p,
const char *e)
719 const uintptr_t *s, *t;
721#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
722# if SIZEOF_UINTPTR_T == 8
723# define NONASCII_MASK UINT64_C(0x8080808080808080)
724# elif SIZEOF_UINTPTR_T == 4
725# define NONASCII_MASK UINT32_C(0x80808080)
727# error "don't know what to do."
730# if SIZEOF_UINTPTR_T == 8
731# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
732# elif SIZEOF_UINTPTR_T == 4
733# define NONASCII_MASK 0x80808080UL
735# error "don't know what to do."
739 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
740#if !UNALIGNED_WORD_ACCESS
741 if ((uintptr_t)p % SIZEOF_VOIDP) {
742 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
747 case 7:
if (p[-7]&0x80)
return p-7;
748 case 6:
if (p[-6]&0x80)
return p-6;
749 case 5:
if (p[-5]&0x80)
return p-5;
750 case 4:
if (p[-4]&0x80)
return p-4;
752 case 3:
if (p[-3]&0x80)
return p-3;
753 case 2:
if (p[-2]&0x80)
return p-2;
754 case 1:
if (p[-1]&0x80)
return p-1;
759#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
760#define aligned_ptr(value) \
761 __builtin_assume_aligned((value), sizeof(uintptr_t))
763#define aligned_ptr(value) (uintptr_t *)(value)
766 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
769 if (*s & NONASCII_MASK) {
770#ifdef WORDS_BIGENDIAN
771 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
773 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
783 case 7:
if (e[-7]&0x80)
return e-7;
784 case 6:
if (e[-6]&0x80)
return e-6;
785 case 5:
if (e[-5]&0x80)
return e-5;
786 case 4:
if (e[-4]&0x80)
return e-4;
788 case 3:
if (e[-3]&0x80)
return e-3;
789 case 2:
if (e[-2]&0x80)
return e-2;
790 case 1:
if (e[-1]&0x80)
return e-1;
798 const char *e = p +
len;
800 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
802 p = search_nonascii(p, e);
806 if (rb_enc_asciicompat(enc)) {
807 p = search_nonascii(p, e);
810 int ret = rb_enc_precise_mbclen(p, e, enc);
814 p = search_nonascii(p, e);
820 int ret = rb_enc_precise_mbclen(p, e, enc);
836 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
839 p = search_nonascii(p, e);
843 else if (rb_enc_asciicompat(enc)) {
844 p = search_nonascii(p, e);
850 int ret = rb_enc_precise_mbclen(p, e, enc);
857 p = search_nonascii(p, e);
863 int ret = rb_enc_precise_mbclen(p, e, enc);
888 rb_enc_set_index(str1, rb_enc_get_index(str2));
896rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
901 str_enc_copy(dest, src);
902 if (RSTRING_LEN(dest) == 0) {
903 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
914 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
915 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
926rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
928 str_enc_copy(dest, src);
935 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
941 return enc_coderange_scan(str, enc);
950 cr = enc_coderange_scan(str, get_encoding(str));
957rb_enc_str_asciicompat(
VALUE str)
960 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
968 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
977str_mod_check(
VALUE s,
const char *p,
long len)
979 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
985str_capacity(
VALUE str,
const int termlen)
987 if (STR_EMBED_P(str)) {
988 return str_embed_capa(str) - termlen;
990 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
994 return RSTRING(str)->as.heap.aux.capa;
1001 return str_capacity(str, TERM_LEN(str));
1005must_not_null(
const char *
ptr)
1008 rb_raise(rb_eArgError,
"NULL pointer given");
1013str_alloc_embed(
VALUE klass,
size_t capa)
1015 size_t size = rb_str_embed_size(
capa, 0);
1019 NEWOBJ_OF(str,
struct RString, klass,
1023 str->as.embed.ary[0] = 0;
1029str_alloc_heap(
VALUE klass)
1031 NEWOBJ_OF(str,
struct RString, klass,
1035 str->as.heap.aux.capa = 0;
1036 str->as.heap.ptr = NULL;
1042empty_str_alloc(
VALUE klass)
1044 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1045 VALUE str = str_alloc_embed(klass, 0);
1046 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1057 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1061 enc = rb_ascii8bit_encoding();
1064 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1066 int termlen = rb_enc_mbminlen(enc);
1068 if (STR_EMBEDDABLE_P(
len, termlen)) {
1069 str = str_alloc_embed(klass,
len + termlen);
1075 str = str_alloc_heap(klass);
1081 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1084 rb_enc_raw_set(str, enc);
1087 memcpy(RSTRING_PTR(str),
ptr,
len);
1090 memset(RSTRING_PTR(str), 0,
len);
1093 STR_SET_LEN(str,
len);
1094 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1101 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1136 __msan_unpoison_string(
ptr);
1156 if (rb_enc_mbminlen(enc) != 1) {
1157 rb_raise(rb_eArgError,
"wchar encoding given");
1159 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1163str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1168 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1172 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1175 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1176 str = str_alloc_heap(klass);
1180 RBASIC(str)->flags |= STR_NOFREE;
1181 rb_enc_associate_index(str, encindex);
1210static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1212 int ecflags,
VALUE ecopts);
1217 int encidx = rb_enc_to_index(enc);
1218 if (rb_enc_get_index(str) == encidx)
1219 return is_ascii_string(str);
1230 if (!to)
return str;
1231 if (!from) from = rb_enc_get(str);
1232 if (from == to)
return str;
1233 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1234 rb_is_ascii8bit_enc(to)) {
1235 if (STR_ENC_GET(str) != to) {
1237 rb_enc_associate(str, to);
1244 from, to, ecflags, ecopts);
1245 if (
NIL_P(newstr)) {
1253rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1258 olen = RSTRING_LEN(newstr);
1259 if (ofs < -olen || olen < ofs)
1261 if (ofs < 0) ofs += olen;
1263 STR_SET_LEN(newstr, ofs);
1267 rb_str_modify(newstr);
1268 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1276 STR_SET_LEN(str, 0);
1277 rb_enc_associate(str, enc);
1283str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1285 int ecflags,
VALUE ecopts)
1290 VALUE econv_wrapper;
1291 const unsigned char *start, *sp;
1292 unsigned char *dest, *dp;
1293 size_t converted_output = (size_t)ofs;
1298 RBASIC_CLEAR_CLASS(econv_wrapper);
1300 if (!ec)
return Qnil;
1303 sp = (
unsigned char*)
ptr;
1305 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1306 (dp = dest + converted_output),
1310 size_t converted_input = sp - start;
1311 size_t rest =
len - converted_input;
1312 converted_output = dp - dest;
1314 if (converted_input && converted_output &&
1315 rest < (LONG_MAX / converted_output)) {
1316 rest = (rest * converted_output) / converted_input;
1321 olen += rest < 2 ? 2 : rest;
1322 rb_str_resize(newstr, olen);
1329 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1331 rb_enc_associate(newstr, to);
1350 const int eidx = rb_enc_to_index(eenc);
1353 return rb_enc_str_new(
ptr,
len, eenc);
1357 if ((eidx == rb_ascii8bit_encindex()) ||
1358 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1362 ienc = rb_default_internal_encoding();
1363 if (!ienc || eenc == ienc) {
1364 return rb_enc_str_new(
ptr,
len, eenc);
1368 if ((eidx == rb_ascii8bit_encindex()) ||
1369 (eidx == rb_usascii_encindex()) ||
1370 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1371 return rb_enc_str_new(
ptr,
len, ienc);
1374 str = rb_enc_str_new(NULL, 0, ienc);
1377 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1378 rb_str_initialize(str,
ptr,
len, eenc);
1386 int eidx = rb_enc_to_index(eenc);
1387 if (eidx == rb_usascii_encindex() &&
1388 !is_ascii_string(str)) {
1389 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1392 rb_enc_associate_index(str, eidx);
1451str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1453 const int termlen = TERM_LEN(str);
1458 if (str_embed_capa(str2) >=
len + termlen) {
1459 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1460 STR_SET_EMBED(str2);
1461 memcpy(ptr2, RSTRING_PTR(str),
len);
1462 TERM_FILL(ptr2+
len, termlen);
1466 if (STR_SHARED_P(str)) {
1467 root =
RSTRING(str)->as.heap.aux.shared;
1476 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1478 rb_fatal(
"about to free a possible shared root");
1480 char *ptr2 = STR_HEAP_PTR(str2);
1482 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1485 FL_SET(str2, STR_NOEMBED);
1487 STR_SET_SHARED(str2, root);
1490 STR_SET_LEN(str2,
len);
1498 str_replace_shared_without_enc(str2, str);
1499 rb_enc_cr_str_exact_copy(str2, str);
1506 return str_replace_shared(str_alloc_heap(klass), str);
1523rb_str_new_frozen_String(
VALUE orig)
1531rb_str_frozen_bare_string(
VALUE orig)
1533 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1538rb_str_tmp_frozen_acquire(
VALUE orig)
1541 return str_new_frozen_buffer(0, orig, FALSE);
1545rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1547 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1548 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1550 VALUE str = str_alloc_heap(0);
1553 FL_SET(str, STR_SHARED_ROOT);
1555 size_t capa = str_capacity(orig, TERM_LEN(orig));
1561 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1562 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1569 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1570 RBASIC(orig)->flags &= ~STR_NOFREE;
1571 STR_SET_SHARED(orig, str);
1573 RB_OBJ_SET_SHAREABLE(str);
1585rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1590 if (STR_EMBED_P(tmp)) {
1593 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1599 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1603 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1604 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1609 STR_SET_LEN(tmp, 0);
1617 return str_new_frozen_buffer(klass, orig, TRUE);
1627 VALUE str = str_alloc_heap(klass);
1628 STR_SET_LEN(str, RSTRING_LEN(orig));
1629 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1630 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1631 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1632 RBASIC(orig)->flags &= ~STR_NOFREE;
1633 STR_SET_SHARED(orig, str);
1640str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1644 long len = RSTRING_LEN(orig);
1645 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1646 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1648 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1649 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1655 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1656 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1662 if ((ofs > 0) || (rest > 0) ||
1665 str = str_new_shared(klass,
shared);
1667 RSTRING(str)->as.heap.ptr += ofs;
1668 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1676 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1677 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1679 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1680 STR_SET_LEN(str, RSTRING_LEN(orig));
1686 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1689 str = heap_str_make_shared(klass, orig);
1694 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1706str_new_empty_String(
VALUE str)
1709 rb_enc_copy(v, str);
1713#define STR_BUF_MIN_SIZE 63
1718 if (STR_EMBEDDABLE_P(
capa, 1)) {
1726 RSTRING(str)->as.heap.ptr[0] =
'\0';
1746 return str_new(0, 0,
len);
1752 if (STR_EMBED_P(str)) {
1753 RB_DEBUG_COUNTER_INC(obj_str_embed);
1755 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1756 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1757 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1760 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1761 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1766rb_str_memsize(
VALUE str)
1768 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1769 return STR_HEAP_SIZE(str);
1779 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1782static inline void str_discard(
VALUE str);
1783static void str_shared_replace(
VALUE str,
VALUE str2);
1788 if (str != str2) str_shared_replace(str, str2);
1799 enc = STR_ENC_GET(str2);
1802 termlen = rb_enc_mbminlen(enc);
1804 STR_SET_LEN(str, RSTRING_LEN(str2));
1806 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1808 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1809 rb_enc_associate(str, enc);
1813 if (STR_EMBED_P(str2)) {
1815 long len = RSTRING_LEN(str2);
1818 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1819 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1820 RSTRING(str2)->as.heap.ptr = new_ptr;
1821 STR_SET_LEN(str2,
len);
1823 STR_SET_NOEMBED(str2);
1826 STR_SET_NOEMBED(str);
1828 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1830 if (
FL_TEST(str2, STR_SHARED)) {
1832 STR_SET_SHARED(str,
shared);
1835 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1839 STR_SET_EMBED(str2);
1840 RSTRING_PTR(str2)[0] = 0;
1841 STR_SET_LEN(str2, 0);
1842 rb_enc_associate(str, enc);
1856 return rb_obj_as_string_result(str, obj);
1872 len = RSTRING_LEN(str2);
1873 if (STR_SHARED_P(str2)) {
1876 STR_SET_NOEMBED(str);
1877 STR_SET_LEN(str,
len);
1878 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1879 STR_SET_SHARED(str,
shared);
1880 rb_enc_cr_str_exact_copy(str, str2);
1883 str_replace_shared(str, str2);
1892 size_t size = rb_str_embed_size(
capa, 0);
1896 NEWOBJ_OF(str,
struct RString, klass,
1907 NEWOBJ_OF(str,
struct RString, klass,
1910 str->as.heap.aux.capa = 0;
1911 str->as.heap.ptr = NULL;
1921 encidx = rb_enc_get_index(str);
1922 flags &= ~ENCODING_MASK;
1925 if (encidx) rb_enc_associate_index(dup, encidx);
1935 long len = RSTRING_LEN(str);
1940 STR_SET_LEN(dup, RSTRING_LEN(str));
1941 return str_duplicate_setup_encoding(str, dup, flags);
1950 root =
RSTRING(str)->as.heap.aux.shared;
1953 root = str = str_new_frozen(klass, str);
1959 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1960 FL_SET(root, STR_SHARED_ROOT);
1962 flags |= RSTRING_NOEMBED | STR_SHARED;
1964 STR_SET_LEN(dup, RSTRING_LEN(str));
1965 return str_duplicate_setup_encoding(str, dup, flags);
1971 if (STR_EMBED_P(str)) {
1972 return str_duplicate_setup_embed(klass, str, dup);
1975 return str_duplicate_setup_heap(klass, str, dup);
1983 if (STR_EMBED_P(str)) {
1984 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1987 dup = str_alloc_heap(klass);
1990 return str_duplicate_setup(klass, str, dup);
2001rb_str_dup_m(
VALUE str)
2003 if (LIKELY(BARE_STRING_P(str))) {
2014 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2021 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2025 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2026 str_duplicate_setup_embed(klass, str, new_str);
2029 new_str = ec_str_alloc_heap(ec, klass);
2030 str_duplicate_setup_heap(klass, str, new_str);
2039rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2041 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2065 static ID keyword_ids[2];
2066 VALUE orig, opt, venc, vcapa;
2071 if (!keyword_ids[0]) {
2072 keyword_ids[0] = rb_id_encoding();
2073 CONST_ID(keyword_ids[1],
"capacity");
2081 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2082 enc = rb_to_encoding(venc);
2084 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2087 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2089 if (
capa < STR_BUF_MIN_SIZE) {
2090 capa = STR_BUF_MIN_SIZE;
2094 len = RSTRING_LEN(orig);
2098 if (orig == str) n = 0;
2100 str_modifiable(str);
2101 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2103 const size_t size = (size_t)
capa + termlen;
2104 const char *
const old_ptr = RSTRING_PTR(str);
2105 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2106 char *new_ptr =
ALLOC_N(
char, size);
2107 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2108 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2110 RSTRING(str)->as.heap.ptr = new_ptr;
2112 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2113 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2114 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2116 STR_SET_LEN(str,
len);
2119 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2120 rb_enc_cr_str_exact_copy(str, orig);
2122 FL_SET(str, STR_NOEMBED);
2129 rb_enc_associate(str, enc);
2141rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2147 static ID keyword_ids[2];
2157 keyword_ids[0] = rb_id_encoding();
2158 CONST_ID(keyword_ids[1],
"capacity");
2160 encoding = kwargs[0];
2161 capacity = kwargs[1];
2170 if (UNDEF_P(encoding)) {
2172 encoding = rb_obj_encoding(orig);
2176 if (!UNDEF_P(encoding)) {
2177 enc = rb_to_encoding(encoding);
2181 if (UNDEF_P(capacity)) {
2183 VALUE empty_str = str_new(klass,
"", 0);
2185 rb_enc_associate(empty_str, enc);
2189 VALUE copy = str_duplicate(klass, orig);
2190 rb_enc_associate(copy, enc);
2203 if (orig_capa >
capa) {
2208 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2209 STR_SET_LEN(str, 0);
2220#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2235static inline uintptr_t
2236count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2241 d = (d>>6) | (~d>>7);
2242 d &= NONASCII_MASK >> 7;
2245#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2247 return rb_popcount_intptr(d);
2251# if SIZEOF_VOIDP == 8
2260enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2266 long diff = (long)(e - p);
2267 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2272 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2273 const uintptr_t *s, *t;
2274 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2275 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2276 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2277 while (p < (
const char *)s) {
2278 if (is_utf8_lead_byte(*p))
len++;
2282 len += count_utf8_lead_bytes_with_word(s);
2285 p = (
const char *)s;
2288 if (is_utf8_lead_byte(*p))
len++;
2294 else if (rb_enc_asciicompat(enc)) {
2299 q = search_nonascii(p, e);
2305 p += rb_enc_fast_mbclen(p, e, enc);
2312 q = search_nonascii(p, e);
2318 p += rb_enc_mbclen(p, e, enc);
2325 for (c=0; p<e; c++) {
2326 p += rb_enc_mbclen(p, e, enc);
2341rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2349 long diff = (long)(e - p);
2350 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2352 else if (rb_enc_asciicompat(enc)) {
2356 q = search_nonascii(p, e);
2364 ret = rb_enc_precise_mbclen(p, e, enc);
2379 for (c=0; p<e; c++) {
2380 ret = rb_enc_precise_mbclen(p, e, enc);
2387 if (p + rb_enc_mbminlen(enc) <= e)
2388 p += rb_enc_mbminlen(enc);
2404 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2405 if (!enc) enc = STR_ENC_GET(str);
2406 p = RSTRING_PTR(str);
2411 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2416 return enc_strlen(p, e, enc, cr);
2423 return str_strlen(str, NULL);
2437 return LONG2NUM(str_strlen(str, NULL));
2449rb_str_bytesize(
VALUE str)
2468rb_str_empty(
VALUE str)
2470 return RBOOL(RSTRING_LEN(str) == 0);
2489 char *ptr1, *ptr2, *ptr3;
2494 enc = rb_enc_check_str(str1, str2);
2497 termlen = rb_enc_mbminlen(enc);
2498 if (len1 > LONG_MAX - len2) {
2499 rb_raise(rb_eArgError,
"string size too big");
2501 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2502 ptr3 = RSTRING_PTR(str3);
2503 memcpy(ptr3, ptr1, len1);
2504 memcpy(ptr3+len1, ptr2, len2);
2505 TERM_FILL(&ptr3[len1+len2], termlen);
2521 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2524 int enc1 = rb_enc_get_index(str1);
2525 int enc2 = rb_enc_get_index(str2);
2530 else if (enc2 < 0) {
2533 else if (enc1 != enc2) {
2536 else if (len1 > LONG_MAX - len2) {
2570 rb_enc_copy(str2, str);
2575 rb_raise(rb_eArgError,
"negative argument");
2577 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2578 if (STR_EMBEDDABLE_P(
len, 1)) {
2580 memset(RSTRING_PTR(str2), 0,
len + 1);
2587 STR_SET_LEN(str2,
len);
2588 rb_enc_copy(str2, str);
2591 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2592 rb_raise(rb_eArgError,
"argument too big");
2595 len *= RSTRING_LEN(str);
2596 termlen = TERM_LEN(str);
2598 ptr2 = RSTRING_PTR(str2);
2600 n = RSTRING_LEN(str);
2601 memcpy(ptr2, RSTRING_PTR(str), n);
2602 while (n <=
len/2) {
2603 memcpy(ptr2 + n, ptr2, n);
2606 memcpy(ptr2 + n, ptr2,
len-n);
2608 STR_SET_LEN(str2,
len);
2609 TERM_FILL(&ptr2[
len], termlen);
2610 rb_enc_cr_str_copy_for_substr(str2, str);
2647rb_check_lockedtmp(
VALUE str)
2649 if (
FL_TEST(str, STR_TMPLOCK)) {
2656#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2658str_modifiable(
VALUE str)
2662 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2663 if (CHILLED_STRING_P(str)) {
2664 CHILLED_STRING_MUTATED(str);
2666 rb_check_lockedtmp(str);
2667 rb_check_frozen(str);
2672str_dependent_p(
VALUE str)
2674 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2684#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2686str_independent(
VALUE str)
2690 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2691 str_modifiable(str);
2692 return !str_dependent_p(str);
2698str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2708 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2713 STR_SET_LEN(str,
len);
2718 oldptr = RSTRING_PTR(str);
2720 memcpy(
ptr, oldptr,
len);
2722 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2725 STR_SET_NOEMBED(str);
2726 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2727 TERM_FILL(
ptr +
len, termlen);
2729 STR_SET_LEN(str,
len);
2736 if (!str_independent(str))
2737 str_make_independent(str);
2746 int termlen = TERM_LEN(str);
2747 long len = RSTRING_LEN(str);
2750 rb_raise(rb_eArgError,
"negative expanding string size");
2752 if (expand >= LONG_MAX -
len) {
2753 rb_raise(rb_eArgError,
"string size too big");
2756 if (!str_independent(str)) {
2757 str_make_independent_expand(str,
len, expand, termlen);
2759 else if (expand > 0) {
2760 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2767str_modify_keep_cr(
VALUE str)
2769 if (!str_independent(str))
2770 str_make_independent(str);
2777str_discard(
VALUE str)
2779 str_modifiable(str);
2780 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2781 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2782 RSTRING(str)->as.heap.ptr = 0;
2783 STR_SET_LEN(str, 0);
2790 int encindex = rb_enc_get_index(str);
2792 if (RB_UNLIKELY(encindex == -1)) {
2796 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2801 if (!rb_enc_asciicompat(enc)) {
2823 return RSTRING_PTR(str);
2827zero_filled(
const char *s,
int n)
2829 for (; n > 0; --n) {
2836str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2838 const char *e = s +
len;
2840 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2841 if (zero_filled(s, minlen))
return s;
2847str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2852 if (str_dependent_p(str)) {
2853 if (!zero_filled(s +
len, termlen))
2854 str_make_independent_expand(str,
len, 0L, termlen);
2857 TERM_FILL(s +
len, termlen);
2860 return RSTRING_PTR(str);
2864rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2866 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2867 long len = RSTRING_LEN(str);
2871 rb_check_lockedtmp(str);
2872 str_make_independent_expand(str,
len, 0L, termlen);
2874 else if (str_dependent_p(str)) {
2875 if (termlen > oldtermlen)
2876 str_make_independent_expand(str,
len, 0L, termlen);
2879 if (!STR_EMBED_P(str)) {
2884 if (termlen > oldtermlen) {
2885 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2893str_null_check(
VALUE str,
int *w)
2895 char *s = RSTRING_PTR(str);
2896 long len = RSTRING_LEN(str);
2898 const int minlen = rb_enc_mbminlen(enc);
2902 if (str_null_char(s,
len, minlen, enc)) {
2905 return str_fill_term(str, s,
len, minlen);
2908 if (!s || memchr(s, 0,
len)) {
2912 s = str_fill_term(str, s,
len, minlen);
2918rb_str_to_cstr(
VALUE str)
2921 return str_null_check(str, &w);
2929 char *s = str_null_check(str, &w);
2932 rb_raise(rb_eArgError,
"string contains null char");
2934 rb_raise(rb_eArgError,
"string contains null byte");
2940rb_str_fill_terminator(
VALUE str,
const int newminlen)
2942 char *s = RSTRING_PTR(str);
2943 long len = RSTRING_LEN(str);
2944 return str_fill_term(str, s,
len, newminlen);
2950 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2976str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2985 else if (rb_enc_asciicompat(enc)) {
2986 const char *p2, *e2;
2989 while (p < e && 0 < nth) {
2996 p2 = search_nonascii(p, e2);
3005 n = rb_enc_mbclen(p, e, enc);
3016 while (p < e && nth--) {
3017 p += rb_enc_mbclen(p, e, enc);
3028 return str_nth_len(p, e, &nth, enc);
3032str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3037 p = str_nth_len(p, e, &nth, enc);
3046str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3048 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3049 if (!pp)
return e - p;
3056 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3057 STR_ENC_GET(str), single_byte_optimizable(str));
3062str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3065 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3066 const uintptr_t *s, *t;
3067 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3068 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3069 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3070 while (p < (
const char *)s) {
3071 if (is_utf8_lead_byte(*p)) nth--;
3075 nth -= count_utf8_lead_bytes_with_word(s);
3077 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3081 if (is_utf8_lead_byte(*p)) {
3082 if (nth == 0)
break;
3092str_utf8_offset(
const char *p,
const char *e,
long nth)
3094 const char *pp = str_utf8_nth(p, e, &nth);
3103 if (single_byte_optimizable(str) || pos < 0)
3106 char *p = RSTRING_PTR(str);
3107 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3112str_subseq(
VALUE str,
long beg,
long len)
3120 const int termlen = TERM_LEN(str);
3121 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3128 if (str_embed_capa(str2) >=
len + termlen) {
3129 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3130 STR_SET_EMBED(str2);
3131 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3132 TERM_FILL(ptr2+
len, termlen);
3134 STR_SET_LEN(str2,
len);
3138 str_replace_shared(str2, str);
3141 RSTRING(str2)->as.heap.ptr += beg;
3142 if (RSTRING_LEN(str2) >
len) {
3143 STR_SET_LEN(str2,
len);
3153 VALUE str2 = str_subseq(str, beg,
len);
3154 rb_enc_cr_str_copy_for_substr(str2, str);
3163 const long blen = RSTRING_LEN(str);
3165 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3167 if (
len < 0)
return 0;
3168 if (beg < 0 && -beg < 0)
return 0;
3172 if (single_byte_optimizable(str)) {
3173 if (beg > blen)
return 0;
3176 if (beg < 0)
return 0;
3178 if (
len > blen - beg)
3180 if (
len < 0)
return 0;
3185 if (
len > -beg)
len = -beg;
3189 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3192 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3198 slen = str_strlen(str, enc);
3200 if (beg < 0)
return 0;
3202 if (
len == 0)
goto end;
3205 else if (beg > 0 && beg > blen) {
3209 if (beg > str_strlen(str, enc))
return 0;
3214 enc == rb_utf8_encoding()) {
3215 p = str_utf8_nth(s, e, &beg);
3216 if (beg > 0)
return 0;
3217 len = str_utf8_offset(p, e,
len);
3223 p = s + beg * char_sz;
3227 else if (
len * char_sz > e - p)
3232 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3233 if (beg > 0)
return 0;
3237 len = str_offset(p, e,
len, enc, 0);
3245static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3250 return str_substr(str, beg,
len, TRUE);
3260str_substr(
VALUE str,
long beg,
long len,
int empty)
3264 if (!p)
return Qnil;
3265 if (!
len && !empty)
return Qnil;
3267 beg = p - RSTRING_PTR(str);
3269 VALUE str2 = str_subseq(str, beg,
len);
3270 rb_enc_cr_str_copy_for_substr(str2, str);
3278 if (CHILLED_STRING_P(str)) {
3283 rb_str_resize(str, RSTRING_LEN(str));
3301 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3344str_uminus(
VALUE str)
3349 return rb_fstring(str);
3353#define rb_str_dup_frozen rb_str_new_frozen
3358 rb_check_frozen(str);
3359 if (
FL_TEST(str, STR_TMPLOCK)) {
3362 FL_SET(str, STR_TMPLOCK);
3369 rb_check_frozen(str);
3370 if (!
FL_TEST(str, STR_TMPLOCK)) {
3390 const int termlen = TERM_LEN(str);
3392 str_modifiable(str);
3393 if (STR_SHARED_P(str)) {
3396 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3397 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3408 else if (
len > RSTRING_LEN(str)) {
3412 const char *
const new_end = RSTRING_PTR(str) +
len;
3422 else if (
len < RSTRING_LEN(str)) {
3430 STR_SET_LEN(str,
len);
3431 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3438 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3441 int independent = str_independent(str);
3442 long slen = RSTRING_LEN(str);
3443 const int termlen = TERM_LEN(str);
3445 if (slen >
len || (termlen != 1 && slen <
len)) {
3451 if (STR_EMBED_P(str)) {
3452 if (
len == slen)
return str;
3453 if (str_embed_capa(str) >=
len + termlen) {
3454 STR_SET_LEN(str,
len);
3458 str_make_independent_expand(str, slen,
len - slen, termlen);
3460 else if (str_embed_capa(str) >=
len + termlen) {
3461 char *
ptr = STR_HEAP_PTR(str);
3463 if (slen >
len) slen =
len;
3466 STR_SET_LEN(str,
len);
3467 if (independent) ruby_xfree(
ptr);
3470 else if (!independent) {
3471 if (
len == slen)
return str;
3472 str_make_independent_expand(str, slen,
len - slen, termlen);
3476 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3477 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3480 else if (
len == slen)
return str;
3481 STR_SET_LEN(str,
len);
3488str_ensure_available_capa(
VALUE str,
long len)
3490 str_modify_keep_cr(str);
3492 const int termlen = TERM_LEN(str);
3493 long olen = RSTRING_LEN(str);
3495 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3496 rb_raise(rb_eArgError,
"string sizes too big");
3499 long total = olen +
len;
3500 long capa = str_capacity(str, termlen);
3503 if (total >= LONG_MAX / 2) {
3506 while (total >
capa) {
3509 RESIZE_CAPA_TERM(str,
capa, termlen);
3514str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3517 str_modify_keep_cr(str);
3522 if (
len == 0)
return 0;
3524 long total, olen,
off = -1;
3526 const int termlen = TERM_LEN(str);
3529 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3533 long capa = str_capacity(str, termlen);
3535 if (olen > LONG_MAX -
len) {
3536 rb_raise(rb_eArgError,
"string sizes too big");
3540 if (total >= LONG_MAX / 2) {
3543 while (total >
capa) {
3546 RESIZE_CAPA_TERM(str,
capa, termlen);
3547 sptr = RSTRING_PTR(str);
3552 memcpy(sptr + olen,
ptr,
len);
3553 STR_SET_LEN(str, total);
3554 TERM_FILL(sptr + total, termlen);
3559#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3560#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3565 if (
len == 0)
return str;
3567 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3569 return str_buf_cat(str,
ptr,
len);
3580rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3585 if (UNLIKELY(!str_independent(str))) {
3586 str_make_independent(str);
3589 long string_length = -1;
3590 const int null_terminator_length = 1;
3595 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3596 rb_raise(rb_eArgError,
"string sizes too big");
3599 long string_capacity = str_capacity(str, null_terminator_length);
3605 if (LIKELY(string_capacity >= string_length + 1)) {
3607 sptr[string_length] = byte;
3608 STR_SET_LEN(str, string_length + 1);
3609 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3613 str_buf_cat(str, (
char *)&
byte, 1);
3629 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3640rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3641 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3650 if (str_encindex == ptr_encindex) {
3652 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3656 str_enc = rb_enc_from_index(str_encindex);
3657 ptr_enc = rb_enc_from_index(ptr_encindex);
3658 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3661 if (RSTRING_LEN(str) == 0) {
3664 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3670 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3679 *ptr_cr_ret = ptr_cr;
3681 if (str_encindex != ptr_encindex &&
3684 str_enc = rb_enc_from_index(str_encindex);
3685 ptr_enc = rb_enc_from_index(ptr_encindex);
3690 res_encindex = str_encindex;
3695 res_encindex = str_encindex;
3699 res_encindex = ptr_encindex;
3704 res_encindex = str_encindex;
3711 res_encindex = str_encindex;
3717 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3719 str_buf_cat(str,
ptr,
len);
3725 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3732 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3742 if (rb_enc_asciicompat(enc)) {
3743 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3749 unsigned int c = (
unsigned char)*
ptr;
3750 int len = rb_enc_codelen(c, enc);
3751 rb_enc_mbcput(c, buf, enc);
3752 rb_enc_cr_str_buf_cat(str, buf,
len,
3765 if (str_enc_fastpath(str)) {
3769 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3775 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3786 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3802rb_str_concat_literals(
size_t num,
const VALUE *strary)
3806 unsigned long len = 1;
3811 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3813 str_enc_copy_direct(str, strary[0]);
3815 for (i = s; i < num; ++i) {
3816 const VALUE v = strary[i];
3820 if (encidx != ENCINDEX_US_ASCII) {
3822 rb_enc_set_index(str, encidx);
3835rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3837 str_modifiable(str);
3842 else if (argc > 1) {
3845 rb_enc_copy(arg_str, str);
3846 for (i = 0; i < argc; i++) {
3881rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3883 long needed_capacity = 0;
3887 for (
int index = 0; index < argc; index++) {
3888 VALUE obj = argv[index];
3896 needed_capacity += RSTRING_LEN(obj);
3901 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3908 str_ensure_available_capa(str, needed_capacity);
3911 for (
int index = 0; index < argc; index++) {
3912 VALUE obj = argv[index];
3917 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3918 char byte = (char)(
NUM2INT(obj) & 0xFF);
3932 rb_bug(
"append_as_bytes arguments should have been validated");
3936 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3937 TERM_FILL(sptr, TERM_LEN(str));
3942 for (
int index = 0; index < argc; index++) {
3943 VALUE obj = argv[index];
3960 rb_bug(
"append_as_bytes arguments should have been validated");
4039 if (rb_num_to_uint(str2, &code) == 0) {
4052 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4055 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4058 long pos = RSTRING_LEN(str1);
4063 switch (
len = rb_enc_codelen(code, enc)) {
4064 case ONIGERR_INVALID_CODE_POINT_VALUE:
4065 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4067 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4073 rb_enc_mbcput(code, buf, enc);
4074 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4075 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4077 rb_str_resize(str1, pos+
len);
4078 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4091rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4093 int encidx = rb_enc_to_index(enc);
4095 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4100 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4101 return ENCINDEX_ASCII_8BIT;
4123rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4125 str_modifiable(str);
4130 else if (argc > 1) {
4133 rb_enc_copy(arg_str, str);
4134 for (i = 0; i < argc; i++) {
4147 st_index_t precomputed_hash;
4148 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4150 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4151 return precomputed_hash;
4154 return str_do_hash(str);
4161 const char *ptr1, *ptr2;
4164 return (len1 != len2 ||
4166 memcmp(ptr1, ptr2, len1) != 0);
4178rb_str_hash_m(
VALUE str)
4184#define lesser(a,b) (((a)>(b))?(b):(a))
4192 if (RSTRING_LEN(str1) == 0)
return TRUE;
4193 if (RSTRING_LEN(str2) == 0)
return TRUE;
4196 if (idx1 == idx2)
return TRUE;
4201 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4205 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4215 const char *ptr1, *ptr2;
4218 if (str1 == str2)
return 0;
4221 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4230 if (len1 > len2)
return 1;
4233 if (retval > 0)
return 1;
4267 if (str1 == str2)
return Qtrue;
4274 return rb_str_eql_internal(str1, str2);
4288 if (str1 == str2)
return Qtrue;
4290 return rb_str_eql_internal(str1, str2);
4322 return rb_invcmp(str1, str2);
4364 return str_casecmp(str1, s);
4372 const char *p1, *p1end, *p2, *p2end;
4374 enc = rb_enc_compatible(str1, str2);
4379 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4380 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4381 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4382 while (p1 < p1end && p2 < p2end) {
4384 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4385 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4387 return INT2FIX(c1 < c2 ? -1 : 1);
4394 while (p1 < p1end && p2 < p2end) {
4395 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4396 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4398 if (0 <= c1 && 0 <= c2) {
4402 return INT2FIX(c1 < c2 ? -1 : 1);
4406 l1 = rb_enc_mbclen(p1, p1end, enc);
4407 l2 = rb_enc_mbclen(p2, p2end, enc);
4408 len = l1 < l2 ? l1 : l2;
4409 r = memcmp(p1, p2,
len);
4411 return INT2FIX(r < 0 ? -1 : 1);
4413 return INT2FIX(l1 < l2 ? -1 : 1);
4419 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4420 if (p1 == p1end)
return INT2FIX(-1);
4453 return str_casecmp_p(str1, s);
4460 VALUE folded_str1, folded_str2;
4461 VALUE fold_opt = sym_fold;
4463 enc = rb_enc_compatible(str1, str2);
4468 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4469 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4471 return rb_str_eql(folded_str1, folded_str2);
4475strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4476 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4478 const char *search_start = str_ptr;
4479 long pos, search_len = str_len - offset;
4483 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4484 if (pos < 0)
return pos;
4486 if (t == search_start + pos)
break;
4487 search_len -= t - search_start;
4488 if (search_len <= 0)
return -1;
4489 offset += t - search_start;
4492 return pos + offset;
4496#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4497#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4500rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4502 const char *str_ptr, *str_ptr_end, *sub_ptr;
4503 long str_len, sub_len;
4506 enc = rb_enc_check(str, sub);
4507 if (is_broken_string(sub))
return -1;
4509 str_ptr = RSTRING_PTR(str);
4511 str_len = RSTRING_LEN(str);
4512 sub_ptr = RSTRING_PTR(sub);
4513 sub_len = RSTRING_LEN(sub);
4515 if (str_len < sub_len)
return -1;
4518 long str_len_char, sub_len_char;
4519 int single_byte = single_byte_optimizable(str);
4520 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4521 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4523 offset += str_len_char;
4524 if (offset < 0)
return -1;
4526 if (str_len_char - offset < sub_len_char)
return -1;
4527 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4530 if (sub_len == 0)
return offset;
4533 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4546rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4553 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4554 long slen = str_strlen(str, enc);
4556 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4568 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4569 enc, single_byte_optimizable(str));
4580 pos = rb_str_index(str, sub, pos);
4594str_ensure_byte_pos(
VALUE str,
long pos)
4596 if (!single_byte_optimizable(str)) {
4597 const char *s = RSTRING_PTR(str);
4599 const char *p = s + pos;
4600 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4602 "offset %ld does not land on character boundary", pos);
4675rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4681 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4682 long slen = RSTRING_LEN(str);
4684 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4695 str_ensure_byte_pos(str, pos);
4707 pos = rb_str_byteindex(str, sub, pos);
4708 if (pos >= 0)
return LONG2NUM(pos);
4715memrchr(
const char *search_str,
int chr,
long search_len)
4717 const char *ptr = search_str + search_len;
4718 while (ptr > search_str) {
4719 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4729 char *hit, *adjusted;
4731 long slen, searchlen;
4734 sbeg = RSTRING_PTR(str);
4735 slen = RSTRING_LEN(sub);
4736 if (slen == 0)
return s - sbeg;
4738 t = RSTRING_PTR(sub);
4740 searchlen = s - sbeg + 1;
4742 if (memcmp(s, t, slen) == 0) {
4747 hit = memrchr(sbeg, c, searchlen);
4750 if (hit != adjusted) {
4751 searchlen = adjusted - sbeg;
4754 if (memcmp(hit, t, slen) == 0)
4756 searchlen = adjusted - sbeg;
4757 }
while (searchlen > 0);
4771 enc = rb_enc_check(str, sub);
4772 if (is_broken_string(sub))
return -1;
4773 singlebyte = single_byte_optimizable(str);
4774 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4775 slen = str_strlen(sub, enc);
4778 if (
len < slen)
return -1;
4779 if (
len - pos < slen) pos =
len - slen;
4780 if (
len == 0)
return pos;
4782 sbeg = RSTRING_PTR(str);
4785 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4791 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4792 return str_rindex(str, sub, s, enc);
4804rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4809 long pos,
len = str_strlen(str, enc);
4811 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4813 if (pos < 0 && (pos +=
len) < 0) {
4819 if (pos >
len) pos =
len;
4827 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4828 enc, single_byte_optimizable(str));
4839 pos = rb_str_rindex(str, sub, pos);
4849rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4855 enc = rb_enc_check(str, sub);
4856 if (is_broken_string(sub))
return -1;
4857 len = RSTRING_LEN(str);
4858 slen = RSTRING_LEN(sub);
4861 if (
len < slen)
return -1;
4862 if (
len - pos < slen) pos =
len - slen;
4863 if (
len == 0)
return pos;
4865 sbeg = RSTRING_PTR(str);
4868 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4875 return str_rindex(str, sub, s, enc);
4965rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4969 long pos,
len = RSTRING_LEN(str);
4971 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4973 if (pos < 0 && (pos +=
len) < 0) {
4979 if (pos >
len) pos =
len;
4985 str_ensure_byte_pos(str, pos);
4997 pos = rb_str_byterindex(str, sub, pos);
4998 if (pos >= 0)
return LONG2NUM(pos);
5037 switch (OBJ_BUILTIN_TYPE(y)) {
5091rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5098 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5129rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5133 re = get_pat(argv[0]);
5134 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5143static enum neighbor_char
5149 if (rb_enc_mbminlen(enc) > 1) {
5151 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5153 return NEIGHBOR_NOT_CHAR;
5155 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5157 if (!l)
return NEIGHBOR_NOT_CHAR;
5158 if (l !=
len)
return NEIGHBOR_WRAPPED;
5159 rb_enc_mbcput(c, p, enc);
5160 r = rb_enc_precise_mbclen(p, p +
len, enc);
5162 return NEIGHBOR_NOT_CHAR;
5164 return NEIGHBOR_FOUND;
5167 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5170 return NEIGHBOR_WRAPPED;
5171 ++((
unsigned char*)p)[i];
5172 l = rb_enc_precise_mbclen(p, p+
len, enc);
5176 return NEIGHBOR_FOUND;
5179 memset(p+l, 0xff,
len-l);
5185 for (len2 =
len-1; 0 < len2; len2--) {
5186 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5190 memset(p+len2+1, 0xff,
len-(len2+1));
5195static enum neighbor_char
5200 if (rb_enc_mbminlen(enc) > 1) {
5202 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5204 return NEIGHBOR_NOT_CHAR;
5206 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5207 if (!c)
return NEIGHBOR_NOT_CHAR;
5210 if (!l)
return NEIGHBOR_NOT_CHAR;
5211 if (l !=
len)
return NEIGHBOR_WRAPPED;
5212 rb_enc_mbcput(c, p, enc);
5213 r = rb_enc_precise_mbclen(p, p +
len, enc);
5215 return NEIGHBOR_NOT_CHAR;
5217 return NEIGHBOR_FOUND;
5220 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5223 return NEIGHBOR_WRAPPED;
5224 --((
unsigned char*)p)[i];
5225 l = rb_enc_precise_mbclen(p, p+
len, enc);
5229 return NEIGHBOR_FOUND;
5232 memset(p+l, 0,
len-l);
5238 for (len2 =
len-1; 0 < len2; len2--) {
5239 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5243 memset(p+len2+1, 0,
len-(len2+1));
5257static enum neighbor_char
5258enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5260 enum neighbor_char ret;
5264 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5268 const int max_gaps = 1;
5270 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5272 ctype = ONIGENC_CTYPE_DIGIT;
5274 ctype = ONIGENC_CTYPE_ALPHA;
5276 return NEIGHBOR_NOT_CHAR;
5279 for (
try = 0;
try <= max_gaps; ++
try) {
5280 ret = enc_succ_char(p,
len, enc);
5281 if (ret == NEIGHBOR_FOUND) {
5282 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5284 return NEIGHBOR_FOUND;
5291 ret = enc_pred_char(p,
len, enc);
5292 if (ret == NEIGHBOR_FOUND) {
5293 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5306 return NEIGHBOR_NOT_CHAR;
5309 if (ctype != ONIGENC_CTYPE_DIGIT) {
5311 return NEIGHBOR_WRAPPED;
5315 enc_succ_char(carry,
len, enc);
5316 return NEIGHBOR_WRAPPED;
5334 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5335 rb_enc_cr_str_copy_for_substr(str, orig);
5336 return str_succ(str);
5343 char *sbeg, *s, *e, *last_alnum = 0;
5344 int found_alnum = 0;
5346 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5347 long carry_pos = 0, carry_len = 1;
5348 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5350 slen = RSTRING_LEN(str);
5351 if (slen == 0)
return str;
5353 enc = STR_ENC_GET(str);
5354 sbeg = RSTRING_PTR(str);
5355 s = e = sbeg + slen;
5357 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5358 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5364 l = rb_enc_precise_mbclen(s, e, enc);
5365 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5366 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5367 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5369 case NEIGHBOR_NOT_CHAR:
5371 case NEIGHBOR_FOUND:
5373 case NEIGHBOR_WRAPPED:
5378 carry_pos = s - sbeg;
5383 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5384 enum neighbor_char neighbor;
5385 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5386 l = rb_enc_precise_mbclen(s, e, enc);
5387 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5388 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5390 neighbor = enc_succ_char(tmp, l, enc);
5392 case NEIGHBOR_FOUND:
5396 case NEIGHBOR_WRAPPED:
5399 case NEIGHBOR_NOT_CHAR:
5402 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5404 enc_succ_char(s, l, enc);
5406 if (!rb_enc_asciicompat(enc)) {
5407 MEMCPY(carry, s,
char, l);
5410 carry_pos = s - sbeg;
5414 RESIZE_CAPA(str, slen + carry_len);
5415 sbeg = RSTRING_PTR(str);
5416 s = sbeg + carry_pos;
5417 memmove(s + carry_len, s, slen - carry_pos);
5418 memmove(s, carry, carry_len);
5420 STR_SET_LEN(str, slen);
5421 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5437rb_str_succ_bang(
VALUE str)
5445all_digits_p(
const char *s,
long len)
5473 VALUE end, exclusive;
5477 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5483 VALUE current, after_end;
5490 enc = rb_enc_check(beg, end);
5491 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5493 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5494 char c = RSTRING_PTR(beg)[0];
5495 char e = RSTRING_PTR(end)[0];
5497 if (c > e || (excl && c == e))
return beg;
5499 VALUE str = rb_enc_str_new(&c, 1, enc);
5501 if ((*each)(str, arg))
break;
5502 if (!excl && c == e)
break;
5504 if (excl && c == e)
break;
5509 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5510 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5511 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5516 b = rb_str_to_inum(beg, 10, FALSE);
5517 e = rb_str_to_inum(end, 10, FALSE);
5524 if (excl && bi == ei)
break;
5525 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5530 ID op = excl ?
'<' : idLE;
5531 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5536 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5537 b = rb_funcallv(b, succ, 0, 0);
5544 if (n > 0 || (excl && n == 0))
return beg;
5546 after_end = rb_funcallv(end, succ, 0, 0);
5551 next = rb_funcallv(current, succ, 0, 0);
5552 if ((*each)(current, arg))
break;
5553 if (
NIL_P(next))
break;
5557 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5572 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5573 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5574 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5576 b = rb_str_to_inum(beg, 10, FALSE);
5582 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5590 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5591 b = rb_funcallv(b, succ, 0, 0);
5597 VALUE next = rb_funcallv(current, succ, 0, 0);
5598 if ((*each)(current, arg))
break;
5601 if (RSTRING_LEN(current) == 0)
5612 if (!
rb_equal(str, *argp))
return 0;
5626 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5627 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5628 rb_enc_asciicompat(STR_ENC_GET(val))) {
5629 const char *bp = RSTRING_PTR(beg);
5630 const char *ep = RSTRING_PTR(end);
5631 const char *vp = RSTRING_PTR(val);
5632 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5633 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5641 if (b <= v && v < e)
return Qtrue;
5642 return RBOOL(!
RTEST(exclusive) && v == e);
5649 all_digits_p(bp, RSTRING_LEN(beg)) &&
5650 all_digits_p(ep, RSTRING_LEN(end))) {
5655 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5657 return RBOOL(
NIL_P(val));
5680 return rb_str_subpat(str, indx,
INT2FIX(0));
5683 if (rb_str_index(str, indx, 0) != -1)
5689 long beg,
len = str_strlen(str, NULL);
5701 return str_substr(str, idx, 1, FALSE);
5718rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5722 return rb_str_subpat(str, argv[0], argv[1]);
5725 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5729 return rb_str_aref(str, argv[0]);
5735 char *ptr = RSTRING_PTR(str);
5736 long olen = RSTRING_LEN(str), nlen;
5738 str_modifiable(str);
5739 if (
len > olen)
len = olen;
5741 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5743 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5745 ptr =
RSTRING(str)->as.embed.ary;
5746 memmove(ptr, oldptr +
len, nlen);
5747 if (fl == STR_NOEMBED)
xfree(oldptr);
5750 if (!STR_SHARED_P(str)) {
5752 rb_enc_cr_str_exact_copy(shared, str);
5757 STR_SET_LEN(str, nlen);
5759 if (!SHARABLE_MIDDLE_SUBSTRING) {
5760 TERM_FILL(ptr + nlen, TERM_LEN(str));
5767rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5773 if (beg == 0 && vlen == 0) {
5778 str_modify_keep_cr(str);
5782 RESIZE_CAPA(str, slen + vlen -
len);
5783 sptr = RSTRING_PTR(str);
5792 memmove(sptr + beg + vlen,
5794 slen - (beg +
len));
5796 if (vlen < beg &&
len < 0) {
5800 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5803 STR_SET_LEN(str, slen);
5804 TERM_FILL(&sptr[slen], TERM_LEN(str));
5811 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5820 int singlebyte = single_byte_optimizable(str);
5826 enc = rb_enc_check(str, val);
5827 slen = str_strlen(str, enc);
5829 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5838 if (
len > slen - beg) {
5841 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5846 beg = p - RSTRING_PTR(str);
5848 rb_str_update_0(str, beg,
len, val);
5849 rb_enc_associate(str, enc);
5860 long start, end,
len;
5870 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5874 nth += regs->num_regs;
5884 enc = rb_enc_check_str(str, val);
5885 rb_str_update_0(str, start,
len, val);
5886 rb_enc_associate(str, enc);
5894 switch (
TYPE(indx)) {
5896 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5900 beg = rb_str_index(str, indx, 0);
5939rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5943 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5951 return rb_str_aset(str, argv[0], argv[1]);
6003rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6011 str_modify_keep_cr(str);
6019 if ((nth += regs->num_regs) <= 0)
return Qnil;
6021 else if (nth >= regs->num_regs)
return Qnil;
6023 len = END(nth) - beg;
6026 else if (argc == 2) {
6035 beg = p - RSTRING_PTR(str);
6039 beg = rb_str_index(str, indx, 0);
6040 if (beg == -1)
return Qnil;
6041 len = RSTRING_LEN(indx);
6053 beg = p - RSTRING_PTR(str);
6062 beg = p - RSTRING_PTR(str);
6066 rb_enc_cr_str_copy_for_substr(result, str);
6074 char *sptr = RSTRING_PTR(str);
6075 long slen = RSTRING_LEN(str);
6076 if (beg +
len > slen)
6080 slen - (beg +
len));
6082 STR_SET_LEN(str, slen);
6083 TERM_FILL(&sptr[slen], TERM_LEN(str));
6094 switch (OBJ_BUILTIN_TYPE(pat)) {
6113get_pat_quoted(
VALUE pat,
int check)
6117 switch (OBJ_BUILTIN_TYPE(pat)) {
6131 if (check && is_broken_string(pat)) {
6138rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6141 pos = rb_str_byteindex(str, pat, pos);
6142 if (set_backref_str) {
6144 str = rb_str_new_frozen_String(str);
6145 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6147 *match = match_data;
6157 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6162rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6164 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6182rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6196 hash = rb_check_hash_type(argv[1]);
6202 pat = get_pat_quoted(argv[0], 1);
6204 str_modifiable(str);
6205 beg = rb_pat_search(pat, str, 0, 1);
6219 end0 = beg0 + RSTRING_LEN(pat);
6228 if (iter || !
NIL_P(hash)) {
6229 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6235 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6238 str_mod_check(str, p,
len);
6239 rb_check_frozen(str);
6245 enc = rb_enc_compatible(str, repl);
6248 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6252 rb_enc_inspect_name(str_enc),
6253 rb_enc_inspect_name(STR_ENC_GET(repl)));
6255 enc = STR_ENC_GET(repl);
6258 rb_enc_associate(str, enc);
6268 rlen = RSTRING_LEN(repl);
6269 len = RSTRING_LEN(str);
6271 RESIZE_CAPA(str,
len + rlen - plen);
6273 p = RSTRING_PTR(str);
6275 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6277 rp = RSTRING_PTR(repl);
6278 memmove(p + beg0, rp, rlen);
6280 STR_SET_LEN(str,
len);
6281 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6304 rb_str_sub_bang(argc, argv, str);
6309str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6312 long beg, beg0, end0;
6313 long offset, blen, slen,
len, last;
6314 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6316 int need_backref_str = -1;
6326 hash = rb_check_hash_type(argv[1]);
6330 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6338 rb_error_arity(argc, 1, 2);
6341 pat = get_pat_quoted(argv[0], 1);
6342 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6345 if (bang)
return Qnil;
6350 blen = RSTRING_LEN(str) + 30;
6352 sp = RSTRING_PTR(str);
6353 slen = RSTRING_LEN(str);
6355 str_enc = STR_ENC_GET(str);
6356 rb_enc_associate(dest, str_enc);
6363 end0 = beg0 + RSTRING_LEN(pat);
6377 struct RString fake_str = {RBASIC_INIT};
6379 if (mode == FAST_MAP) {
6388 val = rb_hash_aref(hash, key);
6391 str_mod_check(str, sp, slen);
6396 else if (need_backref_str) {
6398 if (need_backref_str < 0) {
6399 need_backref_str = val != repl;
6406 len = beg0 - offset;
6420 if (RSTRING_LEN(str) <= end0)
break;
6421 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6423 offset = end0 +
len;
6425 cp = RSTRING_PTR(str) + offset;
6426 if (offset > RSTRING_LEN(str))
break;
6429 if (mode != FAST_MAP && mode != STR) {
6432 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6437 if (RSTRING_LEN(str) > offset) {
6440 rb_pat_search0(pat, str, last, 1, &match);
6442 str_shared_replace(str, dest);
6467rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6469 str_modify_keep_cr(str);
6470 return str_gsub(argc, argv, str, 1);
6520 return str_gsub(argc, argv, str, 0);
6540 str_modifiable(str);
6541 if (str == str2)
return str;
6545 return str_replace(str, str2);
6562rb_str_clear(
VALUE str)
6566 STR_SET_LEN(str, 0);
6567 RSTRING_PTR(str)[0] = 0;
6568 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6584rb_str_chr(
VALUE str)
6602 pos += RSTRING_LEN(str);
6603 if (pos < 0 || RSTRING_LEN(str) <= pos)
6606 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6626 long len = RSTRING_LEN(str);
6627 char *
ptr, *head, *left = 0;
6631 if (pos < -
len ||
len <= pos)
6638 char byte = (char)(
NUM2INT(w) & 0xFF);
6640 if (!str_independent(str))
6641 str_make_independent(str);
6642 enc = STR_ENC_GET(str);
6643 head = RSTRING_PTR(str);
6645 if (!STR_EMBED_P(str)) {
6652 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6660 width = rb_enc_precise_mbclen(left, head+
len, enc);
6662 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6678str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6680 long n = RSTRING_LEN(str);
6682 if (beg > n ||
len < 0)
return Qnil;
6685 if (beg < 0)
return Qnil;
6690 if (!empty)
return Qnil;
6694 VALUE str2 = str_subseq(str, beg,
len);
6696 str_enc_copy_direct(str2, str);
6698 if (RSTRING_LEN(str2) == 0) {
6699 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6733 long beg,
len = RSTRING_LEN(str);
6741 return str_byte_substr(str, beg,
len, TRUE);
6746 return str_byte_substr(str, idx, 1, FALSE);
6758rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6763 return str_byte_substr(str, beg,
len, TRUE);
6766 return str_byte_aref(str, argv[0]);
6770str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6772 long end, slen = RSTRING_LEN(str);
6775 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6784 if (*
len > slen - *beg) {
6788 str_ensure_byte_pos(str, *beg);
6789 str_ensure_byte_pos(str, end);
6803rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6805 long beg,
len, vbeg, vlen;
6810 if (!(argc == 2 || argc == 3 || argc == 5)) {
6811 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6815 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6816 rb_builtin_class_name(argv[0]));
6823 vlen = RSTRING_LEN(val);
6828 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6829 rb_builtin_class_name(argv[2]));
6841 vlen = RSTRING_LEN(val);
6849 str_check_beg_len(str, &beg, &
len);
6850 str_check_beg_len(val, &vbeg, &vlen);
6851 str_modify_keep_cr(str);
6854 rb_enc_associate(str, rb_enc_check(str, val));
6857 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6879rb_str_reverse(
VALUE str)
6886 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6887 enc = STR_ENC_GET(str);
6893 if (RSTRING_LEN(str) > 1) {
6894 if (single_byte_optimizable(str)) {
6901 int clen = rb_enc_fast_mbclen(s, e, enc);
6909 cr = rb_enc_asciicompat(enc) ?
6912 int clen = rb_enc_mbclen(s, e, enc);
6921 STR_SET_LEN(rev, RSTRING_LEN(str));
6922 str_enc_copy_direct(rev, str);
6944rb_str_reverse_bang(
VALUE str)
6946 if (RSTRING_LEN(str) > 1) {
6947 if (single_byte_optimizable(str)) {
6950 str_modify_keep_cr(str);
6951 s = RSTRING_PTR(str);
6960 str_shared_replace(str, rb_str_reverse(str));
6964 str_modify_keep_cr(str);
6993 i = rb_str_index(str, arg, 0);
6995 return RBOOL(i != -1);
7039 rb_raise(rb_eArgError,
"invalid radix %d", base);
7041 return rb_str_to_inum(str, base, FALSE);
7066rb_str_to_f(
VALUE str)
7083rb_str_to_s(
VALUE str)
7095 char s[RUBY_MAX_CHAR_LEN];
7096 int n = rb_enc_codelen(c, enc);
7098 rb_enc_mbcput(c, s, enc);
7103#define CHAR_ESC_LEN 13
7106rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7108 char buf[CHAR_ESC_LEN + 1];
7116 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7118 else if (c < 0x10000) {
7119 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7122 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7127 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7130 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7133 l = (int)strlen(buf);
7139ruby_escaped_char(
int c)
7142 case '\0':
return "\\0";
7143 case '\n':
return "\\n";
7144 case '\r':
return "\\r";
7145 case '\t':
return "\\t";
7146 case '\f':
return "\\f";
7147 case '\013':
return "\\v";
7148 case '\010':
return "\\b";
7149 case '\007':
return "\\a";
7150 case '\033':
return "\\e";
7151 case '\x7f':
return "\\c?";
7157rb_str_escape(
VALUE str)
7161 const char *p = RSTRING_PTR(str);
7163 const char *prev = p;
7164 char buf[CHAR_ESC_LEN + 1];
7166 int unicode_p = rb_enc_unicode_p(enc);
7167 int asciicompat = rb_enc_asciicompat(enc);
7172 int n = rb_enc_precise_mbclen(p, pend, enc);
7174 if (p > prev) str_buf_cat(result, prev, p - prev);
7175 n = rb_enc_mbminlen(enc);
7177 n = (int)(pend - p);
7179 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7180 str_buf_cat(result, buf, strlen(buf));
7186 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7188 cc = ruby_escaped_char(c);
7190 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7191 str_buf_cat(result, cc, strlen(cc));
7194 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7197 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7198 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7202 if (p > prev) str_buf_cat(result, prev, p - prev);
7221 const char *p, *pend, *prev;
7222 char buf[CHAR_ESC_LEN + 1];
7224 rb_encoding *resenc = rb_default_internal_encoding();
7225 int unicode_p = rb_enc_unicode_p(enc);
7226 int asciicompat = rb_enc_asciicompat(enc);
7228 if (resenc == NULL) resenc = rb_default_external_encoding();
7229 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7230 rb_enc_associate(result, resenc);
7231 str_buf_cat2(result,
"\"");
7239 n = rb_enc_precise_mbclen(p, pend, enc);
7241 if (p > prev) str_buf_cat(result, prev, p - prev);
7242 n = rb_enc_mbminlen(enc);
7244 n = (int)(pend - p);
7246 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7247 str_buf_cat(result, buf, strlen(buf));
7253 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7255 if ((asciicompat || unicode_p) &&
7256 (c ==
'"'|| c ==
'\\' ||
7261 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7262 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7263 str_buf_cat2(result,
"\\");
7264 if (asciicompat || enc == resenc) {
7270 case '\n': cc =
'n';
break;
7271 case '\r': cc =
'r';
break;
7272 case '\t': cc =
't';
break;
7273 case '\f': cc =
'f';
break;
7274 case '\013': cc =
'v';
break;
7275 case '\010': cc =
'b';
break;
7276 case '\007': cc =
'a';
break;
7277 case 033: cc =
'e';
break;
7278 default: cc = 0;
break;
7281 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7284 str_buf_cat(result, buf, 2);
7297 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7301 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7302 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7307 if (p > prev) str_buf_cat(result, prev, p - prev);
7308 str_buf_cat2(result,
"\"");
7313#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7326 int encidx = rb_enc_get_index(str);
7329 const char *p, *pend;
7332 int u8 = (encidx == rb_utf8_encindex());
7333 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7336 if (!rb_enc_asciicompat(enc)) {
7338 len += strlen(enc->name);
7341 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7344 unsigned char c = *p++;
7347 case '"':
case '\\':
7348 case '\n':
case '\r':
7349 case '\t':
case '\f':
7350 case '\013':
case '\010':
case '\007':
case '\033':
7355 clen = IS_EVSTR(p, pend) ? 2 : 1;
7363 if (u8 && c > 0x7F) {
7364 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7366 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7369 else if (cc <= 0xFFFFF)
7382 if (clen > LONG_MAX -
len) {
7389 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7390 q = RSTRING_PTR(result); qend = q +
len + 1;
7394 unsigned char c = *p++;
7396 if (c ==
'"' || c ==
'\\') {
7400 else if (c ==
'#') {
7401 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7404 else if (c ==
'\n') {
7408 else if (c ==
'\r') {
7412 else if (c ==
'\t') {
7416 else if (c ==
'\f') {
7420 else if (c ==
'\013') {
7424 else if (c ==
'\010') {
7428 else if (c ==
'\007') {
7432 else if (c ==
'\033') {
7442 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7444 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7447 snprintf(q, qend-q,
"u%04X", cc);
7449 snprintf(q, qend-q,
"u{%X}", cc);
7454 snprintf(q, qend-q,
"x%02X", c);
7460 if (!rb_enc_asciicompat(enc)) {
7461 snprintf(q, qend-q, nonascii_suffix, enc->name);
7462 encidx = rb_ascii8bit_encindex();
7465 rb_enc_associate_index(result, encidx);
7471unescape_ascii(
unsigned int c)
7495undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7497 const char *s = *ss;
7501 unsigned char buf[6];
7519 *buf = unescape_ascii(*s);
7531 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7532 if (*penc != enc_utf8) {
7534 rb_enc_associate(undumped, enc_utf8);
7551 if (hexlen == 0 || hexlen > 6) {
7557 if (0xd800 <= c && c <= 0xdfff) {
7560 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7570 if (0xd800 <= c && c <= 0xdfff) {
7573 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7603static VALUE rb_str_is_ascii_only_p(
VALUE str);
7615str_undump(
VALUE str)
7617 const char *s = RSTRING_PTR(str);
7620 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7622 bool binary =
false;
7626 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7629 if (!str_null_check(str, &w)) {
7632 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7633 if (*s !=
'"')
goto invalid_format;
7651 static const char force_encoding_suffix[] =
".force_encoding(\"";
7652 static const char dup_suffix[] =
".dup";
7653 const char *encname;
7658 size =
sizeof(dup_suffix) - 1;
7659 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7661 size =
sizeof(force_encoding_suffix) - 1;
7662 if (s_end - s <= size)
goto invalid_format;
7663 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7667 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7671 s = memchr(s,
'"', s_end-s);
7673 if (!s)
goto invalid_format;
7674 if (s_end - s != 2)
goto invalid_format;
7675 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7677 encidx = rb_enc_find_index2(encname, (
long)size);
7681 rb_enc_associate_index(undumped, encidx);
7691 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7702 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7708 if (rb_enc_dummy_p(enc)) {
7715str_true_enc(
VALUE str)
7718 rb_str_check_dummy_enc(enc);
7722static OnigCaseFoldType
7723check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7728 rb_raise(rb_eArgError,
"too many options");
7729 if (argv[0]==sym_turkic) {
7730 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7732 if (argv[1]==sym_lithuanian)
7733 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7735 rb_raise(rb_eArgError,
"invalid second option");
7738 else if (argv[0]==sym_lithuanian) {
7739 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7741 if (argv[1]==sym_turkic)
7742 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7744 rb_raise(rb_eArgError,
"invalid second option");
7748 rb_raise(rb_eArgError,
"too many options");
7749 else if (argv[0]==sym_ascii)
7750 flags |= ONIGENC_CASE_ASCII_ONLY;
7751 else if (argv[0]==sym_fold) {
7752 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7753 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7755 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7758 rb_raise(rb_eArgError,
"invalid option");
7765 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7771#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7772#ifndef CASEMAP_DEBUG
7773# define CASEMAP_DEBUG 0
7781 OnigUChar space[FLEX_ARY_LEN];
7785mapping_buffer_free(
void *p)
7789 while (current_buffer) {
7790 previous_buffer = current_buffer;
7791 current_buffer = current_buffer->next;
7792 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7798 {0, mapping_buffer_free,},
7799 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7807 const OnigUChar *source_current, *source_end;
7808 int target_length = 0;
7809 VALUE buffer_anchor;
7812 size_t buffer_count = 0;
7813 int buffer_length_or_invalid;
7815 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7817 source_current = (OnigUChar*)RSTRING_PTR(source);
7822 while (source_current < source_end) {
7824 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7825 if (CASEMAP_DEBUG) {
7826 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7829 *pre_buffer = current_buffer;
7830 pre_buffer = ¤t_buffer->next;
7831 current_buffer->next = NULL;
7832 current_buffer->capa =
capa;
7833 buffer_length_or_invalid = enc->case_map(flags,
7834 &source_current, source_end,
7835 current_buffer->space,
7836 current_buffer->space+current_buffer->capa,
7838 if (buffer_length_or_invalid < 0) {
7839 current_buffer =
DATA_PTR(buffer_anchor);
7841 mapping_buffer_free(current_buffer);
7842 rb_raise(rb_eArgError,
"input string invalid");
7844 target_length += current_buffer->used = buffer_length_or_invalid;
7846 if (CASEMAP_DEBUG) {
7847 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7850 if (buffer_count==1) {
7851 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7854 char *target_current;
7857 target_current = RSTRING_PTR(target);
7858 current_buffer =
DATA_PTR(buffer_anchor);
7859 while (current_buffer) {
7860 memcpy(target_current, current_buffer->space, current_buffer->used);
7861 target_current += current_buffer->used;
7862 current_buffer = current_buffer->next;
7865 current_buffer =
DATA_PTR(buffer_anchor);
7867 mapping_buffer_free(current_buffer);
7872 str_enc_copy_direct(target, source);
7881 const OnigUChar *source_current, *source_end;
7882 OnigUChar *target_current, *target_end;
7883 long old_length = RSTRING_LEN(source);
7884 int length_or_invalid;
7886 if (old_length == 0)
return Qnil;
7888 source_current = (OnigUChar*)RSTRING_PTR(source);
7890 if (source == target) {
7891 target_current = (OnigUChar*)source_current;
7892 target_end = (OnigUChar*)source_end;
7895 target_current = (OnigUChar*)RSTRING_PTR(target);
7899 length_or_invalid = onigenc_ascii_only_case_map(flags,
7900 &source_current, source_end,
7901 target_current, target_end, enc);
7902 if (length_or_invalid < 0)
7903 rb_raise(rb_eArgError,
"input string invalid");
7904 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7905 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7906 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7907 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7908 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7911 str_enc_copy(target, source);
7917upcase_single(
VALUE str)
7919 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7920 bool modified =
false;
7923 unsigned int c = *(
unsigned char*)s;
7925 if (
'a' <= c && c <=
'z') {
7926 *s =
'A' + (c -
'a');
7947rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7950 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7952 flags = check_case_options(argc, argv, flags);
7953 str_modify_keep_cr(str);
7954 enc = str_true_enc(str);
7955 if (case_option_single_p(flags, enc, str)) {
7956 if (upcase_single(str))
7957 flags |= ONIGENC_CASE_MODIFIED;
7959 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7960 rb_str_ascii_casemap(str, str, &flags, enc);
7962 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7964 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7977rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7980 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7983 flags = check_case_options(argc, argv, flags);
7984 enc = str_true_enc(str);
7985 if (case_option_single_p(flags, enc, str)) {
7986 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7987 str_enc_copy_direct(ret, str);
7990 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7992 rb_str_ascii_casemap(str, ret, &flags, enc);
7995 ret = rb_str_casemap(str, &flags, enc);
8002downcase_single(
VALUE str)
8004 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8005 bool modified =
false;
8008 unsigned int c = *(
unsigned char*)s;
8010 if (
'A' <= c && c <=
'Z') {
8011 *s =
'a' + (c -
'A');
8033rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8036 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8038 flags = check_case_options(argc, argv, flags);
8039 str_modify_keep_cr(str);
8040 enc = str_true_enc(str);
8041 if (case_option_single_p(flags, enc, str)) {
8042 if (downcase_single(str))
8043 flags |= ONIGENC_CASE_MODIFIED;
8045 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8046 rb_str_ascii_casemap(str, str, &flags, enc);
8048 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8050 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8064rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8067 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8070 flags = check_case_options(argc, argv, flags);
8071 enc = str_true_enc(str);
8072 if (case_option_single_p(flags, enc, str)) {
8073 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8074 str_enc_copy_direct(ret, str);
8075 downcase_single(ret);
8077 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8079 rb_str_ascii_casemap(str, ret, &flags, enc);
8082 ret = rb_str_casemap(str, &flags, enc);
8102rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8105 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8107 flags = check_case_options(argc, argv, flags);
8108 str_modify_keep_cr(str);
8109 enc = str_true_enc(str);
8110 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8111 if (flags&ONIGENC_CASE_ASCII_ONLY)
8112 rb_str_ascii_casemap(str, str, &flags, enc);
8114 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8116 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8130rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8133 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8136 flags = check_case_options(argc, argv, flags);
8137 enc = str_true_enc(str);
8138 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8139 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8141 rb_str_ascii_casemap(str, ret, &flags, enc);
8144 ret = rb_str_casemap(str, &flags, enc);
8163rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8166 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8168 flags = check_case_options(argc, argv, flags);
8169 str_modify_keep_cr(str);
8170 enc = str_true_enc(str);
8171 if (flags&ONIGENC_CASE_ASCII_ONLY)
8172 rb_str_ascii_casemap(str, str, &flags, enc);
8174 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8176 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8190rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8193 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8196 flags = check_case_options(argc, argv, flags);
8197 enc = str_true_enc(str);
8198 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8199 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8201 rb_str_ascii_casemap(str, ret, &flags, enc);
8204 ret = rb_str_casemap(str, &flags, enc);
8209typedef unsigned char *USTR;
8213 unsigned int now, max;
8225 if (t->p == t->pend)
return -1;
8226 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8229 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8231 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8233 if (t->p < t->pend) {
8234 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8237 if (t->now < 0x80 && c < 0x80) {
8238 rb_raise(rb_eArgError,
8239 "invalid range \"%c-%c\" in string transliteration",
8243 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8247 else if (t->now < c) {
8256 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8257 if (t->now == t->max) {
8262 if (t->now < t->max) {
8278 const unsigned int errc = -1;
8279 unsigned int trans[256];
8281 struct tr trsrc, trrepl;
8283 unsigned int c, c0, last = 0;
8284 int modify = 0, i, l;
8285 unsigned char *s, *send;
8287 int singlebyte = single_byte_optimizable(str);
8291#define CHECK_IF_ASCII(c) \
8292 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8293 (cr = ENC_CODERANGE_VALID) : 0)
8297 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8298 if (RSTRING_LEN(repl) == 0) {
8299 return rb_str_delete_bang(1, &src, str);
8303 e1 = rb_enc_check(str, src);
8304 e2 = rb_enc_check(str, repl);
8309 enc = rb_enc_check(src, repl);
8311 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8312 if (RSTRING_LEN(src) > 1 &&
8313 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8314 trsrc.p + l < trsrc.pend) {
8318 trrepl.p = RSTRING_PTR(repl);
8319 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8320 trsrc.gen = trrepl.gen = 0;
8321 trsrc.now = trrepl.now = 0;
8322 trsrc.max = trrepl.max = 0;
8325 for (i=0; i<256; i++) {
8328 while ((c = trnext(&trsrc, enc)) != errc) {
8333 if (!hash) hash = rb_hash_new();
8337 while ((c = trnext(&trrepl, enc)) != errc)
8340 for (i=0; i<256; i++) {
8341 if (trans[i] != errc) {
8349 for (i=0; i<256; i++) {
8352 while ((c = trnext(&trsrc, enc)) != errc) {
8353 r = trnext(&trrepl, enc);
8354 if (r == errc) r = trrepl.now;
8357 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8360 if (!hash) hash = rb_hash_new();
8368 str_modify_keep_cr(str);
8369 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8370 termlen = rb_enc_mbminlen(enc);
8373 long offset, max = RSTRING_LEN(str);
8374 unsigned int save = -1;
8375 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8380 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8383 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8386 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8388 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8397 if (cflag) c = last;
8400 else if (cflag) c = errc;
8406 if (c != (
unsigned int)-1) {
8412 tlen = rb_enc_codelen(c, enc);
8418 if (enc != e1) may_modify = 1;
8420 if ((offset = t - buf) + tlen > max) {
8421 size_t MAYBE_UNUSED(old) = max + termlen;
8422 max = offset + tlen + (send - s);
8423 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8426 rb_enc_mbcput(c, t, enc);
8427 if (may_modify && memcmp(s, t, tlen) != 0) {
8433 if (!STR_EMBED_P(str)) {
8434 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8436 TERM_FILL((
char *)t, termlen);
8437 RSTRING(str)->as.heap.ptr = (
char *)buf;
8438 STR_SET_LEN(str, t - buf);
8439 STR_SET_NOEMBED(str);
8440 RSTRING(str)->as.heap.aux.capa = max;
8444 c = (
unsigned char)*s;
8445 if (trans[c] != errc) {
8462 long offset, max = (long)((send - s) * 1.2);
8463 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8468 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8471 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8474 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8476 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8484 if (cflag) c = last;
8487 else if (cflag) c = errc;
8491 c = cflag ? last : errc;
8494 tlen = rb_enc_codelen(c, enc);
8499 if (enc != e1) may_modify = 1;
8501 if ((offset = t - buf) + tlen > max) {
8502 size_t MAYBE_UNUSED(old) = max + termlen;
8503 max = offset + tlen + (long)((send - s) * 1.2);
8504 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8508 rb_enc_mbcput(c, t, enc);
8509 if (may_modify && memcmp(s, t, tlen) != 0) {
8517 if (!STR_EMBED_P(str)) {
8518 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8520 TERM_FILL((
char *)t, termlen);
8521 RSTRING(str)->as.heap.ptr = (
char *)buf;
8522 STR_SET_LEN(str, t - buf);
8523 STR_SET_NOEMBED(str);
8524 RSTRING(str)->as.heap.aux.capa = max;
8530 rb_enc_associate(str, enc);
8552 return tr_trans(str, src, repl, 0);
8597 tr_trans(str, src, repl, 0);
8601#define TR_TABLE_MAX (UCHAR_MAX+1)
8602#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8604tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8607 const unsigned int errc = -1;
8608 char buf[TR_TABLE_MAX];
8611 VALUE table = 0, ptable = 0;
8612 int i, l, cflag = 0;
8614 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8615 tr.gen =
tr.now =
tr.max = 0;
8617 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8622 for (i=0; i<TR_TABLE_MAX; i++) {
8625 stable[TR_TABLE_MAX] = cflag;
8627 else if (stable[TR_TABLE_MAX] && !cflag) {
8628 stable[TR_TABLE_MAX] = 0;
8630 for (i=0; i<TR_TABLE_MAX; i++) {
8634 while ((c = trnext(&
tr, enc)) != errc) {
8635 if (c < TR_TABLE_MAX) {
8636 buf[(
unsigned char)c] = !cflag;
8641 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8644 table = ptable ? ptable : rb_hash_new();
8648 table = rb_hash_new();
8653 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8654 rb_hash_aset(table, key,
Qtrue);
8658 for (i=0; i<TR_TABLE_MAX; i++) {
8659 stable[i] = stable[i] && buf[i];
8661 if (!table && !cflag) {
8668tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8670 if (c < TR_TABLE_MAX) {
8671 return table[c] != 0;
8677 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8678 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8682 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8685 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8700rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8702 char squeez[TR_TABLE_SIZE];
8705 VALUE del = 0, nodel = 0;
8707 int i, ascompat, cr;
8709 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8711 for (i=0; i<argc; i++) {
8715 enc = rb_enc_check(str, s);
8716 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8719 str_modify_keep_cr(str);
8720 ascompat = rb_enc_asciicompat(enc);
8721 s = t = RSTRING_PTR(str);
8728 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8739 c = rb_enc_codepoint_len(s, send, &clen, enc);
8741 if (tr_find(c, squeez, del, nodel)) {
8745 if (t != s) rb_enc_mbcput(c, t, enc);
8752 TERM_FILL(t, TERM_LEN(str));
8753 STR_SET_LEN(str, t - RSTRING_PTR(str));
8756 if (modify)
return str;
8770rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8773 rb_str_delete_bang(argc, argv, str);
8791rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8793 char squeez[TR_TABLE_SIZE];
8795 VALUE del = 0, nodel = 0;
8796 unsigned char *s, *send, *t;
8798 int ascompat, singlebyte = single_byte_optimizable(str);
8802 enc = STR_ENC_GET(str);
8805 for (i=0; i<argc; i++) {
8809 enc = rb_enc_check(str, s);
8810 if (singlebyte && !single_byte_optimizable(s))
8812 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8816 str_modify_keep_cr(str);
8817 s = t = (
unsigned char *)RSTRING_PTR(str);
8818 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8821 ascompat = rb_enc_asciicompat(enc);
8825 unsigned int c = *s++;
8826 if (c != save || (argc > 0 && !squeez[c])) {
8836 if (ascompat && (c = *s) < 0x80) {
8837 if (c != save || (argc > 0 && !squeez[c])) {
8843 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8845 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8846 if (t != s) rb_enc_mbcput(c, t, enc);
8855 TERM_FILL((
char *)t, TERM_LEN(str));
8856 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8857 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8861 if (modify)
return str;
8875rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8878 rb_str_squeeze_bang(argc, argv, str);
8898 return tr_trans(str, src, repl, 1);
8926 tr_trans(str, src, repl, 1);
8939rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8941 char table[TR_TABLE_SIZE];
8943 VALUE del = 0, nodel = 0, tstr;
8953 enc = rb_enc_check(str, tstr);
8956 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8957 (ptstr = RSTRING_PTR(tstr),
8958 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8959 !is_broken_string(str)) {
8961 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8963 s = RSTRING_PTR(str);
8964 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8967 if (*(
unsigned char*)s++ == c) n++;
8973 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8974 for (i=1; i<argc; i++) {
8977 enc = rb_enc_check(str, tstr);
8978 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8981 s = RSTRING_PTR(str);
8982 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8984 ascompat = rb_enc_asciicompat(enc);
8988 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8996 c = rb_enc_codepoint_len(s, send, &clen, enc);
8997 if (tr_find(c, table, del, nodel)) {
9008rb_fs_check(
VALUE val)
9012 if (
NIL_P(val))
return 0;
9017static const char isspacetable[256] = {
9018 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9020 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9036#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9039split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9041 if (empty_count >= 0 &&
len == 0) {
9042 return empty_count + 1;
9044 if (empty_count > 0) {
9049 }
while (--empty_count > 0);
9053 rb_yield(str_new_empty_String(str));
9054 }
while (--empty_count > 0);
9068 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9072literal_split_pattern(
VALUE spat, split_type_t default_type)
9080 return SPLIT_TYPE_CHARS;
9082 else if (rb_enc_asciicompat(enc)) {
9083 if (
len == 1 && ptr[0] ==
' ') {
9084 return SPLIT_TYPE_AWK;
9089 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9090 return SPLIT_TYPE_AWK;
9093 return default_type;
9106rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9111 split_type_t split_type;
9112 long beg, end, i = 0, empty_count = -1;
9117 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9119 if (lim <= 0) limit =
Qnil;
9120 else if (lim == 1) {
9121 if (RSTRING_LEN(str) == 0)
9132 if (
NIL_P(limit) && !lim) empty_count = 0;
9134 enc = STR_ENC_GET(str);
9135 split_type = SPLIT_TYPE_REGEXP;
9137 spat = get_pat_quoted(spat, 0);
9139 else if (
NIL_P(spat = rb_fs)) {
9140 split_type = SPLIT_TYPE_AWK;
9142 else if (!(spat = rb_fs_check(spat))) {
9143 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9148 if (split_type != SPLIT_TYPE_AWK) {
9153 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9154 if (split_type == SPLIT_TYPE_AWK) {
9156 split_type = SPLIT_TYPE_STRING;
9161 mustnot_broken(spat);
9162 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9170#define SPLIT_STR(beg, len) ( \
9171 empty_count = split_string(result, str, beg, len, empty_count), \
9172 str_mod_check(str, str_start, str_len))
9175 char *ptr = RSTRING_PTR(str);
9176 char *
const str_start = ptr;
9177 const long str_len = RSTRING_LEN(str);
9178 char *
const eptr = str_start + str_len;
9179 if (split_type == SPLIT_TYPE_AWK) {
9186 if (is_ascii_string(str)) {
9187 while (ptr < eptr) {
9188 c = (
unsigned char)*ptr++;
9190 if (ascii_isspace(c)) {
9196 if (!
NIL_P(limit) && lim <= i)
break;
9199 else if (ascii_isspace(c)) {
9200 SPLIT_STR(beg, end-beg);
9203 if (!
NIL_P(limit)) ++i;
9211 while (ptr < eptr) {
9214 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9223 if (!
NIL_P(limit) && lim <= i)
break;
9227 SPLIT_STR(beg, end-beg);
9230 if (!
NIL_P(limit)) ++i;
9238 else if (split_type == SPLIT_TYPE_STRING) {
9239 char *substr_start = ptr;
9240 char *sptr = RSTRING_PTR(spat);
9241 long slen = RSTRING_LEN(spat);
9244 mustnot_broken(str);
9245 enc = rb_enc_check(str, spat);
9246 while (ptr < eptr &&
9247 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9250 if (t != ptr + end) {
9254 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9255 str_mod_check(spat, sptr, slen);
9258 if (!
NIL_P(limit) && lim <= ++i)
break;
9260 beg = ptr - str_start;
9262 else if (split_type == SPLIT_TYPE_CHARS) {
9266 mustnot_broken(str);
9267 enc = rb_enc_get(str);
9268 while (ptr < eptr &&
9269 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9270 SPLIT_STR(ptr - str_start, n);
9272 if (!
NIL_P(limit) && lim <= ++i)
break;
9274 beg = ptr - str_start;
9278 long len = RSTRING_LEN(str);
9286 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9291 if (start == end && BEG(0) == END(0)) {
9296 else if (last_null == 1) {
9297 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9304 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9310 SPLIT_STR(beg, end-beg);
9311 beg = start = END(0);
9315 for (idx=1; idx < regs->num_regs; idx++) {
9316 if (BEG(idx) == -1)
continue;
9317 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9319 if (!
NIL_P(limit) && lim <= ++i)
break;
9321 if (match) rb_match_unbusy(match);
9323 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9324 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9327 return result ? result : str;
9337 return rb_str_split_m(1, &sep, str);
9340#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9355#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9358chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9360 const char *prev = rb_enc_prev_char(p, e, e, enc);
9363 prev = rb_enc_prev_char(p, e, e, enc);
9364 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9376 RSTRING_LEN(rs) != 1 ||
9377 RSTRING_PTR(rs)[0] !=
'\n')) {
9383#define rb_rs get_rs()
9390 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9391 long pos,
len, rslen;
9397 static ID keywords[1];
9402 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9406 if (!ENUM_ELEM(ary, str)) {
9414 if (!RSTRING_LEN(str))
goto end;
9416 ptr = subptr = RSTRING_PTR(str);
9418 len = RSTRING_LEN(str);
9420 rslen = RSTRING_LEN(rs);
9423 enc = rb_enc_get(str);
9425 enc = rb_enc_check(str, rs);
9430 const char *eol = NULL;
9432 while (subend < pend) {
9433 long chomp_rslen = 0;
9435 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9437 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9439 if (eol == subend)
break;
9443 chomp_rslen = -rslen;
9447 if (!subptr) subptr = subend;
9451 }
while (subend < pend);
9453 if (rslen == 0) chomp_rslen = 0;
9455 subend - subptr + (chomp ? chomp_rslen : rslen));
9456 if (ENUM_ELEM(ary, line)) {
9457 str_mod_check(str, ptr,
len);
9459 subptr = eol = NULL;
9464 rsptr = RSTRING_PTR(rs);
9465 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9474 rsptr = RSTRING_PTR(rs);
9475 rslen = RSTRING_LEN(rs);
9478 while (subptr < pend) {
9479 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9483 if (hit != adjusted) {
9487 subend = hit += rslen;
9490 subend = chomp_newline(subptr, subend, enc);
9497 if (ENUM_ELEM(ary, line)) {
9498 str_mod_check(str, ptr,
len);
9503 if (subptr != pend) {
9506 pend = chomp_newline(subptr, pend, enc);
9508 else if (pend - subptr >= rslen &&
9509 memcmp(pend - rslen, rsptr, rslen) == 0) {
9514 ENUM_ELEM(ary, line);
9535rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9538 return rb_str_enumerate_lines(argc, argv, str, 0);
9593rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9595 VALUE ary = WANTARRAY(
"lines", 0);
9596 return rb_str_enumerate_lines(argc, argv, str, ary);
9610 for (i=0; i<RSTRING_LEN(str); i++) {
9611 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9629rb_str_each_byte(
VALUE str)
9632 return rb_str_enumerate_bytes(str, 0);
9644rb_str_bytes(
VALUE str)
9646 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9647 return rb_str_enumerate_bytes(str, ary);
9665 ptr = RSTRING_PTR(str);
9666 len = RSTRING_LEN(str);
9667 enc = rb_enc_get(str);
9670 for (i = 0; i <
len; i += n) {
9671 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9676 for (i = 0; i <
len; i += n) {
9677 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9698rb_str_each_char(
VALUE str)
9701 return rb_str_enumerate_chars(str, 0);
9713rb_str_chars(
VALUE str)
9716 return rb_str_enumerate_chars(str, ary);
9720rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9725 const char *ptr, *end;
9728 if (single_byte_optimizable(str))
9729 return rb_str_enumerate_bytes(str, ary);
9732 ptr = RSTRING_PTR(str);
9734 enc = STR_ENC_GET(str);
9737 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9758rb_str_each_codepoint(
VALUE str)
9761 return rb_str_enumerate_codepoints(str, 0);
9773rb_str_codepoints(
VALUE str)
9776 return rb_str_enumerate_codepoints(str, ary);
9782 int encidx = rb_enc_to_index(enc);
9784 const OnigUChar source_ascii[] =
"\\X";
9785 const OnigUChar *source = source_ascii;
9786 size_t source_len =
sizeof(source_ascii) - 1;
9789#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9790#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9791#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9792#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9793#define CASE_UTF(e) \
9794 case ENCINDEX_UTF_##e: { \
9795 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9796 source = source_UTF_##e; \
9797 source_len = sizeof(source_UTF_##e); \
9800 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9808 regex_t *reg_grapheme_cluster;
9810 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9811 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9813 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9814 onig_error_code_to_str(message, r, &einfo);
9815 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9818 return reg_grapheme_cluster;
9824 int encidx = rb_enc_to_index(enc);
9825 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9827 if (encidx == rb_utf8_encindex()) {
9828 if (!reg_grapheme_cluster_utf8) {
9829 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9832 return reg_grapheme_cluster_utf8;
9841 size_t grapheme_cluster_count = 0;
9843 const char *ptr, *end;
9845 if (!rb_enc_unicode_p(enc)) {
9849 bool cached_reg_grapheme_cluster =
true;
9850 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9851 if (!reg_grapheme_cluster) {
9852 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9853 cached_reg_grapheme_cluster =
false;
9856 ptr = RSTRING_PTR(str);
9860 OnigPosition
len = onig_match(reg_grapheme_cluster,
9861 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9862 (
const OnigUChar *)ptr, NULL, 0);
9863 if (
len <= 0)
break;
9864 grapheme_cluster_count++;
9868 if (!cached_reg_grapheme_cluster) {
9869 onig_free(reg_grapheme_cluster);
9872 return SIZET2NUM(grapheme_cluster_count);
9876rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9880 const char *ptr0, *ptr, *end;
9882 if (!rb_enc_unicode_p(enc)) {
9883 return rb_str_enumerate_chars(str, ary);
9888 bool cached_reg_grapheme_cluster =
true;
9889 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9890 if (!reg_grapheme_cluster) {
9891 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9892 cached_reg_grapheme_cluster =
false;
9895 ptr0 = ptr = RSTRING_PTR(str);
9899 OnigPosition
len = onig_match(reg_grapheme_cluster,
9900 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9901 (
const OnigUChar *)ptr, NULL, 0);
9902 if (
len <= 0)
break;
9907 if (!cached_reg_grapheme_cluster) {
9908 onig_free(reg_grapheme_cluster);
9928rb_str_each_grapheme_cluster(
VALUE str)
9931 return rb_str_enumerate_grapheme_clusters(str, 0);
9943rb_str_grapheme_clusters(
VALUE str)
9946 return rb_str_enumerate_grapheme_clusters(str, ary);
9950chopped_length(
VALUE str)
9953 const char *p, *p2, *beg, *end;
9955 beg = RSTRING_PTR(str);
9956 end = beg + RSTRING_LEN(str);
9957 if (beg >= end)
return 0;
9958 p = rb_enc_prev_char(beg, end, end, enc);
9960 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9961 p2 = rb_enc_prev_char(beg, p, end, enc);
9962 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9980rb_str_chop_bang(
VALUE str)
9982 str_modify_keep_cr(str);
9983 if (RSTRING_LEN(str) > 0) {
9985 len = chopped_length(str);
9986 STR_SET_LEN(str,
len);
9987 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10006rb_str_chop(
VALUE str)
10012smart_chomp(
VALUE str,
const char *e,
const char *p)
10015 if (rb_enc_mbminlen(enc) > 1) {
10020 pp = e - rb_enc_mbminlen(enc);
10023 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10031 if (--e > p && *(e-1) ==
'\r') {
10048 char *pp, *e, *rsptr;
10050 char *
const p = RSTRING_PTR(str);
10051 long len = RSTRING_LEN(str);
10053 if (
len == 0)
return 0;
10056 return smart_chomp(str, e, p);
10059 enc = rb_enc_get(str);
10062 if (rb_enc_mbminlen(enc) > 1) {
10067 pp -= rb_enc_mbminlen(enc);
10070 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10077 while (e > p && *(e-1) ==
'\n') {
10079 if (e > p && *(e-1) ==
'\r')
10085 if (rslen >
len)
return len;
10087 enc = rb_enc_get(rs);
10088 newline = rsptr[rslen-1];
10089 if (rslen == rb_enc_mbminlen(enc)) {
10091 if (newline ==
'\n')
10092 return smart_chomp(str, e, p);
10096 return smart_chomp(str, e, p);
10100 enc = rb_enc_check(str, rs);
10101 if (is_broken_string(rs)) {
10105 if (p[
len-1] == newline &&
10107 memcmp(rsptr, pp, rslen) == 0)) {
10108 if (at_char_boundary(p, pp, e, enc))
10109 return len - rslen;
10121chomp_rs(
int argc,
const VALUE *argv)
10125 VALUE rs = argv[0];
10137 long olen = RSTRING_LEN(str);
10138 long len = chompped_length(str, rs);
10139 if (
len >= olen)
return Qnil;
10140 str_modify_keep_cr(str);
10141 STR_SET_LEN(str,
len);
10142 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10162rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10165 str_modifiable(str);
10166 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10167 rs = chomp_rs(argc, argv);
10169 return rb_str_chomp_string(str, rs);
10182rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10184 VALUE rs = chomp_rs(argc, argv);
10190tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10191 VALUE str,
int num_selectors,
VALUE *selectors)
10195 for (i=0; i<num_selectors; i++) {
10196 VALUE selector = selectors[i];
10200 enc = rb_enc_check(str, selector);
10201 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10208 const char *
const start = s;
10210 if (!s || s >= e)
return 0;
10213 if (single_byte_optimizable(str)) {
10214 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10219 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10229lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10230 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10232 const char *
const start = s;
10234 if (!s || s >= e)
return 0;
10239 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10241 if (!tr_find(cc, table, del, nodel))
break;
10260rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10264 long olen, loffset;
10266 str_modify_keep_cr(str);
10267 enc = STR_ENC_GET(str);
10270 char table[TR_TABLE_SIZE];
10271 VALUE del = 0, nodel = 0;
10273 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10274 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10277 loffset = lstrip_offset(str, start, start+olen, enc);
10281 long len = olen-loffset;
10282 s = start + loffset;
10283 memmove(start, s,
len);
10284 STR_SET_LEN(str,
len);
10285 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10320rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10327 char table[TR_TABLE_SIZE];
10328 VALUE del = 0, nodel = 0;
10330 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10331 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10334 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10336 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10345 rb_str_check_dummy_enc(enc);
10349 if (!s || s >= e)
return 0;
10353 if (single_byte_optimizable(str)) {
10355 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10360 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10370rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10371 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10376 rb_str_check_dummy_enc(enc);
10380 if (!s || s >= e)
return 0;
10384 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10386 if (!tr_find(c, table, del, nodel))
break;
10406rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10410 long olen, roffset;
10412 str_modify_keep_cr(str);
10413 enc = STR_ENC_GET(str);
10416 char table[TR_TABLE_SIZE];
10417 VALUE del = 0, nodel = 0;
10419 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10420 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10423 roffset = rstrip_offset(str, start, start+olen, enc);
10426 long len = olen - roffset;
10428 STR_SET_LEN(str,
len);
10429 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10463rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10467 long olen, roffset;
10469 enc = STR_ENC_GET(str);
10472 char table[TR_TABLE_SIZE];
10473 VALUE del = 0, nodel = 0;
10475 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10476 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10479 roffset = rstrip_offset(str, start, start+olen, enc);
10481 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10499rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10502 long olen, loffset, roffset;
10505 str_modify_keep_cr(str);
10506 enc = STR_ENC_GET(str);
10510 char table[TR_TABLE_SIZE];
10511 VALUE del = 0, nodel = 0;
10513 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10514 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10515 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10518 loffset = lstrip_offset(str, start, start+olen, enc);
10519 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10522 if (loffset > 0 || roffset > 0) {
10523 long len = olen-roffset;
10526 memmove(start, start + loffset,
len);
10528 STR_SET_LEN(str,
len);
10529 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10564rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10567 long olen, loffset, roffset;
10573 char table[TR_TABLE_SIZE];
10574 VALUE del = 0, nodel = 0;
10576 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10577 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10578 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10581 loffset = lstrip_offset(str, start, start+olen, enc);
10582 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10585 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10590scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10593 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10599 end = pos + RSTRING_LEN(pat);
10613 if (RSTRING_LEN(str) > end)
10614 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10623 if (!regs || regs->num_regs == 1) {
10629 for (
int i = 1; i < regs->num_regs; i++) {
10660 long last = -1, prev = 0;
10661 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10663 pat = get_pat_quoted(pat, 1);
10664 mustnot_broken(str);
10668 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10673 if (last >= 0) rb_pat_search(pat, str, last, 1);
10678 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10682 str_mod_check(str, p,
len);
10684 if (last >= 0) rb_pat_search(pat, str, last, 1);
10736rb_str_hex(
VALUE str)
10738 return rb_str_to_inum(str, 16, FALSE);
10822rb_str_oct(
VALUE str)
10824 return rb_str_to_inum(str, -8, FALSE);
10827#ifndef HAVE_CRYPT_R
10832 rb_nativethread_lock_t lock;
10833} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10902# define CRYPT_END() ALLOCV_END(databuf)
10905 extern char *crypt(
const char *,
const char *);
10906# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10909 const char *s, *saltp;
10912 char salt_8bit_clean[3];
10916 mustnot_wchar(str);
10917 mustnot_wchar(salt);
10919 saltp = RSTRING_PTR(salt);
10920 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10921 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10925 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10926 salt_8bit_clean[0] = saltp[0] & 0x7f;
10927 salt_8bit_clean[1] = saltp[1] & 0x7f;
10928 salt_8bit_clean[2] =
'\0';
10929 saltp = salt_8bit_clean;
10934# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10935 data->initialized = 0;
10937 res = crypt_r(s, saltp, data);
10940 res = crypt(s, saltp);
10955 size_t res_size = strlen(res)+1;
10956 tmp_buf =
ALLOCA_N(
char, res_size);
10957 memcpy(tmp_buf, res, res_size);
10994 char *ptr, *p, *pend;
10997 unsigned long sum0 = 0;
11002 ptr = p = RSTRING_PTR(str);
11003 len = RSTRING_LEN(str);
11009 str_mod_check(str, ptr,
len);
11012 sum0 += (
unsigned char)*p;
11023 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11024 sum0 &= (((
unsigned long)1)<<bits)-1;
11044rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11048 long width,
len, flen = 1, fclen = 1;
11051 const char *f =
" ";
11052 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11054 int singlebyte = 1, cr;
11058 enc = STR_ENC_GET(str);
11059 termlen = rb_enc_mbminlen(enc);
11063 enc = rb_enc_check(str, pad);
11064 f = RSTRING_PTR(pad);
11065 flen = RSTRING_LEN(pad);
11066 fclen = str_strlen(pad, enc);
11067 singlebyte = single_byte_optimizable(pad);
11068 if (flen == 0 || fclen == 0) {
11069 rb_raise(rb_eArgError,
"zero width padding");
11072 len = str_strlen(str, enc);
11073 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11075 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11079 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11080 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11082 size = RSTRING_LEN(str);
11083 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11084 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11085 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11086 rb_raise(rb_eArgError,
"argument too big");
11090 p = RSTRING_PTR(res);
11092 memset(p, *f, llen);
11096 while (llen >= fclen) {
11102 memcpy(p, f, llen2);
11106 memcpy(p, RSTRING_PTR(str), size);
11109 memset(p, *f, rlen);
11113 while (rlen >= fclen) {
11119 memcpy(p, f, rlen2);
11123 TERM_FILL(p, termlen);
11124 STR_SET_LEN(res, p-RSTRING_PTR(res));
11145rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11147 return rb_str_justify(argc, argv, str,
'l');
11159rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11161 return rb_str_justify(argc, argv, str,
'r');
11174rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11176 return rb_str_justify(argc, argv, str,
'c');
11192 sep = get_pat_quoted(sep, 0);
11204 pos = rb_str_index(str, sep, 0);
11205 if (pos < 0)
goto failed;
11210 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11213 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11227 long pos = RSTRING_LEN(str);
11229 sep = get_pat_quoted(sep, 0);
11242 pos = rb_str_rindex(str, sep, pos);
11251 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11253 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11265rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11269 for (i=0; i<argc; i++) {
11270 VALUE tmp = argv[i];
11272 if (rb_reg_start_with_p(tmp, str))
11276 const char *p, *s, *e;
11281 enc = rb_enc_check(str, tmp);
11282 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11283 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11284 p = RSTRING_PTR(str);
11287 if (!at_char_right_boundary(p, s, e, enc))
11289 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11305rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11309 for (i=0; i<argc; i++) {
11310 VALUE tmp = argv[i];
11311 const char *p, *s, *e;
11316 enc = rb_enc_check(str, tmp);
11317 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11318 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11319 p = RSTRING_PTR(str);
11322 if (!at_char_boundary(p, s, e, enc))
11324 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11340deleted_prefix_length(
VALUE str,
VALUE prefix)
11342 const char *strptr, *prefixptr;
11343 long olen, prefixlen;
11348 if (!is_broken_string(prefix) ||
11349 !rb_enc_asciicompat(enc) ||
11350 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11351 enc = rb_enc_check(str, prefix);
11355 prefixlen = RSTRING_LEN(prefix);
11356 if (prefixlen <= 0)
return 0;
11357 olen = RSTRING_LEN(str);
11358 if (olen < prefixlen)
return 0;
11359 strptr = RSTRING_PTR(str);
11360 prefixptr = RSTRING_PTR(prefix);
11361 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11362 if (is_broken_string(prefix)) {
11363 if (!is_broken_string(str)) {
11367 const char *strend = strptr + olen;
11368 const char *after_prefix = strptr + prefixlen;
11369 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11390rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11393 str_modify_keep_cr(str);
11395 prefixlen = deleted_prefix_length(str, prefix);
11396 if (prefixlen <= 0)
return Qnil;
11410rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11414 prefixlen = deleted_prefix_length(str, prefix);
11415 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11417 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11430deleted_suffix_length(
VALUE str,
VALUE suffix)
11432 const char *strptr, *suffixptr;
11433 long olen, suffixlen;
11437 if (is_broken_string(suffix))
return 0;
11438 enc = rb_enc_check(str, suffix);
11441 suffixlen = RSTRING_LEN(suffix);
11442 if (suffixlen <= 0)
return 0;
11443 olen = RSTRING_LEN(str);
11444 if (olen < suffixlen)
return 0;
11445 strptr = RSTRING_PTR(str);
11446 suffixptr = RSTRING_PTR(suffix);
11447 const char *strend = strptr + olen;
11448 const char *before_suffix = strend - suffixlen;
11449 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11450 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11466rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11468 long olen, suffixlen,
len;
11469 str_modifiable(str);
11471 suffixlen = deleted_suffix_length(str, suffix);
11472 if (suffixlen <= 0)
return Qnil;
11474 olen = RSTRING_LEN(str);
11475 str_modify_keep_cr(str);
11476 len = olen - suffixlen;
11477 STR_SET_LEN(str,
len);
11478 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11494rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11498 suffixlen = deleted_suffix_length(str, suffix);
11499 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11501 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11508 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11514nil_setter_warning(
ID id)
11516 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11523 if (!
NIL_P(*var)) {
11524 nil_setter_warning(
id);
11531 val = rb_fs_check(val);
11534 "value of %"PRIsVALUE
" must be String or Regexp",
11538 nil_setter_warning(
id);
11555 str_modifiable(str);
11558 int idx = rb_enc_to_index(encoding);
11565 rb_enc_associate_index(str, idx);
11589 if (STR_EMBED_P(str)) {
11590 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11595 str_replace_shared_without_enc(str2, str);
11597 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11627rb_str_valid_encoding_p(
VALUE str)
11647rb_str_is_ascii_only_p(
VALUE str)
11657 static const char ellipsis[] =
"...";
11658 const long ellipsislen =
sizeof(ellipsis) - 1;
11660 const long blen = RSTRING_LEN(str);
11661 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11662 VALUE estr, ret = 0;
11665 if (
len * rb_enc_mbminlen(enc) >= blen ||
11669 else if (
len <= ellipsislen ||
11671 if (rb_enc_asciicompat(enc)) {
11673 rb_enc_associate(ret, enc);
11680 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11685 rb_enc_from_encoding(enc), 0,
Qnil);
11698 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11704 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11723 if (enc == STR_ENC_GET(str)) {
11728 return enc_str_scrub(enc, str, repl, cr);
11736 const char *rep, *p, *e, *p1, *sp;
11742 rb_raise(rb_eArgError,
"both of block and replacement given");
11749 if (!
NIL_P(repl)) {
11750 repl = str_compat_and_valid(repl, enc);
11753 if (rb_enc_dummy_p(enc)) {
11756 encidx = rb_enc_to_index(enc);
11758#define DEFAULT_REPLACE_CHAR(str) do { \
11759 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11760 rep = replace; replen = (int)sizeof(replace); \
11763 slen = RSTRING_LEN(str);
11764 p = RSTRING_PTR(str);
11769 if (rb_enc_asciicompat(enc)) {
11775 else if (!
NIL_P(repl)) {
11776 rep = RSTRING_PTR(repl);
11777 replen = RSTRING_LEN(repl);
11780 else if (encidx == rb_utf8_encindex()) {
11781 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11785 DEFAULT_REPLACE_CHAR(
"?");
11790 p = search_nonascii(p, e);
11795 int ret = rb_enc_precise_mbclen(p, e, enc);
11814 if (e - p < clen) clen = e - p;
11821 for (; clen > 1; clen--) {
11822 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11833 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11834 str_mod_check(str, sp, slen);
11835 repl = str_compat_and_valid(repl, enc);
11842 p = search_nonascii(p, e);
11868 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11869 str_mod_check(str, sp, slen);
11870 repl = str_compat_and_valid(repl, enc);
11879 long mbminlen = rb_enc_mbminlen(enc);
11883 else if (!
NIL_P(repl)) {
11884 rep = RSTRING_PTR(repl);
11885 replen = RSTRING_LEN(repl);
11887 else if (encidx == ENCINDEX_UTF_16BE) {
11888 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11890 else if (encidx == ENCINDEX_UTF_16LE) {
11891 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11893 else if (encidx == ENCINDEX_UTF_32BE) {
11894 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11896 else if (encidx == ENCINDEX_UTF_32LE) {
11897 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11900 DEFAULT_REPLACE_CHAR(
"?");
11904 int ret = rb_enc_precise_mbclen(p, e, enc);
11917 if (e - p < clen) clen = e - p;
11918 if (clen <= mbminlen * 2) {
11923 for (; clen > mbminlen; clen-=mbminlen) {
11924 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11934 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11935 str_mod_check(str, sp, slen);
11936 repl = str_compat_and_valid(repl, enc);
11961 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11962 str_mod_check(str, sp, slen);
11963 repl = str_compat_and_valid(repl, enc);
12003str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12011static ID id_normalize;
12012static ID id_normalized_p;
12013static VALUE mUnicodeNormalize;
12016unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12018 static int UnicodeNormalizeRequired = 0;
12021 if (!UnicodeNormalizeRequired) {
12022 rb_require(
"unicode_normalize/normalize.rb");
12023 UnicodeNormalizeRequired = 1;
12027 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12038rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12040 return unicode_normalize_common(argc, argv, str, id_normalize);
12054rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12056 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12083rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12085 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12217#define sym_equal rb_obj_equal
12220sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12224 int c = rb_enc_precise_mbclen(s, send, enc);
12228 c = rb_enc_mbc_to_codepoint(s, send, enc);
12236rb_str_symname_p(
VALUE sym)
12241 rb_encoding *resenc = rb_default_internal_encoding();
12243 if (resenc == NULL) resenc = rb_default_external_encoding();
12244 enc = STR_ENC_GET(sym);
12245 ptr = RSTRING_PTR(sym);
12246 len = RSTRING_LEN(sym);
12247 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12255rb_str_quote_unprintable(
VALUE str)
12263 resenc = rb_default_internal_encoding();
12264 if (resenc == NULL) resenc = rb_default_external_encoding();
12265 enc = STR_ENC_GET(str);
12266 ptr = RSTRING_PTR(str);
12267 len = RSTRING_LEN(str);
12268 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12269 !sym_printable(ptr, ptr +
len, enc)) {
12270 return rb_str_escape(str);
12276rb_id_quote_unprintable(
ID id)
12278 VALUE str = rb_id2str(
id);
12279 if (!rb_str_symname_p(str)) {
12280 return rb_str_escape(str);
12298sym_inspect(
VALUE sym)
12305 if (!rb_str_symname_p(str)) {
12307 len = RSTRING_LEN(str);
12308 rb_str_resize(str,
len + 1);
12309 dest = RSTRING_PTR(str);
12310 memmove(dest + 1, dest,
len);
12314 VALUE orig_str = str;
12316 len = RSTRING_LEN(orig_str);
12317 str = rb_enc_str_new(0,
len + 1, enc);
12320 ptr = RSTRING_PTR(orig_str);
12321 dest = RSTRING_PTR(str);
12322 memcpy(dest + 1, ptr,
len);
12342rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12347 rb_raise(rb_eArgError,
"no receiver given");
12444 return rb_str_match(
rb_sym2str(sym), other);
12459sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12461 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12474sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12476 return rb_str_match_m_p(argc, argv, sym);
12494 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12505sym_length(
VALUE sym)
12519sym_empty(
VALUE sym)
12553sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12569sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12585sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12599sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12601 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12614sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12616 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12628sym_encoding(
VALUE sym)
12634string_for_symbol(
VALUE name)
12639 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12653 name = string_for_symbol(name);
12654 return rb_intern_str(name);
12663 name = string_for_symbol(name);
12687 return rb_fstring(str);
12693 struct RString fake_str = {RBASIC_INIT};
12694 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12706 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12707 rb_enc_autoload(enc);
12710 struct RString fake_str = {RBASIC_INIT};
12711 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12717 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12718 rb_enc_autoload(enc);
12721 struct RString fake_str = {RBASIC_INIT};
12722 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12733#if USE_YJIT || USE_ZJIT
12735rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12740 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12741 rb_str_buf_cat_byte(str, (
char) code);
12751fstring_set_class_i(
VALUE *str,
void *data)
12755 return ST_CONTINUE;
12763 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12930 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.