14 #include "ruby/internal/config.h"
24 #include "debug_counter.h"
28 #include "internal/array.h"
29 #include "internal/compar.h"
30 #include "internal/compilers.h"
31 #include "internal/encoding.h"
32 #include "internal/error.h"
33 #include "internal/gc.h"
34 #include "internal/numeric.h"
35 #include "internal/object.h"
36 #include "internal/proc.h"
37 #include "internal/re.h"
38 #include "internal/sanitizers.h"
39 #include "internal/string.h"
40 #include "internal/transcode.h"
45 #include "ruby_assert.h"
48 #if defined HAVE_CRYPT_R
49 # if defined HAVE_CRYPT_H
52 #elif !defined HAVE_CRYPT
53 # include "missing/crypt.h"
54 # define HAVE_CRYPT_R 1
57 #define BEG(no) (regs->beg[(no)])
58 #define END(no) (regs->end[(no)])
61 #undef rb_usascii_str_new
62 #undef rb_utf8_str_new
64 #undef rb_str_new_cstr
65 #undef rb_usascii_str_new_cstr
66 #undef rb_utf8_str_new_cstr
67 #undef rb_enc_str_new_cstr
68 #undef rb_external_str_new_cstr
69 #undef rb_locale_str_new_cstr
70 #undef rb_str_dup_frozen
71 #undef rb_str_buf_new_cstr
73 #undef rb_str_buf_cat2
75 #undef rb_str_cat_cstr
76 #undef rb_fstring_cstr
125 #define RUBY_MAX_CHAR_LEN 16
126 #define STR_PRECOMPUTED_HASH FL_USER4
127 #define STR_SHARED_ROOT FL_USER5
128 #define STR_BORROWED FL_USER6
129 #define STR_TMPLOCK FL_USER7
130 #define STR_NOFREE FL_USER18
131 #define STR_FAKESTR FL_USER19
133 #define STR_SET_NOEMBED(str) do {\
134 FL_SET((str), STR_NOEMBED);\
135 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
137 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
139 #define STR_SET_LEN(str, n) do { \
140 RSTRING(str)->len = (n); \
144 str_encindex_fastpath(
int encindex)
148 case ENCINDEX_ASCII_8BIT:
150 case ENCINDEX_US_ASCII:
158 str_enc_fastpath(
VALUE str)
163 #define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
164 #define TERM_FILL(ptr, termlen) do {\
165 char *const term_fill_ptr = (ptr);\
166 const int term_fill_len = (termlen);\
167 *term_fill_ptr = '\0';\
168 if (UNLIKELY(term_fill_len > 1))\
169 memset(term_fill_ptr, 0, term_fill_len);\
172 #define RESIZE_CAPA(str,capacity) do {\
173 const int termlen = TERM_LEN(str);\
174 RESIZE_CAPA_TERM(str,capacity,termlen);\
176 #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
177 if (STR_EMBED_P(str)) {\
178 if (str_embed_capa(str) < capacity + termlen) {\
179 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
180 const long tlen = RSTRING_LEN(str);\
181 memcpy(tmp, RSTRING_PTR(str), tlen);\
182 RSTRING(str)->as.heap.ptr = tmp;\
183 RSTRING(str)->len = tlen;\
184 STR_SET_NOEMBED(str);\
185 RSTRING(str)->as.heap.aux.capa = (capacity);\
189 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
190 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
191 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
192 RSTRING(str)->as.heap.aux.capa = (capacity);\
196 #define STR_SET_SHARED(str, shared_str) do { \
197 if (!FL_TEST(str, STR_FAKESTR)) { \
198 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
199 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
200 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
201 FL_SET((str), STR_SHARED); \
202 FL_SET((shared_str), STR_SHARED_ROOT); \
203 if (RBASIC_CLASS((shared_str)) == 0) \
204 FL_SET_RAW((shared_str), STR_BORROWED); \
208 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
209 #define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
212 #define STR_ENC_GET(str) get_encoding(str)
214 #if !defined SHARABLE_MIDDLE_SUBSTRING
215 # define SHARABLE_MIDDLE_SUBSTRING 0
217 #if !SHARABLE_MIDDLE_SUBSTRING
218 #define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
220 #define SHARABLE_SUBSTRING_P(beg, len, end) 1
225 str_embed_capa(
VALUE str)
227 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
231 rb_str_reembeddable_p(
VALUE str)
233 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
237 rb_str_embed_size(
long capa)
243 rb_str_size_as_embedded(
VALUE str)
246 if (STR_EMBED_P(str)) {
247 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
251 else if (rb_str_reembeddable_p(str)) {
252 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
255 real_size =
sizeof(
struct RString);
259 real_size +=
sizeof(st_index_t);
266 STR_EMBEDDABLE_P(
long len,
long termlen)
268 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
273 static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
274 static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
276 static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
277 static inline void str_modifiable(
VALUE str);
282 str_make_independent(
VALUE str)
285 int termlen = TERM_LEN(str);
286 str_make_independent_expand((str),
len, 0L, termlen);
289 static inline int str_dependent_p(
VALUE str);
292 rb_str_make_independent(
VALUE str)
294 if (str_dependent_p(str)) {
295 str_make_independent(str);
300 rb_str_make_embedded(
VALUE str)
305 char *buf =
RSTRING(str)->as.heap.ptr;
309 STR_SET_LEN(str,
len);
316 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
320 rb_debug_rstring_null_ptr(
const char *func)
322 fprintf(stderr,
"%s is returning NULL!! "
323 "SIGSEGV is highly expected to follow immediately.\n"
324 "If you could reproduce, attach your debugger here, "
325 "and look at the passed string.\n",
330 static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
333 get_encoding(
VALUE str)
339 mustnot_broken(
VALUE str)
341 if (is_broken_string(str)) {
347 mustnot_wchar(
VALUE str)
357 static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
359 #if SIZEOF_LONG == SIZEOF_VOIDP
360 #define PRECOMPUTED_FAKESTR_HASH 1
364 #ifdef PRECOMPUTED_FAKESTR_HASH
366 fstring_hash(
VALUE str)
370 return (st_index_t)
RSTRING(str)->as.heap.aux.capa;
377 #define fstring_hash rb_str_hash
385 #define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
387 static inline st_index_t
388 str_do_hash(
VALUE str)
392 if (e && !is_ascii_string(str)) {
399 str_store_precomputed_hash(
VALUE str, st_index_t hash)
405 size_t used_bytes = (
RSTRING_LEN(str) + TERM_LEN(str));
406 size_t free_bytes = str_embed_capa(str) - used_bytes;
410 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
412 FL_SET(str, STR_PRECOMPUTED_HASH);
420 bool force_precompute_hash;
424 fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
433 if (rb_objspace_garbage_object_p(str)) {
449 long capa =
len +
sizeof(st_index_t);
450 int term_len = TERM_LEN(str);
452 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
458 str_store_precomputed_hash(new_str, fstring_hash(str));
463 #ifdef PRECOMPUTED_FAKESTR_HASH
465 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
479 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
482 if (STR_SHARED_P(str)) {
484 str_make_independent(str);
487 if (!BARE_STRING_P(str)) {
493 RBASIC(str)->flags |= RSTRING_FSTR;
495 *key = *value = arg->fstr = str;
501 rb_fstring(
VALUE str)
508 if (
FL_TEST(str, RSTRING_FSTR))
511 bare = BARE_STRING_P(str);
513 if (STR_EMBED_P(str)) {
518 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
527 fstr = register_fstring(str,
false,
false);
530 str_replace_shared_without_enc(str, fstr);
538 register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
542 .force_precompute_hash = force_precompute_hash
545 #if SIZEOF_VOIDP == SIZEOF_LONG
549 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
555 st_table *frozen_strings = rb_vm_fstring_table();
558 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
559 }
while (UNDEF_P(args.fstr));
572 setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
585 fake_str->
as.
heap.ptr = (
char *)name;
587 return (
VALUE)fake_str;
605 rb_fstring_new(
const char *
ptr,
long len)
608 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
615 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
619 rb_fstring_cstr(
const char *
ptr)
621 return rb_fstring_new(
ptr, strlen(
ptr));
625 fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
635 const char *aptr, *bptr;
638 return (alen != blen ||
640 memcmp(aptr, bptr, alen) != 0);
644 single_byte_optimizable(
VALUE str)
648 case ENCINDEX_ASCII_8BIT:
649 case ENCINDEX_US_ASCII:
671 static inline const char *
672 search_nonascii(
const char *p,
const char *e)
674 const uintptr_t *s, *t;
676 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
677 # if SIZEOF_UINTPTR_T == 8
678 # define NONASCII_MASK UINT64_C(0x8080808080808080)
679 # elif SIZEOF_UINTPTR_T == 4
680 # define NONASCII_MASK UINT32_C(0x80808080)
682 # error "don't know what to do."
685 # if SIZEOF_UINTPTR_T == 8
686 # define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
687 # elif SIZEOF_UINTPTR_T == 4
688 # define NONASCII_MASK 0x80808080UL
690 # error "don't know what to do."
694 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
695 #if !UNALIGNED_WORD_ACCESS
696 if ((uintptr_t)p % SIZEOF_VOIDP) {
697 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
702 case 7:
if (p[-7]&0x80)
return p-7;
703 case 6:
if (p[-6]&0x80)
return p-6;
704 case 5:
if (p[-5]&0x80)
return p-5;
705 case 4:
if (p[-4]&0x80)
return p-4;
707 case 3:
if (p[-3]&0x80)
return p-3;
708 case 2:
if (p[-2]&0x80)
return p-2;
709 case 1:
if (p[-1]&0x80)
return p-1;
714 #if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
715 #define aligned_ptr(value) \
716 __builtin_assume_aligned((value), sizeof(uintptr_t))
718 #define aligned_ptr(value) (uintptr_t *)(value)
721 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
724 if (*s & NONASCII_MASK) {
725 #ifdef WORDS_BIGENDIAN
726 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
728 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
738 case 7:
if (e[-7]&0x80)
return e-7;
739 case 6:
if (e[-6]&0x80)
return e-6;
740 case 5:
if (e[-5]&0x80)
return e-5;
741 case 4:
if (e[-4]&0x80)
return e-4;
743 case 3:
if (e[-3]&0x80)
return e-3;
744 case 2:
if (e[-2]&0x80)
return e-2;
745 case 1:
if (e[-1]&0x80)
return e-1;
753 const char *e = p +
len;
757 p = search_nonascii(p, e);
762 p = search_nonascii(p, e);
769 p = search_nonascii(p, e);
794 p = search_nonascii(p, e);
799 p = search_nonascii(p, e);
812 p = search_nonascii(p, e);
851 rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
856 str_enc_copy(dest, src);
881 rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
883 str_enc_copy(dest, src);
896 return enc_coderange_scan(str, enc);
905 cr = enc_coderange_scan(str, get_encoding(str));
912 rb_enc_str_asciicompat(
VALUE str)
915 return str_encindex_fastpath(encindex) ||
rb_enc_asciicompat(rb_enc_get_from_index(encindex));
923 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
932 str_mod_check(
VALUE s,
const char *p,
long len)
940 str_capacity(
VALUE str,
const int termlen)
942 if (STR_EMBED_P(str)) {
943 return str_embed_capa(str) - termlen;
945 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
949 return RSTRING(str)->as.heap.aux.capa;
956 return str_capacity(str, TERM_LEN(str));
960 must_not_null(
const char *
ptr)
968 str_alloc_embed(
VALUE klass,
size_t capa)
970 size_t size = rb_str_embed_size(
capa);
974 NEWOBJ_OF(str,
struct RString, klass,
981 str_alloc_heap(
VALUE klass)
983 NEWOBJ_OF(str,
struct RString, klass,
990 empty_str_alloc(
VALUE klass)
992 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
993 VALUE str = str_alloc_embed(klass, 0);
994 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1012 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1016 if (STR_EMBEDDABLE_P(
len, termlen)) {
1017 str = str_alloc_embed(klass,
len + termlen);
1023 str = str_alloc_heap(klass);
1029 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1032 rb_enc_raw_set(str, enc);
1038 STR_SET_LEN(str,
len);
1081 __msan_unpoison_string(
ptr);
1108 str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1120 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1121 str = str_alloc_heap(klass);
1125 RBASIC(str)->flags |= STR_NOFREE;
1155 static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1157 int ecflags,
VALUE ecopts);
1164 return is_ascii_string(str);
1175 if (!to)
return str;
1177 if (from == to)
return str;
1179 rb_is_ascii8bit_enc(to)) {
1180 if (STR_ENC_GET(str) != to) {
1189 from, to, ecflags, ecopts);
1190 if (
NIL_P(newstr)) {
1198 rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1204 if (ofs < -olen || olen < ofs)
1206 if (ofs < 0) ofs += olen;
1208 STR_SET_LEN(newstr, ofs);
1213 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1221 STR_SET_LEN(str, 0);
1228 str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1230 int ecflags,
VALUE ecopts)
1235 VALUE econv_wrapper;
1236 const unsigned char *start, *sp;
1237 unsigned char *dest, *dp;
1238 size_t converted_output = (size_t)ofs;
1243 RBASIC_CLEAR_CLASS(econv_wrapper);
1245 if (!ec)
return Qnil;
1248 sp = (
unsigned char*)
ptr;
1250 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1251 (dp = dest + converted_output),
1255 size_t converted_input = sp - start;
1256 size_t rest =
len - converted_input;
1257 converted_output = dp - dest;
1259 if (converted_input && converted_output &&
1260 rest < (LONG_MAX / converted_output)) {
1261 rest = (rest * converted_output) / converted_input;
1266 olen += rest < 2 ? 2 : rest;
1308 if (!ienc || eenc == ienc) {
1322 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1323 rb_str_initialize(str,
ptr,
len, eenc);
1333 !is_ascii_string(str)) {
1396 str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1398 const int termlen = TERM_LEN(str);
1403 if (str_embed_capa(str2) >=
len + termlen) {
1404 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1405 STR_SET_EMBED(str2);
1407 TERM_FILL(ptr2+
len, termlen);
1411 if (STR_SHARED_P(str)) {
1412 root =
RSTRING(str)->as.heap.aux.shared;
1421 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1423 rb_fatal(
"about to free a possible shared root");
1425 char *ptr2 = STR_HEAP_PTR(str2);
1427 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1430 FL_SET(str2, STR_NOEMBED);
1432 STR_SET_SHARED(str2, root);
1435 STR_SET_LEN(str2,
len);
1443 str_replace_shared_without_enc(str2, str);
1444 rb_enc_cr_str_exact_copy(str2, str);
1451 return str_replace_shared(str_alloc_heap(klass), str);
1468 rb_str_new_frozen_String(
VALUE orig)
1475 rb_str_tmp_frozen_acquire(
VALUE orig)
1478 return str_new_frozen_buffer(0, orig, FALSE);
1482 rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1484 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1485 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1487 VALUE str = str_alloc_heap(0);
1490 FL_SET(str, STR_SHARED_ROOT);
1492 size_t capa = str_capacity(orig, TERM_LEN(orig));
1498 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1499 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1506 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1507 RBASIC(orig)->flags &= ~STR_NOFREE;
1508 STR_SET_SHARED(orig, str);
1518 rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1523 if (STR_EMBED_P(tmp)) {
1536 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1537 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1542 STR_SET_LEN(tmp, 0);
1550 return str_new_frozen_buffer(klass, orig, TRUE);
1554 heap_str_make_shared(
VALUE klass,
VALUE orig)
1559 VALUE str = str_alloc_heap(klass);
1562 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1563 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1564 RBASIC(orig)->flags &= ~STR_NOFREE;
1565 STR_SET_SHARED(orig, str);
1572 str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1578 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1580 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1594 if ((ofs > 0) || (rest > 0) ||
1597 str = str_new_shared(klass,
shared);
1599 RSTRING(str)->as.heap.ptr += ofs;
1600 STR_SET_LEN(str,
RSTRING_LEN(str) - (ofs + rest));
1608 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1609 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1617 str = heap_str_make_shared(klass, orig);
1621 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1633 str_new_empty_String(
VALUE str)
1640 #define STR_BUF_MIN_SIZE 63
1645 if (STR_EMBEDDABLE_P(
capa, 1)) {
1653 RSTRING(str)->as.heap.ptr[0] =
'\0';
1673 return str_new(0, 0,
len);
1679 if (STR_EMBED_P(str)) {
1680 RB_DEBUG_COUNTER_INC(obj_str_embed);
1682 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1683 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1684 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1687 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1688 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1693 rb_str_memsize(
VALUE str)
1695 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1696 return STR_HEAP_SIZE(str);
1706 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1709 static inline void str_discard(
VALUE str);
1710 static void str_shared_replace(
VALUE str,
VALUE str2);
1715 if (str != str2) str_shared_replace(str, str2);
1726 enc = STR_ENC_GET(str2);
1733 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1740 if (STR_EMBED_P(str2)) {
1745 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1746 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1747 RSTRING(str2)->as.heap.ptr = new_ptr;
1748 STR_SET_LEN(str2,
len);
1750 STR_SET_NOEMBED(str2);
1753 STR_SET_NOEMBED(str);
1757 if (
FL_TEST(str2, STR_SHARED)) {
1759 STR_SET_SHARED(str,
shared);
1762 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1766 STR_SET_EMBED(str2);
1768 STR_SET_LEN(str2, 0);
1783 return rb_obj_as_string_result(str, obj);
1787 rb_obj_as_string_result(
VALUE str,
VALUE obj)
1800 if (STR_SHARED_P(str2)) {
1803 STR_SET_NOEMBED(str);
1804 STR_SET_LEN(str,
len);
1806 STR_SET_SHARED(str,
shared);
1807 rb_enc_cr_str_exact_copy(str, str2);
1810 str_replace_shared(str, str2);
1819 size_t size = rb_str_embed_size(
capa);
1823 NEWOBJ_OF(str,
struct RString, klass,
1832 NEWOBJ_OF(str,
struct RString, klass,
1863 return str_duplicate_setup_encoding(str, dup, flags);
1872 root =
RSTRING(str)->as.heap.aux.shared;
1874 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1875 root = str = str_new_frozen(klass, str);
1882 FL_SET(root, STR_SHARED_ROOT);
1884 flags |= RSTRING_NOEMBED | STR_SHARED;
1887 return str_duplicate_setup_encoding(str, dup, flags);
1893 if (STR_EMBED_P(str)) {
1894 return str_duplicate_setup_embed(klass, str, dup);
1897 return str_duplicate_setup_heap(klass, str, dup);
1905 if (STR_EMBED_P(str)) {
1906 dup = str_alloc_embed(klass,
RSTRING_LEN(str) + TERM_LEN(str));
1909 dup = str_alloc_heap(klass);
1912 return str_duplicate_setup(klass, str, dup);
1923 rb_str_dup_m(
VALUE str)
1925 if (LIKELY(BARE_STRING_P(str))) {
1936 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1943 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1947 new_str = ec_str_alloc_embed(ec, klass,
RSTRING_LEN(str) + TERM_LEN(str));
1948 str_duplicate_setup_embed(klass, str, new_str);
1951 new_str = ec_str_alloc_heap(ec, klass);
1952 str_duplicate_setup_heap(klass, str, new_str);
1961 rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
1980 rb_str_init(
int argc,
VALUE *argv,
VALUE str)
1982 static ID keyword_ids[2];
1983 VALUE orig, opt, venc, vcapa;
1988 if (!keyword_ids[0]) {
1989 keyword_ids[0] = rb_id_encoding();
1990 CONST_ID(keyword_ids[1],
"capacity");
1998 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2001 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2006 if (
capa < STR_BUF_MIN_SIZE) {
2007 capa = STR_BUF_MIN_SIZE;
2015 if (orig == str) n = 0;
2017 str_modifiable(str);
2018 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2020 const size_t size = (size_t)
capa + termlen;
2022 const size_t osize =
RSTRING_LEN(str) + TERM_LEN(str);
2023 char *new_ptr =
ALLOC_N(
char, size);
2024 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2025 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2027 RSTRING(str)->as.heap.ptr = new_ptr;
2029 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2030 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2031 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2033 STR_SET_LEN(str,
len);
2037 rb_enc_cr_str_exact_copy(str, orig);
2039 FL_SET(str, STR_NOEMBED);
2058 rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2064 static ID keyword_ids[2];
2074 keyword_ids[0] = rb_id_encoding();
2075 CONST_ID(keyword_ids[1],
"capacity");
2077 encoding = kwargs[0];
2078 capacity = kwargs[1];
2087 if (UNDEF_P(encoding)) {
2093 if (!UNDEF_P(encoding)) {
2098 if (UNDEF_P(capacity)) {
2100 VALUE empty_str = str_new(klass,
"", 0);
2106 VALUE copy = str_duplicate(klass, orig);
2120 if (orig_capa >
capa) {
2125 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2126 STR_SET_LEN(str, 0);
2136 #ifdef NONASCII_MASK
2137 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2152 static inline uintptr_t
2153 count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2158 d = (d>>6) | (~d>>7);
2159 d &= NONASCII_MASK >> 7;
2162 #if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2164 return rb_popcount_intptr(d);
2168 # if SIZEOF_VOIDP == 8
2177 enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2183 long diff = (long)(e - p);
2186 #ifdef NONASCII_MASK
2189 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2190 const uintptr_t *s, *t;
2191 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2192 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2193 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2194 while (p < (
const char *)s) {
2195 if (is_utf8_lead_byte(*p))
len++;
2199 len += count_utf8_lead_bytes_with_word(s);
2202 p = (
const char *)s;
2205 if (is_utf8_lead_byte(*p))
len++;
2216 q = search_nonascii(p, e);
2229 q = search_nonascii(p, e);
2242 for (c=0; p<e; c++) {
2258 rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2266 long diff = (long)(e - p);
2273 q = search_nonascii(p, e);
2296 for (c=0; p<e; c++) {
2321 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2322 if (!enc) enc = STR_ENC_GET(str);
2328 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2333 return enc_strlen(p, e, enc, cr);
2340 return str_strlen(str, NULL);
2354 return LONG2NUM(str_strlen(str, NULL));
2366 rb_str_bytesize(
VALUE str)
2384 rb_str_empty(
VALUE str)
2404 char *ptr1, *ptr2, *ptr3;
2409 enc = rb_enc_check_str(str1, str2);
2413 if (len1 > LONG_MAX - len2) {
2416 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2418 memcpy(ptr3, ptr1, len1);
2419 memcpy(ptr3+len1, ptr2, len2);
2420 TERM_FILL(&ptr3[len1+len2], termlen);
2436 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2445 else if (enc2 < 0) {
2448 else if (enc1 != enc2) {
2451 else if (len1 > LONG_MAX - len2) {
2492 if (STR_EMBEDDABLE_P(
len, 1)) {
2501 STR_SET_LEN(str2,
len);
2510 termlen = TERM_LEN(str);
2516 while (n <=
len/2) {
2517 memcpy(ptr2 + n, ptr2, n);
2520 memcpy(ptr2 + n, ptr2,
len-n);
2522 STR_SET_LEN(str2,
len);
2523 TERM_FILL(&ptr2[
len], termlen);
2524 rb_enc_cr_str_copy_for_substr(str2, str);
2559 rb_check_lockedtmp(
VALUE str)
2561 if (
FL_TEST(str, STR_TMPLOCK)) {
2568 #define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2570 str_modifiable(
VALUE str)
2573 if (CHILLED_STRING_P(str)) {
2574 CHILLED_STRING_MUTATED(str);
2576 rb_check_lockedtmp(str);
2577 rb_check_frozen(str);
2582 str_dependent_p(
VALUE str)
2584 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2594 #define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2596 str_independent(
VALUE str)
2599 str_modifiable(str);
2600 return !str_dependent_p(str);
2606 str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2614 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2619 STR_SET_LEN(str,
len);
2626 memcpy(
ptr, oldptr,
len);
2628 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2631 STR_SET_NOEMBED(str);
2632 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2633 TERM_FILL(
ptr +
len, termlen);
2635 STR_SET_LEN(str,
len);
2642 if (!str_independent(str))
2643 str_make_independent(str);
2650 int termlen = TERM_LEN(str);
2656 if (expand >= LONG_MAX -
len) {
2660 if (!str_independent(str)) {
2661 str_make_independent_expand(str,
len, expand, termlen);
2663 else if (expand > 0) {
2664 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2671 str_modify_keep_cr(
VALUE str)
2673 if (!str_independent(str))
2674 str_make_independent(str);
2681 str_discard(
VALUE str)
2683 str_modifiable(str);
2684 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2685 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2686 RSTRING(str)->as.heap.ptr = 0;
2687 STR_SET_LEN(str, 0);
2700 if (
RB_LIKELY(str_encindex_fastpath(encindex))) {
2729 zero_filled(
const char *s,
int n)
2731 for (; n > 0; --n) {
2738 str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2740 const char *e = s +
len;
2743 if (zero_filled(s, minlen))
return s;
2749 str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2754 if (str_dependent_p(str)) {
2755 if (!zero_filled(s +
len, termlen))
2756 str_make_independent_expand(str,
len, 0L, termlen);
2759 TERM_FILL(s +
len, termlen);
2766 rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2768 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2773 rb_check_lockedtmp(str);
2774 str_make_independent_expand(str,
len, 0L, termlen);
2776 else if (str_dependent_p(str)) {
2777 if (termlen > oldtermlen)
2778 str_make_independent_expand(str,
len, 0L, termlen);
2781 if (!STR_EMBED_P(str)) {
2786 if (termlen > oldtermlen) {
2795 str_null_check(
VALUE str,
int *w)
2804 if (str_null_char(s,
len, minlen, enc)) {
2807 return str_fill_term(str, s,
len, minlen);
2810 if (!s || memchr(s, 0,
len)) {
2814 s = str_fill_term(str, s,
len, minlen);
2820 rb_str_to_cstr(
VALUE str)
2823 return str_null_check(str, &w);
2831 char *s = str_null_check(str, &w);
2842 rb_str_fill_terminator(
VALUE str,
const int newminlen)
2846 return str_fill_term(str, s,
len, newminlen);
2852 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2876 str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2886 const char *p2, *e2;
2889 while (p < e && 0 < nth) {
2896 p2 = search_nonascii(p, e2);
2916 while (p < e && nth--) {
2928 return str_nth_len(p, e, &nth, enc);
2932 str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2937 p = str_nth_len(p, e, &nth, enc);
2946 str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2948 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2949 if (!pp)
return e - p;
2957 STR_ENC_GET(str), single_byte_optimizable(str));
2960 #ifdef NONASCII_MASK
2962 str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2965 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2966 const uintptr_t *s, *t;
2967 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2968 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2969 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2970 while (p < (
const char *)s) {
2971 if (is_utf8_lead_byte(*p)) nth--;
2975 nth -= count_utf8_lead_bytes_with_word(s);
2977 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2981 if (is_utf8_lead_byte(*p)) {
2982 if (nth == 0)
break;
2992 str_utf8_offset(
const char *p,
const char *e,
long nth)
2994 const char *pp = str_utf8_nth(p, e, &nth);
3003 if (single_byte_optimizable(str) || pos < 0)
3007 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3012 str_subseq(
VALUE str,
long beg,
long len)
3020 const int termlen = TERM_LEN(str);
3028 if (str_embed_capa(str2) >=
len + termlen) {
3029 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3030 STR_SET_EMBED(str2);
3032 TERM_FILL(ptr2+
len, termlen);
3034 STR_SET_LEN(str2,
len);
3038 str_replace_shared(str2, str);
3041 RSTRING(str2)->as.heap.ptr += beg;
3043 STR_SET_LEN(str2,
len);
3053 VALUE str2 = str_subseq(str, beg,
len);
3054 rb_enc_cr_str_copy_for_substr(str2, str);
3067 if (
len < 0)
return 0;
3068 if (beg < 0 && -beg < 0)
return 0;
3072 if (single_byte_optimizable(str)) {
3073 if (beg > blen)
return 0;
3076 if (beg < 0)
return 0;
3078 if (
len > blen - beg)
3080 if (
len < 0)
return 0;
3085 if (
len > -beg)
len = -beg;
3098 slen = str_strlen(str, enc);
3100 if (beg < 0)
return 0;
3102 if (
len == 0)
goto end;
3105 else if (beg > 0 && beg > blen) {
3109 if (beg > str_strlen(str, enc))
return 0;
3112 #ifdef NONASCII_MASK
3115 p = str_utf8_nth(s, e, &beg);
3116 if (beg > 0)
return 0;
3117 len = str_utf8_offset(p, e,
len);
3123 p = s + beg * char_sz;
3127 else if (
len * char_sz > e - p)
3132 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3133 if (beg > 0)
return 0;
3137 len = str_offset(p, e,
len, enc, 0);
3145 static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3150 return str_substr(str, beg,
len, TRUE);
3160 str_substr(
VALUE str,
long beg,
long len,
int empty)
3164 if (!p)
return Qnil;
3165 if (!
len && !empty)
return Qnil;
3169 VALUE str2 = str_subseq(str, beg,
len);
3170 rb_enc_cr_str_copy_for_substr(str2, str);
3178 if (CHILLED_STRING_P(str)) {
3196 str_uplus(
VALUE str)
3198 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3228 str_uminus(
VALUE str)
3233 return rb_fstring(str);
3237 #define rb_str_dup_frozen rb_str_new_frozen
3242 if (
FL_TEST(str, STR_TMPLOCK)) {
3245 FL_SET(str, STR_TMPLOCK);
3252 if (!
FL_TEST(str, STR_TMPLOCK)) {
3270 const int termlen = TERM_LEN(str);
3272 str_modifiable(str);
3273 if (STR_SHARED_P(str)) {
3276 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3310 STR_SET_LEN(str,
len);
3321 int independent = str_independent(str);
3323 const int termlen = TERM_LEN(str);
3325 if (slen >
len || (termlen != 1 && slen <
len)) {
3331 if (STR_EMBED_P(str)) {
3332 if (
len == slen)
return str;
3333 if (str_embed_capa(str) >=
len + termlen) {
3334 STR_SET_LEN(str,
len);
3338 str_make_independent_expand(str, slen,
len - slen, termlen);
3340 else if (str_embed_capa(str) >=
len + termlen) {
3341 char *
ptr = STR_HEAP_PTR(str);
3343 if (slen >
len) slen =
len;
3346 STR_SET_LEN(str,
len);
3350 else if (!independent) {
3351 if (
len == slen)
return str;
3352 str_make_independent_expand(str, slen,
len - slen, termlen);
3356 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3357 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3360 else if (
len == slen)
return str;
3361 STR_SET_LEN(str,
len);
3368 str_ensure_available_capa(
VALUE str,
long len)
3370 str_modify_keep_cr(str);
3372 const int termlen = TERM_LEN(str);
3379 long total = olen +
len;
3380 long capa = str_capacity(str, termlen);
3383 if (total >= LONG_MAX / 2) {
3386 while (total >
capa) {
3389 RESIZE_CAPA_TERM(str,
capa, termlen);
3394 str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3397 str_modify_keep_cr(str);
3402 if (
len == 0)
return 0;
3404 long total, olen,
off = -1;
3406 const int termlen = TERM_LEN(str);
3409 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3413 long capa = str_capacity(str, termlen);
3415 if (olen > LONG_MAX -
len) {
3420 if (total >= LONG_MAX / 2) {
3423 while (total >
capa) {
3426 RESIZE_CAPA_TERM(str,
capa, termlen);
3432 memcpy(sptr + olen,
ptr,
len);
3433 STR_SET_LEN(str, total);
3434 TERM_FILL(sptr + total, termlen);
3439 #define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3440 #define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3445 if (
len == 0)
return str;
3449 return str_buf_cat(str,
ptr,
len);
3460 rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3465 if (UNLIKELY(!str_independent(str))) {
3466 str_make_independent(str);
3469 long string_length = -1;
3470 const int null_terminator_length = 1;
3475 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3479 long string_capacity = str_capacity(str, null_terminator_length);
3485 if (LIKELY(string_capacity >= string_length + 1)) {
3487 sptr[string_length] = byte;
3488 STR_SET_LEN(str, string_length + 1);
3489 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3493 str_buf_cat(str, (
char *)&
byte, 1);
3520 rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3521 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3530 if (str_encindex == ptr_encindex) {
3550 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3559 *ptr_cr_ret = ptr_cr;
3561 if (str_encindex != ptr_encindex &&
3570 res_encindex = str_encindex;
3575 res_encindex = str_encindex;
3579 res_encindex = ptr_encindex;
3584 res_encindex = str_encindex;
3591 res_encindex = str_encindex;
3599 str_buf_cat(str,
ptr,
len);
3605 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3612 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3623 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3629 unsigned int c = (
unsigned char)*
ptr;
3632 rb_enc_cr_str_buf_cat(str, buf,
len,
3645 if (str_enc_fastpath(str)) {
3682 rb_str_concat_literals(
size_t num,
const VALUE *strary)
3686 unsigned long len = 1;
3693 str_enc_copy_direct(str, strary[0]);
3695 for (i = s; i < num; ++i) {
3696 const VALUE v = strary[i];
3700 if (encidx != ENCINDEX_US_ASCII) {
3727 rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3729 str_modifiable(str);
3734 else if (argc > 1) {
3738 for (i = 0; i < argc; i++) {
3771 rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3773 long needed_capacity = 0;
3777 for (
int index = 0; index < argc; index++) {
3778 VALUE obj = argv[index];
3791 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3798 str_ensure_available_capa(str, needed_capacity);
3801 for (
int index = 0; index < argc; index++) {
3802 VALUE obj = argv[index];
3807 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3808 char byte = (char)(
NUM2INT(obj) & 0xFF);
3822 rb_bug(
"append_as_bytes arguments should have been validated");
3826 STR_SET_LEN(str,
RSTRING_LEN(str) + needed_capacity);
3827 TERM_FILL(sptr, TERM_LEN(str));
3832 for (
int index = 0; index < argc; index++) {
3833 VALUE obj = argv[index];
3850 rb_bug(
"append_as_bytes arguments should have been validated");
3924 if (rb_num_to_uint(str2, &code) == 0) {
3937 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3940 rb_str_buf_cat_byte(str1, (
unsigned char)code);
3949 case ONIGERR_INVALID_CODE_POINT_VALUE:
3952 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3976 rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3980 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3985 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3986 return ENCINDEX_ASCII_8BIT;
4009 rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4011 str_modifiable(str);
4016 else if (argc > 1) {
4020 for (i = 0; i < argc; i++) {
4033 st_index_t precomputed_hash;
4034 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4036 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4037 return precomputed_hash;
4040 return str_do_hash(str);
4047 const char *ptr1, *ptr2;
4050 return (len1 != len2 ||
4052 memcmp(ptr1, ptr2, len1) != 0);
4066 rb_str_hash_m(
VALUE str)
4072 #define lesser(a,b) (((a)>(b))?(b):(a))
4084 if (idx1 == idx2)
return TRUE;
4103 const char *ptr1, *ptr2;
4106 if (str1 == str2)
return 0;
4109 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4118 if (len1 > len2)
return 1;
4121 if (retval > 0)
return 1;
4148 if (str1 == str2)
return Qtrue;
4155 return rb_str_eql_internal(str1, str2);
4179 if (str1 == str2)
return Qtrue;
4181 return rb_str_eql_internal(str1, str2);
4212 return rb_invcmp(str1, str2);
4254 return str_casecmp(str1, s);
4262 const char *p1, *p1end, *p2, *p2end;
4271 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4272 while (p1 < p1end && p2 < p2end) {
4274 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4275 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4277 return INT2FIX(c1 < c2 ? -1 : 1);
4284 while (p1 < p1end && p2 < p2end) {
4288 if (0 <= c1 && 0 <= c2) {
4292 return INT2FIX(c1 < c2 ? -1 : 1);
4298 len = l1 < l2 ? l1 : l2;
4299 r = memcmp(p1, p2,
len);
4301 return INT2FIX(r < 0 ? -1 : 1);
4303 return INT2FIX(l1 < l2 ? -1 : 1);
4344 return str_casecmp_p(str1, s);
4351 VALUE folded_str1, folded_str2;
4352 VALUE fold_opt = sym_fold;
4359 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4360 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4362 return rb_str_eql(folded_str1, folded_str2);
4366 strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4367 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4369 const char *search_start = str_ptr;
4370 long pos, search_len = str_len - offset;
4374 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4375 if (pos < 0)
return pos;
4377 if (t == search_start + pos)
break;
4378 search_len -= t - search_start;
4379 if (search_len <= 0)
return -1;
4380 offset += t - search_start;
4383 return pos + offset;
4387 #define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4388 #define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4391 rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4393 const char *str_ptr, *str_ptr_end, *sub_ptr;
4394 long str_len, sub_len;
4398 if (is_broken_string(sub))
return -1;
4406 if (str_len < sub_len)
return -1;
4409 long str_len_char, sub_len_char;
4410 int single_byte = single_byte_optimizable(str);
4411 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4412 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4414 offset += str_len_char;
4415 if (offset < 0)
return -1;
4417 if (str_len_char - offset < sub_len_char)
return -1;
4418 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4421 if (sub_len == 0)
return offset;
4424 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4438 rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4445 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4446 long slen = str_strlen(str, enc);
4448 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4461 enc, single_byte_optimizable(str));
4472 pos = rb_str_index(str, sub, pos);
4486 str_ensure_byte_pos(
VALUE str,
long pos)
4488 if (!single_byte_optimizable(str)) {
4491 const char *p = s + pos;
4492 if (!at_char_boundary(s, p, e,
rb_enc_get(str))) {
4494 "offset %ld does not land on character boundary", pos);
4541 rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4547 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4550 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4561 str_ensure_byte_pos(str, pos);
4573 pos = rb_str_byteindex(str, sub, pos);
4574 if (pos >= 0)
return LONG2NUM(pos);
4579 #ifndef HAVE_MEMRCHR
4581 memrchr(
const char *search_str,
int chr,
long search_len)
4583 const char *
ptr = search_str + search_len;
4584 while (
ptr > search_str) {
4585 if ((
unsigned char)*(--
ptr) == chr)
return (
void *)
ptr;
4595 char *hit, *adjusted;
4597 long slen, searchlen;
4602 if (slen == 0)
return s - sbeg;
4606 searchlen = s - sbeg + 1;
4608 if (memcmp(s, t, slen) == 0) {
4613 hit = memrchr(sbeg, c, searchlen);
4616 if (hit != adjusted) {
4617 searchlen = adjusted - sbeg;
4620 if (memcmp(hit, t, slen) == 0)
4622 searchlen = adjusted - sbeg;
4623 }
while (searchlen > 0);
4630 rb_str_rindex(
VALUE str,
VALUE sub,
long pos)
4638 if (is_broken_string(sub))
return -1;
4639 singlebyte = single_byte_optimizable(str);
4641 slen = str_strlen(sub, enc);
4644 if (
len < slen)
return -1;
4645 if (
len - pos < slen) pos =
len - slen;
4646 if (
len == 0)
return pos;
4657 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4658 return str_rindex(str, sub, s, enc);
4719 rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4724 long pos,
len = str_strlen(str, enc);
4726 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4728 if (pos < 0 && (pos +=
len) < 0) {
4734 if (pos >
len) pos =
len;
4743 enc, single_byte_optimizable(str));
4754 pos = rb_str_rindex(str, sub, pos);
4764 rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4771 if (is_broken_string(sub))
return -1;
4776 if (
len < slen)
return -1;
4777 if (
len - pos < slen) pos =
len - slen;
4778 if (
len == 0)
return pos;
4790 return str_rindex(str, sub, s, enc);
4855 rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4861 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4863 if (pos < 0 && (pos +=
len) < 0) {
4869 if (pos >
len) pos =
len;
4875 str_ensure_byte_pos(str, pos);
4887 pos = rb_str_byterindex(str, sub, pos);
4888 if (pos >= 0)
return LONG2NUM(pos);
4924 switch (OBJ_BUILTIN_TYPE(y)) {
4976 rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5015 rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5019 re = get_pat(argv[0]);
5020 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5023 enum neighbor_char {
5029 static enum neighbor_char
5039 return NEIGHBOR_NOT_CHAR;
5043 if (!l)
return NEIGHBOR_NOT_CHAR;
5044 if (l !=
len)
return NEIGHBOR_WRAPPED;
5048 return NEIGHBOR_NOT_CHAR;
5050 return NEIGHBOR_FOUND;
5053 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5056 return NEIGHBOR_WRAPPED;
5057 ++((
unsigned char*)p)[i];
5062 return NEIGHBOR_FOUND;
5065 memset(p+l, 0xff,
len-l);
5071 for (len2 =
len-1; 0 < len2; len2--) {
5076 memset(p+len2+1, 0xff,
len-(len2+1));
5081 static enum neighbor_char
5090 return NEIGHBOR_NOT_CHAR;
5093 if (!c)
return NEIGHBOR_NOT_CHAR;
5096 if (!l)
return NEIGHBOR_NOT_CHAR;
5097 if (l !=
len)
return NEIGHBOR_WRAPPED;
5101 return NEIGHBOR_NOT_CHAR;
5103 return NEIGHBOR_FOUND;
5106 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5109 return NEIGHBOR_WRAPPED;
5110 --((
unsigned char*)p)[i];
5115 return NEIGHBOR_FOUND;
5118 memset(p+l, 0,
len-l);
5124 for (len2 =
len-1; 0 < len2; len2--) {
5129 memset(p+len2+1, 0,
len-(len2+1));
5143 static enum neighbor_char
5144 enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5146 enum neighbor_char ret;
5150 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5154 const int max_gaps = 1;
5158 ctype = ONIGENC_CTYPE_DIGIT;
5160 ctype = ONIGENC_CTYPE_ALPHA;
5162 return NEIGHBOR_NOT_CHAR;
5165 for (
try = 0;
try <= max_gaps; ++
try) {
5166 ret = enc_succ_char(p,
len, enc);
5167 if (ret == NEIGHBOR_FOUND) {
5170 return NEIGHBOR_FOUND;
5177 ret = enc_pred_char(p,
len, enc);
5178 if (ret == NEIGHBOR_FOUND) {
5192 return NEIGHBOR_NOT_CHAR;
5195 if (ctype != ONIGENC_CTYPE_DIGIT) {
5197 return NEIGHBOR_WRAPPED;
5201 enc_succ_char(carry,
len, enc);
5202 return NEIGHBOR_WRAPPED;
5271 rb_enc_cr_str_copy_for_substr(str, orig);
5272 return str_succ(str);
5279 char *sbeg, *s, *e, *last_alnum = 0;
5280 int found_alnum = 0;
5282 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5283 long carry_pos = 0, carry_len = 1;
5284 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5287 if (slen == 0)
return str;
5289 enc = STR_ENC_GET(str);
5291 s = e = sbeg + slen;
5294 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5301 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5302 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5303 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5305 case NEIGHBOR_NOT_CHAR:
5307 case NEIGHBOR_FOUND:
5309 case NEIGHBOR_WRAPPED:
5314 carry_pos = s - sbeg;
5320 enum neighbor_char neighbor;
5321 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5323 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5324 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5326 neighbor = enc_succ_char(tmp, l, enc);
5328 case NEIGHBOR_FOUND:
5332 case NEIGHBOR_WRAPPED:
5335 case NEIGHBOR_NOT_CHAR:
5340 enc_succ_char(s, l, enc);
5343 MEMCPY(carry, s,
char, l);
5346 carry_pos = s - sbeg;
5350 RESIZE_CAPA(str, slen + carry_len);
5352 s = sbeg + carry_pos;
5353 memmove(s + carry_len, s, slen - carry_pos);
5354 memmove(s, carry, carry_len);
5356 STR_SET_LEN(str, slen);
5371 rb_str_succ_bang(
VALUE str)
5379 all_digits_p(
const char *s,
long len)
5431 rb_str_upto(
int argc,
VALUE *argv,
VALUE beg)
5433 VALUE end, exclusive;
5437 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5443 VALUE current, after_end;
5451 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5457 if (c > e || (excl && c == e))
return beg;
5461 if ((*each)(str, arg))
break;
5462 if (!excl && c == e)
break;
5464 if (excl && c == e)
break;
5484 if (excl && bi == ei)
break;
5485 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5490 ID op = excl ?
'<' : idLE;
5491 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5496 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5504 if (n > 0 || (excl && n == 0))
return beg;
5512 if ((*each)(current, arg))
break;
5513 if (
NIL_P(next))
break;
5534 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5542 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5550 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5558 if ((*each)(current, arg))
break;
5572 if (!
rb_equal(str, *argp))
return 0;
5601 if (b <= v && v < e)
return Qtrue;
5602 return RBOOL(!
RTEST(exclusive) && v == e);
5615 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5617 return RBOOL(
NIL_P(val));
5640 return rb_str_subpat(str, indx,
INT2FIX(0));
5643 if (rb_str_index(str, indx, 0) != -1)
5649 long beg,
len = str_strlen(str, NULL);
5661 return str_substr(str, idx, 1, FALSE);
5680 rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5684 return rb_str_subpat(str, argv[0], argv[1]);
5687 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5691 return rb_str_aref(str, argv[0]);
5700 str_modifiable(str);
5701 if (
len > olen)
len = olen;
5703 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5705 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5708 memmove(
ptr, oldptr +
len, nlen);
5709 if (fl == STR_NOEMBED)
xfree(oldptr);
5712 if (!STR_SHARED_P(str)) {
5714 rb_enc_cr_str_exact_copy(shared, str);
5719 STR_SET_LEN(str, nlen);
5721 if (!SHARABLE_MIDDLE_SUBSTRING) {
5722 TERM_FILL(
ptr + nlen, TERM_LEN(str));
5729 rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5735 if (beg == 0 && vlen == 0) {
5740 str_modify_keep_cr(str);
5744 RESIZE_CAPA(str, slen + vlen -
len);
5754 memmove(sptr + beg + vlen,
5756 slen - (beg +
len));
5758 if (vlen < beg &&
len < 0) {
5762 memmove(sptr + beg,
RSTRING_PTR(val) + vbeg, vlen);
5765 STR_SET_LEN(str, slen);
5766 TERM_FILL(&sptr[slen], TERM_LEN(str));
5782 int singlebyte = single_byte_optimizable(str);
5789 slen = str_strlen(str, enc);
5791 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5800 if (
len > slen - beg) {
5810 rb_str_update_0(str, beg,
len, val);
5822 long start, end,
len;
5832 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5836 nth += regs->num_regs;
5846 enc = rb_enc_check_str(str, val);
5847 rb_str_update_0(str, start,
len, val);
5856 switch (
TYPE(indx)) {
5858 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5862 beg = rb_str_index(str, indx, 0);
5916 rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5920 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5928 return rb_str_aset(str, argv[0], argv[1]);
5988 rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5996 str_modify_keep_cr(str);
6004 if ((nth += regs->num_regs) <= 0)
return Qnil;
6006 else if (nth >= regs->num_regs)
return Qnil;
6008 len = END(nth) - beg;
6011 else if (argc == 2) {
6024 beg = rb_str_index(str, indx, 0);
6025 if (beg == -1)
return Qnil;
6051 rb_enc_cr_str_copy_for_substr(result, str);
6061 if (beg +
len > slen)
6065 slen - (beg +
len));
6067 STR_SET_LEN(str, slen);
6068 TERM_FILL(&sptr[slen], TERM_LEN(str));
6079 switch (OBJ_BUILTIN_TYPE(pat)) {
6098 get_pat_quoted(
VALUE pat,
int check)
6102 switch (OBJ_BUILTIN_TYPE(pat)) {
6116 if (check && is_broken_string(pat)) {
6123 rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6126 pos = rb_str_byteindex(str, pat, pos);
6127 if (set_backref_str) {
6129 str = rb_str_new_frozen_String(str);
6130 rb_backref_set_string(str, pos,
RSTRING_LEN(pat));
6139 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
6159 rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6179 pat = get_pat_quoted(argv[0], 1);
6181 str_modifiable(str);
6182 beg = rb_pat_search(pat, str, 0, 1);
6205 if (iter || !
NIL_P(hash)) {
6215 str_mod_check(str, p,
len);
6216 rb_check_frozen(str);
6229 rb_enc_inspect_name(str_enc),
6230 rb_enc_inspect_name(STR_ENC_GET(repl)));
6232 enc = STR_ENC_GET(repl);
6248 RESIZE_CAPA(str,
len + rlen - plen);
6252 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6255 memmove(p + beg0, rp, rlen);
6257 STR_SET_LEN(str,
len);
6287 rb_str_sub_bang(argc, argv, str);
6292 str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6295 long beg, beg0, end0;
6296 long offset, blen, slen,
len, last;
6297 enum {STR, ITER, MAP} mode = STR;
6299 int need_backref = -1;
6318 rb_error_arity(argc, 1, 2);
6321 pat = get_pat_quoted(argv[0], 1);
6322 beg = rb_pat_search(pat, str, 0, need_backref);
6324 if (bang)
return Qnil;
6334 str_enc = STR_ENC_GET(str);
6360 str_mod_check(str, sp, slen);
6365 else if (need_backref) {
6367 if (need_backref < 0) {
6368 need_backref = val != repl;
6375 len = beg0 - offset;
6392 offset = end0 +
len;
6396 beg = rb_pat_search(pat, str, offset, need_backref);
6403 rb_pat_search(pat, str, last, 1);
6405 str_shared_replace(str, dest);
6433 rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6435 str_modify_keep_cr(str);
6436 return str_gsub(argc, argv, str, 1);
6457 rb_str_gsub(
int argc,
VALUE *argv,
VALUE str)
6459 return str_gsub(argc, argv, str, 0);
6477 str_modifiable(str);
6478 if (str == str2)
return str;
6482 return str_replace(str, str2);
6497 rb_str_clear(
VALUE str)
6501 STR_SET_LEN(str, 0);
6522 rb_str_chr(
VALUE str)
6570 char *
ptr, *head, *left = 0;
6574 if (pos < -
len ||
len <= pos)
6581 char byte = (char)(
NUM2INT(w) & 0xFF);
6583 if (!str_independent(str))
6584 str_make_independent(str);
6585 enc = STR_ENC_GET(str);
6588 if (!STR_EMBED_P(str)) {
6621 str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6625 if (beg > n ||
len < 0)
return Qnil;
6628 if (beg < 0)
return Qnil;
6633 if (!empty)
return Qnil;
6637 VALUE str2 = str_subseq(str, beg,
len);
6639 str_enc_copy_direct(str2, str);
6684 return str_byte_substr(str, beg,
len, TRUE);
6689 return str_byte_substr(str, idx, 1, FALSE);
6736 rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6741 return str_byte_substr(str, beg,
len, TRUE);
6744 return str_byte_aref(str, argv[0]);
6748 str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6753 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6762 if (*
len > slen - *beg) {
6766 str_ensure_byte_pos(str, *beg);
6767 str_ensure_byte_pos(str, end);
6792 rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6794 long beg,
len, vbeg, vlen;
6799 if (!(argc == 2 || argc == 3 || argc == 5)) {
6805 rb_builtin_class_name(argv[0]));
6818 rb_builtin_class_name(argv[2]));
6838 str_check_beg_len(str, &beg, &
len);
6839 str_check_beg_len(val, &vbeg, &vlen);
6840 str_modify_keep_cr(str);
6846 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6864 rb_str_reverse(
VALUE str)
6872 enc = STR_ENC_GET(str);
6879 if (single_byte_optimizable(str)) {
6907 str_enc_copy_direct(rev, str);
6927 rb_str_reverse_bang(
VALUE str)
6930 if (single_byte_optimizable(str)) {
6933 str_modify_keep_cr(str);
6943 str_shared_replace(str, rb_str_reverse(str));
6947 str_modify_keep_cr(str);
6972 i = rb_str_index(str, arg, 0);
6974 return RBOOL(i != -1);
7011 rb_str_to_i(
int argc,
VALUE *argv,
VALUE str)
7042 rb_str_to_f(
VALUE str)
7057 rb_str_to_s(
VALUE str)
7069 char s[RUBY_MAX_CHAR_LEN];
7077 #define CHAR_ESC_LEN 13
7080 rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7082 char buf[CHAR_ESC_LEN + 1];
7090 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7092 else if (c < 0x10000) {
7093 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7096 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7101 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7104 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7107 l = (int)strlen(buf);
7113 ruby_escaped_char(
int c)
7116 case '\0':
return "\\0";
7117 case '\n':
return "\\n";
7118 case '\r':
return "\\r";
7119 case '\t':
return "\\t";
7120 case '\f':
return "\\f";
7121 case '\013':
return "\\v";
7122 case '\010':
return "\\b";
7123 case '\007':
return "\\a";
7124 case '\033':
return "\\e";
7125 case '\x7f':
return "\\c?";
7131 rb_str_escape(
VALUE str)
7137 const char *prev = p;
7138 char buf[CHAR_ESC_LEN + 1];
7148 if (p > prev) str_buf_cat(result, prev, p - prev);
7151 n = (int)(pend - p);
7153 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7154 str_buf_cat(result, buf, strlen(buf));
7162 cc = ruby_escaped_char(c);
7164 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7165 str_buf_cat(result, cc, strlen(cc));
7171 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7172 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7176 if (p > prev) str_buf_cat(result, prev, p - prev);
7200 const char *p, *pend, *prev;
7201 char buf[CHAR_ESC_LEN + 1];
7210 str_buf_cat2(result,
"\"");
7220 if (p > prev) str_buf_cat(result, prev, p - prev);
7223 n = (int)(pend - p);
7225 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7226 str_buf_cat(result, buf, strlen(buf));
7234 if ((asciicompat || unicode_p) &&
7235 (c ==
'"'|| c ==
'\\' ||
7240 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7241 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7242 str_buf_cat2(result,
"\\");
7243 if (asciicompat || enc == resenc) {
7249 case '\n': cc =
'n';
break;
7250 case '\r': cc =
'r';
break;
7251 case '\t': cc =
't';
break;
7252 case '\f': cc =
'f';
break;
7253 case '\013': cc =
'v';
break;
7254 case '\010': cc =
'b';
break;
7255 case '\007': cc =
'a';
break;
7256 case 033: cc =
'e';
break;
7257 default: cc = 0;
break;
7260 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7263 str_buf_cat(result, buf, 2);
7280 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7281 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7286 if (p > prev) str_buf_cat(result, prev, p - prev);
7287 str_buf_cat2(result,
"\"");
7292 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7315 const char *p, *pend;
7319 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7324 len += strlen(enc->name);
7330 unsigned char c = *p++;
7333 case '"':
case '\\':
7334 case '\n':
case '\r':
7335 case '\t':
case '\f':
7336 case '\013':
case '\010':
case '\007':
case '\033':
7341 clen = IS_EVSTR(p, pend) ? 2 : 1;
7349 if (u8 && c > 0x7F) {
7355 else if (cc <= 0xFFFFF)
7368 if (clen > LONG_MAX -
len) {
7380 unsigned char c = *p++;
7382 if (c ==
'"' || c ==
'\\') {
7386 else if (c ==
'#') {
7387 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7390 else if (c ==
'\n') {
7394 else if (c ==
'\r') {
7398 else if (c ==
'\t') {
7402 else if (c ==
'\f') {
7406 else if (c ==
'\013') {
7410 else if (c ==
'\010') {
7414 else if (c ==
'\007') {
7418 else if (c ==
'\033') {
7433 snprintf(q, qend-q,
"u%04X", cc);
7435 snprintf(q, qend-q,
"u{%X}", cc);
7440 snprintf(q, qend-q,
"x%02X", c);
7447 snprintf(q, qend-q, nonascii_suffix, enc->name);
7457 unescape_ascii(
unsigned int c)
7481 undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7483 const char *s = *ss;
7487 unsigned char buf[6];
7505 *buf = unescape_ascii(*s);
7518 if (*penc != enc_utf8) {
7537 if (hexlen == 0 || hexlen > 6) {
7543 if (0xd800 <= c && c <= 0xdfff) {
7556 if (0xd800 <= c && c <= 0xdfff) {
7587 static VALUE rb_str_is_ascii_only_p(
VALUE str);
7605 str_undump(
VALUE str)
7612 bool binary =
false;
7616 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7619 if (!str_null_check(str, &w)) {
7623 if (*s !=
'"')
goto invalid_format;
7641 static const char force_encoding_suffix[] =
".force_encoding(\"";
7642 static const char dup_suffix[] =
".dup";
7643 const char *encname;
7648 size =
sizeof(dup_suffix) - 1;
7649 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7651 size =
sizeof(force_encoding_suffix) - 1;
7652 if (s_end - s <= size)
goto invalid_format;
7653 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7661 s = memchr(s,
'"', s_end-s);
7663 if (!s)
goto invalid_format;
7664 if (s_end - s != 2)
goto invalid_format;
7665 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7667 encidx = rb_enc_find_index2(encname, (
long)size);
7681 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7692 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7705 str_true_enc(
VALUE str)
7708 rb_str_check_dummy_enc(enc);
7712 static OnigCaseFoldType
7713 check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7719 if (argv[0]==sym_turkic) {
7720 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7722 if (argv[1]==sym_lithuanian)
7723 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7728 else if (argv[0]==sym_lithuanian) {
7729 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7731 if (argv[1]==sym_turkic)
7732 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7739 else if (argv[0]==sym_ascii)
7740 flags |= ONIGENC_CASE_ASCII_ONLY;
7741 else if (argv[0]==sym_fold) {
7742 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7743 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7761 #define CASE_MAPPING_ADDITIONAL_LENGTH 20
7762 #ifndef CASEMAP_DEBUG
7763 # define CASEMAP_DEBUG 0
7771 OnigUChar space[FLEX_ARY_LEN];
7775 mapping_buffer_free(
void *p)
7779 while (current_buffer) {
7780 previous_buffer = current_buffer;
7781 current_buffer = current_buffer->next;
7782 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7788 {0, mapping_buffer_free,},
7789 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7797 const OnigUChar *source_current, *source_end;
7798 int target_length = 0;
7799 VALUE buffer_anchor;
7802 size_t buffer_count = 0;
7803 int buffer_length_or_invalid;
7812 while (source_current < source_end) {
7814 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7815 if (CASEMAP_DEBUG) {
7816 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7819 *pre_buffer = current_buffer;
7820 pre_buffer = ¤t_buffer->next;
7821 current_buffer->next = NULL;
7822 current_buffer->capa =
capa;
7823 buffer_length_or_invalid = enc->case_map(flags,
7824 &source_current, source_end,
7825 current_buffer->space,
7826 current_buffer->space+current_buffer->capa,
7828 if (buffer_length_or_invalid < 0) {
7829 current_buffer =
DATA_PTR(buffer_anchor);
7831 mapping_buffer_free(current_buffer);
7834 target_length += current_buffer->used = buffer_length_or_invalid;
7836 if (CASEMAP_DEBUG) {
7837 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7840 if (buffer_count==1) {
7841 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7844 char *target_current;
7848 current_buffer =
DATA_PTR(buffer_anchor);
7849 while (current_buffer) {
7850 memcpy(target_current, current_buffer->space, current_buffer->used);
7851 target_current += current_buffer->used;
7852 current_buffer = current_buffer->next;
7855 current_buffer =
DATA_PTR(buffer_anchor);
7857 mapping_buffer_free(current_buffer);
7862 str_enc_copy_direct(target, source);
7871 const OnigUChar *source_current, *source_end;
7872 OnigUChar *target_current, *target_end;
7874 int length_or_invalid;
7876 if (old_length == 0)
return Qnil;
7880 if (source == target) {
7881 target_current = (OnigUChar*)source_current;
7882 target_end = (OnigUChar*)source_end;
7889 length_or_invalid = onigenc_ascii_only_case_map(flags,
7890 &source_current, source_end,
7891 target_current, target_end, enc);
7892 if (length_or_invalid < 0)
7894 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7895 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7896 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7898 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7901 str_enc_copy(target, source);
7907 upcase_single(
VALUE str)
7910 bool modified =
false;
7913 unsigned int c = *(
unsigned char*)s;
7915 if (
'a' <= c && c <=
'z') {
7916 *s =
'A' + (c -
'a');
7944 rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7947 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7949 flags = check_case_options(argc, argv, flags);
7950 str_modify_keep_cr(str);
7951 enc = str_true_enc(str);
7952 if (case_option_single_p(flags, enc, str)) {
7953 if (upcase_single(str))
7954 flags |= ONIGENC_CASE_MODIFIED;
7956 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7957 rb_str_ascii_casemap(str, str, &flags, enc);
7959 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7961 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7983 rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7986 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7989 flags = check_case_options(argc, argv, flags);
7990 enc = str_true_enc(str);
7991 if (case_option_single_p(flags, enc, str)) {
7993 str_enc_copy_direct(ret, str);
7996 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7998 rb_str_ascii_casemap(str, ret, &flags, enc);
8001 ret = rb_str_casemap(str, &flags, enc);
8008 downcase_single(
VALUE str)
8011 bool modified =
false;
8014 unsigned int c = *(
unsigned char*)s;
8016 if (
'A' <= c && c <=
'Z') {
8017 *s =
'a' + (c -
'A');
8046 rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8049 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8051 flags = check_case_options(argc, argv, flags);
8052 str_modify_keep_cr(str);
8053 enc = str_true_enc(str);
8054 if (case_option_single_p(flags, enc, str)) {
8055 if (downcase_single(str))
8056 flags |= ONIGENC_CASE_MODIFIED;
8058 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8059 rb_str_ascii_casemap(str, str, &flags, enc);
8061 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8063 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8085 rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8088 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8091 flags = check_case_options(argc, argv, flags);
8092 enc = str_true_enc(str);
8093 if (case_option_single_p(flags, enc, str)) {
8095 str_enc_copy_direct(ret, str);
8096 downcase_single(ret);
8098 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8100 rb_str_ascii_casemap(str, ret, &flags, enc);
8103 ret = rb_str_casemap(str, &flags, enc);
8131 rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8134 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8136 flags = check_case_options(argc, argv, flags);
8137 str_modify_keep_cr(str);
8138 enc = str_true_enc(str);
8140 if (flags&ONIGENC_CASE_ASCII_ONLY)
8141 rb_str_ascii_casemap(str, str, &flags, enc);
8143 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8145 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8169 rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8172 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8175 flags = check_case_options(argc, argv, flags);
8176 enc = str_true_enc(str);
8178 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8180 rb_str_ascii_casemap(str, ret, &flags, enc);
8183 ret = rb_str_casemap(str, &flags, enc);
8210 rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8213 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8215 flags = check_case_options(argc, argv, flags);
8216 str_modify_keep_cr(str);
8217 enc = str_true_enc(str);
8218 if (flags&ONIGENC_CASE_ASCII_ONLY)
8219 rb_str_ascii_casemap(str, str, &flags, enc);
8221 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8223 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8247 rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8250 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8253 flags = check_case_options(argc, argv, flags);
8254 enc = str_true_enc(str);
8256 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8258 rb_str_ascii_casemap(str, ret, &flags, enc);
8261 ret = rb_str_casemap(str, &flags, enc);
8266 typedef unsigned char *USTR;
8270 unsigned int now, max;
8282 if (t->p == t->pend)
return -1;
8283 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8288 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8290 if (t->p < t->pend) {
8294 if (t->now < 0x80 && c < 0x80) {
8296 "invalid range \"%c-%c\" in string transliteration",
8304 else if (t->now < c) {
8313 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8314 if (t->now == t->max) {
8319 if (t->now < t->max) {
8335 const unsigned int errc = -1;
8336 unsigned int trans[256];
8338 struct tr trsrc, trrepl;
8340 unsigned int c, c0, last = 0;
8341 int modify = 0, i, l;
8342 unsigned char *s, *send;
8344 int singlebyte = single_byte_optimizable(str);
8348 #define CHECK_IF_ASCII(c) \
8349 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8350 (cr = ENC_CODERANGE_VALID) : 0)
8356 return rb_str_delete_bang(1, &src, str);
8371 trsrc.p + l < trsrc.pend) {
8377 trsrc.gen = trrepl.gen = 0;
8378 trsrc.now = trrepl.now = 0;
8379 trsrc.max = trrepl.max = 0;
8382 for (i=0; i<256; i++) {
8385 while ((c = trnext(&trsrc, enc)) != errc) {
8394 while ((c = trnext(&trrepl, enc)) != errc)
8397 for (i=0; i<256; i++) {
8398 if (trans[i] != errc) {
8406 for (i=0; i<256; i++) {
8409 while ((c = trnext(&trsrc, enc)) != errc) {
8410 r = trnext(&trrepl, enc);
8411 if (r == errc) r = trrepl.now;
8425 str_modify_keep_cr(str);
8431 unsigned int save = -1;
8432 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8454 if (cflag) c = last;
8457 else if (cflag) c = errc;
8463 if (c != (
unsigned int)-1) {
8475 if (enc != e1) may_modify = 1;
8477 if ((offset = t - buf) + tlen > max) {
8478 size_t MAYBE_UNUSED(old) = max + termlen;
8479 max = offset + tlen + (send - s);
8480 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8484 if (may_modify && memcmp(s, t, tlen) != 0) {
8490 if (!STR_EMBED_P(str)) {
8491 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8493 TERM_FILL((
char *)t, termlen);
8494 RSTRING(str)->as.heap.ptr = (
char *)buf;
8495 STR_SET_LEN(str, t - buf);
8496 STR_SET_NOEMBED(str);
8497 RSTRING(str)->as.heap.aux.capa = max;
8501 c = (
unsigned char)*s;
8502 if (trans[c] != errc) {
8519 long offset, max = (long)((send - s) * 1.2);
8520 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8541 if (cflag) c = last;
8544 else if (cflag) c = errc;
8548 c = cflag ? last : errc;
8556 if (enc != e1) may_modify = 1;
8558 if ((offset = t - buf) + tlen > max) {
8559 size_t MAYBE_UNUSED(old) = max + termlen;
8560 max = offset + tlen + (long)((send - s) * 1.2);
8561 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8566 if (may_modify && memcmp(s, t, tlen) != 0) {
8574 if (!STR_EMBED_P(str)) {
8575 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8577 TERM_FILL((
char *)t, termlen);
8578 RSTRING(str)->as.heap.ptr = (
char *)buf;
8579 STR_SET_LEN(str, t - buf);
8580 STR_SET_NOEMBED(str);
8581 RSTRING(str)->as.heap.aux.capa = max;
8606 return tr_trans(str, src, repl, 0);
8653 tr_trans(str, src, repl, 0);
8657 #define TR_TABLE_MAX (UCHAR_MAX+1)
8658 #define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8660 tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8663 const unsigned int errc = -1;
8664 char buf[TR_TABLE_MAX];
8667 VALUE table = 0, ptable = 0;
8668 int i, l, cflag = 0;
8671 tr.gen =
tr.now =
tr.max = 0;
8678 for (i=0; i<TR_TABLE_MAX; i++) {
8681 stable[TR_TABLE_MAX] = cflag;
8683 else if (stable[TR_TABLE_MAX] && !cflag) {
8684 stable[TR_TABLE_MAX] = 0;
8686 for (i=0; i<TR_TABLE_MAX; i++) {
8690 while ((c = trnext(&
tr, enc)) != errc) {
8691 if (c < TR_TABLE_MAX) {
8692 buf[(
unsigned char)c] = !cflag;
8697 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8714 for (i=0; i<TR_TABLE_MAX; i++) {
8715 stable[i] = stable[i] && buf[i];
8717 if (!table && !cflag) {
8724 tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8726 if (c < TR_TABLE_MAX) {
8727 return table[c] != 0;
8741 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8755 rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8757 char squeez[TR_TABLE_SIZE];
8760 VALUE del = 0, nodel = 0;
8762 int i, ascompat, cr;
8766 for (i=0; i<argc; i++) {
8771 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8774 str_modify_keep_cr(str);
8783 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8796 if (tr_find(c, squeez, del, nodel)) {
8807 TERM_FILL(t, TERM_LEN(str));
8811 if (modify)
return str;
8831 rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8834 rb_str_delete_bang(argc, argv, str);
8848 rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8850 char squeez[TR_TABLE_SIZE];
8852 VALUE del = 0, nodel = 0;
8853 unsigned char *s, *send, *t;
8855 int ascompat, singlebyte = single_byte_optimizable(str);
8859 enc = STR_ENC_GET(str);
8862 for (i=0; i<argc; i++) {
8867 if (singlebyte && !single_byte_optimizable(s))
8869 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8873 str_modify_keep_cr(str);
8882 unsigned int c = *s++;
8883 if (c != save || (argc > 0 && !squeez[c])) {
8893 if (ascompat && (c = *s) < 0x80) {
8894 if (c != save || (argc > 0 && !squeez[c])) {
8902 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8912 TERM_FILL((
char *)t, TERM_LEN(str));
8918 if (modify)
return str;
8941 rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8944 rb_str_squeeze_bang(argc, argv, str);
8962 return tr_trans(str, src, repl, 1);
8985 tr_trans(str, src, repl, 1);
9014 rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9016 char table[TR_TABLE_SIZE];
9018 VALUE del = 0, nodel = 0, tstr;
9033 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9034 !is_broken_string(str)) {
9042 if (*(
unsigned char*)s++ == c) n++;
9048 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9049 for (i=1; i<argc; i++) {
9053 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9063 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9072 if (tr_find(c, table, del, nodel)) {
9083 rb_fs_check(
VALUE val)
9087 if (
NIL_P(val))
return 0;
9092 static const char isspacetable[256] = {
9093 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9094 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9095 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9096 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9097 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9098 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9099 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9111 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9114 split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9116 if (empty_count >= 0 &&
len == 0) {
9117 return empty_count + 1;
9119 if (empty_count > 0) {
9124 }
while (--empty_count > 0);
9128 rb_yield(str_new_empty_String(str));
9129 }
while (--empty_count > 0);
9143 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9147 literal_split_pattern(
VALUE spat, split_type_t default_type)
9155 return SPLIT_TYPE_CHARS;
9158 if (
len == 1 &&
ptr[0] ==
' ') {
9159 return SPLIT_TYPE_AWK;
9165 return SPLIT_TYPE_AWK;
9168 return default_type;
9181 rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9186 split_type_t split_type;
9187 long beg, end, i = 0, empty_count = -1;
9192 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9194 if (lim <= 0) limit =
Qnil;
9195 else if (lim == 1) {
9207 if (
NIL_P(limit) && !lim) empty_count = 0;
9209 enc = STR_ENC_GET(str);
9210 split_type = SPLIT_TYPE_REGEXP;
9212 spat = get_pat_quoted(spat, 0);
9215 split_type = SPLIT_TYPE_AWK;
9217 else if (!(spat = rb_fs_check(spat))) {
9223 if (split_type != SPLIT_TYPE_AWK) {
9228 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9229 if (split_type == SPLIT_TYPE_AWK) {
9231 split_type = SPLIT_TYPE_STRING;
9236 mustnot_broken(spat);
9237 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9245 #define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
9250 if (split_type == SPLIT_TYPE_AWK) {
9257 if (is_ascii_string(str)) {
9258 while (
ptr < eptr) {
9259 c = (
unsigned char)*
ptr++;
9261 if (ascii_isspace(c)) {
9267 if (!
NIL_P(limit) && lim <= i)
break;
9270 else if (ascii_isspace(c)) {
9271 SPLIT_STR(beg, end-beg);
9274 if (!
NIL_P(limit)) ++i;
9282 while (
ptr < eptr) {
9294 if (!
NIL_P(limit) && lim <= i)
break;
9298 SPLIT_STR(beg, end-beg);
9301 if (!
NIL_P(limit)) ++i;
9309 else if (split_type == SPLIT_TYPE_STRING) {
9310 char *str_start =
ptr;
9311 char *substr_start =
ptr;
9316 mustnot_broken(str);
9318 while (
ptr < eptr &&
9322 if (t !=
ptr + end) {
9326 SPLIT_STR(substr_start - str_start, (
ptr+end) - substr_start);
9329 if (!
NIL_P(limit) && lim <= ++i)
break;
9331 beg =
ptr - str_start;
9333 else if (split_type == SPLIT_TYPE_CHARS) {
9334 char *str_start =
ptr;
9338 mustnot_broken(str);
9340 while (
ptr < eptr &&
9342 SPLIT_STR(
ptr - str_start, n);
9344 if (!
NIL_P(limit) && lim <= ++i)
break;
9346 beg =
ptr - str_start;
9358 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (
void)0)) {
9363 if (start == end && BEG(0) == END(0)) {
9368 else if (last_null == 1) {
9382 SPLIT_STR(beg, end-beg);
9383 beg = start = END(0);
9387 for (idx=1; idx < regs->num_regs; idx++) {
9388 if (BEG(idx) == -1)
continue;
9389 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9391 if (!
NIL_P(limit) && lim <= ++i)
break;
9393 if (match) rb_match_unbusy(match);
9399 return result ? result : str;
9409 return rb_str_split_m(1, &sep, str);
9412 #define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9427 #define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9430 chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9455 #define rb_rs get_rs()
9462 const char *
ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9463 long pos,
len, rslen;
9469 static ID keywords[1];
9474 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9478 if (!ENUM_ELEM(ary, str)) {
9502 const char *eol = NULL;
9504 while (subend < pend) {
9505 long chomp_rslen = 0;
9511 if (eol == subend)
break;
9515 chomp_rslen = -rslen;
9519 if (!subptr) subptr = subend;
9523 }
while (subend < pend);
9525 if (rslen == 0) chomp_rslen = 0;
9527 subend - subptr + (chomp ? chomp_rslen : rslen));
9528 if (ENUM_ELEM(ary, line)) {
9529 str_mod_check(str,
ptr,
len);
9531 subptr = eol = NULL;
9550 while (subptr < pend) {
9551 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9555 if (hit != adjusted) {
9559 subend = hit += rslen;
9562 subend = chomp_newline(subptr, subend, enc);
9569 if (ENUM_ELEM(ary, line)) {
9570 str_mod_check(str,
ptr,
len);
9575 if (subptr != pend) {
9578 pend = chomp_newline(subptr, pend, enc);
9580 else if (pend - subptr >= rslen &&
9581 memcmp(pend - rslen, rsptr, rslen) == 0) {
9586 ENUM_ELEM(ary, line);
9607 rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9610 return rb_str_enumerate_lines(argc, argv, str, 0);
9623 rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9625 VALUE ary = WANTARRAY(
"lines", 0);
9626 return rb_str_enumerate_lines(argc, argv, str, ary);
9659 rb_str_each_byte(
VALUE str)
9662 return rb_str_enumerate_bytes(str, 0);
9674 rb_str_bytes(
VALUE str)
9677 return rb_str_enumerate_bytes(str, ary);
9700 for (i = 0; i <
len; i += n) {
9706 for (i = 0; i <
len; i += n) {
9728 rb_str_each_char(
VALUE str)
9731 return rb_str_enumerate_chars(str, 0);
9743 rb_str_chars(
VALUE str)
9746 return rb_str_enumerate_chars(str, ary);
9750 rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9755 const char *
ptr, *end;
9758 if (single_byte_optimizable(str))
9759 return rb_str_enumerate_bytes(str, ary);
9764 enc = STR_ENC_GET(str);
9788 rb_str_each_codepoint(
VALUE str)
9791 return rb_str_enumerate_codepoints(str, 0);
9803 rb_str_codepoints(
VALUE str)
9806 return rb_str_enumerate_codepoints(str, ary);
9814 const OnigUChar source_ascii[] =
"\\X";
9815 const OnigUChar *source = source_ascii;
9816 size_t source_len =
sizeof(source_ascii) - 1;
9819 #define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9820 #define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9821 #define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9822 #define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9823 #define CASE_UTF(e) \
9824 case ENCINDEX_UTF_##e: { \
9825 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9826 source = source_UTF_##e; \
9827 source_len = sizeof(source_UTF_##e); \
9830 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9838 regex_t *reg_grapheme_cluster;
9840 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9841 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9843 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9844 onig_error_code_to_str(message, r, &einfo);
9845 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9848 return reg_grapheme_cluster;
9855 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9858 if (!reg_grapheme_cluster_utf8) {
9859 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9862 return reg_grapheme_cluster_utf8;
9871 size_t grapheme_cluster_count = 0;
9873 const char *
ptr, *end;
9879 bool cached_reg_grapheme_cluster =
true;
9880 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9881 if (!reg_grapheme_cluster) {
9882 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9883 cached_reg_grapheme_cluster =
false;
9890 OnigPosition
len = onig_match(reg_grapheme_cluster,
9891 (
const OnigUChar *)
ptr, (
const OnigUChar *)end,
9892 (
const OnigUChar *)
ptr, NULL, 0);
9893 if (
len <= 0)
break;
9894 grapheme_cluster_count++;
9898 if (!cached_reg_grapheme_cluster) {
9899 onig_free(reg_grapheme_cluster);
9902 return SIZET2NUM(grapheme_cluster_count);
9906 rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9910 const char *ptr0, *
ptr, *end;
9913 return rb_str_enumerate_chars(str, ary);
9918 bool cached_reg_grapheme_cluster =
true;
9919 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9920 if (!reg_grapheme_cluster) {
9921 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9922 cached_reg_grapheme_cluster =
false;
9929 OnigPosition
len = onig_match(reg_grapheme_cluster,
9930 (
const OnigUChar *)
ptr, (
const OnigUChar *)end,
9931 (
const OnigUChar *)
ptr, NULL, 0);
9932 if (
len <= 0)
break;
9937 if (!cached_reg_grapheme_cluster) {
9938 onig_free(reg_grapheme_cluster);
9958 rb_str_each_grapheme_cluster(
VALUE str)
9961 return rb_str_enumerate_grapheme_clusters(str, 0);
9973 rb_str_grapheme_clusters(
VALUE str)
9976 return rb_str_enumerate_grapheme_clusters(str, ary);
9980 chopped_length(
VALUE str)
9983 const char *p, *p2, *beg, *end;
9987 if (beg >= end)
return 0;
10008 rb_str_chop_bang(
VALUE str)
10010 str_modify_keep_cr(str);
10013 len = chopped_length(str);
10014 STR_SET_LEN(str,
len);
10034 rb_str_chop(
VALUE str)
10040 smart_chomp(
VALUE str,
const char *e,
const char *p)
10059 if (--e > p && *(e-1) ==
'\r') {
10076 char *pp, *e, *rsptr;
10081 if (
len == 0)
return 0;
10084 return smart_chomp(str, e, p);
10105 while (e > p && *(e-1) ==
'\n') {
10107 if (e > p && *(e-1) ==
'\r')
10113 if (rslen >
len)
return len;
10116 newline = rsptr[rslen-1];
10119 if (newline ==
'\n')
10120 return smart_chomp(str, e, p);
10124 return smart_chomp(str, e, p);
10129 if (is_broken_string(rs)) {
10133 if (p[
len-1] == newline &&
10135 memcmp(rsptr, pp, rslen) == 0)) {
10136 if (at_char_boundary(p, pp, e, enc))
10137 return len - rslen;
10149 chomp_rs(
int argc,
const VALUE *argv)
10153 VALUE rs = argv[0];
10166 long len = chompped_length(str, rs);
10167 if (
len >= olen)
return Qnil;
10168 str_modify_keep_cr(str);
10169 STR_SET_LEN(str,
len);
10187 rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10190 str_modifiable(str);
10192 rs = chomp_rs(argc, argv);
10194 return rb_str_chomp_string(str, rs);
10207 rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10209 VALUE rs = chomp_rs(argc, argv);
10217 const char *
const start = s;
10219 if (!s || s >= e)
return 0;
10222 if (single_byte_optimizable(str)) {
10223 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10248 rb_str_lstrip_bang(
VALUE str)
10252 long olen, loffset;
10254 str_modify_keep_cr(str);
10255 enc = STR_ENC_GET(str);
10257 loffset = lstrip_offset(str, start, start+olen, enc);
10259 long len = olen-loffset;
10260 s = start + loffset;
10261 memmove(start, s,
len);
10262 STR_SET_LEN(str,
len);
10286 rb_str_lstrip(
VALUE str)
10291 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10292 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10301 rb_str_check_dummy_enc(enc);
10305 if (!s || s >= e)
return 0;
10309 if (single_byte_optimizable(str)) {
10311 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10336 rb_str_rstrip_bang(
VALUE str)
10340 long olen, roffset;
10342 str_modify_keep_cr(str);
10343 enc = STR_ENC_GET(str);
10345 roffset = rstrip_offset(str, start, start+olen, enc);
10347 long len = olen - roffset;
10349 STR_SET_LEN(str,
len);
10373 rb_str_rstrip(
VALUE str)
10377 long olen, roffset;
10379 enc = STR_ENC_GET(str);
10381 roffset = rstrip_offset(str, start, start+olen, enc);
10383 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10399 rb_str_strip_bang(
VALUE str)
10402 long olen, loffset, roffset;
10405 str_modify_keep_cr(str);
10406 enc = STR_ENC_GET(str);
10408 loffset = lstrip_offset(str, start, start+olen, enc);
10409 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10411 if (loffset > 0 || roffset > 0) {
10412 long len = olen-roffset;
10415 memmove(start, start + loffset,
len);
10417 STR_SET_LEN(str,
len);
10441 rb_str_strip(
VALUE str)
10444 long olen, loffset, roffset;
10448 loffset = lstrip_offset(str, start, start+olen, enc);
10449 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10451 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10456 scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10459 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10489 if (!regs || regs->num_regs == 1) {
10495 for (
int i = 1; i < regs->num_regs; i++) {
10556 long last = -1, prev = 0;
10559 pat = get_pat_quoted(pat, 1);
10560 mustnot_broken(str);
10564 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10569 if (last >= 0) rb_pat_search(pat, str, last, 1);
10574 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10578 str_mod_check(str, p,
len);
10580 if (last >= 0) rb_pat_search(pat, str, last, 1);
10604 rb_str_hex(
VALUE str)
10631 rb_str_oct(
VALUE str)
10636 #ifndef HAVE_CRYPT_R
10641 rb_nativethread_lock_t lock;
10642 } crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10645 crypt_mutex_initialize(
void)
10713 #ifdef HAVE_CRYPT_R
10716 # define CRYPT_END() ALLOCV_END(databuf)
10718 extern char *crypt(
const char *,
const char *);
10719 # define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10722 const char *s, *saltp;
10724 #ifdef BROKEN_CRYPT
10725 char salt_8bit_clean[3];
10729 mustnot_wchar(str);
10730 mustnot_wchar(salt);
10733 if (
RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10737 #ifdef BROKEN_CRYPT
10738 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10739 salt_8bit_clean[0] = saltp[0] & 0x7f;
10740 salt_8bit_clean[1] = saltp[1] & 0x7f;
10741 salt_8bit_clean[2] =
'\0';
10742 saltp = salt_8bit_clean;
10745 #ifdef HAVE_CRYPT_R
10747 # ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10748 data->initialized = 0;
10750 res = crypt_r(s, saltp, data);
10752 crypt_mutex_initialize();
10754 res = crypt(s, saltp);
10776 rb_str_ord(
VALUE s)
10792 rb_str_sum(
int argc,
VALUE *argv,
VALUE str)
10795 char *
ptr, *p, *pend;
10798 unsigned long sum0 = 0;
10810 str_mod_check(str,
ptr,
len);
10813 sum0 += (
unsigned char)*p;
10824 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10825 sum0 &= (((
unsigned long)1)<<bits)-1;
10845 rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10849 long width,
len, flen = 1, fclen = 1;
10852 const char *f =
" ";
10853 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10855 int singlebyte = 1, cr;
10859 enc = STR_ENC_GET(str);
10867 fclen = str_strlen(pad, enc);
10868 singlebyte = single_byte_optimizable(pad);
10869 if (flen == 0 || fclen == 0) {
10873 len = str_strlen(str, enc);
10874 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10876 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10880 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10881 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10884 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10885 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10886 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10893 memset(p, *f, llen);
10897 while (llen >= fclen) {
10903 memcpy(p, f, llen2);
10910 memset(p, *f, rlen);
10914 while (rlen >= fclen) {
10920 memcpy(p, f, rlen2);
10924 TERM_FILL(p, termlen);
10948 rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10950 return rb_str_justify(argc, argv, str,
'l');
10964 rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10966 return rb_str_justify(argc, argv, str,
'r');
10981 rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10983 return rb_str_justify(argc, argv, str,
'c');
10999 sep = get_pat_quoted(sep, 0);
11011 pos = rb_str_index(str, sep, 0);
11012 if (pos < 0)
goto failed;
11020 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11036 sep = get_pat_quoted(sep, 0);
11049 pos = rb_str_rindex(str, sep, pos);
11060 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11072 rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11076 for (i=0; i<argc; i++) {
11077 VALUE tmp = argv[i];
11079 if (rb_reg_start_with_p(tmp, str))
11083 const char *p, *s, *e;
11094 if (!at_char_right_boundary(p, s, e, enc))
11112 rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11116 for (i=0; i<argc; i++) {
11117 VALUE tmp = argv[i];
11118 const char *p, *s, *e;
11129 if (!at_char_boundary(p, s, e, enc))
11147 deleted_prefix_length(
VALUE str,
VALUE prefix)
11149 const char *strptr, *prefixptr;
11150 long olen, prefixlen;
11155 if (!is_broken_string(prefix) ||
11163 if (prefixlen <= 0)
return 0;
11165 if (olen < prefixlen)
return 0;
11168 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11169 if (is_broken_string(prefix)) {
11170 if (!is_broken_string(str)) {
11174 const char *strend = strptr + olen;
11175 const char *after_prefix = strptr + prefixlen;
11176 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11196 rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11199 str_modify_keep_cr(str);
11201 prefixlen = deleted_prefix_length(str, prefix);
11202 if (prefixlen <= 0)
return Qnil;
11216 rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11220 prefixlen = deleted_prefix_length(str, prefix);
11221 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11236 deleted_suffix_length(
VALUE str,
VALUE suffix)
11238 const char *strptr, *suffixptr;
11239 long olen, suffixlen;
11243 if (is_broken_string(suffix))
return 0;
11248 if (suffixlen <= 0)
return 0;
11250 if (olen < suffixlen)
return 0;
11253 const char *strend = strptr + olen;
11254 const char *before_suffix = strend - suffixlen;
11255 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11256 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11271 rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11273 long olen, suffixlen,
len;
11274 str_modifiable(str);
11276 suffixlen = deleted_suffix_length(str, suffix);
11277 if (suffixlen <= 0)
return Qnil;
11280 str_modify_keep_cr(str);
11281 len = olen - suffixlen;
11282 STR_SET_LEN(str,
len);
11299 rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11303 suffixlen = deleted_suffix_length(str, suffix);
11304 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11321 val = rb_fs_check(val);
11324 "value of %"PRIsVALUE
" must be String or Regexp",
11328 rb_warn_deprecated(
"'$;'", NULL);
11345 str_modifiable(str);
11376 rb_str_b(
VALUE str)
11379 if (STR_EMBED_P(str)) {
11385 str_replace_shared_without_enc(str2, str);
11420 rb_str_valid_encoding_p(
VALUE str)
11440 rb_str_is_ascii_only_p(
VALUE str)
11450 static const char ellipsis[] =
"...";
11451 const long ellipsislen =
sizeof(ellipsis) - 1;
11454 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
11455 VALUE estr, ret = 0;
11462 else if (
len <= ellipsislen ||
11497 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11516 if (enc == STR_ENC_GET(str)) {
11521 return enc_str_scrub(enc, str, repl, cr);
11529 const char *rep, *p, *e, *p1, *sp;
11542 if (!
NIL_P(repl)) {
11543 repl = str_compat_and_valid(repl, enc);
11551 #define DEFAULT_REPLACE_CHAR(str) do { \
11552 static const char replace[sizeof(str)-1] = str; \
11553 rep = replace; replen = (int)sizeof(replace); \
11568 else if (!
NIL_P(repl)) {
11574 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11578 DEFAULT_REPLACE_CHAR(
"?");
11583 p = search_nonascii(p, e);
11607 if (e - p < clen) clen = e - p;
11614 for (; clen > 1; clen--) {
11627 str_mod_check(str, sp, slen);
11628 repl = str_compat_and_valid(repl, enc);
11635 p = search_nonascii(p, e);
11662 str_mod_check(str, sp, slen);
11663 repl = str_compat_and_valid(repl, enc);
11676 else if (!
NIL_P(repl)) {
11680 else if (encidx == ENCINDEX_UTF_16BE) {
11681 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11683 else if (encidx == ENCINDEX_UTF_16LE) {
11684 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11686 else if (encidx == ENCINDEX_UTF_32BE) {
11687 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11689 else if (encidx == ENCINDEX_UTF_32LE) {
11690 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11693 DEFAULT_REPLACE_CHAR(
"?");
11710 if (e - p < clen) clen = e - p;
11711 if (clen <= mbminlen * 2) {
11716 for (; clen > mbminlen; clen-=mbminlen) {
11728 str_mod_check(str, sp, slen);
11729 repl = str_compat_and_valid(repl, enc);
11755 str_mod_check(str, sp, slen);
11756 repl = str_compat_and_valid(repl, enc);
11792 str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11800 static ID id_normalize;
11801 static ID id_normalized_p;
11802 static VALUE mUnicodeNormalize;
11805 unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11807 static int UnicodeNormalizeRequired = 0;
11810 if (!UnicodeNormalizeRequired) {
11811 rb_require(
"unicode_normalize/normalize.rb");
11812 UnicodeNormalizeRequired = 1;
11816 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11853 rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11855 return unicode_normalize_common(argc, argv, str, id_normalize);
11869 rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11871 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11898 rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11900 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12032 #define sym_equal rb_obj_equal
12035 sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12051 rb_str_symname_p(
VALUE sym)
12059 enc = STR_ENC_GET(sym);
12062 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(
ptr) ||
12070 rb_str_quote_unprintable(
VALUE str)
12080 enc = STR_ENC_GET(str);
12083 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12085 return rb_str_escape(str);
12091 rb_id_quote_unprintable(
ID id)
12094 if (!rb_str_symname_p(str)) {
12095 return rb_str_escape(str);
12113 sym_inspect(
VALUE sym)
12120 if (!rb_str_symname_p(str)) {
12125 memmove(dest + 1, dest,
len);
12129 VALUE orig_str = str;
12137 memcpy(dest + 1,
ptr,
len);
12157 rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12180 sym_succ(
VALUE sym)
12259 return rb_str_match(
rb_sym2str(sym), other);
12274 sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12276 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12289 sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12291 return rb_str_match_m_p(argc, argv, sym);
12309 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12320 sym_length(
VALUE sym)
12334 sym_empty(
VALUE sym)
12350 sym_upcase(
int argc,
VALUE *argv,
VALUE sym)
12368 sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12384 sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12400 sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12414 sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12416 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12429 sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12431 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12443 sym_encoding(
VALUE sym)
12449 string_for_symbol(
VALUE name)
12468 name = string_for_symbol(name);
12478 name = string_for_symbol(name);
12494 sym_all_symbols(
VALUE _)
12502 return rb_fstring(str);
12509 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12521 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12522 rb_enc_autoload(enc);
12526 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12532 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12533 rb_enc_autoload(enc);
12537 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12548 rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12553 if (
RB_LIKELY(code >= 0 && code < 0xff)) {
12554 rb_str_buf_cat_byte(str, (
char) code);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RB_LIKELY(x)
Asserts that the given Boolean expression likely holds.
#define RB_UNLIKELY(x)
Asserts that the given Boolean expression likely doesn't hold.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_raise(VALUE exc_class, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
int rb_enc_codelen(int code, rb_encoding *enc)
Queries the number of bytes requested to represent the passed code point using the passed encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Look for the "common" encoding between the two.
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
static char * rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the previous (left) character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
rb_encoding * rb_enc_from_index(int idx)
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate_index(VALUE obj, int encindex)
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed ...
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_new(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcallv(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcall(), except it takes the method arguments as a C array.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
void rb_gc_register_address(VALUE *valptr)
Inform the garbage collector that the global or static variable pointed by valptr stores a live Ruby ...
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
VALUE rb_ary_new_from_args(long n,...)
Constructs an array from the passed objects.
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_check_hash_type(VALUE obj)
Try converting an object to its hash representation using its to_hash method, if any.
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_lookup(VALUE hash, VALUE key)
Identical to rb_hash_aref(), except it always returns RUBY_Qnil for misshits.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_rs
The record separator character for inputs, or the $/.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_utf8_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_utf8_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
VALUE rb_usascii_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_str_buf_new_cstr(const char *ptr)
This is a rb_str_buf_new() + rb_str_buf_cat() combo.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
VALUE rb_str_dup_frozen(VALUE)
Just another name of rb_str_new_frozen.
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
VALUE rb_locale_str_new_cstr(const char *ptr)
Identical to rb_locale_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_new_cstr(const char *ptr)
Identical to rb_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_external_str_new_cstr(const char *ptr)
Identical to rb_external_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_cat_cstr(VALUE dst, const char *src)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
VALUE rb_id2str(ID id)
Identical to rb_id2name(), except it returns a frozen Ruby String instead of a C String.
void rb_define_hooked_variable(const char *name, VALUE *var, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Identical to rb_define_virtual_variable(), but can also specify a storage.
int capa
Designed capacity of the buffer.
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
VALUE type(ANYARGS)
ANYARGS-ed function type.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@52 as
String's specific fields.
struct RBasic basic
Basic part, including flags and class.
long len
Length of the string, not including terminating NUL character.
struct RString::@52::@54 embed
Embedded contents.
VALUE shared
Parent of the string.
struct RString::@52::@53 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.
void ruby_xfree(void *ptr)
Deallocates a storage instance.