14 #include "ruby/internal/config.h"
24 #include "debug_counter.h"
28 #include "internal/array.h"
29 #include "internal/compar.h"
30 #include "internal/compilers.h"
31 #include "internal/encoding.h"
32 #include "internal/error.h"
33 #include "internal/gc.h"
34 #include "internal/numeric.h"
35 #include "internal/object.h"
36 #include "internal/proc.h"
37 #include "internal/re.h"
38 #include "internal/sanitizers.h"
39 #include "internal/string.h"
40 #include "internal/transcode.h"
45 #include "ruby_assert.h"
48 #if defined HAVE_CRYPT_R
49 # if defined HAVE_CRYPT_H
52 #elif !defined HAVE_CRYPT
53 # include "missing/crypt.h"
54 # define HAVE_CRYPT_R 1
57 #define BEG(no) (regs->beg[(no)])
58 #define END(no) (regs->end[(no)])
61 #undef rb_usascii_str_new
62 #undef rb_utf8_str_new
64 #undef rb_str_new_cstr
65 #undef rb_usascii_str_new_cstr
66 #undef rb_utf8_str_new_cstr
67 #undef rb_enc_str_new_cstr
68 #undef rb_external_str_new_cstr
69 #undef rb_locale_str_new_cstr
70 #undef rb_str_dup_frozen
71 #undef rb_str_buf_new_cstr
73 #undef rb_str_buf_cat2
75 #undef rb_str_cat_cstr
76 #undef rb_fstring_cstr
125 #define RUBY_MAX_CHAR_LEN 16
126 #define STR_PRECOMPUTED_HASH FL_USER4
127 #define STR_SHARED_ROOT FL_USER5
128 #define STR_BORROWED FL_USER6
129 #define STR_TMPLOCK FL_USER7
130 #define STR_NOFREE FL_USER18
131 #define STR_FAKESTR FL_USER19
133 #define STR_SET_NOEMBED(str) do {\
134 FL_SET((str), STR_NOEMBED);\
135 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
137 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
139 #define STR_SET_LEN(str, n) do { \
140 RSTRING(str)->len = (n); \
144 str_encindex_fastpath(
int encindex)
148 case ENCINDEX_ASCII_8BIT:
150 case ENCINDEX_US_ASCII:
158 str_enc_fastpath(
VALUE str)
163 #define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
164 #define TERM_FILL(ptr, termlen) do {\
165 char *const term_fill_ptr = (ptr);\
166 const int term_fill_len = (termlen);\
167 *term_fill_ptr = '\0';\
168 if (UNLIKELY(term_fill_len > 1))\
169 memset(term_fill_ptr, 0, term_fill_len);\
172 #define RESIZE_CAPA(str,capacity) do {\
173 const int termlen = TERM_LEN(str);\
174 RESIZE_CAPA_TERM(str,capacity,termlen);\
176 #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
177 if (STR_EMBED_P(str)) {\
178 if (str_embed_capa(str) < capacity + termlen) {\
179 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
180 const long tlen = RSTRING_LEN(str);\
181 memcpy(tmp, RSTRING_PTR(str), tlen);\
182 RSTRING(str)->as.heap.ptr = tmp;\
183 RSTRING(str)->len = tlen;\
184 STR_SET_NOEMBED(str);\
185 RSTRING(str)->as.heap.aux.capa = (capacity);\
189 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
190 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
191 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
192 RSTRING(str)->as.heap.aux.capa = (capacity);\
196 #define STR_SET_SHARED(str, shared_str) do { \
197 if (!FL_TEST(str, STR_FAKESTR)) { \
198 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
199 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
200 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
201 FL_SET((str), STR_SHARED); \
202 FL_SET((shared_str), STR_SHARED_ROOT); \
203 if (RBASIC_CLASS((shared_str)) == 0) \
204 FL_SET_RAW((shared_str), STR_BORROWED); \
208 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
209 #define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
212 #define STR_ENC_GET(str) get_encoding(str)
214 #if !defined SHARABLE_MIDDLE_SUBSTRING
215 # define SHARABLE_MIDDLE_SUBSTRING 0
217 #if !SHARABLE_MIDDLE_SUBSTRING
218 #define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
220 #define SHARABLE_SUBSTRING_P(beg, len, end) 1
225 str_embed_capa(
VALUE str)
227 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
231 rb_str_reembeddable_p(
VALUE str)
233 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
237 rb_str_embed_size(
long capa)
243 rb_str_size_as_embedded(
VALUE str)
246 if (STR_EMBED_P(str)) {
247 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
251 else if (rb_str_reembeddable_p(str)) {
252 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
255 real_size =
sizeof(
struct RString);
259 real_size +=
sizeof(st_index_t);
266 STR_EMBEDDABLE_P(
long len,
long termlen)
268 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
273 static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
274 static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
276 static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
277 static inline void str_modifiable(
VALUE str);
282 str_make_independent(
VALUE str)
285 int termlen = TERM_LEN(str);
286 str_make_independent_expand((str),
len, 0L, termlen);
289 static inline int str_dependent_p(
VALUE str);
292 rb_str_make_independent(
VALUE str)
294 if (str_dependent_p(str)) {
295 str_make_independent(str);
300 rb_str_make_embedded(
VALUE str)
305 char *buf =
RSTRING(str)->as.heap.ptr;
309 STR_SET_LEN(str,
len);
316 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
320 rb_debug_rstring_null_ptr(
const char *func)
322 fprintf(stderr,
"%s is returning NULL!! "
323 "SIGSEGV is highly expected to follow immediately.\n"
324 "If you could reproduce, attach your debugger here, "
325 "and look at the passed string.\n",
330 static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
333 get_encoding(
VALUE str)
339 mustnot_broken(
VALUE str)
341 if (is_broken_string(str)) {
347 mustnot_wchar(
VALUE str)
357 static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
359 #if SIZEOF_LONG == SIZEOF_VOIDP
360 #define PRECOMPUTED_FAKESTR_HASH 1
364 #ifdef PRECOMPUTED_FAKESTR_HASH
366 fstring_hash(
VALUE str)
370 return (st_index_t)
RSTRING(str)->as.heap.aux.capa;
377 #define fstring_hash rb_str_hash
385 #define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
387 static inline st_index_t
388 str_do_hash(
VALUE str)
392 if (e && !is_ascii_string(str)) {
399 str_store_precomputed_hash(
VALUE str, st_index_t hash)
405 size_t used_bytes = (
RSTRING_LEN(str) + TERM_LEN(str));
406 size_t free_bytes = str_embed_capa(str) - used_bytes;
410 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
412 FL_SET(str, STR_PRECOMPUTED_HASH);
420 bool force_precompute_hash;
424 fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
433 if (rb_objspace_garbage_object_p(str)) {
449 long capa =
len +
sizeof(st_index_t);
450 int term_len = TERM_LEN(str);
452 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
458 str_store_precomputed_hash(new_str, fstring_hash(str));
463 #ifdef PRECOMPUTED_FAKESTR_HASH
465 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
479 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
482 if (STR_SHARED_P(str)) {
484 str_make_independent(str);
487 if (!BARE_STRING_P(str)) {
493 RBASIC(str)->flags |= RSTRING_FSTR;
495 *key = *value = arg->fstr = str;
501 rb_fstring(
VALUE str)
508 if (
FL_TEST(str, RSTRING_FSTR))
511 bare = BARE_STRING_P(str);
513 if (STR_EMBED_P(str)) {
518 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
527 fstr = register_fstring(str,
false,
false);
530 str_replace_shared_without_enc(str, fstr);
538 register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
542 .force_precompute_hash = force_precompute_hash
545 #if SIZEOF_VOIDP == SIZEOF_LONG
549 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
555 st_table *frozen_strings = rb_vm_fstring_table();
558 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
559 }
while (UNDEF_P(args.fstr));
572 setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
585 fake_str->
as.
heap.ptr = (
char *)name;
587 return (
VALUE)fake_str;
605 rb_fstring_new(
const char *
ptr,
long len)
608 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
615 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
619 rb_fstring_cstr(
const char *
ptr)
621 return rb_fstring_new(
ptr, strlen(
ptr));
625 fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
635 const char *aptr, *bptr;
638 return (alen != blen ||
640 memcmp(aptr, bptr, alen) != 0);
644 single_byte_optimizable(
VALUE str)
648 case ENCINDEX_ASCII_8BIT:
649 case ENCINDEX_US_ASCII:
671 static inline const char *
672 search_nonascii(
const char *p,
const char *e)
674 const uintptr_t *s, *t;
676 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
677 # if SIZEOF_UINTPTR_T == 8
678 # define NONASCII_MASK UINT64_C(0x8080808080808080)
679 # elif SIZEOF_UINTPTR_T == 4
680 # define NONASCII_MASK UINT32_C(0x80808080)
682 # error "don't know what to do."
685 # if SIZEOF_UINTPTR_T == 8
686 # define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
687 # elif SIZEOF_UINTPTR_T == 4
688 # define NONASCII_MASK 0x80808080UL
690 # error "don't know what to do."
694 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
695 #if !UNALIGNED_WORD_ACCESS
696 if ((uintptr_t)p % SIZEOF_VOIDP) {
697 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
702 case 7:
if (p[-7]&0x80)
return p-7;
703 case 6:
if (p[-6]&0x80)
return p-6;
704 case 5:
if (p[-5]&0x80)
return p-5;
705 case 4:
if (p[-4]&0x80)
return p-4;
707 case 3:
if (p[-3]&0x80)
return p-3;
708 case 2:
if (p[-2]&0x80)
return p-2;
709 case 1:
if (p[-1]&0x80)
return p-1;
714 #if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
715 #define aligned_ptr(value) \
716 __builtin_assume_aligned((value), sizeof(uintptr_t))
718 #define aligned_ptr(value) (uintptr_t *)(value)
721 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
724 if (*s & NONASCII_MASK) {
725 #ifdef WORDS_BIGENDIAN
726 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
728 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
738 case 7:
if (e[-7]&0x80)
return e-7;
739 case 6:
if (e[-6]&0x80)
return e-6;
740 case 5:
if (e[-5]&0x80)
return e-5;
741 case 4:
if (e[-4]&0x80)
return e-4;
743 case 3:
if (e[-3]&0x80)
return e-3;
744 case 2:
if (e[-2]&0x80)
return e-2;
745 case 1:
if (e[-1]&0x80)
return e-1;
753 const char *e = p +
len;
757 p = search_nonascii(p, e);
762 p = search_nonascii(p, e);
769 p = search_nonascii(p, e);
794 p = search_nonascii(p, e);
799 p = search_nonascii(p, e);
812 p = search_nonascii(p, e);
851 rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
856 str_enc_copy(dest, src);
881 rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
883 str_enc_copy(dest, src);
896 return enc_coderange_scan(str, enc);
905 cr = enc_coderange_scan(str, get_encoding(str));
912 rb_enc_str_asciicompat(
VALUE str)
915 return str_encindex_fastpath(encindex) ||
rb_enc_asciicompat(rb_enc_get_from_index(encindex));
923 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
932 str_mod_check(
VALUE s,
const char *p,
long len)
940 str_capacity(
VALUE str,
const int termlen)
942 if (STR_EMBED_P(str)) {
943 return str_embed_capa(str) - termlen;
945 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
949 return RSTRING(str)->as.heap.aux.capa;
956 return str_capacity(str, TERM_LEN(str));
960 must_not_null(
const char *
ptr)
968 str_alloc_embed(
VALUE klass,
size_t capa)
970 size_t size = rb_str_embed_size(
capa);
974 NEWOBJ_OF(str,
struct RString, klass,
981 str_alloc_heap(
VALUE klass)
983 NEWOBJ_OF(str,
struct RString, klass,
990 empty_str_alloc(
VALUE klass)
992 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
993 VALUE str = str_alloc_embed(klass, 0);
994 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1012 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1016 if (STR_EMBEDDABLE_P(
len, termlen)) {
1017 str = str_alloc_embed(klass,
len + termlen);
1023 str = str_alloc_heap(klass);
1029 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1032 rb_enc_raw_set(str, enc);
1038 STR_SET_LEN(str,
len);
1081 __msan_unpoison_string(
ptr);
1108 str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1120 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1121 str = str_alloc_heap(klass);
1125 RBASIC(str)->flags |= STR_NOFREE;
1155 static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1157 int ecflags,
VALUE ecopts);
1164 return is_ascii_string(str);
1175 if (!to)
return str;
1177 if (from == to)
return str;
1179 rb_is_ascii8bit_enc(to)) {
1180 if (STR_ENC_GET(str) != to) {
1189 from, to, ecflags, ecopts);
1190 if (
NIL_P(newstr)) {
1198 rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1204 if (ofs < -olen || olen < ofs)
1206 if (ofs < 0) ofs += olen;
1208 STR_SET_LEN(newstr, ofs);
1213 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1221 STR_SET_LEN(str, 0);
1228 str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1230 int ecflags,
VALUE ecopts)
1235 VALUE econv_wrapper;
1236 const unsigned char *start, *sp;
1237 unsigned char *dest, *dp;
1238 size_t converted_output = (size_t)ofs;
1243 RBASIC_CLEAR_CLASS(econv_wrapper);
1245 if (!ec)
return Qnil;
1248 sp = (
unsigned char*)
ptr;
1250 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1251 (dp = dest + converted_output),
1255 size_t converted_input = sp - start;
1256 size_t rest =
len - converted_input;
1257 converted_output = dp - dest;
1259 if (converted_input && converted_output &&
1260 rest < (LONG_MAX / converted_output)) {
1261 rest = (rest * converted_output) / converted_input;
1266 olen += rest < 2 ? 2 : rest;
1308 if (!ienc || eenc == ienc) {
1322 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1323 rb_str_initialize(str,
ptr,
len, eenc);
1333 !is_ascii_string(str)) {
1396 str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1398 const int termlen = TERM_LEN(str);
1403 if (str_embed_capa(str2) >=
len + termlen) {
1404 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1405 STR_SET_EMBED(str2);
1407 TERM_FILL(ptr2+
len, termlen);
1411 if (STR_SHARED_P(str)) {
1412 root =
RSTRING(str)->as.heap.aux.shared;
1421 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1423 rb_fatal(
"about to free a possible shared root");
1425 char *ptr2 = STR_HEAP_PTR(str2);
1427 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1430 FL_SET(str2, STR_NOEMBED);
1432 STR_SET_SHARED(str2, root);
1435 STR_SET_LEN(str2,
len);
1443 str_replace_shared_without_enc(str2, str);
1444 rb_enc_cr_str_exact_copy(str2, str);
1451 return str_replace_shared(str_alloc_heap(klass), str);
1468 rb_str_new_frozen_String(
VALUE orig)
1475 rb_str_tmp_frozen_acquire(
VALUE orig)
1478 return str_new_frozen_buffer(0, orig, FALSE);
1482 rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1484 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1485 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1487 VALUE str = str_alloc_heap(0);
1490 FL_SET(str, STR_SHARED_ROOT);
1492 size_t capa = str_capacity(orig, TERM_LEN(orig));
1498 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1499 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1506 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1507 RBASIC(orig)->flags &= ~STR_NOFREE;
1508 STR_SET_SHARED(orig, str);
1518 rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1523 if (STR_EMBED_P(tmp)) {
1536 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1537 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1542 STR_SET_LEN(tmp, 0);
1550 return str_new_frozen_buffer(klass, orig, TRUE);
1554 heap_str_make_shared(
VALUE klass,
VALUE orig)
1559 VALUE str = str_alloc_heap(klass);
1562 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1563 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1564 RBASIC(orig)->flags &= ~STR_NOFREE;
1565 STR_SET_SHARED(orig, str);
1572 str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1578 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1580 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1594 if ((ofs > 0) || (rest > 0) ||
1597 str = str_new_shared(klass,
shared);
1599 RSTRING(str)->as.heap.ptr += ofs;
1600 STR_SET_LEN(str,
RSTRING_LEN(str) - (ofs + rest));
1608 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1609 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1617 str = heap_str_make_shared(klass, orig);
1621 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1633 str_new_empty_String(
VALUE str)
1640 #define STR_BUF_MIN_SIZE 63
1645 if (STR_EMBEDDABLE_P(
capa, 1)) {
1653 RSTRING(str)->as.heap.ptr[0] =
'\0';
1673 return str_new(0, 0,
len);
1679 if (
FL_TEST(str, RSTRING_FSTR)) {
1680 st_data_t fstr = (st_data_t)str;
1684 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1685 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1690 if (STR_EMBED_P(str)) {
1691 RB_DEBUG_COUNTER_INC(obj_str_embed);
1693 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1694 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1695 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1698 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1699 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1704 rb_str_memsize(
VALUE str)
1706 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1707 return STR_HEAP_SIZE(str);
1717 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1720 static inline void str_discard(
VALUE str);
1721 static void str_shared_replace(
VALUE str,
VALUE str2);
1726 if (str != str2) str_shared_replace(str, str2);
1737 enc = STR_ENC_GET(str2);
1744 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1751 if (STR_EMBED_P(str2)) {
1756 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1757 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1758 RSTRING(str2)->as.heap.ptr = new_ptr;
1759 STR_SET_LEN(str2,
len);
1761 STR_SET_NOEMBED(str2);
1764 STR_SET_NOEMBED(str);
1768 if (
FL_TEST(str2, STR_SHARED)) {
1770 STR_SET_SHARED(str,
shared);
1773 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1777 STR_SET_EMBED(str2);
1779 STR_SET_LEN(str2, 0);
1794 return rb_obj_as_string_result(str, obj);
1798 rb_obj_as_string_result(
VALUE str,
VALUE obj)
1811 if (STR_SHARED_P(str2)) {
1814 STR_SET_NOEMBED(str);
1815 STR_SET_LEN(str,
len);
1817 STR_SET_SHARED(str,
shared);
1818 rb_enc_cr_str_exact_copy(str, str2);
1821 str_replace_shared(str, str2);
1830 size_t size = rb_str_embed_size(
capa);
1834 NEWOBJ_OF(str,
struct RString, klass,
1843 NEWOBJ_OF(str,
struct RString, klass,
1874 return str_duplicate_setup_encoding(str, dup, flags);
1883 root =
RSTRING(str)->as.heap.aux.shared;
1885 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1886 root = str = str_new_frozen(klass, str);
1893 FL_SET(root, STR_SHARED_ROOT);
1895 flags |= RSTRING_NOEMBED | STR_SHARED;
1898 return str_duplicate_setup_encoding(str, dup, flags);
1904 if (STR_EMBED_P(str)) {
1905 return str_duplicate_setup_embed(klass, str, dup);
1908 return str_duplicate_setup_heap(klass, str, dup);
1916 if (STR_EMBED_P(str)) {
1917 dup = str_alloc_embed(klass,
RSTRING_LEN(str) + TERM_LEN(str));
1920 dup = str_alloc_heap(klass);
1923 return str_duplicate_setup(klass, str, dup);
1934 rb_str_dup_m(
VALUE str)
1936 if (LIKELY(BARE_STRING_P(str))) {
1947 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1954 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1958 new_str = ec_str_alloc_embed(ec, klass,
RSTRING_LEN(str) + TERM_LEN(str));
1959 str_duplicate_setup_embed(klass, str, new_str);
1962 new_str = ec_str_alloc_heap(ec, klass);
1963 str_duplicate_setup_heap(klass, str, new_str);
1972 rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
1991 rb_str_init(
int argc,
VALUE *argv,
VALUE str)
1993 static ID keyword_ids[2];
1994 VALUE orig, opt, venc, vcapa;
1999 if (!keyword_ids[0]) {
2000 keyword_ids[0] = rb_id_encoding();
2001 CONST_ID(keyword_ids[1],
"capacity");
2009 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2012 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2017 if (
capa < STR_BUF_MIN_SIZE) {
2018 capa = STR_BUF_MIN_SIZE;
2026 if (orig == str) n = 0;
2028 str_modifiable(str);
2029 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2031 const size_t size = (size_t)
capa + termlen;
2033 const size_t osize =
RSTRING_LEN(str) + TERM_LEN(str);
2034 char *new_ptr =
ALLOC_N(
char, size);
2035 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2036 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2038 RSTRING(str)->as.heap.ptr = new_ptr;
2040 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2041 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2042 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2044 STR_SET_LEN(str,
len);
2048 rb_enc_cr_str_exact_copy(str, orig);
2050 FL_SET(str, STR_NOEMBED);
2069 rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2075 static ID keyword_ids[2];
2085 keyword_ids[0] = rb_id_encoding();
2086 CONST_ID(keyword_ids[1],
"capacity");
2088 encoding = kwargs[0];
2089 capacity = kwargs[1];
2098 if (UNDEF_P(encoding)) {
2104 if (!UNDEF_P(encoding)) {
2109 if (UNDEF_P(capacity)) {
2111 VALUE empty_str = str_new(klass,
"", 0);
2117 VALUE copy = str_duplicate(klass, orig);
2131 if (orig_capa >
capa) {
2136 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2137 STR_SET_LEN(str, 0);
2147 #ifdef NONASCII_MASK
2148 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2163 static inline uintptr_t
2164 count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2169 d = (d>>6) | (~d>>7);
2170 d &= NONASCII_MASK >> 7;
2173 #if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2175 return rb_popcount_intptr(d);
2179 # if SIZEOF_VOIDP == 8
2188 enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2194 long diff = (long)(e - p);
2197 #ifdef NONASCII_MASK
2200 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2201 const uintptr_t *s, *t;
2202 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2203 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2204 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2205 while (p < (
const char *)s) {
2206 if (is_utf8_lead_byte(*p))
len++;
2210 len += count_utf8_lead_bytes_with_word(s);
2213 p = (
const char *)s;
2216 if (is_utf8_lead_byte(*p))
len++;
2227 q = search_nonascii(p, e);
2240 q = search_nonascii(p, e);
2253 for (c=0; p<e; c++) {
2269 rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2277 long diff = (long)(e - p);
2284 q = search_nonascii(p, e);
2307 for (c=0; p<e; c++) {
2332 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2333 if (!enc) enc = STR_ENC_GET(str);
2339 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2344 return enc_strlen(p, e, enc, cr);
2351 return str_strlen(str, NULL);
2365 return LONG2NUM(str_strlen(str, NULL));
2377 rb_str_bytesize(
VALUE str)
2395 rb_str_empty(
VALUE str)
2415 char *ptr1, *ptr2, *ptr3;
2420 enc = rb_enc_check_str(str1, str2);
2424 if (len1 > LONG_MAX - len2) {
2427 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2429 memcpy(ptr3, ptr1, len1);
2430 memcpy(ptr3+len1, ptr2, len2);
2431 TERM_FILL(&ptr3[len1+len2], termlen);
2447 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2456 else if (enc2 < 0) {
2459 else if (enc1 != enc2) {
2462 else if (len1 > LONG_MAX - len2) {
2503 if (STR_EMBEDDABLE_P(
len, 1)) {
2512 STR_SET_LEN(str2,
len);
2521 termlen = TERM_LEN(str);
2527 while (n <=
len/2) {
2528 memcpy(ptr2 + n, ptr2, n);
2531 memcpy(ptr2 + n, ptr2,
len-n);
2533 STR_SET_LEN(str2,
len);
2534 TERM_FILL(&ptr2[
len], termlen);
2535 rb_enc_cr_str_copy_for_substr(str2, str);
2570 rb_check_lockedtmp(
VALUE str)
2572 if (
FL_TEST(str, STR_TMPLOCK)) {
2579 #define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2581 str_modifiable(
VALUE str)
2584 if (CHILLED_STRING_P(str)) {
2585 CHILLED_STRING_MUTATED(str);
2587 rb_check_lockedtmp(str);
2588 rb_check_frozen(str);
2593 str_dependent_p(
VALUE str)
2595 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2605 #define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2607 str_independent(
VALUE str)
2610 str_modifiable(str);
2611 return !str_dependent_p(str);
2617 str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2625 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2630 STR_SET_LEN(str,
len);
2637 memcpy(
ptr, oldptr,
len);
2639 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2642 STR_SET_NOEMBED(str);
2643 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2644 TERM_FILL(
ptr +
len, termlen);
2646 STR_SET_LEN(str,
len);
2653 if (!str_independent(str))
2654 str_make_independent(str);
2661 int termlen = TERM_LEN(str);
2667 if (expand >= LONG_MAX -
len) {
2671 if (!str_independent(str)) {
2672 str_make_independent_expand(str,
len, expand, termlen);
2674 else if (expand > 0) {
2675 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2682 str_modify_keep_cr(
VALUE str)
2684 if (!str_independent(str))
2685 str_make_independent(str);
2692 str_discard(
VALUE str)
2694 str_modifiable(str);
2695 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2696 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2697 RSTRING(str)->as.heap.ptr = 0;
2698 STR_SET_LEN(str, 0);
2733 zero_filled(
const char *s,
int n)
2735 for (; n > 0; --n) {
2742 str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2744 const char *e = s +
len;
2747 if (zero_filled(s, minlen))
return s;
2753 str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2758 if (str_dependent_p(str)) {
2759 if (!zero_filled(s +
len, termlen))
2760 str_make_independent_expand(str,
len, 0L, termlen);
2763 TERM_FILL(s +
len, termlen);
2770 rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2772 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2777 rb_check_lockedtmp(str);
2778 str_make_independent_expand(str,
len, 0L, termlen);
2780 else if (str_dependent_p(str)) {
2781 if (termlen > oldtermlen)
2782 str_make_independent_expand(str,
len, 0L, termlen);
2785 if (!STR_EMBED_P(str)) {
2790 if (termlen > oldtermlen) {
2799 str_null_check(
VALUE str,
int *w)
2808 if (str_null_char(s,
len, minlen, enc)) {
2811 return str_fill_term(str, s,
len, minlen);
2814 if (!s || memchr(s, 0,
len)) {
2818 s = str_fill_term(str, s,
len, minlen);
2824 rb_str_to_cstr(
VALUE str)
2827 return str_null_check(str, &w);
2835 char *s = str_null_check(str, &w);
2846 rb_str_fill_terminator(
VALUE str,
const int newminlen)
2850 return str_fill_term(str, s,
len, newminlen);
2856 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2880 str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2890 const char *p2, *e2;
2893 while (p < e && 0 < nth) {
2900 p2 = search_nonascii(p, e2);
2920 while (p < e && nth--) {
2932 return str_nth_len(p, e, &nth, enc);
2936 str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2941 p = str_nth_len(p, e, &nth, enc);
2950 str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2952 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2953 if (!pp)
return e - p;
2961 STR_ENC_GET(str), single_byte_optimizable(str));
2964 #ifdef NONASCII_MASK
2966 str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2969 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2970 const uintptr_t *s, *t;
2971 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2972 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2973 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2974 while (p < (
const char *)s) {
2975 if (is_utf8_lead_byte(*p)) nth--;
2979 nth -= count_utf8_lead_bytes_with_word(s);
2981 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2985 if (is_utf8_lead_byte(*p)) {
2986 if (nth == 0)
break;
2996 str_utf8_offset(
const char *p,
const char *e,
long nth)
2998 const char *pp = str_utf8_nth(p, e, &nth);
3007 if (single_byte_optimizable(str) || pos < 0)
3011 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3016 str_subseq(
VALUE str,
long beg,
long len)
3024 const int termlen = TERM_LEN(str);
3032 if (str_embed_capa(str2) >=
len + termlen) {
3033 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3034 STR_SET_EMBED(str2);
3036 TERM_FILL(ptr2+
len, termlen);
3038 STR_SET_LEN(str2,
len);
3042 str_replace_shared(str2, str);
3045 RSTRING(str2)->as.heap.ptr += beg;
3047 STR_SET_LEN(str2,
len);
3057 VALUE str2 = str_subseq(str, beg,
len);
3058 rb_enc_cr_str_copy_for_substr(str2, str);
3071 if (
len < 0)
return 0;
3075 if (single_byte_optimizable(str)) {
3076 if (beg > blen)
return 0;
3079 if (beg < 0)
return 0;
3081 if (
len > blen - beg)
3083 if (
len < 0)
return 0;
3088 if (
len > -beg)
len = -beg;
3100 slen = str_strlen(str, enc);
3102 if (beg < 0)
return 0;
3104 if (
len == 0)
goto end;
3111 if (beg > str_strlen(str, enc))
return 0;
3114 #ifdef NONASCII_MASK
3117 p = str_utf8_nth(s, e, &beg);
3118 if (beg > 0)
return 0;
3119 len = str_utf8_offset(p, e,
len);
3125 p = s + beg * char_sz;
3129 else if (
len * char_sz > e - p)
3134 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3135 if (beg > 0)
return 0;
3139 len = str_offset(p, e,
len, enc, 0);
3147 static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3152 return str_substr(str, beg,
len, TRUE);
3162 str_substr(
VALUE str,
long beg,
long len,
int empty)
3166 if (!p)
return Qnil;
3167 if (!
len && !empty)
return Qnil;
3171 VALUE str2 = str_subseq(str, beg,
len);
3172 rb_enc_cr_str_copy_for_substr(str2, str);
3180 if (CHILLED_STRING_P(str)) {
3198 str_uplus(
VALUE str)
3200 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3230 str_uminus(
VALUE str)
3235 return rb_fstring(str);
3239 #define rb_str_dup_frozen rb_str_new_frozen
3244 if (
FL_TEST(str, STR_TMPLOCK)) {
3247 FL_SET(str, STR_TMPLOCK);
3254 if (!
FL_TEST(str, STR_TMPLOCK)) {
3272 const int termlen = TERM_LEN(str);
3274 str_modifiable(str);
3275 if (STR_SHARED_P(str)) {
3278 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3312 STR_SET_LEN(str,
len);
3323 int independent = str_independent(str);
3325 const int termlen = TERM_LEN(str);
3327 if (slen >
len || (termlen != 1 && slen <
len)) {
3333 if (STR_EMBED_P(str)) {
3334 if (
len == slen)
return str;
3335 if (str_embed_capa(str) >=
len + termlen) {
3336 STR_SET_LEN(str,
len);
3340 str_make_independent_expand(str, slen,
len - slen, termlen);
3342 else if (str_embed_capa(str) >=
len + termlen) {
3343 char *
ptr = STR_HEAP_PTR(str);
3345 if (slen >
len) slen =
len;
3348 STR_SET_LEN(str,
len);
3352 else if (!independent) {
3353 if (
len == slen)
return str;
3354 str_make_independent_expand(str, slen,
len - slen, termlen);
3358 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3359 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3362 else if (
len == slen)
return str;
3363 STR_SET_LEN(str,
len);
3370 str_ensure_available_capa(
VALUE str,
long len)
3372 str_modify_keep_cr(str);
3374 const int termlen = TERM_LEN(str);
3381 long total = olen +
len;
3382 long capa = str_capacity(str, termlen);
3385 if (total >= LONG_MAX / 2) {
3388 while (total >
capa) {
3391 RESIZE_CAPA_TERM(str,
capa, termlen);
3396 str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3399 str_modify_keep_cr(str);
3404 if (
len == 0)
return 0;
3406 long total, olen,
off = -1;
3408 const int termlen = TERM_LEN(str);
3411 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3415 long capa = str_capacity(str, termlen);
3417 if (olen > LONG_MAX -
len) {
3422 if (total >= LONG_MAX / 2) {
3425 while (total >
capa) {
3428 RESIZE_CAPA_TERM(str,
capa, termlen);
3434 memcpy(sptr + olen,
ptr,
len);
3435 STR_SET_LEN(str, total);
3436 TERM_FILL(sptr + total, termlen);
3441 #define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3442 #define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3447 if (
len == 0)
return str;
3451 return str_buf_cat(str,
ptr,
len);
3462 rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3467 if (UNLIKELY(!str_independent(str))) {
3468 str_make_independent(str);
3471 long string_length = -1;
3472 const int null_terminator_length = 1;
3477 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3481 long string_capacity = str_capacity(str, null_terminator_length);
3487 if (LIKELY(string_capacity >= string_length + 1)) {
3489 sptr[string_length] = byte;
3490 STR_SET_LEN(str, string_length + 1);
3491 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3495 str_buf_cat(str, (
char *)&
byte, 1);
3522 rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3523 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3532 if (str_encindex == ptr_encindex) {
3552 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3561 *ptr_cr_ret = ptr_cr;
3563 if (str_encindex != ptr_encindex &&
3572 res_encindex = str_encindex;
3577 res_encindex = str_encindex;
3581 res_encindex = ptr_encindex;
3586 res_encindex = str_encindex;
3593 res_encindex = str_encindex;
3601 str_buf_cat(str,
ptr,
len);
3607 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3614 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3625 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3631 unsigned int c = (
unsigned char)*
ptr;
3634 rb_enc_cr_str_buf_cat(str, buf,
len,
3647 if (str_enc_fastpath(str)) {
3684 rb_str_concat_literals(
size_t num,
const VALUE *strary)
3688 unsigned long len = 1;
3695 str_enc_copy_direct(str, strary[0]);
3697 for (i = s; i < num; ++i) {
3698 const VALUE v = strary[i];
3702 if (encidx != ENCINDEX_US_ASCII) {
3729 rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3731 str_modifiable(str);
3736 else if (argc > 1) {
3740 for (i = 0; i < argc; i++) {
3773 rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3775 long needed_capacity = 0;
3779 for (
int index = 0; index < argc; index++) {
3780 VALUE obj = argv[index];
3793 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3800 str_ensure_available_capa(str, needed_capacity);
3803 for (
int index = 0; index < argc; index++) {
3804 VALUE obj = argv[index];
3809 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3810 char byte = (char)(
NUM2INT(obj) & 0xFF);
3824 rb_bug(
"append_as_bytes arguments should have been validated");
3828 STR_SET_LEN(str,
RSTRING_LEN(str) + needed_capacity);
3829 TERM_FILL(sptr, TERM_LEN(str));
3834 for (
int index = 0; index < argc; index++) {
3835 VALUE obj = argv[index];
3852 rb_bug(
"append_as_bytes arguments should have been validated");
3926 if (rb_num_to_uint(str2, &code) == 0) {
3939 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3942 rb_str_buf_cat_byte(str1, (
unsigned char)code);
3951 case ONIGERR_INVALID_CODE_POINT_VALUE:
3954 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3978 rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3982 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3987 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3988 return ENCINDEX_ASCII_8BIT;
4011 rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4013 str_modifiable(str);
4018 else if (argc > 1) {
4022 for (i = 0; i < argc; i++) {
4035 st_index_t precomputed_hash;
4036 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4038 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4039 return precomputed_hash;
4042 return str_do_hash(str);
4049 const char *ptr1, *ptr2;
4052 return (len1 != len2 ||
4054 memcmp(ptr1, ptr2, len1) != 0);
4068 rb_str_hash_m(
VALUE str)
4074 #define lesser(a,b) (((a)>(b))?(b):(a))
4086 if (idx1 == idx2)
return TRUE;
4105 const char *ptr1, *ptr2;
4108 if (str1 == str2)
return 0;
4111 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4120 if (len1 > len2)
return 1;
4123 if (retval > 0)
return 1;
4150 if (str1 == str2)
return Qtrue;
4157 return rb_str_eql_internal(str1, str2);
4181 if (str1 == str2)
return Qtrue;
4183 return rb_str_eql_internal(str1, str2);
4214 return rb_invcmp(str1, str2);
4256 return str_casecmp(str1, s);
4264 const char *p1, *p1end, *p2, *p2end;
4273 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4274 while (p1 < p1end && p2 < p2end) {
4276 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4277 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4279 return INT2FIX(c1 < c2 ? -1 : 1);
4286 while (p1 < p1end && p2 < p2end) {
4290 if (0 <= c1 && 0 <= c2) {
4294 return INT2FIX(c1 < c2 ? -1 : 1);
4300 len = l1 < l2 ? l1 : l2;
4301 r = memcmp(p1, p2,
len);
4303 return INT2FIX(r < 0 ? -1 : 1);
4305 return INT2FIX(l1 < l2 ? -1 : 1);
4346 return str_casecmp_p(str1, s);
4353 VALUE folded_str1, folded_str2;
4354 VALUE fold_opt = sym_fold;
4361 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4362 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4364 return rb_str_eql(folded_str1, folded_str2);
4368 strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4369 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4371 const char *search_start = str_ptr;
4372 long pos, search_len = str_len - offset;
4376 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4377 if (pos < 0)
return pos;
4379 if (t == search_start + pos)
break;
4380 search_len -= t - search_start;
4381 if (search_len <= 0)
return -1;
4382 offset += t - search_start;
4385 return pos + offset;
4389 #define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4390 #define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4393 rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4395 const char *str_ptr, *str_ptr_end, *sub_ptr;
4396 long str_len, sub_len;
4400 if (is_broken_string(sub))
return -1;
4408 if (str_len < sub_len)
return -1;
4411 long str_len_char, sub_len_char;
4412 int single_byte = single_byte_optimizable(str);
4413 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4414 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4416 offset += str_len_char;
4417 if (offset < 0)
return -1;
4419 if (str_len_char - offset < sub_len_char)
return -1;
4420 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4423 if (sub_len == 0)
return offset;
4426 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4440 rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4447 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4448 long slen = str_strlen(str, enc);
4450 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4463 enc, single_byte_optimizable(str));
4474 pos = rb_str_index(str, sub, pos);
4488 str_ensure_byte_pos(
VALUE str,
long pos)
4490 if (!single_byte_optimizable(str)) {
4493 const char *p = s + pos;
4494 if (!at_char_boundary(s, p, e,
rb_enc_get(str))) {
4496 "offset %ld does not land on character boundary", pos);
4543 rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4549 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4552 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4563 str_ensure_byte_pos(str, pos);
4575 pos = rb_str_byteindex(str, sub, pos);
4576 if (pos >= 0)
return LONG2NUM(pos);
4581 #ifndef HAVE_MEMRCHR
4583 memrchr(
const char *search_str,
int chr,
long search_len)
4585 const char *
ptr = search_str + search_len;
4586 while (
ptr > search_str) {
4587 if ((
unsigned char)*(--
ptr) == chr)
return (
void *)
ptr;
4597 char *hit, *adjusted;
4599 long slen, searchlen;
4604 if (slen == 0)
return s - sbeg;
4608 searchlen = s - sbeg + 1;
4610 if (memcmp(s, t, slen) == 0) {
4615 hit = memrchr(sbeg, c, searchlen);
4618 if (hit != adjusted) {
4619 searchlen = adjusted - sbeg;
4622 if (memcmp(hit, t, slen) == 0)
4624 searchlen = adjusted - sbeg;
4625 }
while (searchlen > 0);
4632 rb_str_rindex(
VALUE str,
VALUE sub,
long pos)
4640 if (is_broken_string(sub))
return -1;
4641 singlebyte = single_byte_optimizable(str);
4643 slen = str_strlen(sub, enc);
4646 if (
len < slen)
return -1;
4647 if (
len - pos < slen) pos =
len - slen;
4648 if (
len == 0)
return pos;
4659 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4660 return str_rindex(str, sub, s, enc);
4721 rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4726 long pos,
len = str_strlen(str, enc);
4728 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4730 if (pos < 0 && (pos +=
len) < 0) {
4736 if (pos >
len) pos =
len;
4745 enc, single_byte_optimizable(str));
4756 pos = rb_str_rindex(str, sub, pos);
4766 rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4773 if (is_broken_string(sub))
return -1;
4778 if (
len < slen)
return -1;
4779 if (
len - pos < slen) pos =
len - slen;
4780 if (
len == 0)
return pos;
4792 return str_rindex(str, sub, s, enc);
4857 rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4863 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4865 if (pos < 0 && (pos +=
len) < 0) {
4871 if (pos >
len) pos =
len;
4877 str_ensure_byte_pos(str, pos);
4889 pos = rb_str_byterindex(str, sub, pos);
4890 if (pos >= 0)
return LONG2NUM(pos);
4926 switch (OBJ_BUILTIN_TYPE(y)) {
4978 rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5017 rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5021 re = get_pat(argv[0]);
5022 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5025 enum neighbor_char {
5031 static enum neighbor_char
5041 return NEIGHBOR_NOT_CHAR;
5045 if (!l)
return NEIGHBOR_NOT_CHAR;
5046 if (l !=
len)
return NEIGHBOR_WRAPPED;
5050 return NEIGHBOR_NOT_CHAR;
5052 return NEIGHBOR_FOUND;
5055 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5058 return NEIGHBOR_WRAPPED;
5059 ++((
unsigned char*)p)[i];
5064 return NEIGHBOR_FOUND;
5067 memset(p+l, 0xff,
len-l);
5073 for (len2 =
len-1; 0 < len2; len2--) {
5078 memset(p+len2+1, 0xff,
len-(len2+1));
5083 static enum neighbor_char
5092 return NEIGHBOR_NOT_CHAR;
5095 if (!c)
return NEIGHBOR_NOT_CHAR;
5098 if (!l)
return NEIGHBOR_NOT_CHAR;
5099 if (l !=
len)
return NEIGHBOR_WRAPPED;
5103 return NEIGHBOR_NOT_CHAR;
5105 return NEIGHBOR_FOUND;
5108 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5111 return NEIGHBOR_WRAPPED;
5112 --((
unsigned char*)p)[i];
5117 return NEIGHBOR_FOUND;
5120 memset(p+l, 0,
len-l);
5126 for (len2 =
len-1; 0 < len2; len2--) {
5131 memset(p+len2+1, 0,
len-(len2+1));
5145 static enum neighbor_char
5146 enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5148 enum neighbor_char ret;
5152 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5156 const int max_gaps = 1;
5160 ctype = ONIGENC_CTYPE_DIGIT;
5162 ctype = ONIGENC_CTYPE_ALPHA;
5164 return NEIGHBOR_NOT_CHAR;
5167 for (
try = 0;
try <= max_gaps; ++
try) {
5168 ret = enc_succ_char(p,
len, enc);
5169 if (ret == NEIGHBOR_FOUND) {
5172 return NEIGHBOR_FOUND;
5179 ret = enc_pred_char(p,
len, enc);
5180 if (ret == NEIGHBOR_FOUND) {
5194 return NEIGHBOR_NOT_CHAR;
5197 if (ctype != ONIGENC_CTYPE_DIGIT) {
5199 return NEIGHBOR_WRAPPED;
5203 enc_succ_char(carry,
len, enc);
5204 return NEIGHBOR_WRAPPED;
5273 rb_enc_cr_str_copy_for_substr(str, orig);
5274 return str_succ(str);
5281 char *sbeg, *s, *e, *last_alnum = 0;
5282 int found_alnum = 0;
5284 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5285 long carry_pos = 0, carry_len = 1;
5286 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5289 if (slen == 0)
return str;
5291 enc = STR_ENC_GET(str);
5293 s = e = sbeg + slen;
5296 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5303 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5304 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5305 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5307 case NEIGHBOR_NOT_CHAR:
5309 case NEIGHBOR_FOUND:
5311 case NEIGHBOR_WRAPPED:
5316 carry_pos = s - sbeg;
5322 enum neighbor_char neighbor;
5323 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5325 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5326 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5328 neighbor = enc_succ_char(tmp, l, enc);
5330 case NEIGHBOR_FOUND:
5334 case NEIGHBOR_WRAPPED:
5337 case NEIGHBOR_NOT_CHAR:
5342 enc_succ_char(s, l, enc);
5345 MEMCPY(carry, s,
char, l);
5348 carry_pos = s - sbeg;
5352 RESIZE_CAPA(str, slen + carry_len);
5354 s = sbeg + carry_pos;
5355 memmove(s + carry_len, s, slen - carry_pos);
5356 memmove(s, carry, carry_len);
5358 STR_SET_LEN(str, slen);
5373 rb_str_succ_bang(
VALUE str)
5381 all_digits_p(
const char *s,
long len)
5433 rb_str_upto(
int argc,
VALUE *argv,
VALUE beg)
5435 VALUE end, exclusive;
5439 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5445 VALUE current, after_end;
5453 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5459 if (c > e || (excl && c == e))
return beg;
5463 if ((*each)(str, arg))
break;
5464 if (!excl && c == e)
break;
5466 if (excl && c == e)
break;
5486 if (excl && bi == ei)
break;
5487 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5492 ID op = excl ?
'<' : idLE;
5493 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5498 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5506 if (n > 0 || (excl && n == 0))
return beg;
5514 if ((*each)(current, arg))
break;
5515 if (
NIL_P(next))
break;
5536 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5544 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5552 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5560 if ((*each)(current, arg))
break;
5574 if (!
rb_equal(str, *argp))
return 0;
5603 if (b <= v && v < e)
return Qtrue;
5604 return RBOOL(!
RTEST(exclusive) && v == e);
5617 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5619 return RBOOL(
NIL_P(val));
5642 return rb_str_subpat(str, indx,
INT2FIX(0));
5645 if (rb_str_index(str, indx, 0) != -1)
5651 long beg,
len = str_strlen(str, NULL);
5663 return str_substr(str, idx, 1, FALSE);
5682 rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5686 return rb_str_subpat(str, argv[0], argv[1]);
5689 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5693 return rb_str_aref(str, argv[0]);
5702 str_modifiable(str);
5703 if (
len > olen)
len = olen;
5705 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5707 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5710 memmove(
ptr, oldptr +
len, nlen);
5711 if (fl == STR_NOEMBED)
xfree(oldptr);
5714 if (!STR_SHARED_P(str)) {
5716 rb_enc_cr_str_exact_copy(shared, str);
5721 STR_SET_LEN(str, nlen);
5723 if (!SHARABLE_MIDDLE_SUBSTRING) {
5724 TERM_FILL(
ptr + nlen, TERM_LEN(str));
5731 rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5737 if (beg == 0 && vlen == 0) {
5742 str_modify_keep_cr(str);
5746 RESIZE_CAPA(str, slen + vlen -
len);
5756 memmove(sptr + beg + vlen,
5758 slen - (beg +
len));
5760 if (vlen < beg &&
len < 0) {
5764 memmove(sptr + beg,
RSTRING_PTR(val) + vbeg, vlen);
5767 STR_SET_LEN(str, slen);
5768 TERM_FILL(&sptr[slen], TERM_LEN(str));
5784 int singlebyte = single_byte_optimizable(str);
5791 slen = str_strlen(str, enc);
5793 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5802 if (
len > slen - beg) {
5812 rb_str_update_0(str, beg,
len, val);
5824 long start, end,
len;
5834 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5838 nth += regs->num_regs;
5848 enc = rb_enc_check_str(str, val);
5849 rb_str_update_0(str, start,
len, val);
5858 switch (
TYPE(indx)) {
5860 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5864 beg = rb_str_index(str, indx, 0);
5918 rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5922 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5930 return rb_str_aset(str, argv[0], argv[1]);
5990 rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5998 str_modify_keep_cr(str);
6006 if ((nth += regs->num_regs) <= 0)
return Qnil;
6008 else if (nth >= regs->num_regs)
return Qnil;
6010 len = END(nth) - beg;
6013 else if (argc == 2) {
6026 beg = rb_str_index(str, indx, 0);
6027 if (beg == -1)
return Qnil;
6053 rb_enc_cr_str_copy_for_substr(result, str);
6063 if (beg +
len > slen)
6067 slen - (beg +
len));
6069 STR_SET_LEN(str, slen);
6070 TERM_FILL(&sptr[slen], TERM_LEN(str));
6081 switch (OBJ_BUILTIN_TYPE(pat)) {
6100 get_pat_quoted(
VALUE pat,
int check)
6104 switch (OBJ_BUILTIN_TYPE(pat)) {
6118 if (check && is_broken_string(pat)) {
6125 rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6128 pos = rb_str_byteindex(str, pat, pos);
6129 if (set_backref_str) {
6131 str = rb_str_new_frozen_String(str);
6132 rb_backref_set_string(str, pos,
RSTRING_LEN(pat));
6141 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
6161 rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6181 pat = get_pat_quoted(argv[0], 1);
6183 str_modifiable(str);
6184 beg = rb_pat_search(pat, str, 0, 1);
6207 if (iter || !
NIL_P(hash)) {
6217 str_mod_check(str, p,
len);
6218 rb_check_frozen(str);
6231 rb_enc_inspect_name(str_enc),
6232 rb_enc_inspect_name(STR_ENC_GET(repl)));
6234 enc = STR_ENC_GET(repl);
6250 RESIZE_CAPA(str,
len + rlen - plen);
6254 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6257 memmove(p + beg0, rp, rlen);
6259 STR_SET_LEN(str,
len);
6289 rb_str_sub_bang(argc, argv, str);
6294 str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6297 long beg, beg0, end0;
6298 long offset, blen, slen,
len, last;
6299 enum {STR, ITER, MAP} mode = STR;
6301 int need_backref = -1;
6320 rb_error_arity(argc, 1, 2);
6323 pat = get_pat_quoted(argv[0], 1);
6324 beg = rb_pat_search(pat, str, 0, need_backref);
6326 if (bang)
return Qnil;
6336 str_enc = STR_ENC_GET(str);
6362 str_mod_check(str, sp, slen);
6367 else if (need_backref) {
6369 if (need_backref < 0) {
6370 need_backref = val != repl;
6377 len = beg0 - offset;
6394 offset = end0 +
len;
6398 beg = rb_pat_search(pat, str, offset, need_backref);
6405 rb_pat_search(pat, str, last, 1);
6407 str_shared_replace(str, dest);
6435 rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6437 str_modify_keep_cr(str);
6438 return str_gsub(argc, argv, str, 1);
6459 rb_str_gsub(
int argc,
VALUE *argv,
VALUE str)
6461 return str_gsub(argc, argv, str, 0);
6479 str_modifiable(str);
6480 if (str == str2)
return str;
6484 return str_replace(str, str2);
6499 rb_str_clear(
VALUE str)
6503 STR_SET_LEN(str, 0);
6524 rb_str_chr(
VALUE str)
6572 char *
ptr, *head, *left = 0;
6576 if (pos < -
len ||
len <= pos)
6583 char byte = (char)(
NUM2INT(w) & 0xFF);
6585 if (!str_independent(str))
6586 str_make_independent(str);
6587 enc = STR_ENC_GET(str);
6590 if (!STR_EMBED_P(str)) {
6623 str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6627 if (beg > n ||
len < 0)
return Qnil;
6630 if (beg < 0)
return Qnil;
6635 if (!empty)
return Qnil;
6639 VALUE str2 = str_subseq(str, beg,
len);
6641 str_enc_copy_direct(str2, str);
6686 return str_byte_substr(str, beg,
len, TRUE);
6691 return str_byte_substr(str, idx, 1, FALSE);
6738 rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6743 return str_byte_substr(str, beg,
len, TRUE);
6746 return str_byte_aref(str, argv[0]);
6750 str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6755 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6764 if (*
len > slen - *beg) {
6768 str_ensure_byte_pos(str, *beg);
6769 str_ensure_byte_pos(str, end);
6794 rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6796 long beg,
len, vbeg, vlen;
6801 if (!(argc == 2 || argc == 3 || argc == 5)) {
6807 rb_builtin_class_name(argv[0]));
6820 rb_builtin_class_name(argv[2]));
6840 str_check_beg_len(str, &beg, &
len);
6841 str_check_beg_len(val, &vbeg, &vlen);
6842 str_modify_keep_cr(str);
6848 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6866 rb_str_reverse(
VALUE str)
6874 enc = STR_ENC_GET(str);
6881 if (single_byte_optimizable(str)) {
6909 str_enc_copy_direct(rev, str);
6929 rb_str_reverse_bang(
VALUE str)
6932 if (single_byte_optimizable(str)) {
6935 str_modify_keep_cr(str);
6945 str_shared_replace(str, rb_str_reverse(str));
6949 str_modify_keep_cr(str);
6974 i = rb_str_index(str, arg, 0);
6976 return RBOOL(i != -1);
7013 rb_str_to_i(
int argc,
VALUE *argv,
VALUE str)
7044 rb_str_to_f(
VALUE str)
7059 rb_str_to_s(
VALUE str)
7071 char s[RUBY_MAX_CHAR_LEN];
7079 #define CHAR_ESC_LEN 13
7082 rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7084 char buf[CHAR_ESC_LEN + 1];
7092 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7094 else if (c < 0x10000) {
7095 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7098 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7103 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7106 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7109 l = (int)strlen(buf);
7115 ruby_escaped_char(
int c)
7118 case '\0':
return "\\0";
7119 case '\n':
return "\\n";
7120 case '\r':
return "\\r";
7121 case '\t':
return "\\t";
7122 case '\f':
return "\\f";
7123 case '\013':
return "\\v";
7124 case '\010':
return "\\b";
7125 case '\007':
return "\\a";
7126 case '\033':
return "\\e";
7127 case '\x7f':
return "\\c?";
7133 rb_str_escape(
VALUE str)
7139 const char *prev = p;
7140 char buf[CHAR_ESC_LEN + 1];
7150 if (p > prev) str_buf_cat(result, prev, p - prev);
7153 n = (int)(pend - p);
7155 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7156 str_buf_cat(result, buf, strlen(buf));
7164 cc = ruby_escaped_char(c);
7166 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7167 str_buf_cat(result, cc, strlen(cc));
7173 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7174 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7178 if (p > prev) str_buf_cat(result, prev, p - prev);
7202 const char *p, *pend, *prev;
7203 char buf[CHAR_ESC_LEN + 1];
7212 str_buf_cat2(result,
"\"");
7222 if (p > prev) str_buf_cat(result, prev, p - prev);
7225 n = (int)(pend - p);
7227 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7228 str_buf_cat(result, buf, strlen(buf));
7236 if ((asciicompat || unicode_p) &&
7237 (c ==
'"'|| c ==
'\\' ||
7242 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7243 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7244 str_buf_cat2(result,
"\\");
7245 if (asciicompat || enc == resenc) {
7251 case '\n': cc =
'n';
break;
7252 case '\r': cc =
'r';
break;
7253 case '\t': cc =
't';
break;
7254 case '\f': cc =
'f';
break;
7255 case '\013': cc =
'v';
break;
7256 case '\010': cc =
'b';
break;
7257 case '\007': cc =
'a';
break;
7258 case 033: cc =
'e';
break;
7259 default: cc = 0;
break;
7262 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7265 str_buf_cat(result, buf, 2);
7282 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7283 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7288 if (p > prev) str_buf_cat(result, prev, p - prev);
7289 str_buf_cat2(result,
"\"");
7294 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7317 const char *p, *pend;
7321 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7326 len += strlen(enc->name);
7332 unsigned char c = *p++;
7335 case '"':
case '\\':
7336 case '\n':
case '\r':
7337 case '\t':
case '\f':
7338 case '\013':
case '\010':
case '\007':
case '\033':
7343 clen = IS_EVSTR(p, pend) ? 2 : 1;
7351 if (u8 && c > 0x7F) {
7357 else if (cc <= 0xFFFFF)
7370 if (clen > LONG_MAX -
len) {
7382 unsigned char c = *p++;
7384 if (c ==
'"' || c ==
'\\') {
7388 else if (c ==
'#') {
7389 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7392 else if (c ==
'\n') {
7396 else if (c ==
'\r') {
7400 else if (c ==
'\t') {
7404 else if (c ==
'\f') {
7408 else if (c ==
'\013') {
7412 else if (c ==
'\010') {
7416 else if (c ==
'\007') {
7420 else if (c ==
'\033') {
7435 snprintf(q, qend-q,
"u%04X", cc);
7437 snprintf(q, qend-q,
"u{%X}", cc);
7442 snprintf(q, qend-q,
"x%02X", c);
7449 snprintf(q, qend-q, nonascii_suffix, enc->name);
7459 unescape_ascii(
unsigned int c)
7483 undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7485 const char *s = *ss;
7489 unsigned char buf[6];
7507 *buf = unescape_ascii(*s);
7520 if (*penc != enc_utf8) {
7539 if (hexlen == 0 || hexlen > 6) {
7545 if (0xd800 <= c && c <= 0xdfff) {
7558 if (0xd800 <= c && c <= 0xdfff) {
7589 static VALUE rb_str_is_ascii_only_p(
VALUE str);
7607 str_undump(
VALUE str)
7614 bool binary =
false;
7618 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7621 if (!str_null_check(str, &w)) {
7625 if (*s !=
'"')
goto invalid_format;
7643 static const char force_encoding_suffix[] =
".force_encoding(\"";
7644 static const char dup_suffix[] =
".dup";
7645 const char *encname;
7650 size =
sizeof(dup_suffix) - 1;
7651 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7653 size =
sizeof(force_encoding_suffix) - 1;
7654 if (s_end - s <= size)
goto invalid_format;
7655 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7663 s = memchr(s,
'"', s_end-s);
7665 if (!s)
goto invalid_format;
7666 if (s_end - s != 2)
goto invalid_format;
7667 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7669 encidx = rb_enc_find_index2(encname, (
long)size);
7683 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7694 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7707 str_true_enc(
VALUE str)
7710 rb_str_check_dummy_enc(enc);
7714 static OnigCaseFoldType
7715 check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7721 if (argv[0]==sym_turkic) {
7722 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7724 if (argv[1]==sym_lithuanian)
7725 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7730 else if (argv[0]==sym_lithuanian) {
7731 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7733 if (argv[1]==sym_turkic)
7734 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7741 else if (argv[0]==sym_ascii)
7742 flags |= ONIGENC_CASE_ASCII_ONLY;
7743 else if (argv[0]==sym_fold) {
7744 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7745 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7763 #define CASE_MAPPING_ADDITIONAL_LENGTH 20
7764 #ifndef CASEMAP_DEBUG
7765 # define CASEMAP_DEBUG 0
7773 OnigUChar space[FLEX_ARY_LEN];
7777 mapping_buffer_free(
void *p)
7781 while (current_buffer) {
7782 previous_buffer = current_buffer;
7783 current_buffer = current_buffer->next;
7784 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7790 {0, mapping_buffer_free,},
7791 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7799 const OnigUChar *source_current, *source_end;
7800 int target_length = 0;
7801 VALUE buffer_anchor;
7804 size_t buffer_count = 0;
7805 int buffer_length_or_invalid;
7814 while (source_current < source_end) {
7816 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7817 if (CASEMAP_DEBUG) {
7818 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7821 *pre_buffer = current_buffer;
7822 pre_buffer = ¤t_buffer->next;
7823 current_buffer->next = NULL;
7824 current_buffer->capa =
capa;
7825 buffer_length_or_invalid = enc->case_map(flags,
7826 &source_current, source_end,
7827 current_buffer->space,
7828 current_buffer->space+current_buffer->capa,
7830 if (buffer_length_or_invalid < 0) {
7831 current_buffer =
DATA_PTR(buffer_anchor);
7833 mapping_buffer_free(current_buffer);
7836 target_length += current_buffer->used = buffer_length_or_invalid;
7838 if (CASEMAP_DEBUG) {
7839 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7842 if (buffer_count==1) {
7843 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7846 char *target_current;
7850 current_buffer =
DATA_PTR(buffer_anchor);
7851 while (current_buffer) {
7852 memcpy(target_current, current_buffer->space, current_buffer->used);
7853 target_current += current_buffer->used;
7854 current_buffer = current_buffer->next;
7857 current_buffer =
DATA_PTR(buffer_anchor);
7859 mapping_buffer_free(current_buffer);
7864 str_enc_copy_direct(target, source);
7873 const OnigUChar *source_current, *source_end;
7874 OnigUChar *target_current, *target_end;
7876 int length_or_invalid;
7878 if (old_length == 0)
return Qnil;
7882 if (source == target) {
7883 target_current = (OnigUChar*)source_current;
7884 target_end = (OnigUChar*)source_end;
7891 length_or_invalid = onigenc_ascii_only_case_map(flags,
7892 &source_current, source_end,
7893 target_current, target_end, enc);
7894 if (length_or_invalid < 0)
7896 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7897 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7898 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7900 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7903 str_enc_copy(target, source);
7909 upcase_single(
VALUE str)
7912 bool modified =
false;
7915 unsigned int c = *(
unsigned char*)s;
7917 if (
'a' <= c && c <=
'z') {
7918 *s =
'A' + (c -
'a');
7946 rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7949 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7951 flags = check_case_options(argc, argv, flags);
7952 str_modify_keep_cr(str);
7953 enc = str_true_enc(str);
7954 if (case_option_single_p(flags, enc, str)) {
7955 if (upcase_single(str))
7956 flags |= ONIGENC_CASE_MODIFIED;
7958 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7959 rb_str_ascii_casemap(str, str, &flags, enc);
7961 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7963 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7985 rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7988 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7991 flags = check_case_options(argc, argv, flags);
7992 enc = str_true_enc(str);
7993 if (case_option_single_p(flags, enc, str)) {
7995 str_enc_copy_direct(ret, str);
7998 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8000 rb_str_ascii_casemap(str, ret, &flags, enc);
8003 ret = rb_str_casemap(str, &flags, enc);
8010 downcase_single(
VALUE str)
8013 bool modified =
false;
8016 unsigned int c = *(
unsigned char*)s;
8018 if (
'A' <= c && c <=
'Z') {
8019 *s =
'a' + (c -
'A');
8048 rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8051 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8053 flags = check_case_options(argc, argv, flags);
8054 str_modify_keep_cr(str);
8055 enc = str_true_enc(str);
8056 if (case_option_single_p(flags, enc, str)) {
8057 if (downcase_single(str))
8058 flags |= ONIGENC_CASE_MODIFIED;
8060 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8061 rb_str_ascii_casemap(str, str, &flags, enc);
8063 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8065 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8087 rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8090 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8093 flags = check_case_options(argc, argv, flags);
8094 enc = str_true_enc(str);
8095 if (case_option_single_p(flags, enc, str)) {
8097 str_enc_copy_direct(ret, str);
8098 downcase_single(ret);
8100 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8102 rb_str_ascii_casemap(str, ret, &flags, enc);
8105 ret = rb_str_casemap(str, &flags, enc);
8133 rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8136 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8138 flags = check_case_options(argc, argv, flags);
8139 str_modify_keep_cr(str);
8140 enc = str_true_enc(str);
8142 if (flags&ONIGENC_CASE_ASCII_ONLY)
8143 rb_str_ascii_casemap(str, str, &flags, enc);
8145 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8147 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8171 rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8174 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8177 flags = check_case_options(argc, argv, flags);
8178 enc = str_true_enc(str);
8180 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8182 rb_str_ascii_casemap(str, ret, &flags, enc);
8185 ret = rb_str_casemap(str, &flags, enc);
8212 rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8215 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8217 flags = check_case_options(argc, argv, flags);
8218 str_modify_keep_cr(str);
8219 enc = str_true_enc(str);
8220 if (flags&ONIGENC_CASE_ASCII_ONLY)
8221 rb_str_ascii_casemap(str, str, &flags, enc);
8223 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8225 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8249 rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8252 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8255 flags = check_case_options(argc, argv, flags);
8256 enc = str_true_enc(str);
8258 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8260 rb_str_ascii_casemap(str, ret, &flags, enc);
8263 ret = rb_str_casemap(str, &flags, enc);
8268 typedef unsigned char *USTR;
8272 unsigned int now, max;
8284 if (t->p == t->pend)
return -1;
8285 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8290 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8292 if (t->p < t->pend) {
8296 if (t->now < 0x80 && c < 0x80) {
8298 "invalid range \"%c-%c\" in string transliteration",
8306 else if (t->now < c) {
8315 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8316 if (t->now == t->max) {
8321 if (t->now < t->max) {
8337 const unsigned int errc = -1;
8338 unsigned int trans[256];
8340 struct tr trsrc, trrepl;
8342 unsigned int c, c0, last = 0;
8343 int modify = 0, i, l;
8344 unsigned char *s, *send;
8346 int singlebyte = single_byte_optimizable(str);
8350 #define CHECK_IF_ASCII(c) \
8351 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8352 (cr = ENC_CODERANGE_VALID) : 0)
8358 return rb_str_delete_bang(1, &src, str);
8373 trsrc.p + l < trsrc.pend) {
8379 trsrc.gen = trrepl.gen = 0;
8380 trsrc.now = trrepl.now = 0;
8381 trsrc.max = trrepl.max = 0;
8384 for (i=0; i<256; i++) {
8387 while ((c = trnext(&trsrc, enc)) != errc) {
8396 while ((c = trnext(&trrepl, enc)) != errc)
8399 for (i=0; i<256; i++) {
8400 if (trans[i] != errc) {
8408 for (i=0; i<256; i++) {
8411 while ((c = trnext(&trsrc, enc)) != errc) {
8412 r = trnext(&trrepl, enc);
8413 if (r == errc) r = trrepl.now;
8427 str_modify_keep_cr(str);
8433 unsigned int save = -1;
8434 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8456 if (cflag) c = last;
8459 else if (cflag) c = errc;
8465 if (c != (
unsigned int)-1) {
8477 if (enc != e1) may_modify = 1;
8479 if ((offset = t - buf) + tlen > max) {
8480 size_t MAYBE_UNUSED(old) = max + termlen;
8481 max = offset + tlen + (send - s);
8482 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8486 if (may_modify && memcmp(s, t, tlen) != 0) {
8492 if (!STR_EMBED_P(str)) {
8493 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8495 TERM_FILL((
char *)t, termlen);
8496 RSTRING(str)->as.heap.ptr = (
char *)buf;
8497 STR_SET_LEN(str, t - buf);
8498 STR_SET_NOEMBED(str);
8499 RSTRING(str)->as.heap.aux.capa = max;
8503 c = (
unsigned char)*s;
8504 if (trans[c] != errc) {
8521 long offset, max = (long)((send - s) * 1.2);
8522 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8543 if (cflag) c = last;
8546 else if (cflag) c = errc;
8550 c = cflag ? last : errc;
8558 if (enc != e1) may_modify = 1;
8560 if ((offset = t - buf) + tlen > max) {
8561 size_t MAYBE_UNUSED(old) = max + termlen;
8562 max = offset + tlen + (long)((send - s) * 1.2);
8563 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8568 if (may_modify && memcmp(s, t, tlen) != 0) {
8576 if (!STR_EMBED_P(str)) {
8577 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8579 TERM_FILL((
char *)t, termlen);
8580 RSTRING(str)->as.heap.ptr = (
char *)buf;
8581 STR_SET_LEN(str, t - buf);
8582 STR_SET_NOEMBED(str);
8583 RSTRING(str)->as.heap.aux.capa = max;
8608 return tr_trans(str, src, repl, 0);
8655 tr_trans(str, src, repl, 0);
8659 #define TR_TABLE_MAX (UCHAR_MAX+1)
8660 #define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8662 tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8665 const unsigned int errc = -1;
8666 char buf[TR_TABLE_MAX];
8669 VALUE table = 0, ptable = 0;
8670 int i, l, cflag = 0;
8673 tr.gen =
tr.now =
tr.max = 0;
8680 for (i=0; i<TR_TABLE_MAX; i++) {
8683 stable[TR_TABLE_MAX] = cflag;
8685 else if (stable[TR_TABLE_MAX] && !cflag) {
8686 stable[TR_TABLE_MAX] = 0;
8688 for (i=0; i<TR_TABLE_MAX; i++) {
8692 while ((c = trnext(&
tr, enc)) != errc) {
8693 if (c < TR_TABLE_MAX) {
8694 buf[(
unsigned char)c] = !cflag;
8699 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8716 for (i=0; i<TR_TABLE_MAX; i++) {
8717 stable[i] = stable[i] && buf[i];
8719 if (!table && !cflag) {
8726 tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8728 if (c < TR_TABLE_MAX) {
8729 return table[c] != 0;
8743 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8757 rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8759 char squeez[TR_TABLE_SIZE];
8762 VALUE del = 0, nodel = 0;
8764 int i, ascompat, cr;
8768 for (i=0; i<argc; i++) {
8773 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8776 str_modify_keep_cr(str);
8785 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8798 if (tr_find(c, squeez, del, nodel)) {
8809 TERM_FILL(t, TERM_LEN(str));
8813 if (modify)
return str;
8833 rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8836 rb_str_delete_bang(argc, argv, str);
8850 rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8852 char squeez[TR_TABLE_SIZE];
8854 VALUE del = 0, nodel = 0;
8855 unsigned char *s, *send, *t;
8857 int ascompat, singlebyte = single_byte_optimizable(str);
8861 enc = STR_ENC_GET(str);
8864 for (i=0; i<argc; i++) {
8869 if (singlebyte && !single_byte_optimizable(s))
8871 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8875 str_modify_keep_cr(str);
8884 unsigned int c = *s++;
8885 if (c != save || (argc > 0 && !squeez[c])) {
8895 if (ascompat && (c = *s) < 0x80) {
8896 if (c != save || (argc > 0 && !squeez[c])) {
8904 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8914 TERM_FILL((
char *)t, TERM_LEN(str));
8920 if (modify)
return str;
8943 rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8946 rb_str_squeeze_bang(argc, argv, str);
8964 return tr_trans(str, src, repl, 1);
8987 tr_trans(str, src, repl, 1);
9016 rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9018 char table[TR_TABLE_SIZE];
9020 VALUE del = 0, nodel = 0, tstr;
9035 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9036 !is_broken_string(str)) {
9044 if (*(
unsigned char*)s++ == c) n++;
9050 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9051 for (i=1; i<argc; i++) {
9055 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9065 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9074 if (tr_find(c, table, del, nodel)) {
9085 rb_fs_check(
VALUE val)
9089 if (
NIL_P(val))
return 0;
9094 static const char isspacetable[256] = {
9095 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9096 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9097 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9098 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9099 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9113 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9116 split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9118 if (empty_count >= 0 &&
len == 0) {
9119 return empty_count + 1;
9121 if (empty_count > 0) {
9126 }
while (--empty_count > 0);
9130 rb_yield(str_new_empty_String(str));
9131 }
while (--empty_count > 0);
9145 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9149 literal_split_pattern(
VALUE spat, split_type_t default_type)
9157 return SPLIT_TYPE_CHARS;
9160 if (
len == 1 &&
ptr[0] ==
' ') {
9161 return SPLIT_TYPE_AWK;
9167 return SPLIT_TYPE_AWK;
9170 return default_type;
9183 rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9188 split_type_t split_type;
9189 long beg, end, i = 0, empty_count = -1;
9194 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9196 if (lim <= 0) limit =
Qnil;
9197 else if (lim == 1) {
9209 if (
NIL_P(limit) && !lim) empty_count = 0;
9211 enc = STR_ENC_GET(str);
9212 split_type = SPLIT_TYPE_REGEXP;
9214 spat = get_pat_quoted(spat, 0);
9217 split_type = SPLIT_TYPE_AWK;
9219 else if (!(spat = rb_fs_check(spat))) {
9225 if (split_type != SPLIT_TYPE_AWK) {
9230 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9231 if (split_type == SPLIT_TYPE_AWK) {
9233 split_type = SPLIT_TYPE_STRING;
9238 mustnot_broken(spat);
9239 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9247 #define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
9252 if (split_type == SPLIT_TYPE_AWK) {
9259 if (is_ascii_string(str)) {
9260 while (
ptr < eptr) {
9261 c = (
unsigned char)*
ptr++;
9263 if (ascii_isspace(c)) {
9269 if (!
NIL_P(limit) && lim <= i)
break;
9272 else if (ascii_isspace(c)) {
9273 SPLIT_STR(beg, end-beg);
9276 if (!
NIL_P(limit)) ++i;
9284 while (
ptr < eptr) {
9296 if (!
NIL_P(limit) && lim <= i)
break;
9300 SPLIT_STR(beg, end-beg);
9303 if (!
NIL_P(limit)) ++i;
9311 else if (split_type == SPLIT_TYPE_STRING) {
9312 char *str_start =
ptr;
9313 char *substr_start =
ptr;
9318 mustnot_broken(str);
9320 while (
ptr < eptr &&
9324 if (t !=
ptr + end) {
9328 SPLIT_STR(substr_start - str_start, (
ptr+end) - substr_start);
9331 if (!
NIL_P(limit) && lim <= ++i)
break;
9333 beg =
ptr - str_start;
9335 else if (split_type == SPLIT_TYPE_CHARS) {
9336 char *str_start =
ptr;
9340 mustnot_broken(str);
9342 while (
ptr < eptr &&
9344 SPLIT_STR(
ptr - str_start, n);
9346 if (!
NIL_P(limit) && lim <= ++i)
break;
9348 beg =
ptr - str_start;
9360 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (
void)0)) {
9365 if (start == end && BEG(0) == END(0)) {
9370 else if (last_null == 1) {
9384 SPLIT_STR(beg, end-beg);
9385 beg = start = END(0);
9389 for (idx=1; idx < regs->num_regs; idx++) {
9390 if (BEG(idx) == -1)
continue;
9391 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9393 if (!
NIL_P(limit) && lim <= ++i)
break;
9395 if (match) rb_match_unbusy(match);
9401 return result ? result : str;
9411 return rb_str_split_m(1, &sep, str);
9414 #define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9429 #define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9432 chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9457 #define rb_rs get_rs()
9464 const char *
ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9465 long pos,
len, rslen;
9471 static ID keywords[1];
9476 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9480 if (!ENUM_ELEM(ary, str)) {
9504 const char *eol = NULL;
9506 while (subend < pend) {
9507 long chomp_rslen = 0;
9513 if (eol == subend)
break;
9517 chomp_rslen = -rslen;
9521 if (!subptr) subptr = subend;
9525 }
while (subend < pend);
9527 if (rslen == 0) chomp_rslen = 0;
9529 subend - subptr + (chomp ? chomp_rslen : rslen));
9530 if (ENUM_ELEM(ary, line)) {
9531 str_mod_check(str,
ptr,
len);
9533 subptr = eol = NULL;
9552 while (subptr < pend) {
9553 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9557 if (hit != adjusted) {
9561 subend = hit += rslen;
9564 subend = chomp_newline(subptr, subend, enc);
9571 if (ENUM_ELEM(ary, line)) {
9572 str_mod_check(str,
ptr,
len);
9577 if (subptr != pend) {
9580 pend = chomp_newline(subptr, pend, enc);
9582 else if (pend - subptr >= rslen &&
9583 memcmp(pend - rslen, rsptr, rslen) == 0) {
9588 ENUM_ELEM(ary, line);
9609 rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9612 return rb_str_enumerate_lines(argc, argv, str, 0);
9625 rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9627 VALUE ary = WANTARRAY(
"lines", 0);
9628 return rb_str_enumerate_lines(argc, argv, str, ary);
9661 rb_str_each_byte(
VALUE str)
9664 return rb_str_enumerate_bytes(str, 0);
9676 rb_str_bytes(
VALUE str)
9679 return rb_str_enumerate_bytes(str, ary);
9702 for (i = 0; i <
len; i += n) {
9708 for (i = 0; i <
len; i += n) {
9730 rb_str_each_char(
VALUE str)
9733 return rb_str_enumerate_chars(str, 0);
9745 rb_str_chars(
VALUE str)
9748 return rb_str_enumerate_chars(str, ary);
9752 rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9757 const char *
ptr, *end;
9760 if (single_byte_optimizable(str))
9761 return rb_str_enumerate_bytes(str, ary);
9766 enc = STR_ENC_GET(str);
9790 rb_str_each_codepoint(
VALUE str)
9793 return rb_str_enumerate_codepoints(str, 0);
9805 rb_str_codepoints(
VALUE str)
9808 return rb_str_enumerate_codepoints(str, ary);
9816 const OnigUChar source_ascii[] =
"\\X";
9817 const OnigUChar *source = source_ascii;
9818 size_t source_len =
sizeof(source_ascii) - 1;
9821 #define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9822 #define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9823 #define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9824 #define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9825 #define CASE_UTF(e) \
9826 case ENCINDEX_UTF_##e: { \
9827 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9828 source = source_UTF_##e; \
9829 source_len = sizeof(source_UTF_##e); \
9832 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9840 regex_t *reg_grapheme_cluster;
9842 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9843 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9845 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9846 onig_error_code_to_str(message, r, &einfo);
9847 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9850 return reg_grapheme_cluster;
9857 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9860 if (!reg_grapheme_cluster_utf8) {
9861 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9864 return reg_grapheme_cluster_utf8;
9873 size_t grapheme_cluster_count = 0;
9875 const char *
ptr, *end;
9881 bool cached_reg_grapheme_cluster =
true;
9882 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9883 if (!reg_grapheme_cluster) {
9884 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9885 cached_reg_grapheme_cluster =
false;
9892 OnigPosition
len = onig_match(reg_grapheme_cluster,
9893 (
const OnigUChar *)
ptr, (
const OnigUChar *)end,
9894 (
const OnigUChar *)
ptr, NULL, 0);
9895 if (
len <= 0)
break;
9896 grapheme_cluster_count++;
9900 if (!cached_reg_grapheme_cluster) {
9901 onig_free(reg_grapheme_cluster);
9904 return SIZET2NUM(grapheme_cluster_count);
9908 rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9912 const char *ptr0, *
ptr, *end;
9915 return rb_str_enumerate_chars(str, ary);
9920 bool cached_reg_grapheme_cluster =
true;
9921 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9922 if (!reg_grapheme_cluster) {
9923 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9924 cached_reg_grapheme_cluster =
false;
9931 OnigPosition
len = onig_match(reg_grapheme_cluster,
9932 (
const OnigUChar *)
ptr, (
const OnigUChar *)end,
9933 (
const OnigUChar *)
ptr, NULL, 0);
9934 if (
len <= 0)
break;
9939 if (!cached_reg_grapheme_cluster) {
9940 onig_free(reg_grapheme_cluster);
9960 rb_str_each_grapheme_cluster(
VALUE str)
9963 return rb_str_enumerate_grapheme_clusters(str, 0);
9975 rb_str_grapheme_clusters(
VALUE str)
9978 return rb_str_enumerate_grapheme_clusters(str, ary);
9982 chopped_length(
VALUE str)
9985 const char *p, *p2, *beg, *end;
9989 if (beg >= end)
return 0;
10010 rb_str_chop_bang(
VALUE str)
10012 str_modify_keep_cr(str);
10015 len = chopped_length(str);
10016 STR_SET_LEN(str,
len);
10036 rb_str_chop(
VALUE str)
10042 smart_chomp(
VALUE str,
const char *e,
const char *p)
10061 if (--e > p && *(e-1) ==
'\r') {
10078 char *pp, *e, *rsptr;
10083 if (
len == 0)
return 0;
10086 return smart_chomp(str, e, p);
10107 while (e > p && *(e-1) ==
'\n') {
10109 if (e > p && *(e-1) ==
'\r')
10115 if (rslen >
len)
return len;
10118 newline = rsptr[rslen-1];
10121 if (newline ==
'\n')
10122 return smart_chomp(str, e, p);
10126 return smart_chomp(str, e, p);
10131 if (is_broken_string(rs)) {
10135 if (p[
len-1] == newline &&
10137 memcmp(rsptr, pp, rslen) == 0)) {
10138 if (at_char_boundary(p, pp, e, enc))
10139 return len - rslen;
10151 chomp_rs(
int argc,
const VALUE *argv)
10155 VALUE rs = argv[0];
10168 long len = chompped_length(str, rs);
10169 if (
len >= olen)
return Qnil;
10170 str_modify_keep_cr(str);
10171 STR_SET_LEN(str,
len);
10189 rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10192 str_modifiable(str);
10194 rs = chomp_rs(argc, argv);
10196 return rb_str_chomp_string(str, rs);
10209 rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10211 VALUE rs = chomp_rs(argc, argv);
10219 const char *
const start = s;
10221 if (!s || s >= e)
return 0;
10224 if (single_byte_optimizable(str)) {
10225 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10250 rb_str_lstrip_bang(
VALUE str)
10254 long olen, loffset;
10256 str_modify_keep_cr(str);
10257 enc = STR_ENC_GET(str);
10259 loffset = lstrip_offset(str, start, start+olen, enc);
10261 long len = olen-loffset;
10262 s = start + loffset;
10263 memmove(start, s,
len);
10264 STR_SET_LEN(str,
len);
10288 rb_str_lstrip(
VALUE str)
10293 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10294 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10303 rb_str_check_dummy_enc(enc);
10307 if (!s || s >= e)
return 0;
10311 if (single_byte_optimizable(str)) {
10313 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10338 rb_str_rstrip_bang(
VALUE str)
10342 long olen, roffset;
10344 str_modify_keep_cr(str);
10345 enc = STR_ENC_GET(str);
10347 roffset = rstrip_offset(str, start, start+olen, enc);
10349 long len = olen - roffset;
10351 STR_SET_LEN(str,
len);
10375 rb_str_rstrip(
VALUE str)
10379 long olen, roffset;
10381 enc = STR_ENC_GET(str);
10383 roffset = rstrip_offset(str, start, start+olen, enc);
10385 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10401 rb_str_strip_bang(
VALUE str)
10404 long olen, loffset, roffset;
10407 str_modify_keep_cr(str);
10408 enc = STR_ENC_GET(str);
10410 loffset = lstrip_offset(str, start, start+olen, enc);
10411 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10413 if (loffset > 0 || roffset > 0) {
10414 long len = olen-roffset;
10417 memmove(start, start + loffset,
len);
10419 STR_SET_LEN(str,
len);
10443 rb_str_strip(
VALUE str)
10446 long olen, loffset, roffset;
10450 loffset = lstrip_offset(str, start, start+olen, enc);
10451 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10453 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10458 scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10461 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10491 if (!regs || regs->num_regs == 1) {
10497 for (
int i = 1; i < regs->num_regs; i++) {
10558 long last = -1, prev = 0;
10561 pat = get_pat_quoted(pat, 1);
10562 mustnot_broken(str);
10566 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10571 if (last >= 0) rb_pat_search(pat, str, last, 1);
10576 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10580 str_mod_check(str, p,
len);
10582 if (last >= 0) rb_pat_search(pat, str, last, 1);
10606 rb_str_hex(
VALUE str)
10633 rb_str_oct(
VALUE str)
10638 #ifndef HAVE_CRYPT_R
10643 rb_nativethread_lock_t lock;
10644 } crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10647 crypt_mutex_initialize(
void)
10715 #ifdef HAVE_CRYPT_R
10718 # define CRYPT_END() ALLOCV_END(databuf)
10720 extern char *crypt(
const char *,
const char *);
10721 # define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10724 const char *s, *saltp;
10726 #ifdef BROKEN_CRYPT
10727 char salt_8bit_clean[3];
10731 mustnot_wchar(str);
10732 mustnot_wchar(salt);
10735 if (
RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10739 #ifdef BROKEN_CRYPT
10740 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10741 salt_8bit_clean[0] = saltp[0] & 0x7f;
10742 salt_8bit_clean[1] = saltp[1] & 0x7f;
10743 salt_8bit_clean[2] =
'\0';
10744 saltp = salt_8bit_clean;
10747 #ifdef HAVE_CRYPT_R
10749 # ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10750 data->initialized = 0;
10752 res = crypt_r(s, saltp, data);
10754 crypt_mutex_initialize();
10756 res = crypt(s, saltp);
10778 rb_str_ord(
VALUE s)
10794 rb_str_sum(
int argc,
VALUE *argv,
VALUE str)
10797 char *
ptr, *p, *pend;
10800 unsigned long sum0 = 0;
10812 str_mod_check(str,
ptr,
len);
10815 sum0 += (
unsigned char)*p;
10826 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10827 sum0 &= (((
unsigned long)1)<<bits)-1;
10847 rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10851 long width,
len, flen = 1, fclen = 1;
10854 const char *f =
" ";
10855 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10857 int singlebyte = 1, cr;
10861 enc = STR_ENC_GET(str);
10869 fclen = str_strlen(pad, enc);
10870 singlebyte = single_byte_optimizable(pad);
10871 if (flen == 0 || fclen == 0) {
10875 len = str_strlen(str, enc);
10876 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10878 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10882 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10883 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10886 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10887 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10888 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10895 memset(p, *f, llen);
10899 while (llen >= fclen) {
10905 memcpy(p, f, llen2);
10912 memset(p, *f, rlen);
10916 while (rlen >= fclen) {
10922 memcpy(p, f, rlen2);
10926 TERM_FILL(p, termlen);
10950 rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10952 return rb_str_justify(argc, argv, str,
'l');
10966 rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10968 return rb_str_justify(argc, argv, str,
'r');
10983 rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10985 return rb_str_justify(argc, argv, str,
'c');
11001 sep = get_pat_quoted(sep, 0);
11013 pos = rb_str_index(str, sep, 0);
11014 if (pos < 0)
goto failed;
11022 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11038 sep = get_pat_quoted(sep, 0);
11051 pos = rb_str_rindex(str, sep, pos);
11062 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11074 rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11078 for (i=0; i<argc; i++) {
11079 VALUE tmp = argv[i];
11081 if (rb_reg_start_with_p(tmp, str))
11085 const char *p, *s, *e;
11096 if (!at_char_right_boundary(p, s, e, enc))
11114 rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11118 for (i=0; i<argc; i++) {
11119 VALUE tmp = argv[i];
11120 const char *p, *s, *e;
11131 if (!at_char_boundary(p, s, e, enc))
11149 deleted_prefix_length(
VALUE str,
VALUE prefix)
11151 const char *strptr, *prefixptr;
11152 long olen, prefixlen;
11157 if (!is_broken_string(prefix) ||
11165 if (prefixlen <= 0)
return 0;
11167 if (olen < prefixlen)
return 0;
11170 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11171 if (is_broken_string(prefix)) {
11172 if (!is_broken_string(str)) {
11176 const char *strend = strptr + olen;
11177 const char *after_prefix = strptr + prefixlen;
11178 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11198 rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11201 str_modify_keep_cr(str);
11203 prefixlen = deleted_prefix_length(str, prefix);
11204 if (prefixlen <= 0)
return Qnil;
11218 rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11222 prefixlen = deleted_prefix_length(str, prefix);
11223 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11238 deleted_suffix_length(
VALUE str,
VALUE suffix)
11240 const char *strptr, *suffixptr;
11241 long olen, suffixlen;
11245 if (is_broken_string(suffix))
return 0;
11250 if (suffixlen <= 0)
return 0;
11252 if (olen < suffixlen)
return 0;
11255 const char *strend = strptr + olen;
11256 const char *before_suffix = strend - suffixlen;
11257 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11258 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11273 rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11275 long olen, suffixlen,
len;
11276 str_modifiable(str);
11278 suffixlen = deleted_suffix_length(str, suffix);
11279 if (suffixlen <= 0)
return Qnil;
11282 str_modify_keep_cr(str);
11283 len = olen - suffixlen;
11284 STR_SET_LEN(str,
len);
11301 rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11305 suffixlen = deleted_suffix_length(str, suffix);
11306 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11323 val = rb_fs_check(val);
11326 "value of %"PRIsVALUE
" must be String or Regexp",
11330 rb_warn_deprecated(
"'$;'", NULL);
11347 str_modifiable(str);
11378 rb_str_b(
VALUE str)
11381 if (STR_EMBED_P(str)) {
11387 str_replace_shared_without_enc(str2, str);
11422 rb_str_valid_encoding_p(
VALUE str)
11442 rb_str_is_ascii_only_p(
VALUE str)
11452 static const char ellipsis[] =
"...";
11453 const long ellipsislen =
sizeof(ellipsis) - 1;
11456 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
11457 VALUE estr, ret = 0;
11464 else if (
len <= ellipsislen ||
11499 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11518 if (enc == STR_ENC_GET(str)) {
11523 return enc_str_scrub(enc, str, repl, cr);
11531 const char *rep, *p, *e, *p1, *sp;
11544 if (!
NIL_P(repl)) {
11545 repl = str_compat_and_valid(repl, enc);
11553 #define DEFAULT_REPLACE_CHAR(str) do { \
11554 static const char replace[sizeof(str)-1] = str; \
11555 rep = replace; replen = (int)sizeof(replace); \
11570 else if (!
NIL_P(repl)) {
11576 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11580 DEFAULT_REPLACE_CHAR(
"?");
11585 p = search_nonascii(p, e);
11609 if (e - p < clen) clen = e - p;
11616 for (; clen > 1; clen--) {
11629 str_mod_check(str, sp, slen);
11630 repl = str_compat_and_valid(repl, enc);
11637 p = search_nonascii(p, e);
11664 str_mod_check(str, sp, slen);
11665 repl = str_compat_and_valid(repl, enc);
11678 else if (!
NIL_P(repl)) {
11682 else if (encidx == ENCINDEX_UTF_16BE) {
11683 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11685 else if (encidx == ENCINDEX_UTF_16LE) {
11686 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11688 else if (encidx == ENCINDEX_UTF_32BE) {
11689 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11691 else if (encidx == ENCINDEX_UTF_32LE) {
11692 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11695 DEFAULT_REPLACE_CHAR(
"?");
11712 if (e - p < clen) clen = e - p;
11713 if (clen <= mbminlen * 2) {
11718 for (; clen > mbminlen; clen-=mbminlen) {
11730 str_mod_check(str, sp, slen);
11731 repl = str_compat_and_valid(repl, enc);
11757 str_mod_check(str, sp, slen);
11758 repl = str_compat_and_valid(repl, enc);
11794 str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11802 static ID id_normalize;
11803 static ID id_normalized_p;
11804 static VALUE mUnicodeNormalize;
11807 unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11809 static int UnicodeNormalizeRequired = 0;
11812 if (!UnicodeNormalizeRequired) {
11813 rb_require(
"unicode_normalize/normalize.rb");
11814 UnicodeNormalizeRequired = 1;
11818 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11855 rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11857 return unicode_normalize_common(argc, argv, str, id_normalize);
11871 rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11873 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11900 rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11902 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12034 #define sym_equal rb_obj_equal
12037 sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12053 rb_str_symname_p(
VALUE sym)
12061 enc = STR_ENC_GET(sym);
12064 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(
ptr) ||
12072 rb_str_quote_unprintable(
VALUE str)
12082 enc = STR_ENC_GET(str);
12085 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12087 return rb_str_escape(str);
12093 rb_id_quote_unprintable(
ID id)
12096 if (!rb_str_symname_p(str)) {
12097 return rb_str_escape(str);
12115 sym_inspect(
VALUE sym)
12122 if (!rb_str_symname_p(str)) {
12127 memmove(dest + 1, dest,
len);
12131 VALUE orig_str = str;
12139 memcpy(dest + 1,
ptr,
len);
12159 rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12182 sym_succ(
VALUE sym)
12261 return rb_str_match(
rb_sym2str(sym), other);
12276 sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12278 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12291 sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12293 return rb_str_match_m_p(argc, argv, sym);
12311 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12322 sym_length(
VALUE sym)
12336 sym_empty(
VALUE sym)
12352 sym_upcase(
int argc,
VALUE *argv,
VALUE sym)
12370 sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12386 sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12402 sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12416 sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12418 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12431 sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12433 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12445 sym_encoding(
VALUE sym)
12451 string_for_symbol(
VALUE name)
12470 name = string_for_symbol(name);
12480 name = string_for_symbol(name);
12496 sym_all_symbols(
VALUE _)
12504 return rb_fstring(str);
12511 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12523 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12524 rb_enc_autoload(enc);
12528 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12534 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12535 rb_enc_autoload(enc);
12539 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12550 rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12555 if (
RB_LIKELY(code >= 0 && code < 0xff)) {
12556 rb_str_buf_cat_byte(str, (
char) code);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RB_LIKELY(x)
Asserts that the given Boolean expression likely holds.
#define RB_UNLIKELY(x)
Asserts that the given Boolean expression likely doesn't hold.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
int rb_enc_codelen(int code, rb_encoding *enc)
Queries the number of bytes requested to represent the passed code point using the passed encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Look for the "common" encoding between the two.
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
static char * rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the previous (left) character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
rb_encoding * rb_enc_from_index(int idx)
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate_index(VALUE obj, int encindex)
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed ...
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_new(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcallv(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcall(), except it takes the method arguments as a C array.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
void rb_gc_register_address(VALUE *valptr)
Inform the garbage collector that the global or static variable pointed by valptr stores a live Ruby ...
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
VALUE rb_ary_new_from_args(long n,...)
Constructs an array from the passed objects.
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_check_hash_type(VALUE obj)
Try converting an object to its hash representation using its to_hash method, if any.
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_lookup(VALUE hash, VALUE key)
Identical to rb_hash_aref(), except it always returns RUBY_Qnil for misshits.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_rs
The record separator character for inputs, or the $/.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_utf8_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_utf8_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
VALUE rb_usascii_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_str_buf_new_cstr(const char *ptr)
This is a rb_str_buf_new() + rb_str_buf_cat() combo.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
VALUE rb_str_dup_frozen(VALUE)
Just another name of rb_str_new_frozen.
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
VALUE rb_locale_str_new_cstr(const char *ptr)
Identical to rb_locale_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_new_cstr(const char *ptr)
Identical to rb_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_external_str_new_cstr(const char *ptr)
Identical to rb_external_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_cat_cstr(VALUE dst, const char *src)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
VALUE rb_id2str(ID id)
Identical to rb_id2name(), except it returns a Ruby's String instead of C's.
void rb_define_hooked_variable(const char *name, VALUE *var, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Identical to rb_define_virtual_variable(), but can also specify a storage.
int capa
Designed capacity of the buffer.
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
VALUE type(ANYARGS)
ANYARGS-ed function type.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
union RString::@48 as
String's specific fields.
long len
Length of the string, not including terminating NUL character.
struct RString::@48::@50 embed
Embedded contents.
struct RString::@48::@49 heap
Strings that use separated memory region for contents use this pattern.
VALUE shared
Parent of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.
void ruby_xfree(void *ptr)
Deallocates a storage instance.