14 #include "ruby/internal/config.h"
24 #include "debug_counter.h"
28 #include "internal/array.h"
29 #include "internal/compar.h"
30 #include "internal/compilers.h"
31 #include "internal/encoding.h"
32 #include "internal/error.h"
33 #include "internal/gc.h"
34 #include "internal/numeric.h"
35 #include "internal/object.h"
36 #include "internal/proc.h"
37 #include "internal/re.h"
38 #include "internal/sanitizers.h"
39 #include "internal/string.h"
40 #include "internal/transcode.h"
45 #include "ruby_assert.h"
48 #if defined HAVE_CRYPT_R
49 # if defined HAVE_CRYPT_H
52 #elif !defined HAVE_CRYPT
53 # include "missing/crypt.h"
54 # define HAVE_CRYPT_R 1
57 #define BEG(no) (regs->beg[(no)])
58 #define END(no) (regs->end[(no)])
61 #undef rb_usascii_str_new
62 #undef rb_utf8_str_new
64 #undef rb_str_new_cstr
65 #undef rb_usascii_str_new_cstr
66 #undef rb_utf8_str_new_cstr
67 #undef rb_enc_str_new_cstr
68 #undef rb_external_str_new_cstr
69 #undef rb_locale_str_new_cstr
70 #undef rb_str_dup_frozen
71 #undef rb_str_buf_new_cstr
73 #undef rb_str_buf_cat2
75 #undef rb_str_cat_cstr
76 #undef rb_fstring_cstr
121 #define RUBY_MAX_CHAR_LEN 16
122 #define STR_PRECOMPUTED_HASH FL_USER4
123 #define STR_SHARED_ROOT FL_USER5
124 #define STR_BORROWED FL_USER6
125 #define STR_TMPLOCK FL_USER7
126 #define STR_NOFREE FL_USER18
127 #define STR_FAKESTR FL_USER19
129 #define STR_SET_NOEMBED(str) do {\
130 FL_SET((str), STR_NOEMBED);\
131 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
133 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
135 #define STR_SET_LEN(str, n) do { \
136 RSTRING(str)->len = (n); \
140 str_encindex_fastpath(
int encindex)
144 case ENCINDEX_ASCII_8BIT:
146 case ENCINDEX_US_ASCII:
154 str_enc_fastpath(
VALUE str)
159 #define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
160 #define TERM_FILL(ptr, termlen) do {\
161 char *const term_fill_ptr = (ptr);\
162 const int term_fill_len = (termlen);\
163 *term_fill_ptr = '\0';\
164 if (UNLIKELY(term_fill_len > 1))\
165 memset(term_fill_ptr, 0, term_fill_len);\
168 #define RESIZE_CAPA(str,capacity) do {\
169 const int termlen = TERM_LEN(str);\
170 RESIZE_CAPA_TERM(str,capacity,termlen);\
172 #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
173 if (STR_EMBED_P(str)) {\
174 if (str_embed_capa(str) < capacity + termlen) {\
175 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
176 const long tlen = RSTRING_LEN(str);\
177 memcpy(tmp, RSTRING_PTR(str), tlen);\
178 RSTRING(str)->as.heap.ptr = tmp;\
179 RSTRING(str)->len = tlen;\
180 STR_SET_NOEMBED(str);\
181 RSTRING(str)->as.heap.aux.capa = (capacity);\
185 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
186 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
187 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
188 RSTRING(str)->as.heap.aux.capa = (capacity);\
192 #define STR_SET_SHARED(str, shared_str) do { \
193 if (!FL_TEST(str, STR_FAKESTR)) { \
194 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
195 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
196 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
197 FL_SET((str), STR_SHARED); \
198 FL_SET((shared_str), STR_SHARED_ROOT); \
199 if (RBASIC_CLASS((shared_str)) == 0) \
200 FL_SET_RAW((shared_str), STR_BORROWED); \
204 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
205 #define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
208 #define STR_ENC_GET(str) get_encoding(str)
210 #if !defined SHARABLE_MIDDLE_SUBSTRING
211 # define SHARABLE_MIDDLE_SUBSTRING 0
213 #if !SHARABLE_MIDDLE_SUBSTRING
214 #define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
216 #define SHARABLE_SUBSTRING_P(beg, len, end) 1
221 str_embed_capa(
VALUE str)
223 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
227 rb_str_reembeddable_p(
VALUE str)
229 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
233 rb_str_embed_size(
long capa)
239 rb_str_size_as_embedded(
VALUE str)
242 if (STR_EMBED_P(str)) {
243 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
247 else if (rb_str_reembeddable_p(str)) {
248 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
251 real_size =
sizeof(
struct RString);
255 real_size +=
sizeof(st_index_t);
262 STR_EMBEDDABLE_P(
long len,
long termlen)
264 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
269 static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
270 static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
272 static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
273 static inline void str_modifiable(
VALUE str);
278 str_make_independent(
VALUE str)
281 int termlen = TERM_LEN(str);
282 str_make_independent_expand((str),
len, 0L, termlen);
285 static inline int str_dependent_p(
VALUE str);
288 rb_str_make_independent(
VALUE str)
290 if (str_dependent_p(str)) {
291 str_make_independent(str);
296 rb_str_make_embedded(
VALUE str)
301 char *buf =
RSTRING(str)->as.heap.ptr;
305 STR_SET_LEN(str,
len);
312 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
316 rb_debug_rstring_null_ptr(
const char *func)
318 fprintf(stderr,
"%s is returning NULL!! "
319 "SIGSEGV is highly expected to follow immediately.\n"
320 "If you could reproduce, attach your debugger here, "
321 "and look at the passed string.\n",
326 static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
329 get_encoding(
VALUE str)
335 mustnot_broken(
VALUE str)
337 if (is_broken_string(str)) {
343 mustnot_wchar(
VALUE str)
353 static VALUE register_fstring(
VALUE str,
bool copy,
bool precompute_hash);
355 #if SIZEOF_LONG == SIZEOF_VOIDP
357 fstring_hash(
VALUE str)
361 return (st_index_t)
RSTRING(str)->as.heap.aux.capa;
368 #define fstring_hash rb_str_hash
375 #define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
377 static inline st_index_t
378 str_do_hash(
VALUE str)
382 if (e && !is_ascii_string(str)) {
389 str_store_precomputed_hash(
VALUE str, st_index_t hash)
395 size_t used_bytes = (
RSTRING_LEN(str) + TERM_LEN(str));
396 size_t free_bytes = str_embed_capa(str) - used_bytes;
400 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
402 FL_SET(str, STR_PRECOMPUTED_HASH);
410 bool precompute_hash;
414 fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
423 if (rb_objspace_garbage_object_p(str)) {
436 long capa =
len +
sizeof(st_index_t);
437 int term_len = TERM_LEN(str);
439 if (arg->precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
445 str_store_precomputed_hash(new_str, fstring_hash(str));
461 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
464 if (STR_SHARED_P(str)) {
466 str_make_independent(str);
469 if (!BARE_STRING_P(str)) {
473 RBASIC(str)->flags |= RSTRING_FSTR;
475 *key = *value = arg->fstr = str;
481 rb_fstring(
VALUE str)
488 if (
FL_TEST(str, RSTRING_FSTR))
491 bare = BARE_STRING_P(str);
493 if (STR_EMBED_P(str)) {
498 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
507 fstr = register_fstring(str,
false,
false);
510 str_replace_shared_without_enc(str, fstr);
518 register_fstring(
VALUE str,
bool copy,
bool precompute_hash)
522 .precompute_hash = precompute_hash
525 #if SIZEOF_VOIDP == SIZEOF_LONG
529 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
535 st_table *frozen_strings = rb_vm_fstring_table();
538 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
539 }
while (UNDEF_P(args.fstr));
552 setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
565 fake_str->
as.
heap.ptr = (
char *)name;
567 return (
VALUE)fake_str;
585 rb_fstring_new(
const char *
ptr,
long len)
588 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
595 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
599 rb_fstring_cstr(
const char *
ptr)
601 return rb_fstring_new(
ptr, strlen(
ptr));
605 fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
615 const char *aptr, *bptr;
618 return (alen != blen ||
620 memcmp(aptr, bptr, alen) != 0);
624 single_byte_optimizable(
VALUE str)
628 case ENCINDEX_ASCII_8BIT:
629 case ENCINDEX_US_ASCII:
651 static inline const char *
652 search_nonascii(
const char *p,
const char *e)
654 const uintptr_t *s, *t;
656 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
657 # if SIZEOF_UINTPTR_T == 8
658 # define NONASCII_MASK UINT64_C(0x8080808080808080)
659 # elif SIZEOF_UINTPTR_T == 4
660 # define NONASCII_MASK UINT32_C(0x80808080)
662 # error "don't know what to do."
665 # if SIZEOF_UINTPTR_T == 8
666 # define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
667 # elif SIZEOF_UINTPTR_T == 4
668 # define NONASCII_MASK 0x80808080UL
670 # error "don't know what to do."
674 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
675 #if !UNALIGNED_WORD_ACCESS
676 if ((uintptr_t)p % SIZEOF_VOIDP) {
677 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
682 case 7:
if (p[-7]&0x80)
return p-7;
683 case 6:
if (p[-6]&0x80)
return p-6;
684 case 5:
if (p[-5]&0x80)
return p-5;
685 case 4:
if (p[-4]&0x80)
return p-4;
687 case 3:
if (p[-3]&0x80)
return p-3;
688 case 2:
if (p[-2]&0x80)
return p-2;
689 case 1:
if (p[-1]&0x80)
return p-1;
694 #if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
695 #define aligned_ptr(value) \
696 __builtin_assume_aligned((value), sizeof(uintptr_t))
698 #define aligned_ptr(value) (uintptr_t *)(value)
701 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
704 if (*s & NONASCII_MASK) {
705 #ifdef WORDS_BIGENDIAN
706 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
708 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
718 case 7:
if (e[-7]&0x80)
return e-7;
719 case 6:
if (e[-6]&0x80)
return e-6;
720 case 5:
if (e[-5]&0x80)
return e-5;
721 case 4:
if (e[-4]&0x80)
return e-4;
723 case 3:
if (e[-3]&0x80)
return e-3;
724 case 2:
if (e[-2]&0x80)
return e-2;
725 case 1:
if (e[-1]&0x80)
return e-1;
733 const char *e = p +
len;
737 p = search_nonascii(p, e);
742 p = search_nonascii(p, e);
749 p = search_nonascii(p, e);
774 p = search_nonascii(p, e);
779 p = search_nonascii(p, e);
792 p = search_nonascii(p, e);
831 rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
836 str_enc_copy(dest, src);
861 rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
863 str_enc_copy(dest, src);
876 return enc_coderange_scan(str, enc);
885 cr = enc_coderange_scan(str, get_encoding(str));
892 rb_enc_str_asciicompat(
VALUE str)
895 return str_encindex_fastpath(encindex) ||
rb_enc_asciicompat(rb_enc_get_from_index(encindex));
903 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
912 str_mod_check(
VALUE s,
const char *p,
long len)
920 str_capacity(
VALUE str,
const int termlen)
922 if (STR_EMBED_P(str)) {
923 return str_embed_capa(str) - termlen;
925 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
929 return RSTRING(str)->as.heap.aux.capa;
936 return str_capacity(str, TERM_LEN(str));
940 must_not_null(
const char *
ptr)
948 str_alloc_embed(
VALUE klass,
size_t capa)
950 size_t size = rb_str_embed_size(
capa);
954 NEWOBJ_OF(str,
struct RString, klass,
961 str_alloc_heap(
VALUE klass)
963 NEWOBJ_OF(str,
struct RString, klass,
970 empty_str_alloc(
VALUE klass)
972 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
973 VALUE str = str_alloc_embed(klass, 0);
974 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
980 str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
988 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
990 if (STR_EMBEDDABLE_P(
len, termlen)) {
991 str = str_alloc_embed(klass,
len + termlen);
997 str = str_alloc_heap(klass);
1003 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1008 STR_SET_LEN(str,
len);
1016 return str_new0(klass,
ptr,
len, 1);
1061 __msan_unpoison_string(
ptr);
1092 str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1101 rb_encoding *enc = rb_enc_get_from_index(encindex);
1105 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1106 str = str_alloc_heap(klass);
1110 RBASIC(str)->flags |= STR_NOFREE;
1140 static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1142 int ecflags,
VALUE ecopts);
1149 return is_ascii_string(str);
1160 if (!to)
return str;
1162 if (from == to)
return str;
1164 rb_is_ascii8bit_enc(to)) {
1165 if (STR_ENC_GET(str) != to) {
1174 from, to, ecflags, ecopts);
1175 if (
NIL_P(newstr)) {
1183 rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1189 if (ofs < -olen || olen < ofs)
1191 if (ofs < 0) ofs += olen;
1193 STR_SET_LEN(newstr, ofs);
1198 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1206 STR_SET_LEN(str, 0);
1213 str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1215 int ecflags,
VALUE ecopts)
1220 VALUE econv_wrapper;
1221 const unsigned char *start, *sp;
1222 unsigned char *dest, *dp;
1223 size_t converted_output = (size_t)ofs;
1228 RBASIC_CLEAR_CLASS(econv_wrapper);
1230 if (!ec)
return Qnil;
1233 sp = (
unsigned char*)
ptr;
1235 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1236 (dp = dest + converted_output),
1240 size_t converted_input = sp - start;
1241 size_t rest =
len - converted_input;
1242 converted_output = dp - dest;
1244 if (converted_input && converted_output &&
1245 rest < (LONG_MAX / converted_output)) {
1246 rest = (rest * converted_output) / converted_input;
1251 olen += rest < 2 ? 2 : rest;
1293 if (!ienc || eenc == ienc) {
1307 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1308 rb_str_initialize(str,
ptr,
len, eenc);
1318 !is_ascii_string(str)) {
1381 str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1383 const int termlen = TERM_LEN(str);
1388 if (str_embed_capa(str2) >=
len + termlen) {
1389 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1390 STR_SET_EMBED(str2);
1392 TERM_FILL(ptr2+
len, termlen);
1396 if (STR_SHARED_P(str)) {
1397 root =
RSTRING(str)->as.heap.aux.shared;
1406 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1408 rb_fatal(
"about to free a possible shared root");
1410 char *ptr2 = STR_HEAP_PTR(str2);
1412 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1415 FL_SET(str2, STR_NOEMBED);
1417 STR_SET_SHARED(str2, root);
1420 STR_SET_LEN(str2,
len);
1428 str_replace_shared_without_enc(str2, str);
1429 rb_enc_cr_str_exact_copy(str2, str);
1436 return str_replace_shared(str_alloc_heap(klass), str);
1453 rb_str_new_frozen_String(
VALUE orig)
1460 rb_str_tmp_frozen_acquire(
VALUE orig)
1463 return str_new_frozen_buffer(0, orig, FALSE);
1467 rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1469 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1470 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1472 VALUE str = str_alloc_heap(0);
1475 FL_SET(str, STR_SHARED_ROOT);
1477 size_t capa = str_capacity(orig, TERM_LEN(orig));
1483 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1484 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1491 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1492 RBASIC(orig)->flags &= ~STR_NOFREE;
1493 STR_SET_SHARED(orig, str);
1503 rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1508 if (STR_EMBED_P(tmp)) {
1521 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1522 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1527 STR_SET_LEN(tmp, 0);
1535 return str_new_frozen_buffer(klass, orig, TRUE);
1539 heap_str_make_shared(
VALUE klass,
VALUE orig)
1544 VALUE str = str_alloc_heap(klass);
1547 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1548 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1549 RBASIC(orig)->flags &= ~STR_NOFREE;
1550 STR_SET_SHARED(orig, str);
1557 str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1562 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1564 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1578 if ((ofs > 0) || (rest > 0) ||
1581 str = str_new_shared(klass,
shared);
1583 RSTRING(str)->as.heap.ptr += ofs;
1584 STR_SET_LEN(str,
RSTRING_LEN(str) - (ofs + rest));
1592 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1593 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1601 str = heap_str_make_shared(klass, orig);
1605 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1617 str_new_empty_String(
VALUE str)
1624 #define STR_BUF_MIN_SIZE 63
1629 if (STR_EMBEDDABLE_P(
capa, 1)) {
1637 RSTRING(str)->as.heap.ptr[0] =
'\0';
1657 return str_new(0, 0,
len);
1663 if (
FL_TEST(str, RSTRING_FSTR)) {
1664 st_data_t fstr = (st_data_t)str;
1668 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1669 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1674 if (STR_EMBED_P(str)) {
1675 RB_DEBUG_COUNTER_INC(obj_str_embed);
1677 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1678 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1679 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1682 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1683 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1688 rb_str_memsize(
VALUE str)
1690 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1691 return STR_HEAP_SIZE(str);
1701 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1704 static inline void str_discard(
VALUE str);
1705 static void str_shared_replace(
VALUE str,
VALUE str2);
1710 if (str != str2) str_shared_replace(str, str2);
1721 enc = STR_ENC_GET(str2);
1728 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1735 if (STR_EMBED_P(str2)) {
1740 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1741 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1742 RSTRING(str2)->as.heap.ptr = new_ptr;
1743 STR_SET_LEN(str2,
len);
1745 STR_SET_NOEMBED(str2);
1748 STR_SET_NOEMBED(str);
1752 if (
FL_TEST(str2, STR_SHARED)) {
1754 STR_SET_SHARED(str,
shared);
1757 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1761 STR_SET_EMBED(str2);
1763 STR_SET_LEN(str2, 0);
1778 return rb_obj_as_string_result(str, obj);
1782 rb_obj_as_string_result(
VALUE str,
VALUE obj)
1795 if (STR_SHARED_P(str2)) {
1798 STR_SET_NOEMBED(str);
1799 STR_SET_LEN(str,
len);
1801 STR_SET_SHARED(str,
shared);
1802 rb_enc_cr_str_exact_copy(str, str2);
1805 str_replace_shared(str, str2);
1814 size_t size = rb_str_embed_size(
capa);
1818 NEWOBJ_OF(str,
struct RString, klass,
1827 NEWOBJ_OF(str,
struct RString, klass,
1858 return str_duplicate_setup_encoding(str, dup, flags);
1867 root =
RSTRING(str)->as.heap.aux.shared;
1869 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1870 root = str = str_new_frozen(klass, str);
1877 FL_SET(root, STR_SHARED_ROOT);
1879 flags |= RSTRING_NOEMBED | STR_SHARED;
1882 return str_duplicate_setup_encoding(str, dup, flags);
1888 if (STR_EMBED_P(str)) {
1889 return str_duplicate_setup_embed(klass, str, dup);
1892 return str_duplicate_setup_heap(klass, str, dup);
1900 if (STR_EMBED_P(str)) {
1901 dup = str_alloc_embed(klass,
RSTRING_LEN(str) + TERM_LEN(str));
1904 dup = str_alloc_heap(klass);
1907 return str_duplicate_setup(klass, str, dup);
1918 rb_str_dup_m(
VALUE str)
1920 if (LIKELY(BARE_STRING_P(str))) {
1931 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1938 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1942 new_str = ec_str_alloc_embed(ec, klass,
RSTRING_LEN(str) + TERM_LEN(str));
1943 str_duplicate_setup_embed(klass, str, new_str);
1946 new_str = ec_str_alloc_heap(ec, klass);
1947 str_duplicate_setup_heap(klass, str, new_str);
1950 STR_CHILL_RAW(new_str);
1956 rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
1975 rb_str_init(
int argc,
VALUE *argv,
VALUE str)
1977 static ID keyword_ids[2];
1978 VALUE orig, opt, venc, vcapa;
1983 if (!keyword_ids[0]) {
1984 keyword_ids[0] = rb_id_encoding();
1985 CONST_ID(keyword_ids[1],
"capacity");
1993 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1996 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2001 if (
capa < STR_BUF_MIN_SIZE) {
2002 capa = STR_BUF_MIN_SIZE;
2010 if (orig == str) n = 0;
2012 str_modifiable(str);
2013 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2015 const size_t size = (size_t)
capa + termlen;
2017 const size_t osize =
RSTRING_LEN(str) + TERM_LEN(str);
2018 char *new_ptr =
ALLOC_N(
char, size);
2019 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2020 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2022 RSTRING(str)->as.heap.ptr = new_ptr;
2024 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2025 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2026 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2028 STR_SET_LEN(str,
len);
2032 rb_enc_cr_str_exact_copy(str, orig);
2034 FL_SET(str, STR_NOEMBED);
2053 rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2059 static ID keyword_ids[2];
2069 keyword_ids[0] = rb_id_encoding();
2070 CONST_ID(keyword_ids[1],
"capacity");
2072 encoding = kwargs[0];
2073 capacity = kwargs[1];
2084 if (UNDEF_P(encoding)) {
2090 if (!UNDEF_P(encoding)) {
2096 if (UNDEF_P(capacity)) {
2098 VALUE empty_str = str_new(klass,
"", 0);
2104 VALUE copy = str_duplicate(klass, orig);
2118 if (orig_capa >
capa) {
2123 VALUE str = str_new0(klass, NULL,
capa, termlen);
2124 STR_SET_LEN(str, 0);
2138 #ifdef NONASCII_MASK
2139 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2154 static inline uintptr_t
2155 count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2160 d = (d>>6) | (~d>>7);
2161 d &= NONASCII_MASK >> 7;
2164 #if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2166 return rb_popcount_intptr(d);
2170 # if SIZEOF_VOIDP == 8
2179 enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2185 long diff = (long)(e - p);
2188 #ifdef NONASCII_MASK
2191 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2192 const uintptr_t *s, *t;
2193 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2194 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2195 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2196 while (p < (
const char *)s) {
2197 if (is_utf8_lead_byte(*p))
len++;
2201 len += count_utf8_lead_bytes_with_word(s);
2204 p = (
const char *)s;
2207 if (is_utf8_lead_byte(*p))
len++;
2218 q = search_nonascii(p, e);
2231 q = search_nonascii(p, e);
2244 for (c=0; p<e; c++) {
2260 rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2268 long diff = (long)(e - p);
2275 q = search_nonascii(p, e);
2298 for (c=0; p<e; c++) {
2323 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2324 if (!enc) enc = STR_ENC_GET(str);
2330 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2335 return enc_strlen(p, e, enc, cr);
2342 return str_strlen(str, NULL);
2356 return LONG2NUM(str_strlen(str, NULL));
2368 rb_str_bytesize(
VALUE str)
2386 rb_str_empty(
VALUE str)
2406 char *ptr1, *ptr2, *ptr3;
2411 enc = rb_enc_check_str(str1, str2);
2415 if (len1 > LONG_MAX - len2) {
2418 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2420 memcpy(ptr3, ptr1, len1);
2421 memcpy(ptr3+len1, ptr2, len2);
2422 TERM_FILL(&ptr3[len1+len2], termlen);
2438 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2447 else if (enc2 < 0) {
2450 else if (enc1 != enc2) {
2453 else if (len1 > LONG_MAX - len2) {
2494 if (STR_EMBEDDABLE_P(
len, 1)) {
2503 STR_SET_LEN(str2,
len);
2512 termlen = TERM_LEN(str);
2518 while (n <=
len/2) {
2519 memcpy(ptr2 + n, ptr2, n);
2522 memcpy(ptr2 + n, ptr2,
len-n);
2524 STR_SET_LEN(str2,
len);
2525 TERM_FILL(&ptr2[
len], termlen);
2526 rb_enc_cr_str_copy_for_substr(str2, str);
2561 rb_check_lockedtmp(
VALUE str)
2563 if (
FL_TEST(str, STR_TMPLOCK)) {
2570 #define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2572 str_modifiable(
VALUE str)
2575 if (CHILLED_STRING_P(str)) {
2576 CHILLED_STRING_MUTATED(str);
2578 rb_check_lockedtmp(str);
2579 rb_check_frozen(str);
2584 str_dependent_p(
VALUE str)
2586 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2596 #define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2598 str_independent(
VALUE str)
2601 str_modifiable(str);
2602 return !str_dependent_p(str);
2608 str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2616 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2621 STR_SET_LEN(str,
len);
2628 memcpy(
ptr, oldptr,
len);
2630 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2633 STR_SET_NOEMBED(str);
2634 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2635 TERM_FILL(
ptr +
len, termlen);
2637 STR_SET_LEN(str,
len);
2644 if (!str_independent(str))
2645 str_make_independent(str);
2652 int termlen = TERM_LEN(str);
2658 if (expand >= LONG_MAX -
len) {
2662 if (!str_independent(str)) {
2663 str_make_independent_expand(str,
len, expand, termlen);
2665 else if (expand > 0) {
2666 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2673 str_modify_keep_cr(
VALUE str)
2675 if (!str_independent(str))
2676 str_make_independent(str);
2683 str_discard(
VALUE str)
2685 str_modifiable(str);
2686 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2687 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2688 RSTRING(str)->as.heap.ptr = 0;
2689 STR_SET_LEN(str, 0);
2724 zero_filled(
const char *s,
int n)
2726 for (; n > 0; --n) {
2733 str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2735 const char *e = s +
len;
2738 if (zero_filled(s, minlen))
return s;
2744 str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2749 if (str_dependent_p(str)) {
2750 if (!zero_filled(s +
len, termlen))
2751 str_make_independent_expand(str,
len, 0L, termlen);
2754 TERM_FILL(s +
len, termlen);
2761 rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2763 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2768 rb_check_lockedtmp(str);
2769 str_make_independent_expand(str,
len, 0L, termlen);
2771 else if (str_dependent_p(str)) {
2772 if (termlen > oldtermlen)
2773 str_make_independent_expand(str,
len, 0L, termlen);
2776 if (!STR_EMBED_P(str)) {
2781 if (termlen > oldtermlen) {
2790 str_null_check(
VALUE str,
int *w)
2799 if (str_null_char(s,
len, minlen, enc)) {
2802 return str_fill_term(str, s,
len, minlen);
2805 if (!s || memchr(s, 0,
len)) {
2809 s = str_fill_term(str, s,
len, minlen);
2815 rb_str_to_cstr(
VALUE str)
2818 return str_null_check(str, &w);
2826 char *s = str_null_check(str, &w);
2837 rb_str_fill_terminator(
VALUE str,
const int newminlen)
2841 return str_fill_term(str, s,
len, newminlen);
2847 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2871 str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2881 const char *p2, *e2;
2884 while (p < e && 0 < nth) {
2891 p2 = search_nonascii(p, e2);
2911 while (p < e && nth--) {
2923 return str_nth_len(p, e, &nth, enc);
2927 str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2932 p = str_nth_len(p, e, &nth, enc);
2941 str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2943 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2944 if (!pp)
return e - p;
2952 STR_ENC_GET(str), single_byte_optimizable(str));
2955 #ifdef NONASCII_MASK
2957 str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2960 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2961 const uintptr_t *s, *t;
2962 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2963 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2964 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2965 while (p < (
const char *)s) {
2966 if (is_utf8_lead_byte(*p)) nth--;
2970 nth -= count_utf8_lead_bytes_with_word(s);
2972 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2976 if (is_utf8_lead_byte(*p)) {
2977 if (nth == 0)
break;
2987 str_utf8_offset(
const char *p,
const char *e,
long nth)
2989 const char *pp = str_utf8_nth(p, e, &nth);
2998 if (single_byte_optimizable(str) || pos < 0)
3002 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3007 str_subseq(
VALUE str,
long beg,
long len)
3015 const int termlen = TERM_LEN(str);
3023 if (str_embed_capa(str2) >=
len + termlen) {
3024 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3025 STR_SET_EMBED(str2);
3027 TERM_FILL(ptr2+
len, termlen);
3029 STR_SET_LEN(str2,
len);
3033 str_replace_shared(str2, str);
3036 RSTRING(str2)->as.heap.ptr += beg;
3038 STR_SET_LEN(str2,
len);
3048 VALUE str2 = str_subseq(str, beg,
len);
3049 rb_enc_cr_str_copy_for_substr(str2, str);
3062 if (
len < 0)
return 0;
3066 if (single_byte_optimizable(str)) {
3067 if (beg > blen)
return 0;
3070 if (beg < 0)
return 0;
3072 if (
len > blen - beg)
3074 if (
len < 0)
return 0;
3079 if (
len > -beg)
len = -beg;
3091 slen = str_strlen(str, enc);
3093 if (beg < 0)
return 0;
3095 if (
len == 0)
goto end;
3102 if (beg > str_strlen(str, enc))
return 0;
3105 #ifdef NONASCII_MASK
3108 p = str_utf8_nth(s, e, &beg);
3109 if (beg > 0)
return 0;
3110 len = str_utf8_offset(p, e,
len);
3116 p = s + beg * char_sz;
3120 else if (
len * char_sz > e - p)
3125 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3126 if (beg > 0)
return 0;
3130 len = str_offset(p, e,
len, enc, 0);
3138 static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3143 return str_substr(str, beg,
len, TRUE);
3147 str_substr(
VALUE str,
long beg,
long len,
int empty)
3151 if (!p)
return Qnil;
3152 if (!
len && !empty)
return Qnil;
3156 VALUE str2 = str_subseq(str, beg,
len);
3157 rb_enc_cr_str_copy_for_substr(str2, str);
3165 if (CHILLED_STRING_P(str)) {
3183 str_uplus(
VALUE str)
3185 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3215 str_uminus(
VALUE str)
3220 return rb_fstring(str);
3224 #define rb_str_dup_frozen rb_str_new_frozen
3229 if (
FL_TEST(str, STR_TMPLOCK)) {
3232 FL_SET(str, STR_TMPLOCK);
3239 if (!
FL_TEST(str, STR_TMPLOCK)) {
3257 const int termlen = TERM_LEN(str);
3259 str_modifiable(str);
3260 if (STR_SHARED_P(str)) {
3263 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3297 STR_SET_LEN(str,
len);
3308 int independent = str_independent(str);
3310 const int termlen = TERM_LEN(str);
3312 if (slen >
len || (termlen != 1 && slen <
len)) {
3318 if (STR_EMBED_P(str)) {
3319 if (
len == slen)
return str;
3320 if (str_embed_capa(str) >=
len + termlen) {
3321 STR_SET_LEN(str,
len);
3325 str_make_independent_expand(str, slen,
len - slen, termlen);
3327 else if (str_embed_capa(str) >=
len + termlen) {
3328 char *
ptr = STR_HEAP_PTR(str);
3330 if (slen >
len) slen =
len;
3333 STR_SET_LEN(str,
len);
3337 else if (!independent) {
3338 if (
len == slen)
return str;
3339 str_make_independent_expand(str, slen,
len - slen, termlen);
3343 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3344 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3347 else if (
len == slen)
return str;
3348 STR_SET_LEN(str,
len);
3355 str_ensure_available_capa(
VALUE str,
long len)
3357 str_modify_keep_cr(str);
3359 const int termlen = TERM_LEN(str);
3366 long total = olen +
len;
3367 long capa = str_capacity(str, termlen);
3370 if (total >= LONG_MAX / 2) {
3373 while (total >
capa) {
3376 RESIZE_CAPA_TERM(str,
capa, termlen);
3381 str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3384 str_modify_keep_cr(str);
3389 if (
len == 0)
return 0;
3391 long total, olen,
off = -1;
3393 const int termlen = TERM_LEN(str);
3396 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3400 long capa = str_capacity(str, termlen);
3402 if (olen > LONG_MAX -
len) {
3407 if (total >= LONG_MAX / 2) {
3410 while (total >
capa) {
3413 RESIZE_CAPA_TERM(str,
capa, termlen);
3419 memcpy(sptr + olen,
ptr,
len);
3420 STR_SET_LEN(str, total);
3421 TERM_FILL(sptr + total, termlen);
3426 #define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3427 #define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3432 if (
len == 0)
return str;
3436 return str_buf_cat(str,
ptr,
len);
3447 rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3452 if (UNLIKELY(!str_independent(str))) {
3453 str_make_independent(str);
3456 long string_length = -1;
3457 const int null_terminator_length = 1;
3462 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3466 long string_capacity = str_capacity(str, null_terminator_length);
3472 if (LIKELY(string_capacity >= string_length + 1)) {
3474 sptr[string_length] = byte;
3475 STR_SET_LEN(str, string_length + 1);
3476 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3480 str_buf_cat(str, (
char *)&
byte, 1);
3507 rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3508 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3517 if (str_encindex == ptr_encindex) {
3537 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3546 *ptr_cr_ret = ptr_cr;
3548 if (str_encindex != ptr_encindex &&
3557 res_encindex = str_encindex;
3562 res_encindex = str_encindex;
3566 res_encindex = ptr_encindex;
3571 res_encindex = str_encindex;
3578 res_encindex = str_encindex;
3586 str_buf_cat(str,
ptr,
len);
3592 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3599 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3610 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3616 unsigned int c = (
unsigned char)*
ptr;
3619 rb_enc_cr_str_buf_cat(str, buf,
len,
3632 if (str_enc_fastpath(str)) {
3669 rb_str_concat_literals(
size_t num,
const VALUE *strary)
3673 unsigned long len = 1;
3680 str_enc_copy_direct(str, strary[0]);
3682 for (i = s; i < num; ++i) {
3683 const VALUE v = strary[i];
3687 if (encidx != ENCINDEX_US_ASCII) {
3714 rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3716 str_modifiable(str);
3721 else if (argc > 1) {
3725 for (i = 0; i < argc; i++) {
3758 rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3760 long needed_capacity = 0;
3764 for (
int index = 0; index < argc; index++) {
3765 VALUE obj = argv[index];
3778 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3785 str_ensure_available_capa(str, needed_capacity);
3788 for (
int index = 0; index < argc; index++) {
3789 VALUE obj = argv[index];
3794 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3795 char byte = (char)(
NUM2INT(obj) & 0xFF);
3809 rb_bug(
"append_as_bytes arguments should have been validated");
3813 STR_SET_LEN(str,
RSTRING_LEN(str) + needed_capacity);
3814 TERM_FILL(sptr, TERM_LEN(str));
3819 for (
int index = 0; index < argc; index++) {
3820 VALUE obj = argv[index];
3837 rb_bug(
"append_as_bytes arguments should have been validated");
3911 if (rb_num_to_uint(str2, &code) == 0) {
3924 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3927 rb_str_buf_cat_byte(str1, (
unsigned char)code);
3936 case ONIGERR_INVALID_CODE_POINT_VALUE:
3939 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3963 rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3967 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3972 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3973 return ENCINDEX_ASCII_8BIT;
3996 rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3998 str_modifiable(str);
4003 else if (argc > 1) {
4007 for (i = 0; i < argc; i++) {
4020 st_index_t precomputed_hash;
4021 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4023 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4024 return precomputed_hash;
4027 return str_do_hash(str);
4034 const char *ptr1, *ptr2;
4037 return (len1 != len2 ||
4039 memcmp(ptr1, ptr2, len1) != 0);
4053 rb_str_hash_m(
VALUE str)
4059 #define lesser(a,b) (((a)>(b))?(b):(a))
4071 if (idx1 == idx2)
return TRUE;
4090 const char *ptr1, *ptr2;
4093 if (str1 == str2)
return 0;
4096 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4105 if (len1 > len2)
return 1;
4108 if (retval > 0)
return 1;
4135 if (str1 == str2)
return Qtrue;
4142 return rb_str_eql_internal(str1, str2);
4166 if (str1 == str2)
return Qtrue;
4168 return rb_str_eql_internal(str1, str2);
4199 return rb_invcmp(str1, str2);
4241 return str_casecmp(str1, s);
4249 const char *p1, *p1end, *p2, *p2end;
4258 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4259 while (p1 < p1end && p2 < p2end) {
4261 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4262 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4264 return INT2FIX(c1 < c2 ? -1 : 1);
4271 while (p1 < p1end && p2 < p2end) {
4275 if (0 <= c1 && 0 <= c2) {
4279 return INT2FIX(c1 < c2 ? -1 : 1);
4285 len = l1 < l2 ? l1 : l2;
4286 r = memcmp(p1, p2,
len);
4288 return INT2FIX(r < 0 ? -1 : 1);
4290 return INT2FIX(l1 < l2 ? -1 : 1);
4331 return str_casecmp_p(str1, s);
4338 VALUE folded_str1, folded_str2;
4339 VALUE fold_opt = sym_fold;
4346 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4347 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4349 return rb_str_eql(folded_str1, folded_str2);
4353 strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4354 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4356 const char *search_start = str_ptr;
4357 long pos, search_len = str_len - offset;
4361 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4362 if (pos < 0)
return pos;
4364 if (t == search_start + pos)
break;
4365 search_len -= t - search_start;
4366 if (search_len <= 0)
return -1;
4367 offset += t - search_start;
4370 return pos + offset;
4374 #define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4375 #define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4378 rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4380 const char *str_ptr, *str_ptr_end, *sub_ptr;
4381 long str_len, sub_len;
4385 if (is_broken_string(sub))
return -1;
4393 if (str_len < sub_len)
return -1;
4396 long str_len_char, sub_len_char;
4397 int single_byte = single_byte_optimizable(str);
4398 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4399 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4401 offset += str_len_char;
4402 if (offset < 0)
return -1;
4404 if (str_len_char - offset < sub_len_char)
return -1;
4405 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4408 if (sub_len == 0)
return offset;
4411 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4425 rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4432 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4433 long slen = str_strlen(str, enc);
4435 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4448 enc, single_byte_optimizable(str));
4459 pos = rb_str_index(str, sub, pos);
4473 str_ensure_byte_pos(
VALUE str,
long pos)
4475 if (!single_byte_optimizable(str)) {
4478 const char *p = s + pos;
4479 if (!at_char_boundary(s, p, e,
rb_enc_get(str))) {
4481 "offset %ld does not land on character boundary", pos);
4528 rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4534 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4537 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4548 str_ensure_byte_pos(str, pos);
4560 pos = rb_str_byteindex(str, sub, pos);
4561 if (pos >= 0)
return LONG2NUM(pos);
4566 #ifndef HAVE_MEMRCHR
4568 memrchr(
const char *search_str,
int chr,
long search_len)
4570 const char *
ptr = search_str + search_len;
4571 while (
ptr > search_str) {
4572 if ((
unsigned char)*(--
ptr) == chr)
return (
void *)
ptr;
4582 char *hit, *adjusted;
4584 long slen, searchlen;
4589 if (slen == 0)
return s - sbeg;
4593 searchlen = s - sbeg + 1;
4595 if (memcmp(s, t, slen) == 0) {
4600 hit = memrchr(sbeg, c, searchlen);
4603 if (hit != adjusted) {
4604 searchlen = adjusted - sbeg;
4607 if (memcmp(hit, t, slen) == 0)
4609 searchlen = adjusted - sbeg;
4610 }
while (searchlen > 0);
4617 rb_str_rindex(
VALUE str,
VALUE sub,
long pos)
4625 if (is_broken_string(sub))
return -1;
4626 singlebyte = single_byte_optimizable(str);
4628 slen = str_strlen(sub, enc);
4631 if (
len < slen)
return -1;
4632 if (
len - pos < slen) pos =
len - slen;
4633 if (
len == 0)
return pos;
4644 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4645 return str_rindex(str, sub, s, enc);
4706 rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4711 long pos,
len = str_strlen(str, enc);
4713 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4715 if (pos < 0 && (pos +=
len) < 0) {
4721 if (pos >
len) pos =
len;
4730 enc, single_byte_optimizable(str));
4741 pos = rb_str_rindex(str, sub, pos);
4751 rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4758 if (is_broken_string(sub))
return -1;
4763 if (
len < slen)
return -1;
4764 if (
len - pos < slen) pos =
len - slen;
4765 if (
len == 0)
return pos;
4777 return str_rindex(str, sub, s, enc);
4842 rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4848 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4850 if (pos < 0 && (pos +=
len) < 0) {
4856 if (pos >
len) pos =
len;
4862 str_ensure_byte_pos(str, pos);
4874 pos = rb_str_byterindex(str, sub, pos);
4875 if (pos >= 0)
return LONG2NUM(pos);
4911 switch (OBJ_BUILTIN_TYPE(y)) {
4963 rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5002 rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5006 re = get_pat(argv[0]);
5007 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5010 enum neighbor_char {
5016 static enum neighbor_char
5026 return NEIGHBOR_NOT_CHAR;
5030 if (!l)
return NEIGHBOR_NOT_CHAR;
5031 if (l !=
len)
return NEIGHBOR_WRAPPED;
5035 return NEIGHBOR_NOT_CHAR;
5037 return NEIGHBOR_FOUND;
5040 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5043 return NEIGHBOR_WRAPPED;
5044 ++((
unsigned char*)p)[i];
5049 return NEIGHBOR_FOUND;
5052 memset(p+l, 0xff,
len-l);
5058 for (len2 =
len-1; 0 < len2; len2--) {
5063 memset(p+len2+1, 0xff,
len-(len2+1));
5068 static enum neighbor_char
5077 return NEIGHBOR_NOT_CHAR;
5080 if (!c)
return NEIGHBOR_NOT_CHAR;
5083 if (!l)
return NEIGHBOR_NOT_CHAR;
5084 if (l !=
len)
return NEIGHBOR_WRAPPED;
5088 return NEIGHBOR_NOT_CHAR;
5090 return NEIGHBOR_FOUND;
5093 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5096 return NEIGHBOR_WRAPPED;
5097 --((
unsigned char*)p)[i];
5102 return NEIGHBOR_FOUND;
5105 memset(p+l, 0,
len-l);
5111 for (len2 =
len-1; 0 < len2; len2--) {
5116 memset(p+len2+1, 0,
len-(len2+1));
5130 static enum neighbor_char
5131 enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5133 enum neighbor_char ret;
5137 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5141 const int max_gaps = 1;
5145 ctype = ONIGENC_CTYPE_DIGIT;
5147 ctype = ONIGENC_CTYPE_ALPHA;
5149 return NEIGHBOR_NOT_CHAR;
5152 for (
try = 0;
try <= max_gaps; ++
try) {
5153 ret = enc_succ_char(p,
len, enc);
5154 if (ret == NEIGHBOR_FOUND) {
5157 return NEIGHBOR_FOUND;
5164 ret = enc_pred_char(p,
len, enc);
5165 if (ret == NEIGHBOR_FOUND) {
5179 return NEIGHBOR_NOT_CHAR;
5182 if (ctype != ONIGENC_CTYPE_DIGIT) {
5184 return NEIGHBOR_WRAPPED;
5188 enc_succ_char(carry,
len, enc);
5189 return NEIGHBOR_WRAPPED;
5258 rb_enc_cr_str_copy_for_substr(str, orig);
5259 return str_succ(str);
5266 char *sbeg, *s, *e, *last_alnum = 0;
5267 int found_alnum = 0;
5269 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5270 long carry_pos = 0, carry_len = 1;
5271 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5274 if (slen == 0)
return str;
5276 enc = STR_ENC_GET(str);
5278 s = e = sbeg + slen;
5281 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5288 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5289 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5290 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5292 case NEIGHBOR_NOT_CHAR:
5294 case NEIGHBOR_FOUND:
5296 case NEIGHBOR_WRAPPED:
5301 carry_pos = s - sbeg;
5307 enum neighbor_char neighbor;
5308 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5310 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5311 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5313 neighbor = enc_succ_char(tmp, l, enc);
5315 case NEIGHBOR_FOUND:
5319 case NEIGHBOR_WRAPPED:
5322 case NEIGHBOR_NOT_CHAR:
5327 enc_succ_char(s, l, enc);
5330 MEMCPY(carry, s,
char, l);
5333 carry_pos = s - sbeg;
5337 RESIZE_CAPA(str, slen + carry_len);
5339 s = sbeg + carry_pos;
5340 memmove(s + carry_len, s, slen - carry_pos);
5341 memmove(s, carry, carry_len);
5343 STR_SET_LEN(str, slen);
5358 rb_str_succ_bang(
VALUE str)
5366 all_digits_p(
const char *s,
long len)
5418 rb_str_upto(
int argc,
VALUE *argv,
VALUE beg)
5420 VALUE end, exclusive;
5424 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5430 VALUE current, after_end;
5438 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5444 if (c > e || (excl && c == e))
return beg;
5448 if ((*each)(str, arg))
break;
5449 if (!excl && c == e)
break;
5451 if (excl && c == e)
break;
5471 if (excl && bi == ei)
break;
5472 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5477 ID op = excl ?
'<' : idLE;
5478 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5483 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5491 if (n > 0 || (excl && n == 0))
return beg;
5499 if ((*each)(current, arg))
break;
5500 if (
NIL_P(next))
break;
5521 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5529 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5537 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5545 if ((*each)(current, arg))
break;
5559 if (!
rb_equal(str, *argp))
return 0;
5588 if (b <= v && v < e)
return Qtrue;
5589 return RBOOL(!
RTEST(exclusive) && v == e);
5602 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5604 return RBOOL(
NIL_P(val));
5627 return rb_str_subpat(str, indx,
INT2FIX(0));
5630 if (rb_str_index(str, indx, 0) != -1)
5636 long beg,
len = str_strlen(str, NULL);
5648 return str_substr(str, idx, 1, FALSE);
5667 rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5671 return rb_str_subpat(str, argv[0], argv[1]);
5680 return rb_str_aref(str, argv[0]);
5689 str_modifiable(str);
5690 if (
len > olen)
len = olen;
5692 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5694 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5697 memmove(
ptr, oldptr +
len, nlen);
5698 if (fl == STR_NOEMBED)
xfree(oldptr);
5701 if (!STR_SHARED_P(str)) {
5703 rb_enc_cr_str_exact_copy(shared, str);
5708 STR_SET_LEN(str, nlen);
5710 if (!SHARABLE_MIDDLE_SUBSTRING) {
5711 TERM_FILL(
ptr + nlen, TERM_LEN(str));
5718 rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5724 if (beg == 0 && vlen == 0) {
5729 str_modify_keep_cr(str);
5733 RESIZE_CAPA(str, slen + vlen -
len);
5743 memmove(sptr + beg + vlen,
5745 slen - (beg +
len));
5747 if (vlen < beg &&
len < 0) {
5751 memmove(sptr + beg,
RSTRING_PTR(val) + vbeg, vlen);
5754 STR_SET_LEN(str, slen);
5755 TERM_FILL(&sptr[slen], TERM_LEN(str));
5771 int singlebyte = single_byte_optimizable(str);
5778 slen = str_strlen(str, enc);
5780 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5789 if (
len > slen - beg) {
5799 rb_str_update_0(str, beg,
len, val);
5811 long start, end,
len;
5821 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5825 nth += regs->num_regs;
5835 enc = rb_enc_check_str(str, val);
5836 rb_str_update_0(str, start,
len, val);
5845 switch (
TYPE(indx)) {
5847 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5851 beg = rb_str_index(str, indx, 0);
5905 rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5909 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5917 return rb_str_aset(str, argv[0], argv[1]);
5977 rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5985 str_modify_keep_cr(str);
5993 if ((nth += regs->num_regs) <= 0)
return Qnil;
5995 else if (nth >= regs->num_regs)
return Qnil;
5997 len = END(nth) - beg;
6000 else if (argc == 2) {
6013 beg = rb_str_index(str, indx, 0);
6014 if (beg == -1)
return Qnil;
6040 rb_enc_cr_str_copy_for_substr(result, str);
6050 if (beg +
len > slen)
6054 slen - (beg +
len));
6056 STR_SET_LEN(str, slen);
6057 TERM_FILL(&sptr[slen], TERM_LEN(str));
6068 switch (OBJ_BUILTIN_TYPE(pat)) {
6087 get_pat_quoted(
VALUE pat,
int check)
6091 switch (OBJ_BUILTIN_TYPE(pat)) {
6105 if (check && is_broken_string(pat)) {
6112 rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6115 pos = rb_str_byteindex(str, pat, pos);
6116 if (set_backref_str) {
6118 str = rb_str_new_frozen_String(str);
6119 rb_backref_set_string(str, pos,
RSTRING_LEN(pat));
6128 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
6148 rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6168 pat = get_pat_quoted(argv[0], 1);
6170 str_modifiable(str);
6171 beg = rb_pat_search(pat, str, 0, 1);
6194 if (iter || !
NIL_P(hash)) {
6204 str_mod_check(str, p,
len);
6205 rb_check_frozen(str);
6218 rb_enc_inspect_name(str_enc),
6219 rb_enc_inspect_name(STR_ENC_GET(repl)));
6221 enc = STR_ENC_GET(repl);
6237 RESIZE_CAPA(str,
len + rlen - plen);
6241 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6244 memmove(p + beg0, rp, rlen);
6246 STR_SET_LEN(str,
len);
6276 rb_str_sub_bang(argc, argv, str);
6281 str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6284 long beg, beg0, end0;
6285 long offset, blen, slen,
len, last;
6286 enum {STR, ITER, MAP} mode = STR;
6288 int need_backref = -1;
6307 rb_error_arity(argc, 1, 2);
6310 pat = get_pat_quoted(argv[0], 1);
6311 beg = rb_pat_search(pat, str, 0, need_backref);
6313 if (bang)
return Qnil;
6323 str_enc = STR_ENC_GET(str);
6349 str_mod_check(str, sp, slen);
6354 else if (need_backref) {
6356 if (need_backref < 0) {
6357 need_backref = val != repl;
6364 len = beg0 - offset;
6381 offset = end0 +
len;
6385 beg = rb_pat_search(pat, str, offset, need_backref);
6392 rb_pat_search(pat, str, last, 1);
6394 str_shared_replace(str, dest);
6422 rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6424 str_modify_keep_cr(str);
6425 return str_gsub(argc, argv, str, 1);
6446 rb_str_gsub(
int argc,
VALUE *argv,
VALUE str)
6448 return str_gsub(argc, argv, str, 0);
6466 str_modifiable(str);
6467 if (str == str2)
return str;
6471 return str_replace(str, str2);
6486 rb_str_clear(
VALUE str)
6490 STR_SET_LEN(str, 0);
6511 rb_str_chr(
VALUE str)
6559 char *
ptr, *head, *left = 0;
6563 if (pos < -
len ||
len <= pos)
6570 char byte = (char)(
NUM2INT(w) & 0xFF);
6572 if (!str_independent(str))
6573 str_make_independent(str);
6574 enc = STR_ENC_GET(str);
6577 if (!STR_EMBED_P(str)) {
6610 str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6614 if (beg > n ||
len < 0)
return Qnil;
6617 if (beg < 0)
return Qnil;
6622 if (!empty)
return Qnil;
6626 VALUE str2 = str_subseq(str, beg,
len);
6628 str_enc_copy_direct(str2, str);
6673 return str_byte_substr(str, beg,
len, TRUE);
6678 return str_byte_substr(str, idx, 1, FALSE);
6725 rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6730 return str_byte_substr(str, beg,
len, TRUE);
6733 return str_byte_aref(str, argv[0]);
6737 str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6742 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6751 if (*
len > slen - *beg) {
6755 str_ensure_byte_pos(str, *beg);
6756 str_ensure_byte_pos(str, end);
6781 rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6783 long beg,
len, vbeg, vlen;
6788 if (!(argc == 2 || argc == 3 || argc == 5)) {
6794 rb_builtin_class_name(argv[0]));
6807 rb_builtin_class_name(argv[2]));
6827 str_check_beg_len(str, &beg, &
len);
6828 str_check_beg_len(val, &vbeg, &vlen);
6829 str_modify_keep_cr(str);
6835 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6853 rb_str_reverse(
VALUE str)
6861 enc = STR_ENC_GET(str);
6868 if (single_byte_optimizable(str)) {
6896 str_enc_copy_direct(rev, str);
6916 rb_str_reverse_bang(
VALUE str)
6919 if (single_byte_optimizable(str)) {
6922 str_modify_keep_cr(str);
6932 str_shared_replace(str, rb_str_reverse(str));
6936 str_modify_keep_cr(str);
6961 i = rb_str_index(str, arg, 0);
6963 return RBOOL(i != -1);
7000 rb_str_to_i(
int argc,
VALUE *argv,
VALUE str)
7031 rb_str_to_f(
VALUE str)
7046 rb_str_to_s(
VALUE str)
7058 char s[RUBY_MAX_CHAR_LEN];
7066 #define CHAR_ESC_LEN 13
7069 rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7071 char buf[CHAR_ESC_LEN + 1];
7079 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7081 else if (c < 0x10000) {
7082 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7085 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7090 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7093 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7096 l = (int)strlen(buf);
7102 ruby_escaped_char(
int c)
7105 case '\0':
return "\\0";
7106 case '\n':
return "\\n";
7107 case '\r':
return "\\r";
7108 case '\t':
return "\\t";
7109 case '\f':
return "\\f";
7110 case '\013':
return "\\v";
7111 case '\010':
return "\\b";
7112 case '\007':
return "\\a";
7113 case '\033':
return "\\e";
7114 case '\x7f':
return "\\c?";
7120 rb_str_escape(
VALUE str)
7126 const char *prev = p;
7127 char buf[CHAR_ESC_LEN + 1];
7137 if (p > prev) str_buf_cat(result, prev, p - prev);
7140 n = (int)(pend - p);
7142 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7143 str_buf_cat(result, buf, strlen(buf));
7151 cc = ruby_escaped_char(c);
7153 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7154 str_buf_cat(result, cc, strlen(cc));
7160 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7161 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7165 if (p > prev) str_buf_cat(result, prev, p - prev);
7189 const char *p, *pend, *prev;
7190 char buf[CHAR_ESC_LEN + 1];
7199 str_buf_cat2(result,
"\"");
7209 if (p > prev) str_buf_cat(result, prev, p - prev);
7212 n = (int)(pend - p);
7214 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7215 str_buf_cat(result, buf, strlen(buf));
7223 if ((asciicompat || unicode_p) &&
7224 (c ==
'"'|| c ==
'\\' ||
7229 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7230 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7231 str_buf_cat2(result,
"\\");
7232 if (asciicompat || enc == resenc) {
7238 case '\n': cc =
'n';
break;
7239 case '\r': cc =
'r';
break;
7240 case '\t': cc =
't';
break;
7241 case '\f': cc =
'f';
break;
7242 case '\013': cc =
'v';
break;
7243 case '\010': cc =
'b';
break;
7244 case '\007': cc =
'a';
break;
7245 case 033: cc =
'e';
break;
7246 default: cc = 0;
break;
7249 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7252 str_buf_cat(result, buf, 2);
7269 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7270 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7275 if (p > prev) str_buf_cat(result, prev, p - prev);
7276 str_buf_cat2(result,
"\"");
7281 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7304 const char *p, *pend;
7308 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7313 len += strlen(enc->name);
7319 unsigned char c = *p++;
7322 case '"':
case '\\':
7323 case '\n':
case '\r':
7324 case '\t':
case '\f':
7325 case '\013':
case '\010':
case '\007':
case '\033':
7330 clen = IS_EVSTR(p, pend) ? 2 : 1;
7338 if (u8 && c > 0x7F) {
7344 else if (cc <= 0xFFFFF)
7357 if (clen > LONG_MAX -
len) {
7369 unsigned char c = *p++;
7371 if (c ==
'"' || c ==
'\\') {
7375 else if (c ==
'#') {
7376 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7379 else if (c ==
'\n') {
7383 else if (c ==
'\r') {
7387 else if (c ==
'\t') {
7391 else if (c ==
'\f') {
7395 else if (c ==
'\013') {
7399 else if (c ==
'\010') {
7403 else if (c ==
'\007') {
7407 else if (c ==
'\033') {
7422 snprintf(q, qend-q,
"u%04X", cc);
7424 snprintf(q, qend-q,
"u{%X}", cc);
7429 snprintf(q, qend-q,
"x%02X", c);
7436 snprintf(q, qend-q, nonascii_suffix, enc->name);
7446 unescape_ascii(
unsigned int c)
7470 undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7472 const char *s = *ss;
7476 unsigned char buf[6];
7494 *buf = unescape_ascii(*s);
7507 if (*penc != enc_utf8) {
7526 if (hexlen == 0 || hexlen > 6) {
7532 if (0xd800 <= c && c <= 0xdfff) {
7545 if (0xd800 <= c && c <= 0xdfff) {
7576 static VALUE rb_str_is_ascii_only_p(
VALUE str);
7594 str_undump(
VALUE str)
7601 bool binary =
false;
7605 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7608 if (!str_null_check(str, &w)) {
7612 if (*s !=
'"')
goto invalid_format;
7630 static const char force_encoding_suffix[] =
".force_encoding(\"";
7631 static const char dup_suffix[] =
".dup";
7632 const char *encname;
7637 size =
sizeof(dup_suffix) - 1;
7638 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7640 size =
sizeof(force_encoding_suffix) - 1;
7641 if (s_end - s <= size)
goto invalid_format;
7642 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7650 s = memchr(s,
'"', s_end-s);
7652 if (!s)
goto invalid_format;
7653 if (s_end - s != 2)
goto invalid_format;
7654 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7656 encidx = rb_enc_find_index2(encname, (
long)size);
7670 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7681 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7694 str_true_enc(
VALUE str)
7697 rb_str_check_dummy_enc(enc);
7701 static OnigCaseFoldType
7702 check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7708 if (argv[0]==sym_turkic) {
7709 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7711 if (argv[1]==sym_lithuanian)
7712 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7717 else if (argv[0]==sym_lithuanian) {
7718 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7720 if (argv[1]==sym_turkic)
7721 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7728 else if (argv[0]==sym_ascii)
7729 flags |= ONIGENC_CASE_ASCII_ONLY;
7730 else if (argv[0]==sym_fold) {
7731 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7732 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7750 #define CASE_MAPPING_ADDITIONAL_LENGTH 20
7751 #ifndef CASEMAP_DEBUG
7752 # define CASEMAP_DEBUG 0
7760 OnigUChar space[FLEX_ARY_LEN];
7764 mapping_buffer_free(
void *p)
7768 while (current_buffer) {
7769 previous_buffer = current_buffer;
7770 current_buffer = current_buffer->next;
7771 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7777 {0, mapping_buffer_free,},
7778 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7786 const OnigUChar *source_current, *source_end;
7787 int target_length = 0;
7788 VALUE buffer_anchor;
7791 size_t buffer_count = 0;
7792 int buffer_length_or_invalid;
7801 while (source_current < source_end) {
7803 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7804 if (CASEMAP_DEBUG) {
7805 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7808 *pre_buffer = current_buffer;
7809 pre_buffer = ¤t_buffer->next;
7810 current_buffer->next = NULL;
7811 current_buffer->capa =
capa;
7812 buffer_length_or_invalid = enc->case_map(flags,
7813 &source_current, source_end,
7814 current_buffer->space,
7815 current_buffer->space+current_buffer->capa,
7817 if (buffer_length_or_invalid < 0) {
7818 current_buffer =
DATA_PTR(buffer_anchor);
7820 mapping_buffer_free(current_buffer);
7823 target_length += current_buffer->used = buffer_length_or_invalid;
7825 if (CASEMAP_DEBUG) {
7826 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7829 if (buffer_count==1) {
7830 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7833 char *target_current;
7837 current_buffer =
DATA_PTR(buffer_anchor);
7838 while (current_buffer) {
7839 memcpy(target_current, current_buffer->space, current_buffer->used);
7840 target_current += current_buffer->used;
7841 current_buffer = current_buffer->next;
7844 current_buffer =
DATA_PTR(buffer_anchor);
7846 mapping_buffer_free(current_buffer);
7851 str_enc_copy_direct(target, source);
7860 const OnigUChar *source_current, *source_end;
7861 OnigUChar *target_current, *target_end;
7863 int length_or_invalid;
7865 if (old_length == 0)
return Qnil;
7869 if (source == target) {
7870 target_current = (OnigUChar*)source_current;
7871 target_end = (OnigUChar*)source_end;
7878 length_or_invalid = onigenc_ascii_only_case_map(flags,
7879 &source_current, source_end,
7880 target_current, target_end, enc);
7881 if (length_or_invalid < 0)
7883 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7884 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7885 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7887 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7890 str_enc_copy(target, source);
7896 upcase_single(
VALUE str)
7899 bool modified =
false;
7902 unsigned int c = *(
unsigned char*)s;
7904 if (
'a' <= c && c <=
'z') {
7905 *s =
'A' + (c -
'a');
7933 rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7936 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7938 flags = check_case_options(argc, argv, flags);
7939 str_modify_keep_cr(str);
7940 enc = str_true_enc(str);
7941 if (case_option_single_p(flags, enc, str)) {
7942 if (upcase_single(str))
7943 flags |= ONIGENC_CASE_MODIFIED;
7945 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7946 rb_str_ascii_casemap(str, str, &flags, enc);
7948 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7950 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7972 rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7975 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7978 flags = check_case_options(argc, argv, flags);
7979 enc = str_true_enc(str);
7980 if (case_option_single_p(flags, enc, str)) {
7982 str_enc_copy_direct(ret, str);
7985 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7987 rb_str_ascii_casemap(str, ret, &flags, enc);
7990 ret = rb_str_casemap(str, &flags, enc);
7997 downcase_single(
VALUE str)
8000 bool modified =
false;
8003 unsigned int c = *(
unsigned char*)s;
8005 if (
'A' <= c && c <=
'Z') {
8006 *s =
'a' + (c -
'A');
8035 rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8038 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8040 flags = check_case_options(argc, argv, flags);
8041 str_modify_keep_cr(str);
8042 enc = str_true_enc(str);
8043 if (case_option_single_p(flags, enc, str)) {
8044 if (downcase_single(str))
8045 flags |= ONIGENC_CASE_MODIFIED;
8047 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8048 rb_str_ascii_casemap(str, str, &flags, enc);
8050 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8052 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8074 rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8077 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8080 flags = check_case_options(argc, argv, flags);
8081 enc = str_true_enc(str);
8082 if (case_option_single_p(flags, enc, str)) {
8084 str_enc_copy_direct(ret, str);
8085 downcase_single(ret);
8087 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8089 rb_str_ascii_casemap(str, ret, &flags, enc);
8092 ret = rb_str_casemap(str, &flags, enc);
8120 rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8123 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8125 flags = check_case_options(argc, argv, flags);
8126 str_modify_keep_cr(str);
8127 enc = str_true_enc(str);
8129 if (flags&ONIGENC_CASE_ASCII_ONLY)
8130 rb_str_ascii_casemap(str, str, &flags, enc);
8132 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8134 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8158 rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8161 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8164 flags = check_case_options(argc, argv, flags);
8165 enc = str_true_enc(str);
8167 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8169 rb_str_ascii_casemap(str, ret, &flags, enc);
8172 ret = rb_str_casemap(str, &flags, enc);
8199 rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8202 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8204 flags = check_case_options(argc, argv, flags);
8205 str_modify_keep_cr(str);
8206 enc = str_true_enc(str);
8207 if (flags&ONIGENC_CASE_ASCII_ONLY)
8208 rb_str_ascii_casemap(str, str, &flags, enc);
8210 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8212 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8236 rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8239 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8242 flags = check_case_options(argc, argv, flags);
8243 enc = str_true_enc(str);
8245 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8247 rb_str_ascii_casemap(str, ret, &flags, enc);
8250 ret = rb_str_casemap(str, &flags, enc);
8255 typedef unsigned char *USTR;
8259 unsigned int now, max;
8271 if (t->p == t->pend)
return -1;
8272 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8277 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8279 if (t->p < t->pend) {
8283 if (t->now < 0x80 && c < 0x80) {
8285 "invalid range \"%c-%c\" in string transliteration",
8293 else if (t->now < c) {
8302 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8303 if (t->now == t->max) {
8308 if (t->now < t->max) {
8324 const unsigned int errc = -1;
8325 unsigned int trans[256];
8327 struct tr trsrc, trrepl;
8329 unsigned int c, c0, last = 0;
8330 int modify = 0, i, l;
8331 unsigned char *s, *send;
8333 int singlebyte = single_byte_optimizable(str);
8337 #define CHECK_IF_ASCII(c) \
8338 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8339 (cr = ENC_CODERANGE_VALID) : 0)
8345 return rb_str_delete_bang(1, &src, str);
8360 trsrc.p + l < trsrc.pend) {
8366 trsrc.gen = trrepl.gen = 0;
8367 trsrc.now = trrepl.now = 0;
8368 trsrc.max = trrepl.max = 0;
8371 for (i=0; i<256; i++) {
8374 while ((c = trnext(&trsrc, enc)) != errc) {
8383 while ((c = trnext(&trrepl, enc)) != errc)
8386 for (i=0; i<256; i++) {
8387 if (trans[i] != errc) {
8395 for (i=0; i<256; i++) {
8398 while ((c = trnext(&trsrc, enc)) != errc) {
8399 r = trnext(&trrepl, enc);
8400 if (r == errc) r = trrepl.now;
8414 str_modify_keep_cr(str);
8420 unsigned int save = -1;
8421 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8443 if (cflag) c = last;
8446 else if (cflag) c = errc;
8452 if (c != (
unsigned int)-1) {
8464 if (enc != e1) may_modify = 1;
8466 if ((offset = t - buf) + tlen > max) {
8467 size_t MAYBE_UNUSED(old) = max + termlen;
8468 max = offset + tlen + (send - s);
8469 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8473 if (may_modify && memcmp(s, t, tlen) != 0) {
8479 if (!STR_EMBED_P(str)) {
8480 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8482 TERM_FILL((
char *)t, termlen);
8483 RSTRING(str)->as.heap.ptr = (
char *)buf;
8484 STR_SET_LEN(str, t - buf);
8485 STR_SET_NOEMBED(str);
8486 RSTRING(str)->as.heap.aux.capa = max;
8490 c = (
unsigned char)*s;
8491 if (trans[c] != errc) {
8508 long offset, max = (long)((send - s) * 1.2);
8509 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8530 if (cflag) c = last;
8533 else if (cflag) c = errc;
8537 c = cflag ? last : errc;
8545 if (enc != e1) may_modify = 1;
8547 if ((offset = t - buf) + tlen > max) {
8548 size_t MAYBE_UNUSED(old) = max + termlen;
8549 max = offset + tlen + (long)((send - s) * 1.2);
8550 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8555 if (may_modify && memcmp(s, t, tlen) != 0) {
8563 if (!STR_EMBED_P(str)) {
8564 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8566 TERM_FILL((
char *)t, termlen);
8567 RSTRING(str)->as.heap.ptr = (
char *)buf;
8568 STR_SET_LEN(str, t - buf);
8569 STR_SET_NOEMBED(str);
8570 RSTRING(str)->as.heap.aux.capa = max;
8595 return tr_trans(str, src, repl, 0);
8642 tr_trans(str, src, repl, 0);
8646 #define TR_TABLE_MAX (UCHAR_MAX+1)
8647 #define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8649 tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8652 const unsigned int errc = -1;
8653 char buf[TR_TABLE_MAX];
8656 VALUE table = 0, ptable = 0;
8657 int i, l, cflag = 0;
8660 tr.gen =
tr.now =
tr.max = 0;
8667 for (i=0; i<TR_TABLE_MAX; i++) {
8670 stable[TR_TABLE_MAX] = cflag;
8672 else if (stable[TR_TABLE_MAX] && !cflag) {
8673 stable[TR_TABLE_MAX] = 0;
8675 for (i=0; i<TR_TABLE_MAX; i++) {
8679 while ((c = trnext(&
tr, enc)) != errc) {
8680 if (c < TR_TABLE_MAX) {
8681 buf[(
unsigned char)c] = !cflag;
8686 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8703 for (i=0; i<TR_TABLE_MAX; i++) {
8704 stable[i] = stable[i] && buf[i];
8706 if (!table && !cflag) {
8713 tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8715 if (c < TR_TABLE_MAX) {
8716 return table[c] != 0;
8730 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8744 rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8746 char squeez[TR_TABLE_SIZE];
8749 VALUE del = 0, nodel = 0;
8751 int i, ascompat, cr;
8755 for (i=0; i<argc; i++) {
8760 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8763 str_modify_keep_cr(str);
8772 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8785 if (tr_find(c, squeez, del, nodel)) {
8796 TERM_FILL(t, TERM_LEN(str));
8800 if (modify)
return str;
8820 rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8823 rb_str_delete_bang(argc, argv, str);
8837 rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8839 char squeez[TR_TABLE_SIZE];
8841 VALUE del = 0, nodel = 0;
8842 unsigned char *s, *send, *t;
8844 int ascompat, singlebyte = single_byte_optimizable(str);
8848 enc = STR_ENC_GET(str);
8851 for (i=0; i<argc; i++) {
8856 if (singlebyte && !single_byte_optimizable(s))
8858 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8862 str_modify_keep_cr(str);
8871 unsigned int c = *s++;
8872 if (c != save || (argc > 0 && !squeez[c])) {
8882 if (ascompat && (c = *s) < 0x80) {
8883 if (c != save || (argc > 0 && !squeez[c])) {
8891 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8901 TERM_FILL((
char *)t, TERM_LEN(str));
8907 if (modify)
return str;
8930 rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8933 rb_str_squeeze_bang(argc, argv, str);
8951 return tr_trans(str, src, repl, 1);
8974 tr_trans(str, src, repl, 1);
9003 rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9005 char table[TR_TABLE_SIZE];
9007 VALUE del = 0, nodel = 0, tstr;
9022 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9023 !is_broken_string(str)) {
9031 if (*(
unsigned char*)s++ == c) n++;
9037 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9038 for (i=1; i<argc; i++) {
9042 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9052 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9061 if (tr_find(c, table, del, nodel)) {
9072 rb_fs_check(
VALUE val)
9076 if (
NIL_P(val))
return 0;
9081 static const char isspacetable[256] = {
9082 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9084 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9085 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9086 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9087 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9088 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9089 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9090 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9091 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9092 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9093 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9094 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9095 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9096 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9097 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9100 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9103 split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9105 if (empty_count >= 0 &&
len == 0) {
9106 return empty_count + 1;
9108 if (empty_count > 0) {
9113 }
while (--empty_count > 0);
9117 rb_yield(str_new_empty_String(str));
9118 }
while (--empty_count > 0);
9132 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9136 literal_split_pattern(
VALUE spat, split_type_t default_type)
9144 return SPLIT_TYPE_CHARS;
9147 if (
len == 1 &&
ptr[0] ==
' ') {
9148 return SPLIT_TYPE_AWK;
9154 return SPLIT_TYPE_AWK;
9157 return default_type;
9170 rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9175 split_type_t split_type;
9176 long beg, end, i = 0, empty_count = -1;
9181 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9183 if (lim <= 0) limit =
Qnil;
9184 else if (lim == 1) {
9196 if (
NIL_P(limit) && !lim) empty_count = 0;
9198 enc = STR_ENC_GET(str);
9199 split_type = SPLIT_TYPE_REGEXP;
9201 spat = get_pat_quoted(spat, 0);
9204 split_type = SPLIT_TYPE_AWK;
9206 else if (!(spat = rb_fs_check(spat))) {
9212 if (split_type != SPLIT_TYPE_AWK) {
9217 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9218 if (split_type == SPLIT_TYPE_AWK) {
9220 split_type = SPLIT_TYPE_STRING;
9225 mustnot_broken(spat);
9226 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9234 #define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
9239 if (split_type == SPLIT_TYPE_AWK) {
9246 if (is_ascii_string(str)) {
9247 while (
ptr < eptr) {
9248 c = (
unsigned char)*
ptr++;
9250 if (ascii_isspace(c)) {
9256 if (!
NIL_P(limit) && lim <= i)
break;
9259 else if (ascii_isspace(c)) {
9260 SPLIT_STR(beg, end-beg);
9263 if (!
NIL_P(limit)) ++i;
9271 while (
ptr < eptr) {
9283 if (!
NIL_P(limit) && lim <= i)
break;
9287 SPLIT_STR(beg, end-beg);
9290 if (!
NIL_P(limit)) ++i;
9298 else if (split_type == SPLIT_TYPE_STRING) {
9299 char *str_start =
ptr;
9300 char *substr_start =
ptr;
9305 mustnot_broken(str);
9307 while (
ptr < eptr &&
9311 if (t !=
ptr + end) {
9315 SPLIT_STR(substr_start - str_start, (
ptr+end) - substr_start);
9318 if (!
NIL_P(limit) && lim <= ++i)
break;
9320 beg =
ptr - str_start;
9322 else if (split_type == SPLIT_TYPE_CHARS) {
9323 char *str_start =
ptr;
9327 mustnot_broken(str);
9329 while (
ptr < eptr &&
9331 SPLIT_STR(
ptr - str_start, n);
9333 if (!
NIL_P(limit) && lim <= ++i)
break;
9335 beg =
ptr - str_start;
9347 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (
void)0)) {
9352 if (start == end && BEG(0) == END(0)) {
9357 else if (last_null == 1) {
9371 SPLIT_STR(beg, end-beg);
9372 beg = start = END(0);
9376 for (idx=1; idx < regs->num_regs; idx++) {
9377 if (BEG(idx) == -1)
continue;
9378 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9380 if (!
NIL_P(limit) && lim <= ++i)
break;
9382 if (match) rb_match_unbusy(match);
9388 return result ? result : str;
9398 return rb_str_split_m(1, &sep, str);
9401 #define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9416 #define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9419 chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9444 #define rb_rs get_rs()
9451 const char *
ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9452 long pos,
len, rslen;
9458 static ID keywords[1];
9463 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9467 if (!ENUM_ELEM(ary, str)) {
9491 const char *eol = NULL;
9493 while (subend < pend) {
9494 long chomp_rslen = 0;
9500 if (eol == subend)
break;
9504 chomp_rslen = -rslen;
9508 if (!subptr) subptr = subend;
9512 }
while (subend < pend);
9514 if (rslen == 0) chomp_rslen = 0;
9516 subend - subptr + (chomp ? chomp_rslen : rslen));
9517 if (ENUM_ELEM(ary, line)) {
9518 str_mod_check(str,
ptr,
len);
9520 subptr = eol = NULL;
9539 while (subptr < pend) {
9540 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9544 if (hit != adjusted) {
9548 subend = hit += rslen;
9551 subend = chomp_newline(subptr, subend, enc);
9558 if (ENUM_ELEM(ary, line)) {
9559 str_mod_check(str,
ptr,
len);
9564 if (subptr != pend) {
9567 pend = chomp_newline(subptr, pend, enc);
9569 else if (pend - subptr >= rslen &&
9570 memcmp(pend - rslen, rsptr, rslen) == 0) {
9575 ENUM_ELEM(ary, line);
9596 rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9599 return rb_str_enumerate_lines(argc, argv, str, 0);
9612 rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9614 VALUE ary = WANTARRAY(
"lines", 0);
9615 return rb_str_enumerate_lines(argc, argv, str, ary);
9648 rb_str_each_byte(
VALUE str)
9651 return rb_str_enumerate_bytes(str, 0);
9663 rb_str_bytes(
VALUE str)
9666 return rb_str_enumerate_bytes(str, ary);
9689 for (i = 0; i <
len; i += n) {
9695 for (i = 0; i <
len; i += n) {
9717 rb_str_each_char(
VALUE str)
9720 return rb_str_enumerate_chars(str, 0);
9732 rb_str_chars(
VALUE str)
9735 return rb_str_enumerate_chars(str, ary);
9739 rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9744 const char *
ptr, *end;
9747 if (single_byte_optimizable(str))
9748 return rb_str_enumerate_bytes(str, ary);
9753 enc = STR_ENC_GET(str);
9777 rb_str_each_codepoint(
VALUE str)
9780 return rb_str_enumerate_codepoints(str, 0);
9792 rb_str_codepoints(
VALUE str)
9795 return rb_str_enumerate_codepoints(str, ary);
9803 const OnigUChar source_ascii[] =
"\\X";
9804 const OnigUChar *source = source_ascii;
9805 size_t source_len =
sizeof(source_ascii) - 1;
9808 #define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9809 #define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9810 #define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9811 #define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9812 #define CASE_UTF(e) \
9813 case ENCINDEX_UTF_##e: { \
9814 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9815 source = source_UTF_##e; \
9816 source_len = sizeof(source_UTF_##e); \
9819 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9827 regex_t *reg_grapheme_cluster;
9829 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9830 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9832 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9833 onig_error_code_to_str(message, r, &einfo);
9834 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9837 return reg_grapheme_cluster;
9844 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9847 if (!reg_grapheme_cluster_utf8) {
9848 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9851 return reg_grapheme_cluster_utf8;
9860 size_t grapheme_cluster_count = 0;
9862 const char *
ptr, *end;
9868 bool cached_reg_grapheme_cluster =
true;
9869 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9870 if (!reg_grapheme_cluster) {
9871 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9872 cached_reg_grapheme_cluster =
false;
9879 OnigPosition
len = onig_match(reg_grapheme_cluster,
9880 (
const OnigUChar *)
ptr, (
const OnigUChar *)end,
9881 (
const OnigUChar *)
ptr, NULL, 0);
9882 if (
len <= 0)
break;
9883 grapheme_cluster_count++;
9887 if (!cached_reg_grapheme_cluster) {
9888 onig_free(reg_grapheme_cluster);
9891 return SIZET2NUM(grapheme_cluster_count);
9895 rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9899 const char *ptr0, *
ptr, *end;
9902 return rb_str_enumerate_chars(str, ary);
9907 bool cached_reg_grapheme_cluster =
true;
9908 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9909 if (!reg_grapheme_cluster) {
9910 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9911 cached_reg_grapheme_cluster =
false;
9918 OnigPosition
len = onig_match(reg_grapheme_cluster,
9919 (
const OnigUChar *)
ptr, (
const OnigUChar *)end,
9920 (
const OnigUChar *)
ptr, NULL, 0);
9921 if (
len <= 0)
break;
9926 if (!cached_reg_grapheme_cluster) {
9927 onig_free(reg_grapheme_cluster);
9947 rb_str_each_grapheme_cluster(
VALUE str)
9950 return rb_str_enumerate_grapheme_clusters(str, 0);
9962 rb_str_grapheme_clusters(
VALUE str)
9965 return rb_str_enumerate_grapheme_clusters(str, ary);
9969 chopped_length(
VALUE str)
9972 const char *p, *p2, *beg, *end;
9976 if (beg >= end)
return 0;
9997 rb_str_chop_bang(
VALUE str)
9999 str_modify_keep_cr(str);
10002 len = chopped_length(str);
10003 STR_SET_LEN(str,
len);
10023 rb_str_chop(
VALUE str)
10029 smart_chomp(
VALUE str,
const char *e,
const char *p)
10048 if (--e > p && *(e-1) ==
'\r') {
10065 char *pp, *e, *rsptr;
10070 if (
len == 0)
return 0;
10073 return smart_chomp(str, e, p);
10094 while (e > p && *(e-1) ==
'\n') {
10096 if (e > p && *(e-1) ==
'\r')
10102 if (rslen >
len)
return len;
10105 newline = rsptr[rslen-1];
10108 if (newline ==
'\n')
10109 return smart_chomp(str, e, p);
10113 return smart_chomp(str, e, p);
10118 if (is_broken_string(rs)) {
10122 if (p[
len-1] == newline &&
10124 memcmp(rsptr, pp, rslen) == 0)) {
10125 if (at_char_boundary(p, pp, e, enc))
10126 return len - rslen;
10138 chomp_rs(
int argc,
const VALUE *argv)
10142 VALUE rs = argv[0];
10155 long len = chompped_length(str, rs);
10156 if (
len >= olen)
return Qnil;
10157 str_modify_keep_cr(str);
10158 STR_SET_LEN(str,
len);
10176 rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10179 str_modifiable(str);
10181 rs = chomp_rs(argc, argv);
10183 return rb_str_chomp_string(str, rs);
10196 rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10198 VALUE rs = chomp_rs(argc, argv);
10206 const char *
const start = s;
10208 if (!s || s >= e)
return 0;
10211 if (single_byte_optimizable(str)) {
10212 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10237 rb_str_lstrip_bang(
VALUE str)
10241 long olen, loffset;
10243 str_modify_keep_cr(str);
10244 enc = STR_ENC_GET(str);
10246 loffset = lstrip_offset(str, start, start+olen, enc);
10248 long len = olen-loffset;
10249 s = start + loffset;
10250 memmove(start, s,
len);
10251 STR_SET_LEN(str,
len);
10275 rb_str_lstrip(
VALUE str)
10280 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10281 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10290 rb_str_check_dummy_enc(enc);
10294 if (!s || s >= e)
return 0;
10298 if (single_byte_optimizable(str)) {
10300 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10325 rb_str_rstrip_bang(
VALUE str)
10329 long olen, roffset;
10331 str_modify_keep_cr(str);
10332 enc = STR_ENC_GET(str);
10334 roffset = rstrip_offset(str, start, start+olen, enc);
10336 long len = olen - roffset;
10338 STR_SET_LEN(str,
len);
10362 rb_str_rstrip(
VALUE str)
10366 long olen, roffset;
10368 enc = STR_ENC_GET(str);
10370 roffset = rstrip_offset(str, start, start+olen, enc);
10372 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10388 rb_str_strip_bang(
VALUE str)
10391 long olen, loffset, roffset;
10394 str_modify_keep_cr(str);
10395 enc = STR_ENC_GET(str);
10397 loffset = lstrip_offset(str, start, start+olen, enc);
10398 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10400 if (loffset > 0 || roffset > 0) {
10401 long len = olen-roffset;
10404 memmove(start, start + loffset,
len);
10406 STR_SET_LEN(str,
len);
10430 rb_str_strip(
VALUE str)
10433 long olen, loffset, roffset;
10437 loffset = lstrip_offset(str, start, start+olen, enc);
10438 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10440 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10445 scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10448 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10478 if (!regs || regs->num_regs == 1) {
10484 for (
int i = 1; i < regs->num_regs; i++) {
10545 long last = -1, prev = 0;
10548 pat = get_pat_quoted(pat, 1);
10549 mustnot_broken(str);
10553 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10558 if (last >= 0) rb_pat_search(pat, str, last, 1);
10563 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10567 str_mod_check(str, p,
len);
10569 if (last >= 0) rb_pat_search(pat, str, last, 1);
10593 rb_str_hex(
VALUE str)
10620 rb_str_oct(
VALUE str)
10625 #ifndef HAVE_CRYPT_R
10630 rb_nativethread_lock_t lock;
10631 } crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10634 crypt_mutex_initialize(
void)
10702 #ifdef HAVE_CRYPT_R
10705 # define CRYPT_END() ALLOCV_END(databuf)
10707 extern char *crypt(
const char *,
const char *);
10708 # define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10711 const char *s, *saltp;
10713 #ifdef BROKEN_CRYPT
10714 char salt_8bit_clean[3];
10718 mustnot_wchar(str);
10719 mustnot_wchar(salt);
10722 if (
RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10726 #ifdef BROKEN_CRYPT
10727 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10728 salt_8bit_clean[0] = saltp[0] & 0x7f;
10729 salt_8bit_clean[1] = saltp[1] & 0x7f;
10730 salt_8bit_clean[2] =
'\0';
10731 saltp = salt_8bit_clean;
10734 #ifdef HAVE_CRYPT_R
10736 # ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10737 data->initialized = 0;
10739 res = crypt_r(s, saltp, data);
10741 crypt_mutex_initialize();
10743 res = crypt(s, saltp);
10765 rb_str_ord(
VALUE s)
10781 rb_str_sum(
int argc,
VALUE *argv,
VALUE str)
10784 char *
ptr, *p, *pend;
10787 unsigned long sum0 = 0;
10799 str_mod_check(str,
ptr,
len);
10802 sum0 += (
unsigned char)*p;
10813 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10814 sum0 &= (((
unsigned long)1)<<bits)-1;
10834 rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10838 long width,
len, flen = 1, fclen = 1;
10841 const char *f =
" ";
10842 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10844 int singlebyte = 1, cr;
10848 enc = STR_ENC_GET(str);
10856 fclen = str_strlen(pad, enc);
10857 singlebyte = single_byte_optimizable(pad);
10858 if (flen == 0 || fclen == 0) {
10862 len = str_strlen(str, enc);
10863 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10865 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10869 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10870 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10873 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10874 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10875 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10882 memset(p, *f, llen);
10886 while (llen >= fclen) {
10892 memcpy(p, f, llen2);
10899 memset(p, *f, rlen);
10903 while (rlen >= fclen) {
10909 memcpy(p, f, rlen2);
10913 TERM_FILL(p, termlen);
10937 rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10939 return rb_str_justify(argc, argv, str,
'l');
10953 rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10955 return rb_str_justify(argc, argv, str,
'r');
10970 rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10972 return rb_str_justify(argc, argv, str,
'c');
10988 sep = get_pat_quoted(sep, 0);
11000 pos = rb_str_index(str, sep, 0);
11001 if (pos < 0)
goto failed;
11009 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11025 sep = get_pat_quoted(sep, 0);
11038 pos = rb_str_rindex(str, sep, pos);
11049 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11061 rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11065 for (i=0; i<argc; i++) {
11066 VALUE tmp = argv[i];
11068 if (rb_reg_start_with_p(tmp, str))
11072 const char *p, *s, *e;
11083 if (!at_char_right_boundary(p, s, e, enc))
11101 rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11105 for (i=0; i<argc; i++) {
11106 VALUE tmp = argv[i];
11107 const char *p, *s, *e;
11118 if (!at_char_boundary(p, s, e, enc))
11136 deleted_prefix_length(
VALUE str,
VALUE prefix)
11138 const char *strptr, *prefixptr;
11139 long olen, prefixlen;
11144 if (!is_broken_string(prefix) ||
11152 if (prefixlen <= 0)
return 0;
11154 if (olen < prefixlen)
return 0;
11157 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11158 if (is_broken_string(prefix)) {
11159 if (!is_broken_string(str)) {
11163 const char *strend = strptr + olen;
11164 const char *after_prefix = strptr + prefixlen;
11165 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11185 rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11188 str_modify_keep_cr(str);
11190 prefixlen = deleted_prefix_length(str, prefix);
11191 if (prefixlen <= 0)
return Qnil;
11205 rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11209 prefixlen = deleted_prefix_length(str, prefix);
11210 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11225 deleted_suffix_length(
VALUE str,
VALUE suffix)
11227 const char *strptr, *suffixptr;
11228 long olen, suffixlen;
11232 if (is_broken_string(suffix))
return 0;
11237 if (suffixlen <= 0)
return 0;
11239 if (olen < suffixlen)
return 0;
11242 const char *strend = strptr + olen;
11243 const char *before_suffix = strend - suffixlen;
11244 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11245 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11260 rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11262 long olen, suffixlen,
len;
11263 str_modifiable(str);
11265 suffixlen = deleted_suffix_length(str, suffix);
11266 if (suffixlen <= 0)
return Qnil;
11269 str_modify_keep_cr(str);
11270 len = olen - suffixlen;
11271 STR_SET_LEN(str,
len);
11288 rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11292 suffixlen = deleted_suffix_length(str, suffix);
11293 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11310 val = rb_fs_check(val);
11313 "value of %"PRIsVALUE
" must be String or Regexp",
11317 rb_warn_deprecated(
"'$;'", NULL);
11334 str_modifiable(str);
11365 rb_str_b(
VALUE str)
11368 if (STR_EMBED_P(str)) {
11374 str_replace_shared_without_enc(str2, str);
11409 rb_str_valid_encoding_p(
VALUE str)
11429 rb_str_is_ascii_only_p(
VALUE str)
11439 static const char ellipsis[] =
"...";
11440 const long ellipsislen =
sizeof(ellipsis) - 1;
11443 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
11444 VALUE estr, ret = 0;
11451 else if (
len <= ellipsislen ||
11486 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11505 if (enc == STR_ENC_GET(str)) {
11510 return enc_str_scrub(enc, str, repl, cr);
11518 const char *rep, *p, *e, *p1, *sp;
11531 if (!
NIL_P(repl)) {
11532 repl = str_compat_and_valid(repl, enc);
11540 #define DEFAULT_REPLACE_CHAR(str) do { \
11541 static const char replace[sizeof(str)-1] = str; \
11542 rep = replace; replen = (int)sizeof(replace); \
11557 else if (!
NIL_P(repl)) {
11563 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11567 DEFAULT_REPLACE_CHAR(
"?");
11572 p = search_nonascii(p, e);
11596 if (e - p < clen) clen = e - p;
11603 for (; clen > 1; clen--) {
11616 str_mod_check(str, sp, slen);
11617 repl = str_compat_and_valid(repl, enc);
11624 p = search_nonascii(p, e);
11651 str_mod_check(str, sp, slen);
11652 repl = str_compat_and_valid(repl, enc);
11665 else if (!
NIL_P(repl)) {
11669 else if (encidx == ENCINDEX_UTF_16BE) {
11670 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11672 else if (encidx == ENCINDEX_UTF_16LE) {
11673 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11675 else if (encidx == ENCINDEX_UTF_32BE) {
11676 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11678 else if (encidx == ENCINDEX_UTF_32LE) {
11679 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11682 DEFAULT_REPLACE_CHAR(
"?");
11699 if (e - p < clen) clen = e - p;
11700 if (clen <= mbminlen * 2) {
11705 for (; clen > mbminlen; clen-=mbminlen) {
11717 str_mod_check(str, sp, slen);
11718 repl = str_compat_and_valid(repl, enc);
11744 str_mod_check(str, sp, slen);
11745 repl = str_compat_and_valid(repl, enc);
11781 str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11789 static ID id_normalize;
11790 static ID id_normalized_p;
11791 static VALUE mUnicodeNormalize;
11794 unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11796 static int UnicodeNormalizeRequired = 0;
11799 if (!UnicodeNormalizeRequired) {
11800 rb_require(
"unicode_normalize/normalize.rb");
11801 UnicodeNormalizeRequired = 1;
11805 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11842 rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11844 return unicode_normalize_common(argc, argv, str, id_normalize);
11858 rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11860 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11887 rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11889 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12021 #define sym_equal rb_obj_equal
12024 sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12040 rb_str_symname_p(
VALUE sym)
12048 enc = STR_ENC_GET(sym);
12051 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(
ptr) ||
12059 rb_str_quote_unprintable(
VALUE str)
12069 enc = STR_ENC_GET(str);
12072 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12074 return rb_str_escape(str);
12080 rb_id_quote_unprintable(
ID id)
12083 if (!rb_str_symname_p(str)) {
12084 return rb_str_escape(str);
12102 sym_inspect(
VALUE sym)
12109 if (!rb_str_symname_p(str)) {
12114 memmove(dest + 1, dest,
len);
12118 VALUE orig_str = str;
12126 memcpy(dest + 1,
ptr,
len);
12144 rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12167 sym_succ(
VALUE sym)
12246 return rb_str_match(
rb_sym2str(sym), other);
12261 sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12263 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12276 sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12278 return rb_str_match_m_p(argc, argv, sym);
12296 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12307 sym_length(
VALUE sym)
12321 sym_empty(
VALUE sym)
12337 sym_upcase(
int argc,
VALUE *argv,
VALUE sym)
12355 sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12371 sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12387 sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12401 sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12403 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12416 sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12418 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12430 sym_encoding(
VALUE sym)
12436 string_for_symbol(
VALUE name)
12455 name = string_for_symbol(name);
12465 name = string_for_symbol(name);
12481 sym_all_symbols(
VALUE _)
12489 return rb_fstring(str);
12496 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12508 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12509 rb_enc_autoload(enc);
12513 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12519 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12520 rb_enc_autoload(enc);
12524 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12535 rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12540 if (
RB_LIKELY(code >= 0 && code < 0xff)) {
12541 rb_str_buf_cat_byte(str, (
char) code);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RB_LIKELY(x)
Asserts that the given Boolean expression likely holds.
#define RB_UNLIKELY(x)
Asserts that the given Boolean expression likely doesn't hold.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
int rb_enc_codelen(int code, rb_encoding *enc)
Queries the number of bytes requested to represent the passed code point using the passed encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Look for the "common" encoding between the two.
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
static char * rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the previous (left) character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
rb_encoding * rb_enc_from_index(int idx)
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate_index(VALUE obj, int encindex)
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed ...
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_new(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcallv(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcall(), except it takes the method arguments as a C array.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
void rb_gc_register_address(VALUE *valptr)
Inform the garbage collector that the global or static variable pointed by valptr stores a live Ruby ...
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
VALUE rb_ary_new_from_args(long n,...)
Constructs an array from the passed objects.
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_check_hash_type(VALUE obj)
Try converting an object to its hash representation using its to_hash method, if any.
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_lookup(VALUE hash, VALUE key)
Identical to rb_hash_aref(), except it always returns RUBY_Qnil for misshits.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_rs
The record separator character for inputs, or the $/.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_utf8_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_utf8_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
VALUE rb_usascii_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_str_buf_new_cstr(const char *ptr)
This is a rb_str_buf_new() + rb_str_buf_cat() combo.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
VALUE rb_str_dup_frozen(VALUE)
Just another name of rb_str_new_frozen.
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
VALUE rb_locale_str_new_cstr(const char *ptr)
Identical to rb_locale_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_new_cstr(const char *ptr)
Identical to rb_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_external_str_new_cstr(const char *ptr)
Identical to rb_external_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_cat_cstr(VALUE dst, const char *src)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
VALUE rb_id2str(ID id)
Identical to rb_id2name(), except it returns a Ruby's String instead of C's.
void rb_define_hooked_variable(const char *name, VALUE *var, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Identical to rb_define_virtual_variable(), but can also specify a storage.
int capa
Designed capacity of the buffer.
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
VALUE type(ANYARGS)
ANYARGS-ed function type.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
union RString::@48 as
String's specific fields.
long len
Length of the string, not including terminating NUL character.
struct RString::@48::@50 embed
Embedded contents.
struct RString::@48::@49 heap
Strings that use separated memory region for contents use this pattern.
VALUE shared
Parent of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.
void ruby_xfree(void *ptr)
Deallocates a storage instance.