14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/hash.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
47#include "ruby_assert.h"
52#if defined HAVE_CRYPT_R
53# if defined HAVE_CRYPT_H
56#elif !defined HAVE_CRYPT
57# include "missing/crypt.h"
58# define HAVE_CRYPT_R 1
61#define BEG(no) (regs->beg[(no)])
62#define END(no) (regs->end[(no)])
65#undef rb_usascii_str_new
69#undef rb_usascii_str_new_cstr
70#undef rb_utf8_str_new_cstr
71#undef rb_enc_str_new_cstr
72#undef rb_external_str_new_cstr
73#undef rb_locale_str_new_cstr
74#undef rb_str_dup_frozen
75#undef rb_str_buf_new_cstr
129#define RUBY_MAX_CHAR_LEN 16
130#define STR_PRECOMPUTED_HASH FL_USER4
131#define STR_SHARED_ROOT FL_USER5
132#define STR_BORROWED FL_USER6
133#define STR_TMPLOCK FL_USER7
134#define STR_NOFREE FL_USER18
135#define STR_FAKESTR FL_USER19
137#define STR_SET_NOEMBED(str) do {\
138 FL_SET((str), STR_NOEMBED);\
139 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
141#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
143#define STR_SET_LEN(str, n) do { \
144 RSTRING(str)->len = (n); \
148str_encindex_fastpath(
int encindex)
152 case ENCINDEX_ASCII_8BIT:
154 case ENCINDEX_US_ASCII:
162str_enc_fastpath(
VALUE str)
167#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
168#define TERM_FILL(ptr, termlen) do {\
169 char *const term_fill_ptr = (ptr);\
170 const int term_fill_len = (termlen);\
171 *term_fill_ptr = '\0';\
172 if (UNLIKELY(term_fill_len > 1))\
173 memset(term_fill_ptr, 0, term_fill_len);\
176#define RESIZE_CAPA(str,capacity) do {\
177 const int termlen = TERM_LEN(str);\
178 RESIZE_CAPA_TERM(str,capacity,termlen);\
180#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
181 if (STR_EMBED_P(str)) {\
182 if (str_embed_capa(str) < capacity + termlen) {\
183 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
184 const long tlen = RSTRING_LEN(str);\
185 memcpy(tmp, RSTRING_PTR(str), tlen);\
186 RSTRING(str)->as.heap.ptr = tmp;\
187 RSTRING(str)->len = tlen;\
188 STR_SET_NOEMBED(str);\
189 RSTRING(str)->as.heap.aux.capa = (capacity);\
193 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
194 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
195 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
196 RSTRING(str)->as.heap.aux.capa = (capacity);\
200#define STR_SET_SHARED(str, shared_str) do { \
201 if (!FL_TEST(str, STR_FAKESTR)) { \
202 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
203 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
204 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
205 FL_SET((str), STR_SHARED); \
206 FL_SET((shared_str), STR_SHARED_ROOT); \
207 if (RBASIC_CLASS((shared_str)) == 0) \
208 FL_SET_RAW((shared_str), STR_BORROWED); \
212#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
213#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
216#define STR_ENC_GET(str) get_encoding(str)
218#if !defined SHARABLE_MIDDLE_SUBSTRING
219# define SHARABLE_MIDDLE_SUBSTRING 0
221#if !SHARABLE_MIDDLE_SUBSTRING
222#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
224#define SHARABLE_SUBSTRING_P(beg, len, end) 1
229str_embed_capa(
VALUE str)
231 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
235rb_str_reembeddable_p(
VALUE str)
237 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
241rb_str_embed_size(
long capa)
247rb_str_size_as_embedded(
VALUE str)
250 if (STR_EMBED_P(str)) {
251 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
255 else if (rb_str_reembeddable_p(str)) {
256 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
259 real_size =
sizeof(
struct RString);
263 real_size +=
sizeof(st_index_t);
270STR_EMBEDDABLE_P(
long len,
long termlen)
272 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
277static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
278static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
280static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
281static inline void str_modifiable(
VALUE str);
286str_make_independent(
VALUE str)
288 long len = RSTRING_LEN(str);
289 int termlen = TERM_LEN(str);
290 str_make_independent_expand((str),
len, 0L, termlen);
293static inline int str_dependent_p(
VALUE str);
296rb_str_make_independent(
VALUE str)
298 if (str_dependent_p(str)) {
299 str_make_independent(str);
304rb_str_make_embedded(
VALUE str)
309 char *buf =
RSTRING(str)->as.heap.ptr;
313 STR_SET_LEN(str,
len);
316 memcpy(RSTRING_PTR(str), buf,
len);
320 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
324rb_debug_rstring_null_ptr(
const char *func)
326 fprintf(stderr,
"%s is returning NULL!! "
327 "SIGSEGV is highly expected to follow immediately.\n"
328 "If you could reproduce, attach your debugger here, "
329 "and look at the passed string.\n",
334static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
337get_encoding(
VALUE str)
343mustnot_broken(
VALUE str)
345 if (is_broken_string(str)) {
346 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
351mustnot_wchar(
VALUE str)
354 if (rb_enc_mbminlen(enc) > 1) {
355 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
361static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
363#if SIZEOF_LONG == SIZEOF_VOIDP
364#define PRECOMPUTED_FAKESTR_HASH 1
368#ifdef PRECOMPUTED_FAKESTR_HASH
370fstring_hash(
VALUE str)
375 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
385#define fstring_hash rb_str_hash
389BARE_STRING_P(
VALUE str)
399static inline st_index_t
400str_do_hash(
VALUE str)
402 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
404 if (e && !is_ascii_string(str)) {
411str_store_precomputed_hash(
VALUE str, st_index_t hash)
417 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
418 size_t free_bytes = str_embed_capa(str) - used_bytes;
422 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
424 FL_SET(str, STR_PRECOMPUTED_HASH);
431 bool force_precompute_hash;
443 long len = RSTRING_LEN(str);
444 long capa =
len +
sizeof(st_index_t);
445 int term_len = TERM_LEN(str);
447 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
449 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
450 STR_SET_LEN(new_str, RSTRING_LEN(str));
452 rb_enc_copy(new_str, str);
453 str_store_precomputed_hash(new_str, str_do_hash(str));
457 rb_enc_copy(new_str, str);
458#ifdef PRECOMPUTED_FAKESTR_HASH
459 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
460 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
474 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
477 if (STR_SHARED_P(str)) {
479 str_make_independent(str);
482 if (!BARE_STRING_P(str)) {
488 RBASIC(str)->flags |= RSTRING_FSTR;
508 if (
FL_TEST(str, RSTRING_FSTR))
511 bare = BARE_STRING_P(str);
513 if (STR_EMBED_P(str)) {
518 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
525 rb_str_resize(str, RSTRING_LEN(str));
527 fstr = register_fstring(str,
false,
false);
530 str_replace_shared_without_enc(str, fstr);
537#define FSTRING_TABLE_EMPTY Qfalse
538#define FSTRING_TABLE_TOMBSTONE Qtrue
539#define FSTRING_TABLE_MOVED Qundef
548 unsigned int capacity;
549 unsigned int deleted_entries;
554fstring_table_free(
void *ptr)
557 xfree(table->entries);
561fstring_table_size(
const void *ptr)
572 .dfree = fstring_table_free,
573 .dsize = fstring_table_size,
575 .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE
579static VALUE fstring_table_obj;
582new_fstring_table(
int capacity)
587 table->capacity = capacity;
594Init_fstring_table(
void)
596 fstring_table_obj = new_fstring_table(8192);
597 rb_gc_register_address(&fstring_table_obj);
611 RUBY_ASSERT((table->capacity & (table->capacity - 1)) == 0);
612 probe->mask = table->capacity - 1;
613 probe->idx = hash_code & probe->mask;
620 probe->idx = (probe->idx + 1) & probe->mask;
637 RUBY_ASSERT((table->capacity & (table->capacity - 1)) == 0);
639 probe->mask = table->capacity - 1;
640 probe->idx = hash_code & probe->mask;
648 probe->idx = (probe->idx + probe->d) & probe->mask;
653#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
659 int idx = fstring_table_probe_start(&probe, table, hash_code);
663 VALUE candidate = entry->str;
668 if (candidate == FSTRING_TABLE_EMPTY) {
675 entry->hash = hash_code;
679 idx = fstring_table_probe_next(&probe);
685fstring_try_resize(
VALUE old_table_obj)
690 if (RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj) != old_table_obj) {
698 int expected_count =
RUBY_ATOMIC_LOAD(old_table->count) - old_table->deleted_entries;
701 int old_capacity = old_table->capacity;
702 int new_capacity = old_capacity * 2;
703 if (new_capacity > expected_count * 8) {
704 new_capacity = old_capacity / 2;
706 else if (new_capacity > expected_count * 4) {
707 new_capacity = old_capacity;
711 VALUE new_table_obj = new_fstring_table(new_capacity);
714 for (
int i = 0; i < old_capacity; i++) {
718 if (val == FSTRING_TABLE_EMPTY)
continue;
719 if (val == FSTRING_TABLE_TOMBSTONE)
continue;
720 if (rb_objspace_garbage_object_p(val))
continue;
722 VALUE hash_code = RUBY_ATOMIC_VALUE_LOAD(entry->hash);
723 if (hash_code == 0) {
726 hash_code = fstring_hash(val);
729 fstring_insert_on_resize(new_table, hash_code, val);
733 fprintf(stderr,
"resized: %p(%i) -> %p(%i) (count: %i->%i)\n", old_table, old_table->capacity, new_table, new_table->capacity, old_table->count, new_table->count);
747 bool inserting =
false;
753 table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
755 table = RTYPEDDATA_GET_DATA(table_obj);
756 idx = fstring_table_probe_start(&probe, table, hash_code);
760 VALUE candidate = RUBY_ATOMIC_VALUE_LOAD(entry->str);
762 if (candidate == FSTRING_TABLE_EMPTY) {
766 value = build_fstring(value, arg);
773 if (UNLIKELY(prev_count > table->capacity / 2)) {
774 fstring_try_resize(table_obj);
779 if (found == FSTRING_TABLE_EMPTY) {
796 else if (candidate == FSTRING_TABLE_TOMBSTONE) {
799 else if (candidate == FSTRING_TABLE_MOVED) {
807 VALUE candidate_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash);
808 if ((candidate_hash == hash_code || candidate_hash == 0) && !fstring_cmp(candidate, value)) {
810 if (UNLIKELY(rb_objspace_garbage_object_p(candidate))) {
824 idx = fstring_table_probe_next(&probe);
834 VALUE table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
839 int idx = fstring_table_probe_start(&probe, table, hash_code);
843 VALUE candidate = entry->str;
848 if (candidate == FSTRING_TABLE_EMPTY) {
852 else if (candidate == value) {
854 entry->str = FSTRING_TABLE_TOMBSTONE;
855 table->deleted_entries++;
859 idx = fstring_table_probe_next(&probe);
864register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
868 .force_precompute_hash = force_precompute_hash
871#if SIZEOF_VOIDP == SIZEOF_LONG
875 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
879 VALUE hash_code = fstring_hash(str);
880 VALUE result = fstring_find_or_insert(hash_code, str, &args);
882 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
892rb_fstring_foreach_with_replace(st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
897 VALUE table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
904 for (
unsigned int i = 0; i < table->capacity; i++) {
905 VALUE key = table->entries[i].str;
906 if(key == FSTRING_TABLE_EMPTY)
continue;
907 if(key == FSTRING_TABLE_TOMBSTONE)
continue;
909 enum st_retval retval;
910 retval = (*func)(key, key, arg, 0);
912 if (retval == ST_REPLACE && replace) {
913 st_data_t value = key;
914 retval = (*replace)(&key, &value, arg, TRUE);
915 table->entries[i].str = key;
922 rb_bug(
"unsupported");
926 table->entries[i].str = FSTRING_TABLE_TOMBSTONE;
933rb_obj_is_fstring_table(
VALUE obj)
937 return obj == fstring_table_obj;
941rb_gc_free_fstring(
VALUE obj)
946 VALUE str_hash = fstring_hash(obj);
947 fstring_delete(str_hash, obj);
949 RB_DEBUG_COUNTER_INC(obj_str_fstr);
955setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
970 return (
VALUE)fake_str;
979 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
988rb_fstring_new(
const char *ptr,
long len)
991 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
998 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
1002rb_fstring_cstr(
const char *
ptr)
1004 return rb_fstring_new(
ptr, strlen(
ptr));
1011 const char *aptr, *bptr;
1018 return (alen != blen ||
1020 memcmp(aptr, bptr, alen) != 0);
1024single_byte_optimizable(
VALUE str)
1028 case ENCINDEX_ASCII_8BIT:
1029 case ENCINDEX_US_ASCII:
1031 case ENCINDEX_UTF_8:
1051static inline const char *
1052search_nonascii(
const char *p,
const char *e)
1054 const uintptr_t *s, *t;
1056#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
1057# if SIZEOF_UINTPTR_T == 8
1058# define NONASCII_MASK UINT64_C(0x8080808080808080)
1059# elif SIZEOF_UINTPTR_T == 4
1060# define NONASCII_MASK UINT32_C(0x80808080)
1062# error "don't know what to do."
1065# if SIZEOF_UINTPTR_T == 8
1066# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
1067# elif SIZEOF_UINTPTR_T == 4
1068# define NONASCII_MASK 0x80808080UL
1070# error "don't know what to do."
1074 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
1075#if !UNALIGNED_WORD_ACCESS
1076 if ((uintptr_t)p % SIZEOF_VOIDP) {
1077 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
1082 case 7:
if (p[-7]&0x80)
return p-7;
1083 case 6:
if (p[-6]&0x80)
return p-6;
1084 case 5:
if (p[-5]&0x80)
return p-5;
1085 case 4:
if (p[-4]&0x80)
return p-4;
1087 case 3:
if (p[-3]&0x80)
return p-3;
1088 case 2:
if (p[-2]&0x80)
return p-2;
1089 case 1:
if (p[-1]&0x80)
return p-1;
1094#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
1095#define aligned_ptr(value) \
1096 __builtin_assume_aligned((value), sizeof(uintptr_t))
1098#define aligned_ptr(value) (uintptr_t *)(value)
1101 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
1104 if (*s & NONASCII_MASK) {
1105#ifdef WORDS_BIGENDIAN
1106 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
1108 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
1112 p = (
const char *)s;
1118 case 7:
if (e[-7]&0x80)
return e-7;
1119 case 6:
if (e[-6]&0x80)
return e-6;
1120 case 5:
if (e[-5]&0x80)
return e-5;
1121 case 4:
if (e[-4]&0x80)
return e-4;
1123 case 3:
if (e[-3]&0x80)
return e-3;
1124 case 2:
if (e[-2]&0x80)
return e-2;
1125 case 1:
if (e[-1]&0x80)
return e-1;
1126 case 0:
return NULL;
1133 const char *e = p +
len;
1135 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
1137 p = search_nonascii(p, e);
1141 if (rb_enc_asciicompat(enc)) {
1142 p = search_nonascii(p, e);
1145 int ret = rb_enc_precise_mbclen(p, e, enc);
1149 p = search_nonascii(p, e);
1155 int ret = rb_enc_precise_mbclen(p, e, enc);
1171 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
1174 p = search_nonascii(p, e);
1178 else if (rb_enc_asciicompat(enc)) {
1179 p = search_nonascii(p, e);
1185 int ret = rb_enc_precise_mbclen(p, e, enc);
1192 p = search_nonascii(p, e);
1198 int ret = rb_enc_precise_mbclen(p, e, enc);
1223 rb_enc_set_index(str1, rb_enc_get_index(str2));
1231rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
1236 str_enc_copy(dest, src);
1237 if (RSTRING_LEN(dest) == 0) {
1238 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
1249 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
1250 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
1261rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
1263 str_enc_copy(dest, src);
1270 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
1276 return enc_coderange_scan(str, enc);
1285 cr = enc_coderange_scan(str, get_encoding(str));
1292rb_enc_str_asciicompat(
VALUE str)
1295 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
1303 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
1312str_mod_check(
VALUE s,
const char *p,
long len)
1314 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
1320str_capacity(
VALUE str,
const int termlen)
1322 if (STR_EMBED_P(str)) {
1323 return str_embed_capa(str) - termlen;
1325 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
1329 return RSTRING(str)->as.heap.aux.capa;
1336 return str_capacity(str, TERM_LEN(str));
1340must_not_null(
const char *
ptr)
1343 rb_raise(rb_eArgError,
"NULL pointer given");
1348str_alloc_embed(
VALUE klass,
size_t capa)
1350 size_t size = rb_str_embed_size(
capa);
1354 NEWOBJ_OF(str,
struct RString, klass,
1361str_alloc_heap(
VALUE klass)
1363 NEWOBJ_OF(str,
struct RString, klass,
1370empty_str_alloc(
VALUE klass)
1372 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1373 VALUE str = str_alloc_embed(klass, 0);
1374 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1385 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1389 enc = rb_ascii8bit_encoding();
1392 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1394 int termlen = rb_enc_mbminlen(enc);
1396 if (STR_EMBEDDABLE_P(
len, termlen)) {
1397 str = str_alloc_embed(klass,
len + termlen);
1403 str = str_alloc_heap(klass);
1409 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1412 rb_enc_raw_set(str, enc);
1415 memcpy(RSTRING_PTR(str),
ptr,
len);
1418 STR_SET_LEN(str,
len);
1419 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1426 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1461 __msan_unpoison_string(
ptr);
1481 if (rb_enc_mbminlen(enc) != 1) {
1482 rb_raise(rb_eArgError,
"wchar encoding given");
1484 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1488str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1493 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1497 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1500 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1501 str = str_alloc_heap(klass);
1505 RBASIC(str)->flags |= STR_NOFREE;
1506 rb_enc_associate_index(str, encindex);
1535static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1537 int ecflags,
VALUE ecopts);
1542 int encidx = rb_enc_to_index(enc);
1543 if (rb_enc_get_index(str) == encidx)
1544 return is_ascii_string(str);
1555 if (!to)
return str;
1556 if (!from) from = rb_enc_get(str);
1557 if (from == to)
return str;
1558 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1559 rb_is_ascii8bit_enc(to)) {
1560 if (STR_ENC_GET(str) != to) {
1562 rb_enc_associate(str, to);
1569 from, to, ecflags, ecopts);
1570 if (
NIL_P(newstr)) {
1578rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1583 olen = RSTRING_LEN(newstr);
1584 if (ofs < -olen || olen < ofs)
1586 if (ofs < 0) ofs += olen;
1588 STR_SET_LEN(newstr, ofs);
1592 rb_str_modify(newstr);
1593 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1601 STR_SET_LEN(str, 0);
1602 rb_enc_associate(str, enc);
1608str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1610 int ecflags,
VALUE ecopts)
1615 VALUE econv_wrapper;
1616 const unsigned char *start, *sp;
1617 unsigned char *dest, *dp;
1618 size_t converted_output = (size_t)ofs;
1623 RBASIC_CLEAR_CLASS(econv_wrapper);
1625 if (!ec)
return Qnil;
1628 sp = (
unsigned char*)
ptr;
1630 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1631 (dp = dest + converted_output),
1635 size_t converted_input = sp - start;
1636 size_t rest =
len - converted_input;
1637 converted_output = dp - dest;
1639 if (converted_input && converted_output &&
1640 rest < (LONG_MAX / converted_output)) {
1641 rest = (rest * converted_output) / converted_input;
1646 olen += rest < 2 ? 2 : rest;
1647 rb_str_resize(newstr, olen);
1654 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1656 rb_enc_associate(newstr, to);
1675 const int eidx = rb_enc_to_index(eenc);
1678 return rb_enc_str_new(
ptr,
len, eenc);
1682 if ((eidx == rb_ascii8bit_encindex()) ||
1683 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1687 ienc = rb_default_internal_encoding();
1688 if (!ienc || eenc == ienc) {
1689 return rb_enc_str_new(
ptr,
len, eenc);
1693 if ((eidx == rb_ascii8bit_encindex()) ||
1694 (eidx == rb_usascii_encindex()) ||
1695 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1696 return rb_enc_str_new(
ptr,
len, ienc);
1699 str = rb_enc_str_new(NULL, 0, ienc);
1702 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1703 rb_str_initialize(str,
ptr,
len, eenc);
1711 int eidx = rb_enc_to_index(eenc);
1712 if (eidx == rb_usascii_encindex() &&
1713 !is_ascii_string(str)) {
1714 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1717 rb_enc_associate_index(str, eidx);
1776str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1778 const int termlen = TERM_LEN(str);
1783 if (str_embed_capa(str2) >=
len + termlen) {
1784 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1785 STR_SET_EMBED(str2);
1786 memcpy(ptr2, RSTRING_PTR(str),
len);
1787 TERM_FILL(ptr2+
len, termlen);
1791 if (STR_SHARED_P(str)) {
1792 root =
RSTRING(str)->as.heap.aux.shared;
1801 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1803 rb_fatal(
"about to free a possible shared root");
1805 char *ptr2 = STR_HEAP_PTR(str2);
1807 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1810 FL_SET(str2, STR_NOEMBED);
1812 STR_SET_SHARED(str2, root);
1815 STR_SET_LEN(str2,
len);
1823 str_replace_shared_without_enc(str2, str);
1824 rb_enc_cr_str_exact_copy(str2, str);
1831 return str_replace_shared(str_alloc_heap(klass), str);
1848rb_str_new_frozen_String(
VALUE orig)
1856rb_str_frozen_bare_string(
VALUE orig)
1858 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1863rb_str_tmp_frozen_acquire(
VALUE orig)
1866 return str_new_frozen_buffer(0, orig, FALSE);
1870rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1872 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1873 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1875 VALUE str = str_alloc_heap(0);
1878 FL_SET(str, STR_SHARED_ROOT);
1880 size_t capa = str_capacity(orig, TERM_LEN(orig));
1886 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1887 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1894 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1895 RBASIC(orig)->flags &= ~STR_NOFREE;
1896 STR_SET_SHARED(orig, str);
1906rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1911 if (STR_EMBED_P(tmp)) {
1920 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1924 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1925 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1930 STR_SET_LEN(tmp, 0);
1938 return str_new_frozen_buffer(klass, orig, TRUE);
1947 VALUE str = str_alloc_heap(klass);
1948 STR_SET_LEN(str, RSTRING_LEN(orig));
1949 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1950 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1951 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1952 RBASIC(orig)->flags &= ~STR_NOFREE;
1953 STR_SET_SHARED(orig, str);
1960str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1964 long len = RSTRING_LEN(orig);
1965 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1966 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1968 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1969 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1975 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1976 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1982 if ((ofs > 0) || (rest > 0) ||
1985 str = str_new_shared(klass,
shared);
1987 RSTRING(str)->as.heap.ptr += ofs;
1988 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1996 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1997 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1999 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
2000 STR_SET_LEN(str, RSTRING_LEN(orig));
2005 str = heap_str_make_shared(klass, orig);
2009 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
2021str_new_empty_String(
VALUE str)
2024 rb_enc_copy(v, str);
2028#define STR_BUF_MIN_SIZE 63
2033 if (STR_EMBEDDABLE_P(
capa, 1)) {
2041 RSTRING(str)->as.heap.ptr[0] =
'\0';
2061 return str_new(0, 0,
len);
2067 if (STR_EMBED_P(str)) {
2068 RB_DEBUG_COUNTER_INC(obj_str_embed);
2070 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
2071 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
2072 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
2075 RB_DEBUG_COUNTER_INC(obj_str_ptr);
2076 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2081rb_str_memsize(
VALUE str)
2083 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
2084 return STR_HEAP_SIZE(str);
2094 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2097static inline void str_discard(
VALUE str);
2098static void str_shared_replace(
VALUE str,
VALUE str2);
2103 if (str != str2) str_shared_replace(str, str2);
2114 enc = STR_ENC_GET(str2);
2117 termlen = rb_enc_mbminlen(enc);
2119 STR_SET_LEN(str, RSTRING_LEN(str2));
2121 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
2123 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
2124 rb_enc_associate(str, enc);
2128 if (STR_EMBED_P(str2)) {
2130 long len = RSTRING_LEN(str2);
2133 char *new_ptr =
ALLOC_N(
char,
len + termlen);
2134 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
2135 RSTRING(str2)->as.heap.ptr = new_ptr;
2136 STR_SET_LEN(str2,
len);
2138 STR_SET_NOEMBED(str2);
2141 STR_SET_NOEMBED(str);
2143 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
2145 if (
FL_TEST(str2, STR_SHARED)) {
2147 STR_SET_SHARED(str,
shared);
2150 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
2154 STR_SET_EMBED(str2);
2155 RSTRING_PTR(str2)[0] = 0;
2156 STR_SET_LEN(str2, 0);
2157 rb_enc_associate(str, enc);
2171 return rb_obj_as_string_result(str, obj);
2187 len = RSTRING_LEN(str2);
2188 if (STR_SHARED_P(str2)) {
2191 STR_SET_NOEMBED(str);
2192 STR_SET_LEN(str,
len);
2193 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
2194 STR_SET_SHARED(str,
shared);
2195 rb_enc_cr_str_exact_copy(str, str2);
2198 str_replace_shared(str, str2);
2207 size_t size = rb_str_embed_size(
capa);
2211 NEWOBJ_OF(str,
struct RString, klass,
2220 NEWOBJ_OF(str,
struct RString, klass,
2231 encidx = rb_enc_get_index(str);
2232 flags &= ~ENCODING_MASK;
2235 if (encidx) rb_enc_associate_index(dup, encidx);
2245 long len = RSTRING_LEN(str);
2250 STR_SET_LEN(dup, RSTRING_LEN(str));
2251 return str_duplicate_setup_encoding(str, dup, flags);
2260 root =
RSTRING(str)->as.heap.aux.shared;
2262 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
2263 root = str = str_new_frozen(klass, str);
2269 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
2270 FL_SET(root, STR_SHARED_ROOT);
2272 flags |= RSTRING_NOEMBED | STR_SHARED;
2274 STR_SET_LEN(dup, RSTRING_LEN(str));
2275 return str_duplicate_setup_encoding(str, dup, flags);
2281 if (STR_EMBED_P(str)) {
2282 return str_duplicate_setup_embed(klass, str, dup);
2285 return str_duplicate_setup_heap(klass, str, dup);
2293 if (STR_EMBED_P(str)) {
2294 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
2297 dup = str_alloc_heap(klass);
2300 return str_duplicate_setup(klass, str, dup);
2311rb_str_dup_m(
VALUE str)
2313 if (LIKELY(BARE_STRING_P(str))) {
2324 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2331 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2335 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2336 str_duplicate_setup_embed(klass, str, new_str);
2339 new_str = ec_str_alloc_heap(ec, klass);
2340 str_duplicate_setup_heap(klass, str, new_str);
2349rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2351 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2375 static ID keyword_ids[2];
2376 VALUE orig, opt, venc, vcapa;
2381 if (!keyword_ids[0]) {
2382 keyword_ids[0] = rb_id_encoding();
2383 CONST_ID(keyword_ids[1],
"capacity");
2391 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2392 enc = rb_to_encoding(venc);
2394 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2397 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2399 if (
capa < STR_BUF_MIN_SIZE) {
2400 capa = STR_BUF_MIN_SIZE;
2404 len = RSTRING_LEN(orig);
2408 if (orig == str) n = 0;
2410 str_modifiable(str);
2411 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2413 const size_t size = (size_t)
capa + termlen;
2414 const char *
const old_ptr = RSTRING_PTR(str);
2415 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2416 char *new_ptr =
ALLOC_N(
char, size);
2417 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2418 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2420 RSTRING(str)->as.heap.ptr = new_ptr;
2422 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2423 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2424 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2426 STR_SET_LEN(str,
len);
2429 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2430 rb_enc_cr_str_exact_copy(str, orig);
2432 FL_SET(str, STR_NOEMBED);
2439 rb_enc_associate(str, enc);
2451rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2457 static ID keyword_ids[2];
2467 keyword_ids[0] = rb_id_encoding();
2468 CONST_ID(keyword_ids[1],
"capacity");
2470 encoding = kwargs[0];
2471 capacity = kwargs[1];
2480 if (UNDEF_P(encoding)) {
2482 encoding = rb_obj_encoding(orig);
2486 if (!UNDEF_P(encoding)) {
2487 enc = rb_to_encoding(encoding);
2491 if (UNDEF_P(capacity)) {
2493 VALUE empty_str = str_new(klass,
"", 0);
2495 rb_enc_associate(empty_str, enc);
2499 VALUE copy = str_duplicate(klass, orig);
2500 rb_enc_associate(copy, enc);
2513 if (orig_capa >
capa) {
2518 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2519 STR_SET_LEN(str, 0);
2530#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2545static inline uintptr_t
2546count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2551 d = (d>>6) | (~d>>7);
2552 d &= NONASCII_MASK >> 7;
2555#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2557 return rb_popcount_intptr(d);
2561# if SIZEOF_VOIDP == 8
2570enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2576 long diff = (long)(e - p);
2577 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2582 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2583 const uintptr_t *s, *t;
2584 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2585 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2586 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2587 while (p < (
const char *)s) {
2588 if (is_utf8_lead_byte(*p))
len++;
2592 len += count_utf8_lead_bytes_with_word(s);
2595 p = (
const char *)s;
2598 if (is_utf8_lead_byte(*p))
len++;
2604 else if (rb_enc_asciicompat(enc)) {
2609 q = search_nonascii(p, e);
2615 p += rb_enc_fast_mbclen(p, e, enc);
2622 q = search_nonascii(p, e);
2628 p += rb_enc_mbclen(p, e, enc);
2635 for (c=0; p<e; c++) {
2636 p += rb_enc_mbclen(p, e, enc);
2651rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2659 long diff = (long)(e - p);
2660 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2662 else if (rb_enc_asciicompat(enc)) {
2666 q = search_nonascii(p, e);
2674 ret = rb_enc_precise_mbclen(p, e, enc);
2689 for (c=0; p<e; c++) {
2690 ret = rb_enc_precise_mbclen(p, e, enc);
2697 if (p + rb_enc_mbminlen(enc) <= e)
2698 p += rb_enc_mbminlen(enc);
2714 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2715 if (!enc) enc = STR_ENC_GET(str);
2716 p = RSTRING_PTR(str);
2721 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2726 return enc_strlen(p, e, enc, cr);
2733 return str_strlen(str, NULL);
2747 return LONG2NUM(str_strlen(str, NULL));
2759rb_str_bytesize(
VALUE str)
2777rb_str_empty(
VALUE str)
2779 return RBOOL(RSTRING_LEN(str) == 0);
2798 char *ptr1, *ptr2, *ptr3;
2803 enc = rb_enc_check_str(str1, str2);
2806 termlen = rb_enc_mbminlen(enc);
2807 if (len1 > LONG_MAX - len2) {
2808 rb_raise(rb_eArgError,
"string size too big");
2810 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2811 ptr3 = RSTRING_PTR(str3);
2812 memcpy(ptr3, ptr1, len1);
2813 memcpy(ptr3+len1, ptr2, len2);
2814 TERM_FILL(&ptr3[len1+len2], termlen);
2830 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2833 int enc1 = rb_enc_get_index(str1);
2834 int enc2 = rb_enc_get_index(str2);
2839 else if (enc2 < 0) {
2842 else if (enc1 != enc2) {
2845 else if (len1 > LONG_MAX - len2) {
2879 rb_enc_copy(str2, str);
2884 rb_raise(rb_eArgError,
"negative argument");
2886 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2887 if (STR_EMBEDDABLE_P(
len, 1)) {
2889 memset(RSTRING_PTR(str2), 0,
len + 1);
2896 STR_SET_LEN(str2,
len);
2897 rb_enc_copy(str2, str);
2900 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2901 rb_raise(rb_eArgError,
"argument too big");
2904 len *= RSTRING_LEN(str);
2905 termlen = TERM_LEN(str);
2907 ptr2 = RSTRING_PTR(str2);
2909 n = RSTRING_LEN(str);
2910 memcpy(ptr2, RSTRING_PTR(str), n);
2911 while (n <=
len/2) {
2912 memcpy(ptr2 + n, ptr2, n);
2915 memcpy(ptr2 + n, ptr2,
len-n);
2917 STR_SET_LEN(str2,
len);
2918 TERM_FILL(&ptr2[
len], termlen);
2919 rb_enc_cr_str_copy_for_substr(str2, str);
2956rb_check_lockedtmp(
VALUE str)
2958 if (
FL_TEST(str, STR_TMPLOCK)) {
2965#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2967str_modifiable(
VALUE str)
2971 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2972 if (CHILLED_STRING_P(str)) {
2973 CHILLED_STRING_MUTATED(str);
2975 rb_check_lockedtmp(str);
2976 rb_check_frozen(str);
2981str_dependent_p(
VALUE str)
2983 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2993#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2995str_independent(
VALUE str)
2999 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
3000 str_modifiable(str);
3001 return !str_dependent_p(str);
3007str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
3017 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
3022 STR_SET_LEN(str,
len);
3027 oldptr = RSTRING_PTR(str);
3029 memcpy(
ptr, oldptr,
len);
3031 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
3034 STR_SET_NOEMBED(str);
3035 FL_UNSET(str, STR_SHARED|STR_NOFREE);
3036 TERM_FILL(
ptr +
len, termlen);
3038 STR_SET_LEN(str,
len);
3045 if (!str_independent(str))
3046 str_make_independent(str);
3055 int termlen = TERM_LEN(str);
3056 long len = RSTRING_LEN(str);
3059 rb_raise(rb_eArgError,
"negative expanding string size");
3061 if (expand >= LONG_MAX -
len) {
3062 rb_raise(rb_eArgError,
"string size too big");
3065 if (!str_independent(str)) {
3066 str_make_independent_expand(str,
len, expand, termlen);
3068 else if (expand > 0) {
3069 RESIZE_CAPA_TERM(str,
len + expand, termlen);
3076str_modify_keep_cr(
VALUE str)
3078 if (!str_independent(str))
3079 str_make_independent(str);
3086str_discard(
VALUE str)
3088 str_modifiable(str);
3089 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
3090 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
3091 RSTRING(str)->as.heap.ptr = 0;
3092 STR_SET_LEN(str, 0);
3099 int encindex = rb_enc_get_index(str);
3101 if (RB_UNLIKELY(encindex == -1)) {
3105 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
3110 if (!rb_enc_asciicompat(enc)) {
3132 return RSTRING_PTR(str);
3136zero_filled(
const char *s,
int n)
3138 for (; n > 0; --n) {
3145str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
3147 const char *e = s +
len;
3149 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
3150 if (zero_filled(s, minlen))
return s;
3156str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
3161 if (str_dependent_p(str)) {
3162 if (!zero_filled(s +
len, termlen))
3163 str_make_independent_expand(str,
len, 0L, termlen);
3166 TERM_FILL(s +
len, termlen);
3169 return RSTRING_PTR(str);
3173rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
3175 long capa = str_capacity(str, oldtermlen) + oldtermlen;
3176 long len = RSTRING_LEN(str);
3180 rb_check_lockedtmp(str);
3181 str_make_independent_expand(str,
len, 0L, termlen);
3183 else if (str_dependent_p(str)) {
3184 if (termlen > oldtermlen)
3185 str_make_independent_expand(str,
len, 0L, termlen);
3188 if (!STR_EMBED_P(str)) {
3193 if (termlen > oldtermlen) {
3194 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
3202str_null_check(
VALUE str,
int *w)
3204 char *s = RSTRING_PTR(str);
3205 long len = RSTRING_LEN(str);
3207 const int minlen = rb_enc_mbminlen(enc);
3211 if (str_null_char(s,
len, minlen, enc)) {
3214 return str_fill_term(str, s,
len, minlen);
3217 if (!s || memchr(s, 0,
len)) {
3221 s = str_fill_term(str, s,
len, minlen);
3227rb_str_to_cstr(
VALUE str)
3230 return str_null_check(str, &w);
3238 char *s = str_null_check(str, &w);
3241 rb_raise(rb_eArgError,
"string contains null char");
3243 rb_raise(rb_eArgError,
"string contains null byte");
3249rb_str_fill_terminator(
VALUE str,
const int newminlen)
3251 char *s = RSTRING_PTR(str);
3252 long len = RSTRING_LEN(str);
3253 return str_fill_term(str, s,
len, newminlen);
3259 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
3285str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
3294 else if (rb_enc_asciicompat(enc)) {
3295 const char *p2, *e2;
3298 while (p < e && 0 < nth) {
3305 p2 = search_nonascii(p, e2);
3314 n = rb_enc_mbclen(p, e, enc);
3325 while (p < e && nth--) {
3326 p += rb_enc_mbclen(p, e, enc);
3337 return str_nth_len(p, e, &nth, enc);
3341str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3346 p = str_nth_len(p, e, &nth, enc);
3355str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3357 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3358 if (!pp)
return e - p;
3365 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3366 STR_ENC_GET(str), single_byte_optimizable(str));
3371str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3374 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3375 const uintptr_t *s, *t;
3376 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3377 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3378 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3379 while (p < (
const char *)s) {
3380 if (is_utf8_lead_byte(*p)) nth--;
3384 nth -= count_utf8_lead_bytes_with_word(s);
3386 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3390 if (is_utf8_lead_byte(*p)) {
3391 if (nth == 0)
break;
3401str_utf8_offset(
const char *p,
const char *e,
long nth)
3403 const char *pp = str_utf8_nth(p, e, &nth);
3412 if (single_byte_optimizable(str) || pos < 0)
3415 char *p = RSTRING_PTR(str);
3416 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3421str_subseq(
VALUE str,
long beg,
long len)
3429 const int termlen = TERM_LEN(str);
3430 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3437 if (str_embed_capa(str2) >=
len + termlen) {
3438 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3439 STR_SET_EMBED(str2);
3440 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3441 TERM_FILL(ptr2+
len, termlen);
3443 STR_SET_LEN(str2,
len);
3447 str_replace_shared(str2, str);
3450 RSTRING(str2)->as.heap.ptr += beg;
3451 if (RSTRING_LEN(str2) >
len) {
3452 STR_SET_LEN(str2,
len);
3462 VALUE str2 = str_subseq(str, beg,
len);
3463 rb_enc_cr_str_copy_for_substr(str2, str);
3472 const long blen = RSTRING_LEN(str);
3474 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3476 if (
len < 0)
return 0;
3477 if (beg < 0 && -beg < 0)
return 0;
3481 if (single_byte_optimizable(str)) {
3482 if (beg > blen)
return 0;
3485 if (beg < 0)
return 0;
3487 if (
len > blen - beg)
3489 if (
len < 0)
return 0;
3494 if (
len > -beg)
len = -beg;
3498 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3501 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3507 slen = str_strlen(str, enc);
3509 if (beg < 0)
return 0;
3511 if (
len == 0)
goto end;
3514 else if (beg > 0 && beg > blen) {
3518 if (beg > str_strlen(str, enc))
return 0;
3523 enc == rb_utf8_encoding()) {
3524 p = str_utf8_nth(s, e, &beg);
3525 if (beg > 0)
return 0;
3526 len = str_utf8_offset(p, e,
len);
3532 p = s + beg * char_sz;
3536 else if (
len * char_sz > e - p)
3541 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3542 if (beg > 0)
return 0;
3546 len = str_offset(p, e,
len, enc, 0);
3554static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3559 return str_substr(str, beg,
len, TRUE);
3569str_substr(
VALUE str,
long beg,
long len,
int empty)
3573 if (!p)
return Qnil;
3574 if (!
len && !empty)
return Qnil;
3576 beg = p - RSTRING_PTR(str);
3578 VALUE str2 = str_subseq(str, beg,
len);
3579 rb_enc_cr_str_copy_for_substr(str2, str);
3587 if (CHILLED_STRING_P(str)) {
3592 rb_str_resize(str, RSTRING_LEN(str));
3610 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3653str_uminus(
VALUE str)
3658 return rb_fstring(str);
3662#define rb_str_dup_frozen rb_str_new_frozen
3667 if (
FL_TEST(str, STR_TMPLOCK)) {
3670 FL_SET(str, STR_TMPLOCK);
3677 if (!
FL_TEST(str, STR_TMPLOCK)) {
3697 const int termlen = TERM_LEN(str);
3699 str_modifiable(str);
3700 if (STR_SHARED_P(str)) {
3703 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3704 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3715 else if (
len > RSTRING_LEN(str)) {
3719 const char *
const new_end = RSTRING_PTR(str) +
len;
3729 else if (
len < RSTRING_LEN(str)) {
3737 STR_SET_LEN(str,
len);
3738 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3745 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3748 int independent = str_independent(str);
3749 long slen = RSTRING_LEN(str);
3750 const int termlen = TERM_LEN(str);
3752 if (slen >
len || (termlen != 1 && slen <
len)) {
3758 if (STR_EMBED_P(str)) {
3759 if (
len == slen)
return str;
3760 if (str_embed_capa(str) >=
len + termlen) {
3761 STR_SET_LEN(str,
len);
3765 str_make_independent_expand(str, slen,
len - slen, termlen);
3767 else if (str_embed_capa(str) >=
len + termlen) {
3768 char *
ptr = STR_HEAP_PTR(str);
3770 if (slen >
len) slen =
len;
3773 STR_SET_LEN(str,
len);
3774 if (independent) ruby_xfree(
ptr);
3777 else if (!independent) {
3778 if (
len == slen)
return str;
3779 str_make_independent_expand(str, slen,
len - slen, termlen);
3783 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3784 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3787 else if (
len == slen)
return str;
3788 STR_SET_LEN(str,
len);
3795str_ensure_available_capa(
VALUE str,
long len)
3797 str_modify_keep_cr(str);
3799 const int termlen = TERM_LEN(str);
3800 long olen = RSTRING_LEN(str);
3802 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3803 rb_raise(rb_eArgError,
"string sizes too big");
3806 long total = olen +
len;
3807 long capa = str_capacity(str, termlen);
3810 if (total >= LONG_MAX / 2) {
3813 while (total >
capa) {
3816 RESIZE_CAPA_TERM(str,
capa, termlen);
3821str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3824 str_modify_keep_cr(str);
3829 if (
len == 0)
return 0;
3831 long total, olen,
off = -1;
3833 const int termlen = TERM_LEN(str);
3836 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3840 long capa = str_capacity(str, termlen);
3842 if (olen > LONG_MAX -
len) {
3843 rb_raise(rb_eArgError,
"string sizes too big");
3847 if (total >= LONG_MAX / 2) {
3850 while (total >
capa) {
3853 RESIZE_CAPA_TERM(str,
capa, termlen);
3854 sptr = RSTRING_PTR(str);
3859 memcpy(sptr + olen,
ptr,
len);
3860 STR_SET_LEN(str, total);
3861 TERM_FILL(sptr + total, termlen);
3866#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3867#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3872 if (
len == 0)
return str;
3874 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3876 return str_buf_cat(str,
ptr,
len);
3887rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3892 if (UNLIKELY(!str_independent(str))) {
3893 str_make_independent(str);
3896 long string_length = -1;
3897 const int null_terminator_length = 1;
3902 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3903 rb_raise(rb_eArgError,
"string sizes too big");
3906 long string_capacity = str_capacity(str, null_terminator_length);
3912 if (LIKELY(string_capacity >= string_length + 1)) {
3914 sptr[string_length] = byte;
3915 STR_SET_LEN(str, string_length + 1);
3916 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3920 str_buf_cat(str, (
char *)&
byte, 1);
3936 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3947rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3948 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3957 if (str_encindex == ptr_encindex) {
3959 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3963 str_enc = rb_enc_from_index(str_encindex);
3964 ptr_enc = rb_enc_from_index(ptr_encindex);
3965 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3968 if (RSTRING_LEN(str) == 0) {
3971 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3977 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3986 *ptr_cr_ret = ptr_cr;
3988 if (str_encindex != ptr_encindex &&
3991 str_enc = rb_enc_from_index(str_encindex);
3992 ptr_enc = rb_enc_from_index(ptr_encindex);
3997 res_encindex = str_encindex;
4002 res_encindex = str_encindex;
4006 res_encindex = ptr_encindex;
4011 res_encindex = str_encindex;
4018 res_encindex = str_encindex;
4024 rb_raise(rb_eArgError,
"negative string size (or size too big)");
4026 str_buf_cat(str,
ptr,
len);
4032 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
4039 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
4049 if (rb_enc_asciicompat(enc)) {
4050 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
4056 unsigned int c = (
unsigned char)*
ptr;
4057 int len = rb_enc_codelen(c, enc);
4058 rb_enc_mbcput(c, buf, enc);
4059 rb_enc_cr_str_buf_cat(str, buf,
len,
4072 if (str_enc_fastpath(str)) {
4076 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
4082 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
4093 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
4109rb_str_concat_literals(
size_t num,
const VALUE *strary)
4113 unsigned long len = 1;
4118 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
4120 str_enc_copy_direct(str, strary[0]);
4122 for (i = s; i < num; ++i) {
4123 const VALUE v = strary[i];
4127 if (encidx != ENCINDEX_US_ASCII) {
4129 rb_enc_set_index(str, encidx);
4154rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
4156 str_modifiable(str);
4161 else if (argc > 1) {
4164 rb_enc_copy(arg_str, str);
4165 for (i = 0; i < argc; i++) {
4198rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
4200 long needed_capacity = 0;
4204 for (
int index = 0; index < argc; index++) {
4205 VALUE obj = argv[index];
4213 needed_capacity += RSTRING_LEN(obj);
4218 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
4225 str_ensure_available_capa(str, needed_capacity);
4228 for (
int index = 0; index < argc; index++) {
4229 VALUE obj = argv[index];
4234 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
4235 char byte = (char)(
NUM2INT(obj) & 0xFF);
4249 rb_bug(
"append_as_bytes arguments should have been validated");
4253 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
4254 TERM_FILL(sptr, TERM_LEN(str));
4259 for (
int index = 0; index < argc; index++) {
4260 VALUE obj = argv[index];
4277 rb_bug(
"append_as_bytes arguments should have been validated");
4356 if (rb_num_to_uint(str2, &code) == 0) {
4369 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4372 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4375 long pos = RSTRING_LEN(str1);
4380 switch (
len = rb_enc_codelen(code, enc)) {
4381 case ONIGERR_INVALID_CODE_POINT_VALUE:
4382 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4384 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4390 rb_enc_mbcput(code, buf, enc);
4391 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4392 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4394 rb_str_resize(str1, pos+
len);
4395 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4408rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4410 int encidx = rb_enc_to_index(enc);
4412 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4417 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4418 return ENCINDEX_ASCII_8BIT;
4441rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4443 str_modifiable(str);
4448 else if (argc > 1) {
4451 rb_enc_copy(arg_str, str);
4452 for (i = 0; i < argc; i++) {
4465 st_index_t precomputed_hash;
4466 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4468 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4469 return precomputed_hash;
4472 return str_do_hash(str);
4479 const char *ptr1, *ptr2;
4482 return (len1 != len2 ||
4484 memcmp(ptr1, ptr2, len1) != 0);
4498rb_str_hash_m(
VALUE str)
4504#define lesser(a,b) (((a)>(b))?(b):(a))
4512 if (RSTRING_LEN(str1) == 0)
return TRUE;
4513 if (RSTRING_LEN(str2) == 0)
return TRUE;
4516 if (idx1 == idx2)
return TRUE;
4521 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4525 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4535 const char *ptr1, *ptr2;
4538 if (str1 == str2)
return 0;
4541 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4550 if (len1 > len2)
return 1;
4553 if (retval > 0)
return 1;
4587 if (str1 == str2)
return Qtrue;
4594 return rb_str_eql_internal(str1, str2);
4618 if (str1 == str2)
return Qtrue;
4620 return rb_str_eql_internal(str1, str2);
4652 return rb_invcmp(str1, str2);
4694 return str_casecmp(str1, s);
4702 const char *p1, *p1end, *p2, *p2end;
4704 enc = rb_enc_compatible(str1, str2);
4709 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4710 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4711 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4712 while (p1 < p1end && p2 < p2end) {
4714 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4715 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4717 return INT2FIX(c1 < c2 ? -1 : 1);
4724 while (p1 < p1end && p2 < p2end) {
4725 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4726 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4728 if (0 <= c1 && 0 <= c2) {
4732 return INT2FIX(c1 < c2 ? -1 : 1);
4736 l1 = rb_enc_mbclen(p1, p1end, enc);
4737 l2 = rb_enc_mbclen(p2, p2end, enc);
4738 len = l1 < l2 ? l1 : l2;
4739 r = memcmp(p1, p2,
len);
4741 return INT2FIX(r < 0 ? -1 : 1);
4743 return INT2FIX(l1 < l2 ? -1 : 1);
4749 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
4750 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
4784 return str_casecmp_p(str1, s);
4791 VALUE folded_str1, folded_str2;
4792 VALUE fold_opt = sym_fold;
4794 enc = rb_enc_compatible(str1, str2);
4799 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4800 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4802 return rb_str_eql(folded_str1, folded_str2);
4806strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4807 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4809 const char *search_start = str_ptr;
4810 long pos, search_len = str_len - offset;
4814 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4815 if (pos < 0)
return pos;
4817 if (t == search_start + pos)
break;
4818 search_len -= t - search_start;
4819 if (search_len <= 0)
return -1;
4820 offset += t - search_start;
4823 return pos + offset;
4827#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4828#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4831rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4833 const char *str_ptr, *str_ptr_end, *sub_ptr;
4834 long str_len, sub_len;
4837 enc = rb_enc_check(str, sub);
4838 if (is_broken_string(sub))
return -1;
4840 str_ptr = RSTRING_PTR(str);
4842 str_len = RSTRING_LEN(str);
4843 sub_ptr = RSTRING_PTR(sub);
4844 sub_len = RSTRING_LEN(sub);
4846 if (str_len < sub_len)
return -1;
4849 long str_len_char, sub_len_char;
4850 int single_byte = single_byte_optimizable(str);
4851 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4852 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4854 offset += str_len_char;
4855 if (offset < 0)
return -1;
4857 if (str_len_char - offset < sub_len_char)
return -1;
4858 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4861 if (sub_len == 0)
return offset;
4864 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4878rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4885 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4886 long slen = str_strlen(str, enc);
4888 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4900 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4901 enc, single_byte_optimizable(str));
4912 pos = rb_str_index(str, sub, pos);
4926str_ensure_byte_pos(
VALUE str,
long pos)
4928 if (!single_byte_optimizable(str)) {
4929 const char *s = RSTRING_PTR(str);
4931 const char *p = s + pos;
4932 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4934 "offset %ld does not land on character boundary", pos);
4981rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4987 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4988 long slen = RSTRING_LEN(str);
4990 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
5001 str_ensure_byte_pos(str, pos);
5013 pos = rb_str_byteindex(str, sub, pos);
5014 if (pos >= 0)
return LONG2NUM(pos);
5021memrchr(
const char *search_str,
int chr,
long search_len)
5023 const char *ptr = search_str + search_len;
5024 while (ptr > search_str) {
5025 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
5035 char *hit, *adjusted;
5037 long slen, searchlen;
5040 sbeg = RSTRING_PTR(str);
5041 slen = RSTRING_LEN(sub);
5042 if (slen == 0)
return s - sbeg;
5044 t = RSTRING_PTR(sub);
5046 searchlen = s - sbeg + 1;
5048 if (memcmp(s, t, slen) == 0) {
5053 hit = memrchr(sbeg, c, searchlen);
5056 if (hit != adjusted) {
5057 searchlen = adjusted - sbeg;
5060 if (memcmp(hit, t, slen) == 0)
5062 searchlen = adjusted - sbeg;
5063 }
while (searchlen > 0);
5077 enc = rb_enc_check(str, sub);
5078 if (is_broken_string(sub))
return -1;
5079 singlebyte = single_byte_optimizable(str);
5080 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
5081 slen = str_strlen(sub, enc);
5084 if (
len < slen)
return -1;
5085 if (
len - pos < slen) pos =
len - slen;
5086 if (
len == 0)
return pos;
5088 sbeg = RSTRING_PTR(str);
5091 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
5097 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
5098 return str_rindex(str, sub, s, enc);
5159rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
5164 long pos,
len = str_strlen(str, enc);
5166 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5168 if (pos < 0 && (pos +=
len) < 0) {
5174 if (pos >
len) pos =
len;
5182 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
5183 enc, single_byte_optimizable(str));
5194 pos = rb_str_rindex(str, sub, pos);
5204rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
5210 enc = rb_enc_check(str, sub);
5211 if (is_broken_string(sub))
return -1;
5212 len = RSTRING_LEN(str);
5213 slen = RSTRING_LEN(sub);
5216 if (
len < slen)
return -1;
5217 if (
len - pos < slen) pos =
len - slen;
5218 if (
len == 0)
return pos;
5220 sbeg = RSTRING_PTR(str);
5223 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
5230 return str_rindex(str, sub, s, enc);
5295rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
5299 long pos,
len = RSTRING_LEN(str);
5301 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5303 if (pos < 0 && (pos +=
len) < 0) {
5309 if (pos >
len) pos =
len;
5315 str_ensure_byte_pos(str, pos);
5327 pos = rb_str_byterindex(str, sub, pos);
5328 if (pos >= 0)
return LONG2NUM(pos);
5367 switch (OBJ_BUILTIN_TYPE(y)) {
5419rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5426 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5458rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5462 re = get_pat(argv[0]);
5463 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5472static enum neighbor_char
5478 if (rb_enc_mbminlen(enc) > 1) {
5480 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5482 return NEIGHBOR_NOT_CHAR;
5484 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5486 if (!l)
return NEIGHBOR_NOT_CHAR;
5487 if (l !=
len)
return NEIGHBOR_WRAPPED;
5488 rb_enc_mbcput(c, p, enc);
5489 r = rb_enc_precise_mbclen(p, p +
len, enc);
5491 return NEIGHBOR_NOT_CHAR;
5493 return NEIGHBOR_FOUND;
5496 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5499 return NEIGHBOR_WRAPPED;
5500 ++((
unsigned char*)p)[i];
5501 l = rb_enc_precise_mbclen(p, p+
len, enc);
5505 return NEIGHBOR_FOUND;
5508 memset(p+l, 0xff,
len-l);
5514 for (len2 =
len-1; 0 < len2; len2--) {
5515 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5519 memset(p+len2+1, 0xff,
len-(len2+1));
5524static enum neighbor_char
5529 if (rb_enc_mbminlen(enc) > 1) {
5531 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5533 return NEIGHBOR_NOT_CHAR;
5535 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5536 if (!c)
return NEIGHBOR_NOT_CHAR;
5539 if (!l)
return NEIGHBOR_NOT_CHAR;
5540 if (l !=
len)
return NEIGHBOR_WRAPPED;
5541 rb_enc_mbcput(c, p, enc);
5542 r = rb_enc_precise_mbclen(p, p +
len, enc);
5544 return NEIGHBOR_NOT_CHAR;
5546 return NEIGHBOR_FOUND;
5549 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5552 return NEIGHBOR_WRAPPED;
5553 --((
unsigned char*)p)[i];
5554 l = rb_enc_precise_mbclen(p, p+
len, enc);
5558 return NEIGHBOR_FOUND;
5561 memset(p+l, 0,
len-l);
5567 for (len2 =
len-1; 0 < len2; len2--) {
5568 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5572 memset(p+len2+1, 0,
len-(len2+1));
5586static enum neighbor_char
5587enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5589 enum neighbor_char ret;
5593 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5597 const int max_gaps = 1;
5599 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5601 ctype = ONIGENC_CTYPE_DIGIT;
5603 ctype = ONIGENC_CTYPE_ALPHA;
5605 return NEIGHBOR_NOT_CHAR;
5608 for (
try = 0;
try <= max_gaps; ++
try) {
5609 ret = enc_succ_char(p,
len, enc);
5610 if (ret == NEIGHBOR_FOUND) {
5611 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5613 return NEIGHBOR_FOUND;
5620 ret = enc_pred_char(p,
len, enc);
5621 if (ret == NEIGHBOR_FOUND) {
5622 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5635 return NEIGHBOR_NOT_CHAR;
5638 if (ctype != ONIGENC_CTYPE_DIGIT) {
5640 return NEIGHBOR_WRAPPED;
5644 enc_succ_char(carry,
len, enc);
5645 return NEIGHBOR_WRAPPED;
5713 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5714 rb_enc_cr_str_copy_for_substr(str, orig);
5715 return str_succ(str);
5722 char *sbeg, *s, *e, *last_alnum = 0;
5723 int found_alnum = 0;
5725 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5726 long carry_pos = 0, carry_len = 1;
5727 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5729 slen = RSTRING_LEN(str);
5730 if (slen == 0)
return str;
5732 enc = STR_ENC_GET(str);
5733 sbeg = RSTRING_PTR(str);
5734 s = e = sbeg + slen;
5736 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5737 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5743 l = rb_enc_precise_mbclen(s, e, enc);
5744 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5745 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5746 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5748 case NEIGHBOR_NOT_CHAR:
5750 case NEIGHBOR_FOUND:
5752 case NEIGHBOR_WRAPPED:
5757 carry_pos = s - sbeg;
5762 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5763 enum neighbor_char neighbor;
5764 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5765 l = rb_enc_precise_mbclen(s, e, enc);
5766 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5767 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5769 neighbor = enc_succ_char(tmp, l, enc);
5771 case NEIGHBOR_FOUND:
5775 case NEIGHBOR_WRAPPED:
5778 case NEIGHBOR_NOT_CHAR:
5781 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5783 enc_succ_char(s, l, enc);
5785 if (!rb_enc_asciicompat(enc)) {
5786 MEMCPY(carry, s,
char, l);
5789 carry_pos = s - sbeg;
5793 RESIZE_CAPA(str, slen + carry_len);
5794 sbeg = RSTRING_PTR(str);
5795 s = sbeg + carry_pos;
5796 memmove(s + carry_len, s, slen - carry_pos);
5797 memmove(s, carry, carry_len);
5799 STR_SET_LEN(str, slen);
5800 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5814rb_str_succ_bang(
VALUE str)
5822all_digits_p(
const char *s,
long len)
5876 VALUE end, exclusive;
5880 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5886 VALUE current, after_end;
5893 enc = rb_enc_check(beg, end);
5894 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5896 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5897 char c = RSTRING_PTR(beg)[0];
5898 char e = RSTRING_PTR(end)[0];
5900 if (c > e || (excl && c == e))
return beg;
5902 VALUE str = rb_enc_str_new(&c, 1, enc);
5904 if ((*each)(str, arg))
break;
5905 if (!excl && c == e)
break;
5907 if (excl && c == e)
break;
5912 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5913 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5914 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5919 b = rb_str_to_inum(beg, 10, FALSE);
5920 e = rb_str_to_inum(end, 10, FALSE);
5927 if (excl && bi == ei)
break;
5928 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5933 ID op = excl ?
'<' : idLE;
5934 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5939 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5940 b = rb_funcallv(b, succ, 0, 0);
5947 if (n > 0 || (excl && n == 0))
return beg;
5949 after_end = rb_funcallv(end, succ, 0, 0);
5954 next = rb_funcallv(current, succ, 0, 0);
5955 if ((*each)(current, arg))
break;
5956 if (
NIL_P(next))
break;
5960 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5975 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5976 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5977 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5979 b = rb_str_to_inum(beg, 10, FALSE);
5985 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5993 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5994 b = rb_funcallv(b, succ, 0, 0);
6000 VALUE next = rb_funcallv(current, succ, 0, 0);
6001 if ((*each)(current, arg))
break;
6004 if (RSTRING_LEN(current) == 0)
6015 if (!
rb_equal(str, *argp))
return 0;
6029 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
6030 rb_enc_asciicompat(STR_ENC_GET(end)) &&
6031 rb_enc_asciicompat(STR_ENC_GET(val))) {
6032 const char *bp = RSTRING_PTR(beg);
6033 const char *ep = RSTRING_PTR(end);
6034 const char *vp = RSTRING_PTR(val);
6035 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
6036 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
6044 if (b <= v && v < e)
return Qtrue;
6045 return RBOOL(!
RTEST(exclusive) && v == e);
6052 all_digits_p(bp, RSTRING_LEN(beg)) &&
6053 all_digits_p(ep, RSTRING_LEN(end))) {
6058 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
6060 return RBOOL(
NIL_P(val));
6083 return rb_str_subpat(str, indx,
INT2FIX(0));
6086 if (rb_str_index(str, indx, 0) != -1)
6092 long beg,
len = str_strlen(str, NULL);
6104 return str_substr(str, idx, 1, FALSE);
6123rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
6127 return rb_str_subpat(str, argv[0], argv[1]);
6130 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
6134 return rb_str_aref(str, argv[0]);
6140 char *ptr = RSTRING_PTR(str);
6141 long olen = RSTRING_LEN(str), nlen;
6143 str_modifiable(str);
6144 if (
len > olen)
len = olen;
6146 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
6148 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
6150 ptr =
RSTRING(str)->as.embed.ary;
6151 memmove(ptr, oldptr +
len, nlen);
6152 if (fl == STR_NOEMBED)
xfree(oldptr);
6155 if (!STR_SHARED_P(str)) {
6157 rb_enc_cr_str_exact_copy(shared, str);
6162 STR_SET_LEN(str, nlen);
6164 if (!SHARABLE_MIDDLE_SUBSTRING) {
6165 TERM_FILL(ptr + nlen, TERM_LEN(str));
6172rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
6178 if (beg == 0 && vlen == 0) {
6183 str_modify_keep_cr(str);
6187 RESIZE_CAPA(str, slen + vlen -
len);
6188 sptr = RSTRING_PTR(str);
6197 memmove(sptr + beg + vlen,
6199 slen - (beg +
len));
6201 if (vlen < beg &&
len < 0) {
6205 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
6208 STR_SET_LEN(str, slen);
6209 TERM_FILL(&sptr[slen], TERM_LEN(str));
6216 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
6225 int singlebyte = single_byte_optimizable(str);
6231 enc = rb_enc_check(str, val);
6232 slen = str_strlen(str, enc);
6234 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
6243 if (
len > slen - beg) {
6246 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
6251 beg = p - RSTRING_PTR(str);
6253 rb_str_update_0(str, beg,
len, val);
6254 rb_enc_associate(str, enc);
6265 long start, end,
len;
6275 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
6279 nth += regs->num_regs;
6289 enc = rb_enc_check_str(str, val);
6290 rb_str_update_0(str, start,
len, val);
6291 rb_enc_associate(str, enc);
6299 switch (
TYPE(indx)) {
6301 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
6305 beg = rb_str_index(str, indx, 0);
6360rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
6364 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
6372 return rb_str_aset(str, argv[0], argv[1]);
6432rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6440 str_modify_keep_cr(str);
6448 if ((nth += regs->num_regs) <= 0)
return Qnil;
6450 else if (nth >= regs->num_regs)
return Qnil;
6452 len = END(nth) - beg;
6455 else if (argc == 2) {
6464 beg = p - RSTRING_PTR(str);
6468 beg = rb_str_index(str, indx, 0);
6469 if (beg == -1)
return Qnil;
6470 len = RSTRING_LEN(indx);
6482 beg = p - RSTRING_PTR(str);
6491 beg = p - RSTRING_PTR(str);
6495 rb_enc_cr_str_copy_for_substr(result, str);
6503 char *sptr = RSTRING_PTR(str);
6504 long slen = RSTRING_LEN(str);
6505 if (beg +
len > slen)
6509 slen - (beg +
len));
6511 STR_SET_LEN(str, slen);
6512 TERM_FILL(&sptr[slen], TERM_LEN(str));
6523 switch (OBJ_BUILTIN_TYPE(pat)) {
6542get_pat_quoted(
VALUE pat,
int check)
6546 switch (OBJ_BUILTIN_TYPE(pat)) {
6560 if (check && is_broken_string(pat)) {
6567rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6570 pos = rb_str_byteindex(str, pat, pos);
6571 if (set_backref_str) {
6573 str = rb_str_new_frozen_String(str);
6574 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6576 *match = match_data;
6586 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6591rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6593 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6612rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6626 hash = rb_check_hash_type(argv[1]);
6632 pat = get_pat_quoted(argv[0], 1);
6634 str_modifiable(str);
6635 beg = rb_pat_search(pat, str, 0, 1);
6649 end0 = beg0 + RSTRING_LEN(pat);
6658 if (iter || !
NIL_P(hash)) {
6659 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6665 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6668 str_mod_check(str, p,
len);
6669 rb_check_frozen(str);
6675 enc = rb_enc_compatible(str, repl);
6678 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6682 rb_enc_inspect_name(str_enc),
6683 rb_enc_inspect_name(STR_ENC_GET(repl)));
6685 enc = STR_ENC_GET(repl);
6688 rb_enc_associate(str, enc);
6698 rlen = RSTRING_LEN(repl);
6699 len = RSTRING_LEN(str);
6701 RESIZE_CAPA(str,
len + rlen - plen);
6703 p = RSTRING_PTR(str);
6705 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6707 rp = RSTRING_PTR(repl);
6708 memmove(p + beg0, rp, rlen);
6710 STR_SET_LEN(str,
len);
6711 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6740 rb_str_sub_bang(argc, argv, str);
6745str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6748 long beg, beg0, end0;
6749 long offset, blen, slen,
len, last;
6750 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6752 int need_backref_str = -1;
6762 hash = rb_check_hash_type(argv[1]);
6766 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6774 rb_error_arity(argc, 1, 2);
6777 pat = get_pat_quoted(argv[0], 1);
6778 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6781 if (bang)
return Qnil;
6786 blen = RSTRING_LEN(str) + 30;
6788 sp = RSTRING_PTR(str);
6789 slen = RSTRING_LEN(str);
6791 str_enc = STR_ENC_GET(str);
6792 rb_enc_associate(dest, str_enc);
6799 end0 = beg0 + RSTRING_LEN(pat);
6815 if (mode == FAST_MAP) {
6824 val = rb_hash_aref(hash, key);
6827 str_mod_check(str, sp, slen);
6832 else if (need_backref_str) {
6834 if (need_backref_str < 0) {
6835 need_backref_str = val != repl;
6842 len = beg0 - offset;
6856 if (RSTRING_LEN(str) <= end0)
break;
6857 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6859 offset = end0 +
len;
6861 cp = RSTRING_PTR(str) + offset;
6862 if (offset > RSTRING_LEN(str))
break;
6865 if (mode != FAST_MAP && mode != STR) {
6868 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6873 if (RSTRING_LEN(str) > offset) {
6876 rb_pat_search0(pat, str, last, 1, &match);
6878 str_shared_replace(str, dest);
6906rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6908 str_modify_keep_cr(str);
6909 return str_gsub(argc, argv, str, 1);
6932 return str_gsub(argc, argv, str, 0);
6950 str_modifiable(str);
6951 if (str == str2)
return str;
6955 return str_replace(str, str2);
6970rb_str_clear(
VALUE str)
6974 STR_SET_LEN(str, 0);
6975 RSTRING_PTR(str)[0] = 0;
6976 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6995rb_str_chr(
VALUE str)
7019 pos += RSTRING_LEN(str);
7020 if (pos < 0 || RSTRING_LEN(str) <= pos)
7023 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
7042 long len = RSTRING_LEN(str);
7043 char *
ptr, *head, *left = 0;
7047 if (pos < -
len ||
len <= pos)
7054 char byte = (char)(
NUM2INT(w) & 0xFF);
7056 if (!str_independent(str))
7057 str_make_independent(str);
7058 enc = STR_ENC_GET(str);
7059 head = RSTRING_PTR(str);
7061 if (!STR_EMBED_P(str)) {
7068 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
7076 width = rb_enc_precise_mbclen(left, head+
len, enc);
7078 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
7094str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
7096 long n = RSTRING_LEN(str);
7098 if (beg > n ||
len < 0)
return Qnil;
7101 if (beg < 0)
return Qnil;
7106 if (!empty)
return Qnil;
7110 VALUE str2 = str_subseq(str, beg,
len);
7112 str_enc_copy_direct(str2, str);
7114 if (RSTRING_LEN(str2) == 0) {
7115 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
7149 long beg,
len = RSTRING_LEN(str);
7157 return str_byte_substr(str, beg,
len, TRUE);
7162 return str_byte_substr(str, idx, 1, FALSE);
7209rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
7214 return str_byte_substr(str, beg,
len, TRUE);
7217 return str_byte_aref(str, argv[0]);
7221str_check_beg_len(
VALUE str,
long *beg,
long *
len)
7223 long end, slen = RSTRING_LEN(str);
7226 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
7235 if (*
len > slen - *beg) {
7239 str_ensure_byte_pos(str, *beg);
7240 str_ensure_byte_pos(str, end);
7265rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
7267 long beg,
len, vbeg, vlen;
7272 if (!(argc == 2 || argc == 3 || argc == 5)) {
7273 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
7277 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
7278 rb_builtin_class_name(argv[0]));
7285 vlen = RSTRING_LEN(val);
7290 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
7291 rb_builtin_class_name(argv[2]));
7303 vlen = RSTRING_LEN(val);
7311 str_check_beg_len(str, &beg, &
len);
7312 str_check_beg_len(val, &vbeg, &vlen);
7313 str_modify_keep_cr(str);
7316 rb_enc_associate(str, rb_enc_check(str, val));
7319 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
7337rb_str_reverse(
VALUE str)
7344 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
7345 enc = STR_ENC_GET(str);
7351 if (RSTRING_LEN(str) > 1) {
7352 if (single_byte_optimizable(str)) {
7359 int clen = rb_enc_fast_mbclen(s, e, enc);
7367 cr = rb_enc_asciicompat(enc) ?
7370 int clen = rb_enc_mbclen(s, e, enc);
7379 STR_SET_LEN(rev, RSTRING_LEN(str));
7380 str_enc_copy_direct(rev, str);
7400rb_str_reverse_bang(
VALUE str)
7402 if (RSTRING_LEN(str) > 1) {
7403 if (single_byte_optimizable(str)) {
7406 str_modify_keep_cr(str);
7407 s = RSTRING_PTR(str);
7416 str_shared_replace(str, rb_str_reverse(str));
7420 str_modify_keep_cr(str);
7445 i = rb_str_index(str, arg, 0);
7447 return RBOOL(i != -1);
7489 rb_raise(rb_eArgError,
"invalid radix %d", base);
7491 return rb_str_to_inum(str, base, FALSE);
7515rb_str_to_f(
VALUE str)
7530rb_str_to_s(
VALUE str)
7542 char s[RUBY_MAX_CHAR_LEN];
7543 int n = rb_enc_codelen(c, enc);
7545 rb_enc_mbcput(c, s, enc);
7550#define CHAR_ESC_LEN 13
7553rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7555 char buf[CHAR_ESC_LEN + 1];
7563 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7565 else if (c < 0x10000) {
7566 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7569 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7574 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7577 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7580 l = (int)strlen(buf);
7586ruby_escaped_char(
int c)
7589 case '\0':
return "\\0";
7590 case '\n':
return "\\n";
7591 case '\r':
return "\\r";
7592 case '\t':
return "\\t";
7593 case '\f':
return "\\f";
7594 case '\013':
return "\\v";
7595 case '\010':
return "\\b";
7596 case '\007':
return "\\a";
7597 case '\033':
return "\\e";
7598 case '\x7f':
return "\\c?";
7604rb_str_escape(
VALUE str)
7608 const char *p = RSTRING_PTR(str);
7610 const char *prev = p;
7611 char buf[CHAR_ESC_LEN + 1];
7613 int unicode_p = rb_enc_unicode_p(enc);
7614 int asciicompat = rb_enc_asciicompat(enc);
7619 int n = rb_enc_precise_mbclen(p, pend, enc);
7621 if (p > prev) str_buf_cat(result, prev, p - prev);
7622 n = rb_enc_mbminlen(enc);
7624 n = (int)(pend - p);
7626 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7627 str_buf_cat(result, buf, strlen(buf));
7633 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7635 cc = ruby_escaped_char(c);
7637 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7638 str_buf_cat(result, cc, strlen(cc));
7641 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7644 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7645 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7649 if (p > prev) str_buf_cat(result, prev, p - prev);
7673 const char *p, *pend, *prev;
7674 char buf[CHAR_ESC_LEN + 1];
7676 rb_encoding *resenc = rb_default_internal_encoding();
7677 int unicode_p = rb_enc_unicode_p(enc);
7678 int asciicompat = rb_enc_asciicompat(enc);
7680 if (resenc == NULL) resenc = rb_default_external_encoding();
7681 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7682 rb_enc_associate(result, resenc);
7683 str_buf_cat2(result,
"\"");
7691 n = rb_enc_precise_mbclen(p, pend, enc);
7693 if (p > prev) str_buf_cat(result, prev, p - prev);
7694 n = rb_enc_mbminlen(enc);
7696 n = (int)(pend - p);
7698 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7699 str_buf_cat(result, buf, strlen(buf));
7705 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7707 if ((asciicompat || unicode_p) &&
7708 (c ==
'"'|| c ==
'\\' ||
7713 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7714 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7715 str_buf_cat2(result,
"\\");
7716 if (asciicompat || enc == resenc) {
7722 case '\n': cc =
'n';
break;
7723 case '\r': cc =
'r';
break;
7724 case '\t': cc =
't';
break;
7725 case '\f': cc =
'f';
break;
7726 case '\013': cc =
'v';
break;
7727 case '\010': cc =
'b';
break;
7728 case '\007': cc =
'a';
break;
7729 case 033: cc =
'e';
break;
7730 default: cc = 0;
break;
7733 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7736 str_buf_cat(result, buf, 2);
7749 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7753 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7754 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7759 if (p > prev) str_buf_cat(result, prev, p - prev);
7760 str_buf_cat2(result,
"\"");
7765#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7785 int encidx = rb_enc_get_index(str);
7788 const char *p, *pend;
7791 int u8 = (encidx == rb_utf8_encindex());
7792 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7795 if (!rb_enc_asciicompat(enc)) {
7797 len += strlen(enc->name);
7800 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7803 unsigned char c = *p++;
7806 case '"':
case '\\':
7807 case '\n':
case '\r':
7808 case '\t':
case '\f':
7809 case '\013':
case '\010':
case '\007':
case '\033':
7814 clen = IS_EVSTR(p, pend) ? 2 : 1;
7822 if (u8 && c > 0x7F) {
7823 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7825 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7828 else if (cc <= 0xFFFFF)
7841 if (clen > LONG_MAX -
len) {
7848 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7849 q = RSTRING_PTR(result); qend = q +
len + 1;
7853 unsigned char c = *p++;
7855 if (c ==
'"' || c ==
'\\') {
7859 else if (c ==
'#') {
7860 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7863 else if (c ==
'\n') {
7867 else if (c ==
'\r') {
7871 else if (c ==
'\t') {
7875 else if (c ==
'\f') {
7879 else if (c ==
'\013') {
7883 else if (c ==
'\010') {
7887 else if (c ==
'\007') {
7891 else if (c ==
'\033') {
7901 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7903 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7906 snprintf(q, qend-q,
"u%04X", cc);
7908 snprintf(q, qend-q,
"u{%X}", cc);
7913 snprintf(q, qend-q,
"x%02X", c);
7919 if (!rb_enc_asciicompat(enc)) {
7920 snprintf(q, qend-q, nonascii_suffix, enc->name);
7921 encidx = rb_ascii8bit_encindex();
7924 rb_enc_associate_index(result, encidx);
7930unescape_ascii(
unsigned int c)
7954undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7956 const char *s = *ss;
7960 unsigned char buf[6];
7978 *buf = unescape_ascii(*s);
7990 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7991 if (*penc != enc_utf8) {
7993 rb_enc_associate(undumped, enc_utf8);
8010 if (hexlen == 0 || hexlen > 6) {
8016 if (0xd800 <= c && c <= 0xdfff) {
8019 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
8029 if (0xd800 <= c && c <= 0xdfff) {
8032 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
8060static VALUE rb_str_is_ascii_only_p(
VALUE str);
8078str_undump(
VALUE str)
8080 const char *s = RSTRING_PTR(str);
8083 VALUE undumped = rb_enc_str_new(s, 0L, enc);
8085 bool binary =
false;
8089 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
8092 if (!str_null_check(str, &w)) {
8095 if (RSTRING_LEN(str) < 2)
goto invalid_format;
8096 if (*s !=
'"')
goto invalid_format;
8114 static const char force_encoding_suffix[] =
".force_encoding(\"";
8115 static const char dup_suffix[] =
".dup";
8116 const char *encname;
8121 size =
sizeof(dup_suffix) - 1;
8122 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
8124 size =
sizeof(force_encoding_suffix) - 1;
8125 if (s_end - s <= size)
goto invalid_format;
8126 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
8130 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
8134 s = memchr(s,
'"', s_end-s);
8136 if (!s)
goto invalid_format;
8137 if (s_end - s != 2)
goto invalid_format;
8138 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
8140 encidx = rb_enc_find_index2(encname, (
long)size);
8144 rb_enc_associate_index(undumped, encidx);
8154 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
8165 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
8171 if (rb_enc_dummy_p(enc)) {
8178str_true_enc(
VALUE str)
8181 rb_str_check_dummy_enc(enc);
8185static OnigCaseFoldType
8186check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
8191 rb_raise(rb_eArgError,
"too many options");
8192 if (argv[0]==sym_turkic) {
8193 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
8195 if (argv[1]==sym_lithuanian)
8196 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
8198 rb_raise(rb_eArgError,
"invalid second option");
8201 else if (argv[0]==sym_lithuanian) {
8202 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
8204 if (argv[1]==sym_turkic)
8205 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
8207 rb_raise(rb_eArgError,
"invalid second option");
8211 rb_raise(rb_eArgError,
"too many options");
8212 else if (argv[0]==sym_ascii)
8213 flags |= ONIGENC_CASE_ASCII_ONLY;
8214 else if (argv[0]==sym_fold) {
8215 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
8216 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
8218 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
8221 rb_raise(rb_eArgError,
"invalid option");
8228 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
8234#define CASE_MAPPING_ADDITIONAL_LENGTH 20
8235#ifndef CASEMAP_DEBUG
8236# define CASEMAP_DEBUG 0
8244 OnigUChar space[FLEX_ARY_LEN];
8248mapping_buffer_free(
void *p)
8252 while (current_buffer) {
8253 previous_buffer = current_buffer;
8254 current_buffer = current_buffer->next;
8255 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
8261 {0, mapping_buffer_free,},
8262 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
8270 const OnigUChar *source_current, *source_end;
8271 int target_length = 0;
8272 VALUE buffer_anchor;
8275 size_t buffer_count = 0;
8276 int buffer_length_or_invalid;
8278 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
8280 source_current = (OnigUChar*)RSTRING_PTR(source);
8285 while (source_current < source_end) {
8287 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
8288 if (CASEMAP_DEBUG) {
8289 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
8292 *pre_buffer = current_buffer;
8293 pre_buffer = ¤t_buffer->next;
8294 current_buffer->next = NULL;
8295 current_buffer->capa =
capa;
8296 buffer_length_or_invalid = enc->case_map(flags,
8297 &source_current, source_end,
8298 current_buffer->space,
8299 current_buffer->space+current_buffer->capa,
8301 if (buffer_length_or_invalid < 0) {
8302 current_buffer =
DATA_PTR(buffer_anchor);
8304 mapping_buffer_free(current_buffer);
8305 rb_raise(rb_eArgError,
"input string invalid");
8307 target_length += current_buffer->used = buffer_length_or_invalid;
8309 if (CASEMAP_DEBUG) {
8310 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
8313 if (buffer_count==1) {
8314 target =
rb_str_new((
const char*)current_buffer->space, target_length);
8317 char *target_current;
8320 target_current = RSTRING_PTR(target);
8321 current_buffer =
DATA_PTR(buffer_anchor);
8322 while (current_buffer) {
8323 memcpy(target_current, current_buffer->space, current_buffer->used);
8324 target_current += current_buffer->used;
8325 current_buffer = current_buffer->next;
8328 current_buffer =
DATA_PTR(buffer_anchor);
8330 mapping_buffer_free(current_buffer);
8335 str_enc_copy_direct(target, source);
8344 const OnigUChar *source_current, *source_end;
8345 OnigUChar *target_current, *target_end;
8346 long old_length = RSTRING_LEN(source);
8347 int length_or_invalid;
8349 if (old_length == 0)
return Qnil;
8351 source_current = (OnigUChar*)RSTRING_PTR(source);
8353 if (source == target) {
8354 target_current = (OnigUChar*)source_current;
8355 target_end = (OnigUChar*)source_end;
8358 target_current = (OnigUChar*)RSTRING_PTR(target);
8362 length_or_invalid = onigenc_ascii_only_case_map(flags,
8363 &source_current, source_end,
8364 target_current, target_end, enc);
8365 if (length_or_invalid < 0)
8366 rb_raise(rb_eArgError,
"input string invalid");
8367 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
8368 fprintf(stderr,
"problem with rb_str_ascii_casemap"
8369 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8370 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
8371 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8374 str_enc_copy(target, source);
8380upcase_single(
VALUE str)
8382 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8383 bool modified =
false;
8386 unsigned int c = *(
unsigned char*)s;
8388 if (
'a' <= c && c <=
'z') {
8389 *s =
'A' + (c -
'a');
8417rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
8420 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8422 flags = check_case_options(argc, argv, flags);
8423 str_modify_keep_cr(str);
8424 enc = str_true_enc(str);
8425 if (case_option_single_p(flags, enc, str)) {
8426 if (upcase_single(str))
8427 flags |= ONIGENC_CASE_MODIFIED;
8429 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8430 rb_str_ascii_casemap(str, str, &flags, enc);
8432 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8434 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8456rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8459 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8462 flags = check_case_options(argc, argv, flags);
8463 enc = str_true_enc(str);
8464 if (case_option_single_p(flags, enc, str)) {
8465 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8466 str_enc_copy_direct(ret, str);
8469 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8471 rb_str_ascii_casemap(str, ret, &flags, enc);
8474 ret = rb_str_casemap(str, &flags, enc);
8481downcase_single(
VALUE str)
8483 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8484 bool modified =
false;
8487 unsigned int c = *(
unsigned char*)s;
8489 if (
'A' <= c && c <=
'Z') {
8490 *s =
'a' + (c -
'A');
8519rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8522 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8524 flags = check_case_options(argc, argv, flags);
8525 str_modify_keep_cr(str);
8526 enc = str_true_enc(str);
8527 if (case_option_single_p(flags, enc, str)) {
8528 if (downcase_single(str))
8529 flags |= ONIGENC_CASE_MODIFIED;
8531 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8532 rb_str_ascii_casemap(str, str, &flags, enc);
8534 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8536 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8558rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8561 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8564 flags = check_case_options(argc, argv, flags);
8565 enc = str_true_enc(str);
8566 if (case_option_single_p(flags, enc, str)) {
8567 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8568 str_enc_copy_direct(ret, str);
8569 downcase_single(ret);
8571 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8573 rb_str_ascii_casemap(str, ret, &flags, enc);
8576 ret = rb_str_casemap(str, &flags, enc);
8604rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8607 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8609 flags = check_case_options(argc, argv, flags);
8610 str_modify_keep_cr(str);
8611 enc = str_true_enc(str);
8612 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8613 if (flags&ONIGENC_CASE_ASCII_ONLY)
8614 rb_str_ascii_casemap(str, str, &flags, enc);
8616 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8618 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8642rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8645 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8648 flags = check_case_options(argc, argv, flags);
8649 enc = str_true_enc(str);
8650 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8651 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8653 rb_str_ascii_casemap(str, ret, &flags, enc);
8656 ret = rb_str_casemap(str, &flags, enc);
8683rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8686 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8688 flags = check_case_options(argc, argv, flags);
8689 str_modify_keep_cr(str);
8690 enc = str_true_enc(str);
8691 if (flags&ONIGENC_CASE_ASCII_ONLY)
8692 rb_str_ascii_casemap(str, str, &flags, enc);
8694 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8696 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8720rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8723 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8726 flags = check_case_options(argc, argv, flags);
8727 enc = str_true_enc(str);
8728 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8729 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8731 rb_str_ascii_casemap(str, ret, &flags, enc);
8734 ret = rb_str_casemap(str, &flags, enc);
8739typedef unsigned char *USTR;
8743 unsigned int now, max;
8755 if (t->p == t->pend)
return -1;
8756 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8759 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8761 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8763 if (t->p < t->pend) {
8764 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8767 if (t->now < 0x80 && c < 0x80) {
8768 rb_raise(rb_eArgError,
8769 "invalid range \"%c-%c\" in string transliteration",
8773 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8777 else if (t->now < c) {
8786 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8787 if (t->now == t->max) {
8792 if (t->now < t->max) {
8808 const unsigned int errc = -1;
8809 unsigned int trans[256];
8811 struct tr trsrc, trrepl;
8813 unsigned int c, c0, last = 0;
8814 int modify = 0, i, l;
8815 unsigned char *s, *send;
8817 int singlebyte = single_byte_optimizable(str);
8821#define CHECK_IF_ASCII(c) \
8822 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8823 (cr = ENC_CODERANGE_VALID) : 0)
8827 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8828 if (RSTRING_LEN(repl) == 0) {
8829 return rb_str_delete_bang(1, &src, str);
8833 e1 = rb_enc_check(str, src);
8834 e2 = rb_enc_check(str, repl);
8839 enc = rb_enc_check(src, repl);
8841 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8842 if (RSTRING_LEN(src) > 1 &&
8843 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8844 trsrc.p + l < trsrc.pend) {
8848 trrepl.p = RSTRING_PTR(repl);
8849 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8850 trsrc.gen = trrepl.gen = 0;
8851 trsrc.now = trrepl.now = 0;
8852 trsrc.max = trrepl.max = 0;
8855 for (i=0; i<256; i++) {
8858 while ((c = trnext(&trsrc, enc)) != errc) {
8863 if (!hash) hash = rb_hash_new();
8867 while ((c = trnext(&trrepl, enc)) != errc)
8870 for (i=0; i<256; i++) {
8871 if (trans[i] != errc) {
8879 for (i=0; i<256; i++) {
8882 while ((c = trnext(&trsrc, enc)) != errc) {
8883 r = trnext(&trrepl, enc);
8884 if (r == errc) r = trrepl.now;
8887 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8890 if (!hash) hash = rb_hash_new();
8898 str_modify_keep_cr(str);
8899 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8900 termlen = rb_enc_mbminlen(enc);
8903 long offset, max = RSTRING_LEN(str);
8904 unsigned int save = -1;
8905 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8910 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8913 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8916 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8918 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8927 if (cflag) c = last;
8930 else if (cflag) c = errc;
8936 if (c != (
unsigned int)-1) {
8942 tlen = rb_enc_codelen(c, enc);
8948 if (enc != e1) may_modify = 1;
8950 if ((offset = t - buf) + tlen > max) {
8951 size_t MAYBE_UNUSED(old) = max + termlen;
8952 max = offset + tlen + (send - s);
8953 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8956 rb_enc_mbcput(c, t, enc);
8957 if (may_modify && memcmp(s, t, tlen) != 0) {
8963 if (!STR_EMBED_P(str)) {
8964 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8966 TERM_FILL((
char *)t, termlen);
8967 RSTRING(str)->as.heap.ptr = (
char *)buf;
8968 STR_SET_LEN(str, t - buf);
8969 STR_SET_NOEMBED(str);
8970 RSTRING(str)->as.heap.aux.capa = max;
8974 c = (
unsigned char)*s;
8975 if (trans[c] != errc) {
8992 long offset, max = (long)((send - s) * 1.2);
8993 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8998 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
9001 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
9004 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
9006 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
9014 if (cflag) c = last;
9017 else if (cflag) c = errc;
9021 c = cflag ? last : errc;
9024 tlen = rb_enc_codelen(c, enc);
9029 if (enc != e1) may_modify = 1;
9031 if ((offset = t - buf) + tlen > max) {
9032 size_t MAYBE_UNUSED(old) = max + termlen;
9033 max = offset + tlen + (long)((send - s) * 1.2);
9034 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
9038 rb_enc_mbcput(c, t, enc);
9039 if (may_modify && memcmp(s, t, tlen) != 0) {
9047 if (!STR_EMBED_P(str)) {
9048 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
9050 TERM_FILL((
char *)t, termlen);
9051 RSTRING(str)->as.heap.ptr = (
char *)buf;
9052 STR_SET_LEN(str, t - buf);
9053 STR_SET_NOEMBED(str);
9054 RSTRING(str)->as.heap.aux.capa = max;
9060 rb_enc_associate(str, enc);
9079 return tr_trans(str, src, repl, 0);
9126 tr_trans(str, src, repl, 0);
9130#define TR_TABLE_MAX (UCHAR_MAX+1)
9131#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
9133tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
9136 const unsigned int errc = -1;
9137 char buf[TR_TABLE_MAX];
9140 VALUE table = 0, ptable = 0;
9141 int i, l, cflag = 0;
9143 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
9144 tr.gen =
tr.now =
tr.max = 0;
9146 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
9151 for (i=0; i<TR_TABLE_MAX; i++) {
9154 stable[TR_TABLE_MAX] = cflag;
9156 else if (stable[TR_TABLE_MAX] && !cflag) {
9157 stable[TR_TABLE_MAX] = 0;
9159 for (i=0; i<TR_TABLE_MAX; i++) {
9163 while ((c = trnext(&
tr, enc)) != errc) {
9164 if (c < TR_TABLE_MAX) {
9165 buf[(
unsigned char)c] = !cflag;
9170 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
9173 table = ptable ? ptable : rb_hash_new();
9177 table = rb_hash_new();
9182 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
9183 rb_hash_aset(table, key,
Qtrue);
9187 for (i=0; i<TR_TABLE_MAX; i++) {
9188 stable[i] = stable[i] && buf[i];
9190 if (!table && !cflag) {
9197tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
9199 if (c < TR_TABLE_MAX) {
9200 return table[c] != 0;
9206 if (!
NIL_P(rb_hash_lookup(del, v)) &&
9207 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
9211 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
9214 return table[TR_TABLE_MAX] ? TRUE : FALSE;
9228rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
9230 char squeez[TR_TABLE_SIZE];
9233 VALUE del = 0, nodel = 0;
9235 int i, ascompat, cr;
9237 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
9239 for (i=0; i<argc; i++) {
9243 enc = rb_enc_check(str, s);
9244 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
9247 str_modify_keep_cr(str);
9248 ascompat = rb_enc_asciicompat(enc);
9249 s = t = RSTRING_PTR(str);
9256 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9267 c = rb_enc_codepoint_len(s, send, &clen, enc);
9269 if (tr_find(c, squeez, del, nodel)) {
9273 if (t != s) rb_enc_mbcput(c, t, enc);
9280 TERM_FILL(t, TERM_LEN(str));
9281 STR_SET_LEN(str, t - RSTRING_PTR(str));
9284 if (modify)
return str;
9304rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
9307 rb_str_delete_bang(argc, argv, str);
9321rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
9323 char squeez[TR_TABLE_SIZE];
9325 VALUE del = 0, nodel = 0;
9326 unsigned char *s, *send, *t;
9328 int ascompat, singlebyte = single_byte_optimizable(str);
9332 enc = STR_ENC_GET(str);
9335 for (i=0; i<argc; i++) {
9339 enc = rb_enc_check(str, s);
9340 if (singlebyte && !single_byte_optimizable(s))
9342 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
9346 str_modify_keep_cr(str);
9347 s = t = (
unsigned char *)RSTRING_PTR(str);
9348 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
9351 ascompat = rb_enc_asciicompat(enc);
9355 unsigned int c = *s++;
9356 if (c != save || (argc > 0 && !squeez[c])) {
9366 if (ascompat && (c = *s) < 0x80) {
9367 if (c != save || (argc > 0 && !squeez[c])) {
9373 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
9375 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
9376 if (t != s) rb_enc_mbcput(c, t, enc);
9385 TERM_FILL((
char *)t, TERM_LEN(str));
9386 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
9387 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
9391 if (modify)
return str;
9414rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
9417 rb_str_squeeze_bang(argc, argv, str);
9435 return tr_trans(str, src, repl, 1);
9458 tr_trans(str, src, repl, 1);
9487rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9489 char table[TR_TABLE_SIZE];
9491 VALUE del = 0, nodel = 0, tstr;
9501 enc = rb_enc_check(str, tstr);
9504 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9505 (ptstr = RSTRING_PTR(tstr),
9506 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9507 !is_broken_string(str)) {
9509 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9511 s = RSTRING_PTR(str);
9512 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9515 if (*(
unsigned char*)s++ == c) n++;
9521 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9522 for (i=1; i<argc; i++) {
9525 enc = rb_enc_check(str, tstr);
9526 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9529 s = RSTRING_PTR(str);
9530 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9532 ascompat = rb_enc_asciicompat(enc);
9536 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9544 c = rb_enc_codepoint_len(s, send, &clen, enc);
9545 if (tr_find(c, table, del, nodel)) {
9556rb_fs_check(
VALUE val)
9560 if (
NIL_P(val))
return 0;
9565static const char isspacetable[256] = {
9566 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9568 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9584#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9587split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9589 if (empty_count >= 0 &&
len == 0) {
9590 return empty_count + 1;
9592 if (empty_count > 0) {
9597 }
while (--empty_count > 0);
9601 rb_yield(str_new_empty_String(str));
9602 }
while (--empty_count > 0);
9616 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9620literal_split_pattern(
VALUE spat, split_type_t default_type)
9628 return SPLIT_TYPE_CHARS;
9630 else if (rb_enc_asciicompat(enc)) {
9631 if (
len == 1 && ptr[0] ==
' ') {
9632 return SPLIT_TYPE_AWK;
9637 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9638 return SPLIT_TYPE_AWK;
9641 return default_type;
9654rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9659 split_type_t split_type;
9660 long beg, end, i = 0, empty_count = -1;
9665 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9667 if (lim <= 0) limit =
Qnil;
9668 else if (lim == 1) {
9669 if (RSTRING_LEN(str) == 0)
9680 if (
NIL_P(limit) && !lim) empty_count = 0;
9682 enc = STR_ENC_GET(str);
9683 split_type = SPLIT_TYPE_REGEXP;
9685 spat = get_pat_quoted(spat, 0);
9687 else if (
NIL_P(spat = rb_fs)) {
9688 split_type = SPLIT_TYPE_AWK;
9690 else if (!(spat = rb_fs_check(spat))) {
9691 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9696 if (split_type != SPLIT_TYPE_AWK) {
9701 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9702 if (split_type == SPLIT_TYPE_AWK) {
9704 split_type = SPLIT_TYPE_STRING;
9709 mustnot_broken(spat);
9710 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9718#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
9721 char *ptr = RSTRING_PTR(str);
9723 if (split_type == SPLIT_TYPE_AWK) {
9730 if (is_ascii_string(str)) {
9731 while (ptr < eptr) {
9732 c = (
unsigned char)*ptr++;
9734 if (ascii_isspace(c)) {
9740 if (!
NIL_P(limit) && lim <= i)
break;
9743 else if (ascii_isspace(c)) {
9744 SPLIT_STR(beg, end-beg);
9747 if (!
NIL_P(limit)) ++i;
9755 while (ptr < eptr) {
9758 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9767 if (!
NIL_P(limit) && lim <= i)
break;
9771 SPLIT_STR(beg, end-beg);
9774 if (!
NIL_P(limit)) ++i;
9782 else if (split_type == SPLIT_TYPE_STRING) {
9783 char *str_start = ptr;
9784 char *substr_start = ptr;
9785 char *sptr = RSTRING_PTR(spat);
9786 long slen = RSTRING_LEN(spat);
9789 mustnot_broken(str);
9790 enc = rb_enc_check(str, spat);
9791 while (ptr < eptr &&
9792 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9795 if (t != ptr + end) {
9799 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9802 if (!
NIL_P(limit) && lim <= ++i)
break;
9804 beg = ptr - str_start;
9806 else if (split_type == SPLIT_TYPE_CHARS) {
9807 char *str_start = ptr;
9811 mustnot_broken(str);
9812 enc = rb_enc_get(str);
9813 while (ptr < eptr &&
9814 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9815 SPLIT_STR(ptr - str_start, n);
9817 if (!
NIL_P(limit) && lim <= ++i)
break;
9819 beg = ptr - str_start;
9823 long len = RSTRING_LEN(str);
9831 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9836 if (start == end && BEG(0) == END(0)) {
9841 else if (last_null == 1) {
9842 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9849 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9855 SPLIT_STR(beg, end-beg);
9856 beg = start = END(0);
9860 for (idx=1; idx < regs->num_regs; idx++) {
9861 if (BEG(idx) == -1)
continue;
9862 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9864 if (!
NIL_P(limit) && lim <= ++i)
break;
9866 if (match) rb_match_unbusy(match);
9868 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9869 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9872 return result ? result : str;
9882 return rb_str_split_m(1, &sep, str);
9885#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9900#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9903chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9905 const char *prev = rb_enc_prev_char(p, e, e, enc);
9908 prev = rb_enc_prev_char(p, e, e, enc);
9909 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9921 RSTRING_LEN(rs) != 1 ||
9922 RSTRING_PTR(rs)[0] !=
'\n')) {
9928#define rb_rs get_rs()
9935 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9936 long pos,
len, rslen;
9942 static ID keywords[1];
9947 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9951 if (!ENUM_ELEM(ary, str)) {
9959 if (!RSTRING_LEN(str))
goto end;
9961 ptr = subptr = RSTRING_PTR(str);
9963 len = RSTRING_LEN(str);
9965 rslen = RSTRING_LEN(rs);
9968 enc = rb_enc_get(str);
9970 enc = rb_enc_check(str, rs);
9975 const char *eol = NULL;
9977 while (subend < pend) {
9978 long chomp_rslen = 0;
9980 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9982 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9984 if (eol == subend)
break;
9988 chomp_rslen = -rslen;
9992 if (!subptr) subptr = subend;
9996 }
while (subend < pend);
9998 if (rslen == 0) chomp_rslen = 0;
10000 subend - subptr + (chomp ? chomp_rslen : rslen));
10001 if (ENUM_ELEM(ary, line)) {
10002 str_mod_check(str, ptr,
len);
10004 subptr = eol = NULL;
10009 rsptr = RSTRING_PTR(rs);
10010 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
10019 rsptr = RSTRING_PTR(rs);
10020 rslen = RSTRING_LEN(rs);
10023 while (subptr < pend) {
10024 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
10025 if (pos < 0)
break;
10026 hit = subptr + pos;
10028 if (hit != adjusted) {
10032 subend = hit += rslen;
10035 subend = chomp_newline(subptr, subend, enc);
10042 if (ENUM_ELEM(ary, line)) {
10043 str_mod_check(str, ptr,
len);
10048 if (subptr != pend) {
10051 pend = chomp_newline(subptr, pend, enc);
10053 else if (pend - subptr >= rslen &&
10054 memcmp(pend - rslen, rsptr, rslen) == 0) {
10059 ENUM_ELEM(ary, line);
10080rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
10083 return rb_str_enumerate_lines(argc, argv, str, 0);
10096rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
10098 VALUE ary = WANTARRAY(
"lines", 0);
10099 return rb_str_enumerate_lines(argc, argv, str, ary);
10105 return LONG2FIX(RSTRING_LEN(str));
10113 for (i=0; i<RSTRING_LEN(str); i++) {
10114 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
10132rb_str_each_byte(
VALUE str)
10135 return rb_str_enumerate_bytes(str, 0);
10147rb_str_bytes(
VALUE str)
10149 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
10150 return rb_str_enumerate_bytes(str, ary);
10168 ptr = RSTRING_PTR(str);
10169 len = RSTRING_LEN(str);
10170 enc = rb_enc_get(str);
10173 for (i = 0; i <
len; i += n) {
10174 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
10179 for (i = 0; i <
len; i += n) {
10180 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
10201rb_str_each_char(
VALUE str)
10204 return rb_str_enumerate_chars(str, 0);
10216rb_str_chars(
VALUE str)
10219 return rb_str_enumerate_chars(str, ary);
10223rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
10228 const char *ptr, *end;
10231 if (single_byte_optimizable(str))
10232 return rb_str_enumerate_bytes(str, ary);
10235 ptr = RSTRING_PTR(str);
10237 enc = STR_ENC_GET(str);
10239 while (ptr < end) {
10240 c = rb_enc_codepoint_len(ptr, end, &n, enc);
10261rb_str_each_codepoint(
VALUE str)
10264 return rb_str_enumerate_codepoints(str, 0);
10276rb_str_codepoints(
VALUE str)
10279 return rb_str_enumerate_codepoints(str, ary);
10285 int encidx = rb_enc_to_index(enc);
10287 const OnigUChar source_ascii[] =
"\\X";
10288 const OnigUChar *source = source_ascii;
10289 size_t source_len =
sizeof(source_ascii) - 1;
10292#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
10293#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
10294#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
10295#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
10296#define CASE_UTF(e) \
10297 case ENCINDEX_UTF_##e: { \
10298 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
10299 source = source_UTF_##e; \
10300 source_len = sizeof(source_UTF_##e); \
10303 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
10311 regex_t *reg_grapheme_cluster;
10313 int r = onig_new(®_grapheme_cluster, source, source + source_len,
10314 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
10316 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
10317 onig_error_code_to_str(message, r, &einfo);
10318 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
10321 return reg_grapheme_cluster;
10327 int encidx = rb_enc_to_index(enc);
10328 static regex_t *reg_grapheme_cluster_utf8 = NULL;
10330 if (encidx == rb_utf8_encindex()) {
10331 if (!reg_grapheme_cluster_utf8) {
10332 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
10335 return reg_grapheme_cluster_utf8;
10344 size_t grapheme_cluster_count = 0;
10346 const char *ptr, *end;
10348 if (!rb_enc_unicode_p(enc)) {
10352 bool cached_reg_grapheme_cluster =
true;
10353 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10354 if (!reg_grapheme_cluster) {
10355 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10356 cached_reg_grapheme_cluster =
false;
10359 ptr = RSTRING_PTR(str);
10362 while (ptr < end) {
10363 OnigPosition
len = onig_match(reg_grapheme_cluster,
10364 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10365 (
const OnigUChar *)ptr, NULL, 0);
10366 if (
len <= 0)
break;
10367 grapheme_cluster_count++;
10371 if (!cached_reg_grapheme_cluster) {
10372 onig_free(reg_grapheme_cluster);
10375 return SIZET2NUM(grapheme_cluster_count);
10379rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
10383 const char *ptr0, *ptr, *end;
10385 if (!rb_enc_unicode_p(enc)) {
10386 return rb_str_enumerate_chars(str, ary);
10391 bool cached_reg_grapheme_cluster =
true;
10392 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10393 if (!reg_grapheme_cluster) {
10394 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10395 cached_reg_grapheme_cluster =
false;
10398 ptr0 = ptr = RSTRING_PTR(str);
10401 while (ptr < end) {
10402 OnigPosition
len = onig_match(reg_grapheme_cluster,
10403 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10404 (
const OnigUChar *)ptr, NULL, 0);
10405 if (
len <= 0)
break;
10410 if (!cached_reg_grapheme_cluster) {
10411 onig_free(reg_grapheme_cluster);
10431rb_str_each_grapheme_cluster(
VALUE str)
10434 return rb_str_enumerate_grapheme_clusters(str, 0);
10446rb_str_grapheme_clusters(
VALUE str)
10449 return rb_str_enumerate_grapheme_clusters(str, ary);
10453chopped_length(
VALUE str)
10456 const char *p, *p2, *beg, *end;
10458 beg = RSTRING_PTR(str);
10459 end = beg + RSTRING_LEN(str);
10460 if (beg >= end)
return 0;
10461 p = rb_enc_prev_char(beg, end, end, enc);
10463 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10464 p2 = rb_enc_prev_char(beg, p, end, enc);
10465 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10481rb_str_chop_bang(
VALUE str)
10483 str_modify_keep_cr(str);
10484 if (RSTRING_LEN(str) > 0) {
10486 len = chopped_length(str);
10487 STR_SET_LEN(str,
len);
10488 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10507rb_str_chop(
VALUE str)
10513smart_chomp(
VALUE str,
const char *e,
const char *p)
10516 if (rb_enc_mbminlen(enc) > 1) {
10521 pp = e - rb_enc_mbminlen(enc);
10524 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10532 if (--e > p && *(e-1) ==
'\r') {
10549 char *pp, *e, *rsptr;
10551 char *
const p = RSTRING_PTR(str);
10552 long len = RSTRING_LEN(str);
10554 if (
len == 0)
return 0;
10557 return smart_chomp(str, e, p);
10560 enc = rb_enc_get(str);
10563 if (rb_enc_mbminlen(enc) > 1) {
10568 pp -= rb_enc_mbminlen(enc);
10571 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10578 while (e > p && *(e-1) ==
'\n') {
10580 if (e > p && *(e-1) ==
'\r')
10586 if (rslen >
len)
return len;
10588 enc = rb_enc_get(rs);
10589 newline = rsptr[rslen-1];
10590 if (rslen == rb_enc_mbminlen(enc)) {
10592 if (newline ==
'\n')
10593 return smart_chomp(str, e, p);
10597 return smart_chomp(str, e, p);
10601 enc = rb_enc_check(str, rs);
10602 if (is_broken_string(rs)) {
10606 if (p[
len-1] == newline &&
10608 memcmp(rsptr, pp, rslen) == 0)) {
10609 if (at_char_boundary(p, pp, e, enc))
10610 return len - rslen;
10622chomp_rs(
int argc,
const VALUE *argv)
10626 VALUE rs = argv[0];
10638 long olen = RSTRING_LEN(str);
10639 long len = chompped_length(str, rs);
10640 if (
len >= olen)
return Qnil;
10641 str_modify_keep_cr(str);
10642 STR_SET_LEN(str,
len);
10643 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10660rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10663 str_modifiable(str);
10664 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10665 rs = chomp_rs(argc, argv);
10667 return rb_str_chomp_string(str, rs);
10680rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10682 VALUE rs = chomp_rs(argc, argv);
10690 const char *
const start = s;
10692 if (!s || s >= e)
return 0;
10695 if (single_byte_optimizable(str)) {
10696 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10701 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10721rb_str_lstrip_bang(
VALUE str)
10725 long olen, loffset;
10727 str_modify_keep_cr(str);
10728 enc = STR_ENC_GET(str);
10730 loffset = lstrip_offset(str, start, start+olen, enc);
10732 long len = olen-loffset;
10733 s = start + loffset;
10734 memmove(start, s,
len);
10735 STR_SET_LEN(str,
len);
10736 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10759rb_str_lstrip(
VALUE str)
10764 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10765 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10774 rb_str_check_dummy_enc(enc);
10778 if (!s || s >= e)
return 0;
10782 if (single_byte_optimizable(str)) {
10784 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10789 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10809rb_str_rstrip_bang(
VALUE str)
10813 long olen, roffset;
10815 str_modify_keep_cr(str);
10816 enc = STR_ENC_GET(str);
10818 roffset = rstrip_offset(str, start, start+olen, enc);
10820 long len = olen - roffset;
10822 STR_SET_LEN(str,
len);
10823 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10846rb_str_rstrip(
VALUE str)
10850 long olen, roffset;
10852 enc = STR_ENC_GET(str);
10854 roffset = rstrip_offset(str, start, start+olen, enc);
10856 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10872rb_str_strip_bang(
VALUE str)
10875 long olen, loffset, roffset;
10878 str_modify_keep_cr(str);
10879 enc = STR_ENC_GET(str);
10881 loffset = lstrip_offset(str, start, start+olen, enc);
10882 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10884 if (loffset > 0 || roffset > 0) {
10885 long len = olen-roffset;
10888 memmove(start, start + loffset,
len);
10890 STR_SET_LEN(str,
len);
10891 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10914rb_str_strip(
VALUE str)
10917 long olen, loffset, roffset;
10921 loffset = lstrip_offset(str, start, start+olen, enc);
10922 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10924 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10929scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10932 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10938 end = pos + RSTRING_LEN(pat);
10952 if (RSTRING_LEN(str) > end)
10953 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10962 if (!regs || regs->num_regs == 1) {
10968 for (
int i = 1; i < regs->num_regs; i++) {
11029 long last = -1, prev = 0;
11030 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
11032 pat = get_pat_quoted(pat, 1);
11033 mustnot_broken(str);
11037 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
11042 if (last >= 0) rb_pat_search(pat, str, last, 1);
11047 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
11051 str_mod_check(str, p,
len);
11053 if (last >= 0) rb_pat_search(pat, str, last, 1);
11077rb_str_hex(
VALUE str)
11079 return rb_str_to_inum(str, 16, FALSE);
11104rb_str_oct(
VALUE str)
11106 return rb_str_to_inum(str, -8, FALSE);
11109#ifndef HAVE_CRYPT_R
11114 rb_nativethread_lock_t lock;
11115} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
11118crypt_mutex_initialize(
void)
11189# define CRYPT_END() ALLOCV_END(databuf)
11191 extern char *crypt(
const char *,
const char *);
11192# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
11195 const char *s, *saltp;
11198 char salt_8bit_clean[3];
11202 mustnot_wchar(str);
11203 mustnot_wchar(salt);
11205 saltp = RSTRING_PTR(salt);
11206 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
11207 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
11211 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
11212 salt_8bit_clean[0] = saltp[0] & 0x7f;
11213 salt_8bit_clean[1] = saltp[1] & 0x7f;
11214 salt_8bit_clean[2] =
'\0';
11215 saltp = salt_8bit_clean;
11220# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
11221 data->initialized = 0;
11223 res = crypt_r(s, saltp, data);
11225 crypt_mutex_initialize();
11227 res = crypt(s, saltp);
11268 char *ptr, *p, *pend;
11271 unsigned long sum0 = 0;
11276 ptr = p = RSTRING_PTR(str);
11277 len = RSTRING_LEN(str);
11283 str_mod_check(str, ptr,
len);
11286 sum0 += (
unsigned char)*p;
11297 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11298 sum0 &= (((
unsigned long)1)<<bits)-1;
11318rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11322 long width,
len, flen = 1, fclen = 1;
11325 const char *f =
" ";
11326 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11328 int singlebyte = 1, cr;
11332 enc = STR_ENC_GET(str);
11333 termlen = rb_enc_mbminlen(enc);
11337 enc = rb_enc_check(str, pad);
11338 f = RSTRING_PTR(pad);
11339 flen = RSTRING_LEN(pad);
11340 fclen = str_strlen(pad, enc);
11341 singlebyte = single_byte_optimizable(pad);
11342 if (flen == 0 || fclen == 0) {
11343 rb_raise(rb_eArgError,
"zero width padding");
11346 len = str_strlen(str, enc);
11347 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11349 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11353 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11354 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11356 size = RSTRING_LEN(str);
11357 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11358 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11359 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11360 rb_raise(rb_eArgError,
"argument too big");
11364 p = RSTRING_PTR(res);
11366 memset(p, *f, llen);
11370 while (llen >= fclen) {
11376 memcpy(p, f, llen2);
11380 memcpy(p, RSTRING_PTR(str), size);
11383 memset(p, *f, rlen);
11387 while (rlen >= fclen) {
11393 memcpy(p, f, rlen2);
11397 TERM_FILL(p, termlen);
11398 STR_SET_LEN(res, p-RSTRING_PTR(res));
11421rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11423 return rb_str_justify(argc, argv, str,
'l');
11437rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11439 return rb_str_justify(argc, argv, str,
'r');
11454rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11456 return rb_str_justify(argc, argv, str,
'c');
11472 sep = get_pat_quoted(sep, 0);
11484 pos = rb_str_index(str, sep, 0);
11485 if (pos < 0)
goto failed;
11490 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11493 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11507 long pos = RSTRING_LEN(str);
11509 sep = get_pat_quoted(sep, 0);
11522 pos = rb_str_rindex(str, sep, pos);
11531 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11533 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11545rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11549 for (i=0; i<argc; i++) {
11550 VALUE tmp = argv[i];
11552 if (rb_reg_start_with_p(tmp, str))
11556 const char *p, *s, *e;
11561 enc = rb_enc_check(str, tmp);
11562 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11563 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11564 p = RSTRING_PTR(str);
11567 if (!at_char_right_boundary(p, s, e, enc))
11569 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11585rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11589 for (i=0; i<argc; i++) {
11590 VALUE tmp = argv[i];
11591 const char *p, *s, *e;
11596 enc = rb_enc_check(str, tmp);
11597 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11598 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11599 p = RSTRING_PTR(str);
11602 if (!at_char_boundary(p, s, e, enc))
11604 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11620deleted_prefix_length(
VALUE str,
VALUE prefix)
11622 const char *strptr, *prefixptr;
11623 long olen, prefixlen;
11628 if (!is_broken_string(prefix) ||
11629 !rb_enc_asciicompat(enc) ||
11630 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11631 enc = rb_enc_check(str, prefix);
11635 prefixlen = RSTRING_LEN(prefix);
11636 if (prefixlen <= 0)
return 0;
11637 olen = RSTRING_LEN(str);
11638 if (olen < prefixlen)
return 0;
11639 strptr = RSTRING_PTR(str);
11640 prefixptr = RSTRING_PTR(prefix);
11641 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11642 if (is_broken_string(prefix)) {
11643 if (!is_broken_string(str)) {
11647 const char *strend = strptr + olen;
11648 const char *after_prefix = strptr + prefixlen;
11649 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11669rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11672 str_modify_keep_cr(str);
11674 prefixlen = deleted_prefix_length(str, prefix);
11675 if (prefixlen <= 0)
return Qnil;
11689rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11693 prefixlen = deleted_prefix_length(str, prefix);
11694 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11696 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11709deleted_suffix_length(
VALUE str,
VALUE suffix)
11711 const char *strptr, *suffixptr;
11712 long olen, suffixlen;
11716 if (is_broken_string(suffix))
return 0;
11717 enc = rb_enc_check(str, suffix);
11720 suffixlen = RSTRING_LEN(suffix);
11721 if (suffixlen <= 0)
return 0;
11722 olen = RSTRING_LEN(str);
11723 if (olen < suffixlen)
return 0;
11724 strptr = RSTRING_PTR(str);
11725 suffixptr = RSTRING_PTR(suffix);
11726 const char *strend = strptr + olen;
11727 const char *before_suffix = strend - suffixlen;
11728 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11729 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11744rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11746 long olen, suffixlen,
len;
11747 str_modifiable(str);
11749 suffixlen = deleted_suffix_length(str, suffix);
11750 if (suffixlen <= 0)
return Qnil;
11752 olen = RSTRING_LEN(str);
11753 str_modify_keep_cr(str);
11754 len = olen - suffixlen;
11755 STR_SET_LEN(str,
len);
11756 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11772rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11776 suffixlen = deleted_suffix_length(str, suffix);
11777 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11779 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11786 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11794 val = rb_fs_check(val);
11797 "value of %"PRIsVALUE
" must be String or Regexp",
11801 rb_warn_deprecated(
"'$;'", NULL);
11818 str_modifiable(str);
11821 int idx = rb_enc_to_index(encoding);
11828 rb_enc_associate_index(str, idx);
11852 if (STR_EMBED_P(str)) {
11853 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11858 str_replace_shared_without_enc(str2, str);
11860 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11893rb_str_valid_encoding_p(
VALUE str)
11913rb_str_is_ascii_only_p(
VALUE str)
11923 static const char ellipsis[] =
"...";
11924 const long ellipsislen =
sizeof(ellipsis) - 1;
11926 const long blen = RSTRING_LEN(str);
11927 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11928 VALUE estr, ret = 0;
11931 if (
len * rb_enc_mbminlen(enc) >= blen ||
11935 else if (
len <= ellipsislen ||
11937 if (rb_enc_asciicompat(enc)) {
11939 rb_enc_associate(ret, enc);
11946 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11951 rb_enc_from_encoding(enc), 0,
Qnil);
11964 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11970 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11989 if (enc == STR_ENC_GET(str)) {
11994 return enc_str_scrub(enc, str, repl, cr);
12002 const char *rep, *p, *e, *p1, *sp;
12008 rb_raise(rb_eArgError,
"both of block and replacement given");
12015 if (!
NIL_P(repl)) {
12016 repl = str_compat_and_valid(repl, enc);
12019 if (rb_enc_dummy_p(enc)) {
12022 encidx = rb_enc_to_index(enc);
12024#define DEFAULT_REPLACE_CHAR(str) do { \
12025 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
12026 rep = replace; replen = (int)sizeof(replace); \
12029 slen = RSTRING_LEN(str);
12030 p = RSTRING_PTR(str);
12035 if (rb_enc_asciicompat(enc)) {
12041 else if (!
NIL_P(repl)) {
12042 rep = RSTRING_PTR(repl);
12043 replen = RSTRING_LEN(repl);
12046 else if (encidx == rb_utf8_encindex()) {
12047 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
12051 DEFAULT_REPLACE_CHAR(
"?");
12056 p = search_nonascii(p, e);
12061 int ret = rb_enc_precise_mbclen(p, e, enc);
12080 if (e - p < clen) clen = e - p;
12087 for (; clen > 1; clen--) {
12088 ret = rb_enc_precise_mbclen(q, q + clen, enc);
12099 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
12100 str_mod_check(str, sp, slen);
12101 repl = str_compat_and_valid(repl, enc);
12108 p = search_nonascii(p, e);
12134 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12135 str_mod_check(str, sp, slen);
12136 repl = str_compat_and_valid(repl, enc);
12145 long mbminlen = rb_enc_mbminlen(enc);
12149 else if (!
NIL_P(repl)) {
12150 rep = RSTRING_PTR(repl);
12151 replen = RSTRING_LEN(repl);
12153 else if (encidx == ENCINDEX_UTF_16BE) {
12154 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
12156 else if (encidx == ENCINDEX_UTF_16LE) {
12157 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
12159 else if (encidx == ENCINDEX_UTF_32BE) {
12160 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
12162 else if (encidx == ENCINDEX_UTF_32LE) {
12163 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
12166 DEFAULT_REPLACE_CHAR(
"?");
12170 int ret = rb_enc_precise_mbclen(p, e, enc);
12183 if (e - p < clen) clen = e - p;
12184 if (clen <= mbminlen * 2) {
12189 for (; clen > mbminlen; clen-=mbminlen) {
12190 ret = rb_enc_precise_mbclen(q, q + clen, enc);
12200 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
12201 str_mod_check(str, sp, slen);
12202 repl = str_compat_and_valid(repl, enc);
12227 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12228 str_mod_check(str, sp, slen);
12229 repl = str_compat_and_valid(repl, enc);
12265str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12273static ID id_normalize;
12274static ID id_normalized_p;
12275static VALUE mUnicodeNormalize;
12278unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12280 static int UnicodeNormalizeRequired = 0;
12283 if (!UnicodeNormalizeRequired) {
12284 rb_require(
"unicode_normalize/normalize.rb");
12285 UnicodeNormalizeRequired = 1;
12289 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12326rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12328 return unicode_normalize_common(argc, argv, str, id_normalize);
12342rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12344 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12371rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12373 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12505#define sym_equal rb_obj_equal
12508sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12512 int c = rb_enc_precise_mbclen(s, send, enc);
12516 c = rb_enc_mbc_to_codepoint(s, send, enc);
12524rb_str_symname_p(
VALUE sym)
12529 rb_encoding *resenc = rb_default_internal_encoding();
12531 if (resenc == NULL) resenc = rb_default_external_encoding();
12532 enc = STR_ENC_GET(sym);
12533 ptr = RSTRING_PTR(sym);
12534 len = RSTRING_LEN(sym);
12535 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12543rb_str_quote_unprintable(
VALUE str)
12551 resenc = rb_default_internal_encoding();
12552 if (resenc == NULL) resenc = rb_default_external_encoding();
12553 enc = STR_ENC_GET(str);
12554 ptr = RSTRING_PTR(str);
12555 len = RSTRING_LEN(str);
12556 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12557 !sym_printable(ptr, ptr +
len, enc)) {
12558 return rb_str_escape(str);
12564rb_id_quote_unprintable(
ID id)
12566 VALUE str = rb_id2str(
id);
12567 if (!rb_str_symname_p(str)) {
12568 return rb_str_escape(str);
12586sym_inspect(
VALUE sym)
12593 if (!rb_str_symname_p(str)) {
12595 len = RSTRING_LEN(str);
12596 rb_str_resize(str,
len + 1);
12597 dest = RSTRING_PTR(str);
12598 memmove(dest + 1, dest,
len);
12602 VALUE orig_str = str;
12604 len = RSTRING_LEN(orig_str);
12605 str = rb_enc_str_new(0,
len + 1, enc);
12608 ptr = RSTRING_PTR(orig_str);
12609 dest = RSTRING_PTR(str);
12610 memcpy(dest + 1, ptr,
len);
12630rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12635 rb_raise(rb_eArgError,
"no receiver given");
12732 return rb_str_match(
rb_sym2str(sym), other);
12747sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12749 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12762sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12764 return rb_str_match_m_p(argc, argv, sym);
12782 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12793sym_length(
VALUE sym)
12807sym_empty(
VALUE sym)
12841sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12857sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12873sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12887sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12889 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12902sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12904 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12916sym_encoding(
VALUE sym)
12922string_for_symbol(
VALUE name)
12927 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12941 name = string_for_symbol(name);
12942 return rb_intern_str(name);
12951 name = string_for_symbol(name);
12975 return rb_fstring(str);
12982 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12994 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12995 rb_enc_autoload(enc);
12999 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
13005 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
13006 rb_enc_autoload(enc);
13010 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
13021rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
13026 if (RB_LIKELY(code >= 0 && code < 0xff)) {
13027 rb_str_buf_cat_byte(str, (
char) code);
13041 for (
unsigned int i = 0; i < fstring_table->capacity; i++) {
13042 VALUE str = fstring_table->entries[i].str;
13043 if (!str)
continue;
13211 rb_gc_register_address(&rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ATOMIC_VALUE_CAS(var, oldval, newval)
Identical to RUBY_ATOMIC_CAS, except it expects its arguments are VALUE.
#define RUBY_ATOMIC_VALUE_SET(var, val)
Identical to RUBY_ATOMIC_SET, except it expects its arguments are VALUE.
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
#define RUBY_ATOMIC_FETCH_ADD(var, val)
Atomically replaces the value pointed by var with the result of addition of val to the old value of v...
#define RUBY_ATOMIC_VALUE_EXCHANGE(var, val)
Identical to RUBY_ATOMIC_EXCHANGE, except it expects its arguments are VALUE.
#define RUBY_ATOMIC_DEC(var)
Atomically decrements the value pointed by var.
#define RUBY_ATOMIC_LOAD(var)
Atomic load.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
st_index_t rb_ivar_count(VALUE obj)
Number of instance variables defined on an object.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@52 as
String's specific fields.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
struct RString::@52::@54 embed
Embedded contents.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
struct RString::@52::@53 heap
Strings that use separated memory region for contents use this pattern.
union RString::@52::@53::@55 aux
Auxiliary info.
This is the struct that holds necessary info for a struct.
const char * wrap_struct_name
Name of structs of this kind.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.