14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/hash.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
47#include "ruby_assert.h"
50#if defined HAVE_CRYPT_R
51# if defined HAVE_CRYPT_H
54#elif !defined HAVE_CRYPT
55# include "missing/crypt.h"
56# define HAVE_CRYPT_R 1
59#define BEG(no) (regs->beg[(no)])
60#define END(no) (regs->end[(no)])
63#undef rb_usascii_str_new
67#undef rb_usascii_str_new_cstr
68#undef rb_utf8_str_new_cstr
69#undef rb_enc_str_new_cstr
70#undef rb_external_str_new_cstr
71#undef rb_locale_str_new_cstr
72#undef rb_str_dup_frozen
73#undef rb_str_buf_new_cstr
127#define RUBY_MAX_CHAR_LEN 16
128#define STR_PRECOMPUTED_HASH FL_USER4
129#define STR_SHARED_ROOT FL_USER5
130#define STR_BORROWED FL_USER6
131#define STR_TMPLOCK FL_USER7
132#define STR_NOFREE FL_USER18
133#define STR_FAKESTR FL_USER19
135#define STR_SET_NOEMBED(str) do {\
136 FL_SET((str), STR_NOEMBED);\
137 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
139#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
141#define STR_SET_LEN(str, n) do { \
142 RSTRING(str)->len = (n); \
146str_encindex_fastpath(
int encindex)
150 case ENCINDEX_ASCII_8BIT:
152 case ENCINDEX_US_ASCII:
160str_enc_fastpath(
VALUE str)
165#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
166#define TERM_FILL(ptr, termlen) do {\
167 char *const term_fill_ptr = (ptr);\
168 const int term_fill_len = (termlen);\
169 *term_fill_ptr = '\0';\
170 if (UNLIKELY(term_fill_len > 1))\
171 memset(term_fill_ptr, 0, term_fill_len);\
174#define RESIZE_CAPA(str,capacity) do {\
175 const int termlen = TERM_LEN(str);\
176 RESIZE_CAPA_TERM(str,capacity,termlen);\
178#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
179 if (STR_EMBED_P(str)) {\
180 if (str_embed_capa(str) < capacity + termlen) {\
181 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
182 const long tlen = RSTRING_LEN(str);\
183 memcpy(tmp, RSTRING_PTR(str), tlen);\
184 RSTRING(str)->as.heap.ptr = tmp;\
185 RSTRING(str)->len = tlen;\
186 STR_SET_NOEMBED(str);\
187 RSTRING(str)->as.heap.aux.capa = (capacity);\
191 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
192 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
193 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
194 RSTRING(str)->as.heap.aux.capa = (capacity);\
198#define STR_SET_SHARED(str, shared_str) do { \
199 if (!FL_TEST(str, STR_FAKESTR)) { \
200 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
201 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
202 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
203 FL_SET((str), STR_SHARED); \
204 FL_SET((shared_str), STR_SHARED_ROOT); \
205 if (RBASIC_CLASS((shared_str)) == 0) \
206 FL_SET_RAW((shared_str), STR_BORROWED); \
210#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
211#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
214#define STR_ENC_GET(str) get_encoding(str)
216#if !defined SHARABLE_MIDDLE_SUBSTRING
217# define SHARABLE_MIDDLE_SUBSTRING 0
219#if !SHARABLE_MIDDLE_SUBSTRING
220#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
222#define SHARABLE_SUBSTRING_P(beg, len, end) 1
227str_embed_capa(
VALUE str)
229 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
233rb_str_reembeddable_p(
VALUE str)
235 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
239rb_str_embed_size(
long capa)
245rb_str_size_as_embedded(
VALUE str)
248 if (STR_EMBED_P(str)) {
249 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
253 else if (rb_str_reembeddable_p(str)) {
254 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
257 real_size =
sizeof(
struct RString);
261 real_size +=
sizeof(st_index_t);
268STR_EMBEDDABLE_P(
long len,
long termlen)
270 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
275static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
276static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
278static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
279static inline void str_modifiable(
VALUE str);
284str_make_independent(
VALUE str)
286 long len = RSTRING_LEN(str);
287 int termlen = TERM_LEN(str);
288 str_make_independent_expand((str),
len, 0L, termlen);
291static inline int str_dependent_p(
VALUE str);
294rb_str_make_independent(
VALUE str)
296 if (str_dependent_p(str)) {
297 str_make_independent(str);
302rb_str_make_embedded(
VALUE str)
307 char *buf =
RSTRING(str)->as.heap.ptr;
311 STR_SET_LEN(str,
len);
314 memcpy(RSTRING_PTR(str), buf,
len);
318 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
322rb_debug_rstring_null_ptr(
const char *func)
324 fprintf(stderr,
"%s is returning NULL!! "
325 "SIGSEGV is highly expected to follow immediately.\n"
326 "If you could reproduce, attach your debugger here, "
327 "and look at the passed string.\n",
332static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
335get_encoding(
VALUE str)
341mustnot_broken(
VALUE str)
343 if (is_broken_string(str)) {
344 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
349mustnot_wchar(
VALUE str)
352 if (rb_enc_mbminlen(enc) > 1) {
353 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
359static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
361#if SIZEOF_LONG == SIZEOF_VOIDP
362#define PRECOMPUTED_FAKESTR_HASH 1
366#ifdef PRECOMPUTED_FAKESTR_HASH
368fstring_hash(
VALUE str)
373 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
383#define fstring_hash rb_str_hash
386#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
388static inline st_index_t
389str_do_hash(
VALUE str)
391 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
393 if (e && !is_ascii_string(str)) {
400str_store_precomputed_hash(
VALUE str, st_index_t hash)
406 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
407 size_t free_bytes = str_embed_capa(str) - used_bytes;
411 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
413 FL_SET(str, STR_PRECOMPUTED_HASH);
420 bool force_precompute_hash;
432 long len = RSTRING_LEN(str);
433 long capa =
len +
sizeof(st_index_t);
434 int term_len = TERM_LEN(str);
436 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
438 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
439 STR_SET_LEN(new_str, RSTRING_LEN(str));
441 rb_enc_copy(new_str, str);
442 str_store_precomputed_hash(new_str, str_do_hash(str));
446 rb_enc_copy(new_str, str);
447#ifdef PRECOMPUTED_FAKESTR_HASH
448 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
449 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
463 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
466 if (STR_SHARED_P(str)) {
468 str_make_independent(str);
471 if (!BARE_STRING_P(str)) {
477 RBASIC(str)->flags |= RSTRING_FSTR;
497 if (
FL_TEST(str, RSTRING_FSTR))
500 bare = BARE_STRING_P(str);
502 if (STR_EMBED_P(str)) {
507 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
514 rb_str_resize(str, RSTRING_LEN(str));
516 fstr = register_fstring(str,
false,
false);
519 str_replace_shared_without_enc(str, fstr);
526#define FSTRING_TABLE_EMPTY Qfalse
527#define FSTRING_TABLE_TOMBSTONE Qtrue
528#define FSTRING_TABLE_MOVED Qundef
537 unsigned int capacity;
538 unsigned int deleted_entries;
543fstring_table_free(
void *ptr)
546 xfree(table->entries);
550fstring_table_size(
const void *ptr)
561 .dfree = fstring_table_free,
562 .dsize = fstring_table_size,
564 .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE
568static VALUE fstring_table_obj;
571new_fstring_table(
int capacity)
576 table->capacity = capacity;
583Init_fstring_table(
void)
585 fstring_table_obj = new_fstring_table(8192);
586 rb_gc_register_address(&fstring_table_obj);
600 RUBY_ASSERT((table->capacity & (table->capacity - 1)) == 0);
601 probe->mask = table->capacity - 1;
602 probe->idx = hash_code & probe->mask;
609 probe->idx = (probe->idx + 1) & probe->mask;
626 RUBY_ASSERT((table->capacity & (table->capacity - 1)) == 0);
628 probe->mask = table->capacity - 1;
629 probe->idx = hash_code & probe->mask;
637 probe->idx = (probe->idx + probe->d) & probe->mask;
642#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
648 int idx = fstring_table_probe_start(&probe, table, hash_code);
652 VALUE candidate = entry->str;
657 if (candidate == FSTRING_TABLE_EMPTY) {
664 entry->hash = hash_code;
668 idx = fstring_table_probe_next(&probe);
674fstring_try_resize(
VALUE old_table_obj)
679 if (RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj) != old_table_obj) {
687 int expected_count =
RUBY_ATOMIC_LOAD(old_table->count) - old_table->deleted_entries;
690 int old_capacity = old_table->capacity;
691 int new_capacity = old_capacity * 2;
692 if (new_capacity > expected_count * 8) {
693 new_capacity = old_capacity / 2;
695 else if (new_capacity > expected_count * 4) {
696 new_capacity = old_capacity;
700 VALUE new_table_obj = new_fstring_table(new_capacity);
703 for (
int i = 0; i < old_capacity; i++) {
707 if (val == FSTRING_TABLE_EMPTY)
continue;
708 if (val == FSTRING_TABLE_TOMBSTONE)
continue;
709 if (rb_objspace_garbage_object_p(val))
continue;
711 VALUE hash_code = RUBY_ATOMIC_VALUE_LOAD(entry->hash);
712 if (hash_code == 0) {
715 hash_code = fstring_hash(val);
718 fstring_insert_on_resize(new_table, hash_code, val);
722 fprintf(stderr,
"resized: %p(%i) -> %p(%i) (count: %i->%i)\n", old_table, old_table->capacity, new_table, new_table->capacity, old_table->count, new_table->count);
736 bool inserting =
false;
742 table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
744 table = RTYPEDDATA_GET_DATA(table_obj);
745 idx = fstring_table_probe_start(&probe, table, hash_code);
749 VALUE candidate = RUBY_ATOMIC_VALUE_LOAD(entry->str);
751 if (candidate == FSTRING_TABLE_EMPTY) {
755 value = build_fstring(value, arg);
762 if (UNLIKELY(prev_count > table->capacity / 2)) {
763 fstring_try_resize(table_obj);
768 if (found == FSTRING_TABLE_EMPTY) {
785 else if (candidate == FSTRING_TABLE_TOMBSTONE) {
788 else if (candidate == FSTRING_TABLE_MOVED) {
796 VALUE candidate_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash);
797 if ((candidate_hash == hash_code || candidate_hash == 0) && !fstring_cmp(candidate, value)) {
799 if (UNLIKELY(rb_objspace_garbage_object_p(candidate))) {
813 idx = fstring_table_probe_next(&probe);
823 VALUE table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
828 int idx = fstring_table_probe_start(&probe, table, hash_code);
832 VALUE candidate = entry->str;
837 if (candidate == FSTRING_TABLE_EMPTY) {
841 else if (candidate == value) {
843 entry->str = FSTRING_TABLE_TOMBSTONE;
844 table->deleted_entries++;
848 idx = fstring_table_probe_next(&probe);
853register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
857 .force_precompute_hash = force_precompute_hash
860#if SIZEOF_VOIDP == SIZEOF_LONG
864 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
868 VALUE hash_code = fstring_hash(str);
869 VALUE result = fstring_find_or_insert(hash_code, str, &args);
871 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
882rb_fstring_foreach_with_replace(st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
887 VALUE table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
894 for (
unsigned int i = 0; i < table->capacity; i++) {
895 VALUE key = table->entries[i].str;
896 if(key == FSTRING_TABLE_EMPTY)
continue;
897 if(key == FSTRING_TABLE_TOMBSTONE)
continue;
899 enum st_retval retval;
900 retval = (*func)(key, key, arg, 0);
902 if (retval == ST_REPLACE && replace) {
903 st_data_t value = key;
904 retval = (*replace)(&key, &value, arg, TRUE);
905 table->entries[i].str = key;
912 rb_bug(
"unsupported");
916 table->entries[i].str = FSTRING_TABLE_TOMBSTONE;
923rb_obj_is_fstring_table(
VALUE obj)
927 return obj == fstring_table_obj;
931rb_gc_free_fstring(
VALUE obj)
936 VALUE str_hash = fstring_hash(obj);
937 fstring_delete(str_hash, obj);
939 RB_DEBUG_COUNTER_INC(obj_str_fstr);
945setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
960 return (
VALUE)fake_str;
969 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
978rb_fstring_new(
const char *ptr,
long len)
981 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
988 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
992rb_fstring_cstr(
const char *
ptr)
994 return rb_fstring_new(
ptr, strlen(
ptr));
1001 const char *aptr, *bptr;
1008 return (alen != blen ||
1010 memcmp(aptr, bptr, alen) != 0);
1014single_byte_optimizable(
VALUE str)
1018 case ENCINDEX_ASCII_8BIT:
1019 case ENCINDEX_US_ASCII:
1021 case ENCINDEX_UTF_8:
1041static inline const char *
1042search_nonascii(
const char *p,
const char *e)
1044 const uintptr_t *s, *t;
1046#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
1047# if SIZEOF_UINTPTR_T == 8
1048# define NONASCII_MASK UINT64_C(0x8080808080808080)
1049# elif SIZEOF_UINTPTR_T == 4
1050# define NONASCII_MASK UINT32_C(0x80808080)
1052# error "don't know what to do."
1055# if SIZEOF_UINTPTR_T == 8
1056# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
1057# elif SIZEOF_UINTPTR_T == 4
1058# define NONASCII_MASK 0x80808080UL
1060# error "don't know what to do."
1064 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
1065#if !UNALIGNED_WORD_ACCESS
1066 if ((uintptr_t)p % SIZEOF_VOIDP) {
1067 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
1072 case 7:
if (p[-7]&0x80)
return p-7;
1073 case 6:
if (p[-6]&0x80)
return p-6;
1074 case 5:
if (p[-5]&0x80)
return p-5;
1075 case 4:
if (p[-4]&0x80)
return p-4;
1077 case 3:
if (p[-3]&0x80)
return p-3;
1078 case 2:
if (p[-2]&0x80)
return p-2;
1079 case 1:
if (p[-1]&0x80)
return p-1;
1084#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
1085#define aligned_ptr(value) \
1086 __builtin_assume_aligned((value), sizeof(uintptr_t))
1088#define aligned_ptr(value) (uintptr_t *)(value)
1091 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
1094 if (*s & NONASCII_MASK) {
1095#ifdef WORDS_BIGENDIAN
1096 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
1098 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
1102 p = (
const char *)s;
1108 case 7:
if (e[-7]&0x80)
return e-7;
1109 case 6:
if (e[-6]&0x80)
return e-6;
1110 case 5:
if (e[-5]&0x80)
return e-5;
1111 case 4:
if (e[-4]&0x80)
return e-4;
1113 case 3:
if (e[-3]&0x80)
return e-3;
1114 case 2:
if (e[-2]&0x80)
return e-2;
1115 case 1:
if (e[-1]&0x80)
return e-1;
1116 case 0:
return NULL;
1123 const char *e = p +
len;
1125 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
1127 p = search_nonascii(p, e);
1131 if (rb_enc_asciicompat(enc)) {
1132 p = search_nonascii(p, e);
1135 int ret = rb_enc_precise_mbclen(p, e, enc);
1139 p = search_nonascii(p, e);
1145 int ret = rb_enc_precise_mbclen(p, e, enc);
1161 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
1164 p = search_nonascii(p, e);
1168 else if (rb_enc_asciicompat(enc)) {
1169 p = search_nonascii(p, e);
1175 int ret = rb_enc_precise_mbclen(p, e, enc);
1182 p = search_nonascii(p, e);
1188 int ret = rb_enc_precise_mbclen(p, e, enc);
1213 rb_enc_set_index(str1, rb_enc_get_index(str2));
1221rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
1226 str_enc_copy(dest, src);
1227 if (RSTRING_LEN(dest) == 0) {
1228 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
1239 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
1240 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
1251rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
1253 str_enc_copy(dest, src);
1260 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
1266 return enc_coderange_scan(str, enc);
1275 cr = enc_coderange_scan(str, get_encoding(str));
1282rb_enc_str_asciicompat(
VALUE str)
1285 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
1293 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
1302str_mod_check(
VALUE s,
const char *p,
long len)
1304 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
1310str_capacity(
VALUE str,
const int termlen)
1312 if (STR_EMBED_P(str)) {
1313 return str_embed_capa(str) - termlen;
1315 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
1319 return RSTRING(str)->as.heap.aux.capa;
1326 return str_capacity(str, TERM_LEN(str));
1330must_not_null(
const char *
ptr)
1333 rb_raise(rb_eArgError,
"NULL pointer given");
1338str_alloc_embed(
VALUE klass,
size_t capa)
1340 size_t size = rb_str_embed_size(
capa);
1344 NEWOBJ_OF(str,
struct RString, klass,
1351str_alloc_heap(
VALUE klass)
1353 NEWOBJ_OF(str,
struct RString, klass,
1360empty_str_alloc(
VALUE klass)
1362 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1363 VALUE str = str_alloc_embed(klass, 0);
1364 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1375 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1379 enc = rb_ascii8bit_encoding();
1382 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1384 int termlen = rb_enc_mbminlen(enc);
1386 if (STR_EMBEDDABLE_P(
len, termlen)) {
1387 str = str_alloc_embed(klass,
len + termlen);
1393 str = str_alloc_heap(klass);
1399 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1402 rb_enc_raw_set(str, enc);
1405 memcpy(RSTRING_PTR(str),
ptr,
len);
1408 STR_SET_LEN(str,
len);
1409 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1416 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1451 __msan_unpoison_string(
ptr);
1471 if (rb_enc_mbminlen(enc) != 1) {
1472 rb_raise(rb_eArgError,
"wchar encoding given");
1474 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1478str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1483 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1487 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1490 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1491 str = str_alloc_heap(klass);
1495 RBASIC(str)->flags |= STR_NOFREE;
1496 rb_enc_associate_index(str, encindex);
1525static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1527 int ecflags,
VALUE ecopts);
1532 int encidx = rb_enc_to_index(enc);
1533 if (rb_enc_get_index(str) == encidx)
1534 return is_ascii_string(str);
1545 if (!to)
return str;
1546 if (!from) from = rb_enc_get(str);
1547 if (from == to)
return str;
1548 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1549 rb_is_ascii8bit_enc(to)) {
1550 if (STR_ENC_GET(str) != to) {
1552 rb_enc_associate(str, to);
1559 from, to, ecflags, ecopts);
1560 if (
NIL_P(newstr)) {
1568rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1573 olen = RSTRING_LEN(newstr);
1574 if (ofs < -olen || olen < ofs)
1576 if (ofs < 0) ofs += olen;
1578 STR_SET_LEN(newstr, ofs);
1582 rb_str_modify(newstr);
1583 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1591 STR_SET_LEN(str, 0);
1592 rb_enc_associate(str, enc);
1598str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1600 int ecflags,
VALUE ecopts)
1605 VALUE econv_wrapper;
1606 const unsigned char *start, *sp;
1607 unsigned char *dest, *dp;
1608 size_t converted_output = (size_t)ofs;
1613 RBASIC_CLEAR_CLASS(econv_wrapper);
1615 if (!ec)
return Qnil;
1618 sp = (
unsigned char*)
ptr;
1620 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1621 (dp = dest + converted_output),
1625 size_t converted_input = sp - start;
1626 size_t rest =
len - converted_input;
1627 converted_output = dp - dest;
1629 if (converted_input && converted_output &&
1630 rest < (LONG_MAX / converted_output)) {
1631 rest = (rest * converted_output) / converted_input;
1636 olen += rest < 2 ? 2 : rest;
1637 rb_str_resize(newstr, olen);
1644 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1646 rb_enc_associate(newstr, to);
1665 const int eidx = rb_enc_to_index(eenc);
1668 return rb_enc_str_new(
ptr,
len, eenc);
1672 if ((eidx == rb_ascii8bit_encindex()) ||
1673 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1677 ienc = rb_default_internal_encoding();
1678 if (!ienc || eenc == ienc) {
1679 return rb_enc_str_new(
ptr,
len, eenc);
1683 if ((eidx == rb_ascii8bit_encindex()) ||
1684 (eidx == rb_usascii_encindex()) ||
1685 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1686 return rb_enc_str_new(
ptr,
len, ienc);
1689 str = rb_enc_str_new(NULL, 0, ienc);
1692 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1693 rb_str_initialize(str,
ptr,
len, eenc);
1701 int eidx = rb_enc_to_index(eenc);
1702 if (eidx == rb_usascii_encindex() &&
1703 !is_ascii_string(str)) {
1704 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1707 rb_enc_associate_index(str, eidx);
1766str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1768 const int termlen = TERM_LEN(str);
1773 if (str_embed_capa(str2) >=
len + termlen) {
1774 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1775 STR_SET_EMBED(str2);
1776 memcpy(ptr2, RSTRING_PTR(str),
len);
1777 TERM_FILL(ptr2+
len, termlen);
1781 if (STR_SHARED_P(str)) {
1782 root =
RSTRING(str)->as.heap.aux.shared;
1791 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1793 rb_fatal(
"about to free a possible shared root");
1795 char *ptr2 = STR_HEAP_PTR(str2);
1797 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1800 FL_SET(str2, STR_NOEMBED);
1802 STR_SET_SHARED(str2, root);
1805 STR_SET_LEN(str2,
len);
1813 str_replace_shared_without_enc(str2, str);
1814 rb_enc_cr_str_exact_copy(str2, str);
1821 return str_replace_shared(str_alloc_heap(klass), str);
1838rb_str_new_frozen_String(
VALUE orig)
1846rb_str_frozen_bare_string(
VALUE orig)
1848 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1853rb_str_tmp_frozen_acquire(
VALUE orig)
1856 return str_new_frozen_buffer(0, orig, FALSE);
1860rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1862 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1863 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1865 VALUE str = str_alloc_heap(0);
1868 FL_SET(str, STR_SHARED_ROOT);
1870 size_t capa = str_capacity(orig, TERM_LEN(orig));
1876 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1877 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1884 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1885 RBASIC(orig)->flags &= ~STR_NOFREE;
1886 STR_SET_SHARED(orig, str);
1896rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1901 if (STR_EMBED_P(tmp)) {
1910 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1914 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1915 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1920 STR_SET_LEN(tmp, 0);
1928 return str_new_frozen_buffer(klass, orig, TRUE);
1937 VALUE str = str_alloc_heap(klass);
1938 STR_SET_LEN(str, RSTRING_LEN(orig));
1939 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1940 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1941 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1942 RBASIC(orig)->flags &= ~STR_NOFREE;
1943 STR_SET_SHARED(orig, str);
1950str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1954 long len = RSTRING_LEN(orig);
1955 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1956 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1958 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1959 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1965 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1966 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1972 if ((ofs > 0) || (rest > 0) ||
1975 str = str_new_shared(klass,
shared);
1977 RSTRING(str)->as.heap.ptr += ofs;
1978 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1986 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1987 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1989 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1990 STR_SET_LEN(str, RSTRING_LEN(orig));
1995 str = heap_str_make_shared(klass, orig);
1999 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
2011str_new_empty_String(
VALUE str)
2014 rb_enc_copy(v, str);
2018#define STR_BUF_MIN_SIZE 63
2023 if (STR_EMBEDDABLE_P(
capa, 1)) {
2031 RSTRING(str)->as.heap.ptr[0] =
'\0';
2051 return str_new(0, 0,
len);
2057 if (STR_EMBED_P(str)) {
2058 RB_DEBUG_COUNTER_INC(obj_str_embed);
2060 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
2061 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
2062 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
2065 RB_DEBUG_COUNTER_INC(obj_str_ptr);
2066 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2071rb_str_memsize(
VALUE str)
2073 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
2074 return STR_HEAP_SIZE(str);
2084 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2087static inline void str_discard(
VALUE str);
2088static void str_shared_replace(
VALUE str,
VALUE str2);
2093 if (str != str2) str_shared_replace(str, str2);
2104 enc = STR_ENC_GET(str2);
2107 termlen = rb_enc_mbminlen(enc);
2109 STR_SET_LEN(str, RSTRING_LEN(str2));
2111 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
2113 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
2114 rb_enc_associate(str, enc);
2118 if (STR_EMBED_P(str2)) {
2120 long len = RSTRING_LEN(str2);
2123 char *new_ptr =
ALLOC_N(
char,
len + termlen);
2124 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
2125 RSTRING(str2)->as.heap.ptr = new_ptr;
2126 STR_SET_LEN(str2,
len);
2128 STR_SET_NOEMBED(str2);
2131 STR_SET_NOEMBED(str);
2133 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
2135 if (
FL_TEST(str2, STR_SHARED)) {
2137 STR_SET_SHARED(str,
shared);
2140 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
2144 STR_SET_EMBED(str2);
2145 RSTRING_PTR(str2)[0] = 0;
2146 STR_SET_LEN(str2, 0);
2147 rb_enc_associate(str, enc);
2161 return rb_obj_as_string_result(str, obj);
2177 len = RSTRING_LEN(str2);
2178 if (STR_SHARED_P(str2)) {
2181 STR_SET_NOEMBED(str);
2182 STR_SET_LEN(str,
len);
2183 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
2184 STR_SET_SHARED(str,
shared);
2185 rb_enc_cr_str_exact_copy(str, str2);
2188 str_replace_shared(str, str2);
2197 size_t size = rb_str_embed_size(
capa);
2201 NEWOBJ_OF(str,
struct RString, klass,
2210 NEWOBJ_OF(str,
struct RString, klass,
2221 encidx = rb_enc_get_index(str);
2222 flags &= ~ENCODING_MASK;
2225 if (encidx) rb_enc_associate_index(dup, encidx);
2235 long len = RSTRING_LEN(str);
2240 STR_SET_LEN(dup, RSTRING_LEN(str));
2241 return str_duplicate_setup_encoding(str, dup, flags);
2250 root =
RSTRING(str)->as.heap.aux.shared;
2252 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
2253 root = str = str_new_frozen(klass, str);
2259 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
2260 FL_SET(root, STR_SHARED_ROOT);
2262 flags |= RSTRING_NOEMBED | STR_SHARED;
2264 STR_SET_LEN(dup, RSTRING_LEN(str));
2265 return str_duplicate_setup_encoding(str, dup, flags);
2271 if (STR_EMBED_P(str)) {
2272 return str_duplicate_setup_embed(klass, str, dup);
2275 return str_duplicate_setup_heap(klass, str, dup);
2283 if (STR_EMBED_P(str)) {
2284 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
2287 dup = str_alloc_heap(klass);
2290 return str_duplicate_setup(klass, str, dup);
2301rb_str_dup_m(
VALUE str)
2303 if (LIKELY(BARE_STRING_P(str))) {
2314 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2321 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2325 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2326 str_duplicate_setup_embed(klass, str, new_str);
2329 new_str = ec_str_alloc_heap(ec, klass);
2330 str_duplicate_setup_heap(klass, str, new_str);
2339rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2341 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2343 rb_ivar_set(str, id_debug_created_info, rb_ary_freeze(debug_info));
2360 static ID keyword_ids[2];
2361 VALUE orig, opt, venc, vcapa;
2366 if (!keyword_ids[0]) {
2367 keyword_ids[0] = rb_id_encoding();
2368 CONST_ID(keyword_ids[1],
"capacity");
2376 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2377 enc = rb_to_encoding(venc);
2379 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2382 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2384 if (
capa < STR_BUF_MIN_SIZE) {
2385 capa = STR_BUF_MIN_SIZE;
2389 len = RSTRING_LEN(orig);
2393 if (orig == str) n = 0;
2395 str_modifiable(str);
2396 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2398 const size_t size = (size_t)
capa + termlen;
2399 const char *
const old_ptr = RSTRING_PTR(str);
2400 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2401 char *new_ptr =
ALLOC_N(
char, size);
2402 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2403 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2405 RSTRING(str)->as.heap.ptr = new_ptr;
2407 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2408 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2409 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2411 STR_SET_LEN(str,
len);
2414 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2415 rb_enc_cr_str_exact_copy(str, orig);
2417 FL_SET(str, STR_NOEMBED);
2424 rb_enc_associate(str, enc);
2436rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2442 static ID keyword_ids[2];
2452 keyword_ids[0] = rb_id_encoding();
2453 CONST_ID(keyword_ids[1],
"capacity");
2455 encoding = kwargs[0];
2456 capacity = kwargs[1];
2465 if (UNDEF_P(encoding)) {
2467 encoding = rb_obj_encoding(orig);
2471 if (!UNDEF_P(encoding)) {
2472 enc = rb_to_encoding(encoding);
2476 if (UNDEF_P(capacity)) {
2478 VALUE empty_str = str_new(klass,
"", 0);
2480 rb_enc_associate(empty_str, enc);
2484 VALUE copy = str_duplicate(klass, orig);
2485 rb_enc_associate(copy, enc);
2498 if (orig_capa >
capa) {
2503 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2504 STR_SET_LEN(str, 0);
2515#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2530static inline uintptr_t
2531count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2536 d = (d>>6) | (~d>>7);
2537 d &= NONASCII_MASK >> 7;
2540#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2542 return rb_popcount_intptr(d);
2546# if SIZEOF_VOIDP == 8
2555enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2561 long diff = (long)(e - p);
2562 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2567 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2568 const uintptr_t *s, *t;
2569 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2570 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2571 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2572 while (p < (
const char *)s) {
2573 if (is_utf8_lead_byte(*p))
len++;
2577 len += count_utf8_lead_bytes_with_word(s);
2580 p = (
const char *)s;
2583 if (is_utf8_lead_byte(*p))
len++;
2589 else if (rb_enc_asciicompat(enc)) {
2594 q = search_nonascii(p, e);
2600 p += rb_enc_fast_mbclen(p, e, enc);
2607 q = search_nonascii(p, e);
2613 p += rb_enc_mbclen(p, e, enc);
2620 for (c=0; p<e; c++) {
2621 p += rb_enc_mbclen(p, e, enc);
2636rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2644 long diff = (long)(e - p);
2645 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2647 else if (rb_enc_asciicompat(enc)) {
2651 q = search_nonascii(p, e);
2659 ret = rb_enc_precise_mbclen(p, e, enc);
2674 for (c=0; p<e; c++) {
2675 ret = rb_enc_precise_mbclen(p, e, enc);
2682 if (p + rb_enc_mbminlen(enc) <= e)
2683 p += rb_enc_mbminlen(enc);
2699 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2700 if (!enc) enc = STR_ENC_GET(str);
2701 p = RSTRING_PTR(str);
2706 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2711 return enc_strlen(p, e, enc, cr);
2718 return str_strlen(str, NULL);
2732 return LONG2NUM(str_strlen(str, NULL));
2744rb_str_bytesize(
VALUE str)
2762rb_str_empty(
VALUE str)
2764 return RBOOL(RSTRING_LEN(str) == 0);
2782 char *ptr1, *ptr2, *ptr3;
2787 enc = rb_enc_check_str(str1, str2);
2790 termlen = rb_enc_mbminlen(enc);
2791 if (len1 > LONG_MAX - len2) {
2792 rb_raise(rb_eArgError,
"string size too big");
2794 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2795 ptr3 = RSTRING_PTR(str3);
2796 memcpy(ptr3, ptr1, len1);
2797 memcpy(ptr3+len1, ptr2, len2);
2798 TERM_FILL(&ptr3[len1+len2], termlen);
2814 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2817 int enc1 = rb_enc_get_index(str1);
2818 int enc2 = rb_enc_get_index(str2);
2823 else if (enc2 < 0) {
2826 else if (enc1 != enc2) {
2829 else if (len1 > LONG_MAX - len2) {
2862 rb_enc_copy(str2, str);
2867 rb_raise(rb_eArgError,
"negative argument");
2869 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2870 if (STR_EMBEDDABLE_P(
len, 1)) {
2872 memset(RSTRING_PTR(str2), 0,
len + 1);
2879 STR_SET_LEN(str2,
len);
2880 rb_enc_copy(str2, str);
2883 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2884 rb_raise(rb_eArgError,
"argument too big");
2887 len *= RSTRING_LEN(str);
2888 termlen = TERM_LEN(str);
2890 ptr2 = RSTRING_PTR(str2);
2892 n = RSTRING_LEN(str);
2893 memcpy(ptr2, RSTRING_PTR(str), n);
2894 while (n <=
len/2) {
2895 memcpy(ptr2 + n, ptr2, n);
2898 memcpy(ptr2 + n, ptr2,
len-n);
2900 STR_SET_LEN(str2,
len);
2901 TERM_FILL(&ptr2[
len], termlen);
2902 rb_enc_cr_str_copy_for_substr(str2, str);
2928 VALUE tmp = rb_check_array_type(arg);
2937rb_check_lockedtmp(
VALUE str)
2939 if (
FL_TEST(str, STR_TMPLOCK)) {
2946#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2948str_modifiable(
VALUE str)
2952 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2953 if (CHILLED_STRING_P(str)) {
2954 CHILLED_STRING_MUTATED(str);
2956 rb_check_lockedtmp(str);
2957 rb_check_frozen(str);
2962str_dependent_p(
VALUE str)
2964 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2974#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2976str_independent(
VALUE str)
2980 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2981 str_modifiable(str);
2982 return !str_dependent_p(str);
2988str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2998 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
3003 STR_SET_LEN(str,
len);
3008 oldptr = RSTRING_PTR(str);
3010 memcpy(
ptr, oldptr,
len);
3012 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
3015 STR_SET_NOEMBED(str);
3016 FL_UNSET(str, STR_SHARED|STR_NOFREE);
3017 TERM_FILL(
ptr +
len, termlen);
3019 STR_SET_LEN(str,
len);
3026 if (!str_independent(str))
3027 str_make_independent(str);
3036 int termlen = TERM_LEN(str);
3037 long len = RSTRING_LEN(str);
3040 rb_raise(rb_eArgError,
"negative expanding string size");
3042 if (expand >= LONG_MAX -
len) {
3043 rb_raise(rb_eArgError,
"string size too big");
3046 if (!str_independent(str)) {
3047 str_make_independent_expand(str,
len, expand, termlen);
3049 else if (expand > 0) {
3050 RESIZE_CAPA_TERM(str,
len + expand, termlen);
3057str_modify_keep_cr(
VALUE str)
3059 if (!str_independent(str))
3060 str_make_independent(str);
3067str_discard(
VALUE str)
3069 str_modifiable(str);
3070 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
3071 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
3072 RSTRING(str)->as.heap.ptr = 0;
3073 STR_SET_LEN(str, 0);
3080 int encindex = rb_enc_get_index(str);
3082 if (RB_UNLIKELY(encindex == -1)) {
3086 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
3091 if (!rb_enc_asciicompat(enc)) {
3113 return RSTRING_PTR(str);
3117zero_filled(
const char *s,
int n)
3119 for (; n > 0; --n) {
3126str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
3128 const char *e = s +
len;
3130 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
3131 if (zero_filled(s, minlen))
return s;
3137str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
3142 if (str_dependent_p(str)) {
3143 if (!zero_filled(s +
len, termlen))
3144 str_make_independent_expand(str,
len, 0L, termlen);
3147 TERM_FILL(s +
len, termlen);
3150 return RSTRING_PTR(str);
3154rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
3156 long capa = str_capacity(str, oldtermlen) + oldtermlen;
3157 long len = RSTRING_LEN(str);
3161 rb_check_lockedtmp(str);
3162 str_make_independent_expand(str,
len, 0L, termlen);
3164 else if (str_dependent_p(str)) {
3165 if (termlen > oldtermlen)
3166 str_make_independent_expand(str,
len, 0L, termlen);
3169 if (!STR_EMBED_P(str)) {
3174 if (termlen > oldtermlen) {
3175 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
3183str_null_check(
VALUE str,
int *w)
3185 char *s = RSTRING_PTR(str);
3186 long len = RSTRING_LEN(str);
3188 const int minlen = rb_enc_mbminlen(enc);
3192 if (str_null_char(s,
len, minlen, enc)) {
3195 return str_fill_term(str, s,
len, minlen);
3198 if (!s || memchr(s, 0,
len)) {
3202 s = str_fill_term(str, s,
len, minlen);
3208rb_str_to_cstr(
VALUE str)
3211 return str_null_check(str, &w);
3219 char *s = str_null_check(str, &w);
3222 rb_raise(rb_eArgError,
"string contains null char");
3224 rb_raise(rb_eArgError,
"string contains null byte");
3230rb_str_fill_terminator(
VALUE str,
const int newminlen)
3232 char *s = RSTRING_PTR(str);
3233 long len = RSTRING_LEN(str);
3234 return str_fill_term(str, s,
len, newminlen);
3240 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
3266str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
3275 else if (rb_enc_asciicompat(enc)) {
3276 const char *p2, *e2;
3279 while (p < e && 0 < nth) {
3286 p2 = search_nonascii(p, e2);
3295 n = rb_enc_mbclen(p, e, enc);
3306 while (p < e && nth--) {
3307 p += rb_enc_mbclen(p, e, enc);
3318 return str_nth_len(p, e, &nth, enc);
3322str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3327 p = str_nth_len(p, e, &nth, enc);
3336str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3338 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3339 if (!pp)
return e - p;
3346 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3347 STR_ENC_GET(str), single_byte_optimizable(str));
3352str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3355 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3356 const uintptr_t *s, *t;
3357 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3358 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3359 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3360 while (p < (
const char *)s) {
3361 if (is_utf8_lead_byte(*p)) nth--;
3365 nth -= count_utf8_lead_bytes_with_word(s);
3367 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3371 if (is_utf8_lead_byte(*p)) {
3372 if (nth == 0)
break;
3382str_utf8_offset(
const char *p,
const char *e,
long nth)
3384 const char *pp = str_utf8_nth(p, e, &nth);
3393 if (single_byte_optimizable(str) || pos < 0)
3396 char *p = RSTRING_PTR(str);
3397 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3402str_subseq(
VALUE str,
long beg,
long len)
3410 const int termlen = TERM_LEN(str);
3411 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3418 if (str_embed_capa(str2) >=
len + termlen) {
3419 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3420 STR_SET_EMBED(str2);
3421 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3422 TERM_FILL(ptr2+
len, termlen);
3424 STR_SET_LEN(str2,
len);
3428 str_replace_shared(str2, str);
3431 RSTRING(str2)->as.heap.ptr += beg;
3432 if (RSTRING_LEN(str2) >
len) {
3433 STR_SET_LEN(str2,
len);
3443 VALUE str2 = str_subseq(str, beg,
len);
3444 rb_enc_cr_str_copy_for_substr(str2, str);
3453 const long blen = RSTRING_LEN(str);
3455 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3457 if (
len < 0)
return 0;
3458 if (beg < 0 && -beg < 0)
return 0;
3462 if (single_byte_optimizable(str)) {
3463 if (beg > blen)
return 0;
3466 if (beg < 0)
return 0;
3468 if (
len > blen - beg)
3470 if (
len < 0)
return 0;
3475 if (
len > -beg)
len = -beg;
3479 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3482 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3488 slen = str_strlen(str, enc);
3490 if (beg < 0)
return 0;
3492 if (
len == 0)
goto end;
3495 else if (beg > 0 && beg > blen) {
3499 if (beg > str_strlen(str, enc))
return 0;
3504 enc == rb_utf8_encoding()) {
3505 p = str_utf8_nth(s, e, &beg);
3506 if (beg > 0)
return 0;
3507 len = str_utf8_offset(p, e,
len);
3513 p = s + beg * char_sz;
3517 else if (
len * char_sz > e - p)
3522 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3523 if (beg > 0)
return 0;
3527 len = str_offset(p, e,
len, enc, 0);
3535static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3540 return str_substr(str, beg,
len, TRUE);
3550str_substr(
VALUE str,
long beg,
long len,
int empty)
3554 if (!p)
return Qnil;
3555 if (!
len && !empty)
return Qnil;
3557 beg = p - RSTRING_PTR(str);
3559 VALUE str2 = str_subseq(str, beg,
len);
3560 rb_enc_cr_str_copy_for_substr(str2, str);
3568 if (CHILLED_STRING_P(str)) {
3573 rb_str_resize(str, RSTRING_LEN(str));
3589 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3619str_uminus(
VALUE str)
3624 return rb_fstring(str);
3628#define rb_str_dup_frozen rb_str_new_frozen
3633 if (
FL_TEST(str, STR_TMPLOCK)) {
3636 FL_SET(str, STR_TMPLOCK);
3643 if (!
FL_TEST(str, STR_TMPLOCK)) {
3663 const int termlen = TERM_LEN(str);
3665 str_modifiable(str);
3666 if (STR_SHARED_P(str)) {
3669 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3670 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3681 else if (
len > RSTRING_LEN(str)) {
3685 const char *
const new_end = RSTRING_PTR(str) +
len;
3695 else if (
len < RSTRING_LEN(str)) {
3703 STR_SET_LEN(str,
len);
3704 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3711 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3714 int independent = str_independent(str);
3715 long slen = RSTRING_LEN(str);
3716 const int termlen = TERM_LEN(str);
3718 if (slen >
len || (termlen != 1 && slen <
len)) {
3724 if (STR_EMBED_P(str)) {
3725 if (
len == slen)
return str;
3726 if (str_embed_capa(str) >=
len + termlen) {
3727 STR_SET_LEN(str,
len);
3731 str_make_independent_expand(str, slen,
len - slen, termlen);
3733 else if (str_embed_capa(str) >=
len + termlen) {
3734 char *
ptr = STR_HEAP_PTR(str);
3736 if (slen >
len) slen =
len;
3739 STR_SET_LEN(str,
len);
3740 if (independent) ruby_xfree(
ptr);
3743 else if (!independent) {
3744 if (
len == slen)
return str;
3745 str_make_independent_expand(str, slen,
len - slen, termlen);
3749 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3750 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3753 else if (
len == slen)
return str;
3754 STR_SET_LEN(str,
len);
3761str_ensure_available_capa(
VALUE str,
long len)
3763 str_modify_keep_cr(str);
3765 const int termlen = TERM_LEN(str);
3766 long olen = RSTRING_LEN(str);
3768 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3769 rb_raise(rb_eArgError,
"string sizes too big");
3772 long total = olen +
len;
3773 long capa = str_capacity(str, termlen);
3776 if (total >= LONG_MAX / 2) {
3779 while (total >
capa) {
3782 RESIZE_CAPA_TERM(str,
capa, termlen);
3787str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3790 str_modify_keep_cr(str);
3795 if (
len == 0)
return 0;
3797 long total, olen,
off = -1;
3799 const int termlen = TERM_LEN(str);
3802 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3806 long capa = str_capacity(str, termlen);
3808 if (olen > LONG_MAX -
len) {
3809 rb_raise(rb_eArgError,
"string sizes too big");
3813 if (total >= LONG_MAX / 2) {
3816 while (total >
capa) {
3819 RESIZE_CAPA_TERM(str,
capa, termlen);
3820 sptr = RSTRING_PTR(str);
3825 memcpy(sptr + olen,
ptr,
len);
3826 STR_SET_LEN(str, total);
3827 TERM_FILL(sptr + total, termlen);
3832#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3833#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3838 if (
len == 0)
return str;
3840 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3842 return str_buf_cat(str,
ptr,
len);
3853rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3858 if (UNLIKELY(!str_independent(str))) {
3859 str_make_independent(str);
3862 long string_length = -1;
3863 const int null_terminator_length = 1;
3868 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3869 rb_raise(rb_eArgError,
"string sizes too big");
3872 long string_capacity = str_capacity(str, null_terminator_length);
3878 if (LIKELY(string_capacity >= string_length + 1)) {
3880 sptr[string_length] = byte;
3881 STR_SET_LEN(str, string_length + 1);
3882 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3886 str_buf_cat(str, (
char *)&
byte, 1);
3902 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3913rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3914 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3923 if (str_encindex == ptr_encindex) {
3925 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3929 str_enc = rb_enc_from_index(str_encindex);
3930 ptr_enc = rb_enc_from_index(ptr_encindex);
3931 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3934 if (RSTRING_LEN(str) == 0) {
3937 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3943 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3952 *ptr_cr_ret = ptr_cr;
3954 if (str_encindex != ptr_encindex &&
3957 str_enc = rb_enc_from_index(str_encindex);
3958 ptr_enc = rb_enc_from_index(ptr_encindex);
3963 res_encindex = str_encindex;
3968 res_encindex = str_encindex;
3972 res_encindex = ptr_encindex;
3977 res_encindex = str_encindex;
3984 res_encindex = str_encindex;
3990 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3992 str_buf_cat(str,
ptr,
len);
3998 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
4005 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
4015 if (rb_enc_asciicompat(enc)) {
4016 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
4022 unsigned int c = (
unsigned char)*
ptr;
4023 int len = rb_enc_codelen(c, enc);
4024 rb_enc_mbcput(c, buf, enc);
4025 rb_enc_cr_str_buf_cat(str, buf,
len,
4038 if (str_enc_fastpath(str)) {
4042 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
4048 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
4059 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
4075rb_str_concat_literals(
size_t num,
const VALUE *strary)
4079 unsigned long len = 1;
4084 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
4086 str_enc_copy_direct(str, strary[0]);
4088 for (i = s; i < num; ++i) {
4089 const VALUE v = strary[i];
4093 if (encidx != ENCINDEX_US_ASCII) {
4095 rb_enc_set_index(str, encidx);
4120rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
4122 str_modifiable(str);
4127 else if (argc > 1) {
4130 rb_enc_copy(arg_str, str);
4131 for (i = 0; i < argc; i++) {
4164rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
4166 long needed_capacity = 0;
4170 for (
int index = 0; index < argc; index++) {
4171 VALUE obj = argv[index];
4179 needed_capacity += RSTRING_LEN(obj);
4184 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
4191 str_ensure_available_capa(str, needed_capacity);
4194 for (
int index = 0; index < argc; index++) {
4195 VALUE obj = argv[index];
4200 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
4201 char byte = (char)(
NUM2INT(obj) & 0xFF);
4215 rb_bug(
"append_as_bytes arguments should have been validated");
4219 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
4220 TERM_FILL(sptr, TERM_LEN(str));
4225 for (
int index = 0; index < argc; index++) {
4226 VALUE obj = argv[index];
4243 rb_bug(
"append_as_bytes arguments should have been validated");
4317 if (rb_num_to_uint(str2, &code) == 0) {
4330 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4333 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4336 long pos = RSTRING_LEN(str1);
4341 switch (
len = rb_enc_codelen(code, enc)) {
4342 case ONIGERR_INVALID_CODE_POINT_VALUE:
4343 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4345 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4351 rb_enc_mbcput(code, buf, enc);
4352 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4353 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4355 rb_str_resize(str1, pos+
len);
4356 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4369rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4371 int encidx = rb_enc_to_index(enc);
4373 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4378 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4379 return ENCINDEX_ASCII_8BIT;
4402rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4404 str_modifiable(str);
4409 else if (argc > 1) {
4412 rb_enc_copy(arg_str, str);
4413 for (i = 0; i < argc; i++) {
4426 st_index_t precomputed_hash;
4427 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4429 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4430 return precomputed_hash;
4433 return str_do_hash(str);
4440 const char *ptr1, *ptr2;
4443 return (len1 != len2 ||
4445 memcmp(ptr1, ptr2, len1) != 0);
4459rb_str_hash_m(
VALUE str)
4465#define lesser(a,b) (((a)>(b))?(b):(a))
4473 if (RSTRING_LEN(str1) == 0)
return TRUE;
4474 if (RSTRING_LEN(str2) == 0)
return TRUE;
4477 if (idx1 == idx2)
return TRUE;
4482 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4486 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4496 const char *ptr1, *ptr2;
4499 if (str1 == str2)
return 0;
4502 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4511 if (len1 > len2)
return 1;
4514 if (retval > 0)
return 1;
4541 if (str1 == str2)
return Qtrue;
4548 return rb_str_eql_internal(str1, str2);
4572 if (str1 == str2)
return Qtrue;
4574 return rb_str_eql_internal(str1, str2);
4605 return rb_invcmp(str1, str2);
4647 return str_casecmp(str1, s);
4655 const char *p1, *p1end, *p2, *p2end;
4657 enc = rb_enc_compatible(str1, str2);
4662 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4663 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4664 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4665 while (p1 < p1end && p2 < p2end) {
4667 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4668 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4670 return INT2FIX(c1 < c2 ? -1 : 1);
4677 while (p1 < p1end && p2 < p2end) {
4678 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4679 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4681 if (0 <= c1 && 0 <= c2) {
4685 return INT2FIX(c1 < c2 ? -1 : 1);
4689 l1 = rb_enc_mbclen(p1, p1end, enc);
4690 l2 = rb_enc_mbclen(p2, p2end, enc);
4691 len = l1 < l2 ? l1 : l2;
4692 r = memcmp(p1, p2,
len);
4694 return INT2FIX(r < 0 ? -1 : 1);
4696 return INT2FIX(l1 < l2 ? -1 : 1);
4702 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
4703 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
4737 return str_casecmp_p(str1, s);
4744 VALUE folded_str1, folded_str2;
4745 VALUE fold_opt = sym_fold;
4747 enc = rb_enc_compatible(str1, str2);
4752 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4753 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4755 return rb_str_eql(folded_str1, folded_str2);
4759strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4760 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4762 const char *search_start = str_ptr;
4763 long pos, search_len = str_len - offset;
4767 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4768 if (pos < 0)
return pos;
4770 if (t == search_start + pos)
break;
4771 search_len -= t - search_start;
4772 if (search_len <= 0)
return -1;
4773 offset += t - search_start;
4776 return pos + offset;
4780#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4781#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4784rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4786 const char *str_ptr, *str_ptr_end, *sub_ptr;
4787 long str_len, sub_len;
4790 enc = rb_enc_check(str, sub);
4791 if (is_broken_string(sub))
return -1;
4793 str_ptr = RSTRING_PTR(str);
4795 str_len = RSTRING_LEN(str);
4796 sub_ptr = RSTRING_PTR(sub);
4797 sub_len = RSTRING_LEN(sub);
4799 if (str_len < sub_len)
return -1;
4802 long str_len_char, sub_len_char;
4803 int single_byte = single_byte_optimizable(str);
4804 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4805 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4807 offset += str_len_char;
4808 if (offset < 0)
return -1;
4810 if (str_len_char - offset < sub_len_char)
return -1;
4811 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4814 if (sub_len == 0)
return offset;
4817 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4831rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4838 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4839 long slen = str_strlen(str, enc);
4841 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4853 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4854 enc, single_byte_optimizable(str));
4865 pos = rb_str_index(str, sub, pos);
4879str_ensure_byte_pos(
VALUE str,
long pos)
4881 if (!single_byte_optimizable(str)) {
4882 const char *s = RSTRING_PTR(str);
4884 const char *p = s + pos;
4885 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4887 "offset %ld does not land on character boundary", pos);
4934rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4940 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4941 long slen = RSTRING_LEN(str);
4943 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4954 str_ensure_byte_pos(str, pos);
4966 pos = rb_str_byteindex(str, sub, pos);
4967 if (pos >= 0)
return LONG2NUM(pos);
4974memrchr(
const char *search_str,
int chr,
long search_len)
4976 const char *ptr = search_str + search_len;
4977 while (ptr > search_str) {
4978 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4988 char *hit, *adjusted;
4990 long slen, searchlen;
4993 sbeg = RSTRING_PTR(str);
4994 slen = RSTRING_LEN(sub);
4995 if (slen == 0)
return s - sbeg;
4997 t = RSTRING_PTR(sub);
4999 searchlen = s - sbeg + 1;
5001 if (memcmp(s, t, slen) == 0) {
5006 hit = memrchr(sbeg, c, searchlen);
5009 if (hit != adjusted) {
5010 searchlen = adjusted - sbeg;
5013 if (memcmp(hit, t, slen) == 0)
5015 searchlen = adjusted - sbeg;
5016 }
while (searchlen > 0);
5030 enc = rb_enc_check(str, sub);
5031 if (is_broken_string(sub))
return -1;
5032 singlebyte = single_byte_optimizable(str);
5033 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
5034 slen = str_strlen(sub, enc);
5037 if (
len < slen)
return -1;
5038 if (
len - pos < slen) pos =
len - slen;
5039 if (
len == 0)
return pos;
5041 sbeg = RSTRING_PTR(str);
5044 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
5050 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
5051 return str_rindex(str, sub, s, enc);
5112rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
5117 long pos,
len = str_strlen(str, enc);
5119 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5121 if (pos < 0 && (pos +=
len) < 0) {
5127 if (pos >
len) pos =
len;
5135 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
5136 enc, single_byte_optimizable(str));
5147 pos = rb_str_rindex(str, sub, pos);
5157rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
5163 enc = rb_enc_check(str, sub);
5164 if (is_broken_string(sub))
return -1;
5165 len = RSTRING_LEN(str);
5166 slen = RSTRING_LEN(sub);
5169 if (
len < slen)
return -1;
5170 if (
len - pos < slen) pos =
len - slen;
5171 if (
len == 0)
return pos;
5173 sbeg = RSTRING_PTR(str);
5176 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
5183 return str_rindex(str, sub, s, enc);
5248rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
5252 long pos,
len = RSTRING_LEN(str);
5254 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5256 if (pos < 0 && (pos +=
len) < 0) {
5262 if (pos >
len) pos =
len;
5268 str_ensure_byte_pos(str, pos);
5280 pos = rb_str_byterindex(str, sub, pos);
5281 if (pos >= 0)
return LONG2NUM(pos);
5317 switch (OBJ_BUILTIN_TYPE(y)) {
5369rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5376 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5408rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5412 re = get_pat(argv[0]);
5413 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5422static enum neighbor_char
5428 if (rb_enc_mbminlen(enc) > 1) {
5430 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5432 return NEIGHBOR_NOT_CHAR;
5434 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5436 if (!l)
return NEIGHBOR_NOT_CHAR;
5437 if (l !=
len)
return NEIGHBOR_WRAPPED;
5438 rb_enc_mbcput(c, p, enc);
5439 r = rb_enc_precise_mbclen(p, p +
len, enc);
5441 return NEIGHBOR_NOT_CHAR;
5443 return NEIGHBOR_FOUND;
5446 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5449 return NEIGHBOR_WRAPPED;
5450 ++((
unsigned char*)p)[i];
5451 l = rb_enc_precise_mbclen(p, p+
len, enc);
5455 return NEIGHBOR_FOUND;
5458 memset(p+l, 0xff,
len-l);
5464 for (len2 =
len-1; 0 < len2; len2--) {
5465 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5469 memset(p+len2+1, 0xff,
len-(len2+1));
5474static enum neighbor_char
5479 if (rb_enc_mbminlen(enc) > 1) {
5481 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5483 return NEIGHBOR_NOT_CHAR;
5485 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5486 if (!c)
return NEIGHBOR_NOT_CHAR;
5489 if (!l)
return NEIGHBOR_NOT_CHAR;
5490 if (l !=
len)
return NEIGHBOR_WRAPPED;
5491 rb_enc_mbcput(c, p, enc);
5492 r = rb_enc_precise_mbclen(p, p +
len, enc);
5494 return NEIGHBOR_NOT_CHAR;
5496 return NEIGHBOR_FOUND;
5499 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5502 return NEIGHBOR_WRAPPED;
5503 --((
unsigned char*)p)[i];
5504 l = rb_enc_precise_mbclen(p, p+
len, enc);
5508 return NEIGHBOR_FOUND;
5511 memset(p+l, 0,
len-l);
5517 for (len2 =
len-1; 0 < len2; len2--) {
5518 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5522 memset(p+len2+1, 0,
len-(len2+1));
5536static enum neighbor_char
5537enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5539 enum neighbor_char ret;
5543 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5547 const int max_gaps = 1;
5549 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5551 ctype = ONIGENC_CTYPE_DIGIT;
5553 ctype = ONIGENC_CTYPE_ALPHA;
5555 return NEIGHBOR_NOT_CHAR;
5558 for (
try = 0;
try <= max_gaps; ++
try) {
5559 ret = enc_succ_char(p,
len, enc);
5560 if (ret == NEIGHBOR_FOUND) {
5561 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5563 return NEIGHBOR_FOUND;
5570 ret = enc_pred_char(p,
len, enc);
5571 if (ret == NEIGHBOR_FOUND) {
5572 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5585 return NEIGHBOR_NOT_CHAR;
5588 if (ctype != ONIGENC_CTYPE_DIGIT) {
5590 return NEIGHBOR_WRAPPED;
5594 enc_succ_char(carry,
len, enc);
5595 return NEIGHBOR_WRAPPED;
5663 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5664 rb_enc_cr_str_copy_for_substr(str, orig);
5665 return str_succ(str);
5672 char *sbeg, *s, *e, *last_alnum = 0;
5673 int found_alnum = 0;
5675 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5676 long carry_pos = 0, carry_len = 1;
5677 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5679 slen = RSTRING_LEN(str);
5680 if (slen == 0)
return str;
5682 enc = STR_ENC_GET(str);
5683 sbeg = RSTRING_PTR(str);
5684 s = e = sbeg + slen;
5686 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5687 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5693 l = rb_enc_precise_mbclen(s, e, enc);
5694 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5695 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5696 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5698 case NEIGHBOR_NOT_CHAR:
5700 case NEIGHBOR_FOUND:
5702 case NEIGHBOR_WRAPPED:
5707 carry_pos = s - sbeg;
5712 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5713 enum neighbor_char neighbor;
5714 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5715 l = rb_enc_precise_mbclen(s, e, enc);
5716 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5717 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5719 neighbor = enc_succ_char(tmp, l, enc);
5721 case NEIGHBOR_FOUND:
5725 case NEIGHBOR_WRAPPED:
5728 case NEIGHBOR_NOT_CHAR:
5731 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5733 enc_succ_char(s, l, enc);
5735 if (!rb_enc_asciicompat(enc)) {
5736 MEMCPY(carry, s,
char, l);
5739 carry_pos = s - sbeg;
5743 RESIZE_CAPA(str, slen + carry_len);
5744 sbeg = RSTRING_PTR(str);
5745 s = sbeg + carry_pos;
5746 memmove(s + carry_len, s, slen - carry_pos);
5747 memmove(s, carry, carry_len);
5749 STR_SET_LEN(str, slen);
5750 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5764rb_str_succ_bang(
VALUE str)
5772all_digits_p(
const char *s,
long len)
5826 VALUE end, exclusive;
5830 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5836 VALUE current, after_end;
5843 enc = rb_enc_check(beg, end);
5844 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5846 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5847 char c = RSTRING_PTR(beg)[0];
5848 char e = RSTRING_PTR(end)[0];
5850 if (c > e || (excl && c == e))
return beg;
5852 VALUE str = rb_enc_str_new(&c, 1, enc);
5854 if ((*each)(str, arg))
break;
5855 if (!excl && c == e)
break;
5857 if (excl && c == e)
break;
5862 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5863 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5864 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5869 b = rb_str_to_inum(beg, 10, FALSE);
5870 e = rb_str_to_inum(end, 10, FALSE);
5877 if (excl && bi == ei)
break;
5878 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5883 ID op = excl ?
'<' : idLE;
5884 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5889 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5890 b = rb_funcallv(b, succ, 0, 0);
5897 if (n > 0 || (excl && n == 0))
return beg;
5899 after_end = rb_funcallv(end, succ, 0, 0);
5904 next = rb_funcallv(current, succ, 0, 0);
5905 if ((*each)(current, arg))
break;
5906 if (
NIL_P(next))
break;
5910 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5925 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5926 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5927 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5929 b = rb_str_to_inum(beg, 10, FALSE);
5935 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5943 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5944 b = rb_funcallv(b, succ, 0, 0);
5950 VALUE next = rb_funcallv(current, succ, 0, 0);
5951 if ((*each)(current, arg))
break;
5954 if (RSTRING_LEN(current) == 0)
5965 if (!
rb_equal(str, *argp))
return 0;
5979 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5980 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5981 rb_enc_asciicompat(STR_ENC_GET(val))) {
5982 const char *bp = RSTRING_PTR(beg);
5983 const char *ep = RSTRING_PTR(end);
5984 const char *vp = RSTRING_PTR(val);
5985 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5986 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5994 if (b <= v && v < e)
return Qtrue;
5995 return RBOOL(!
RTEST(exclusive) && v == e);
6002 all_digits_p(bp, RSTRING_LEN(beg)) &&
6003 all_digits_p(ep, RSTRING_LEN(end))) {
6008 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
6010 return RBOOL(
NIL_P(val));
6033 return rb_str_subpat(str, indx,
INT2FIX(0));
6036 if (rb_str_index(str, indx, 0) != -1)
6042 long beg,
len = str_strlen(str, NULL);
6054 return str_substr(str, idx, 1, FALSE);
6073rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
6077 return rb_str_subpat(str, argv[0], argv[1]);
6080 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
6084 return rb_str_aref(str, argv[0]);
6090 char *ptr = RSTRING_PTR(str);
6091 long olen = RSTRING_LEN(str), nlen;
6093 str_modifiable(str);
6094 if (
len > olen)
len = olen;
6096 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
6098 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
6100 ptr =
RSTRING(str)->as.embed.ary;
6101 memmove(ptr, oldptr +
len, nlen);
6102 if (fl == STR_NOEMBED)
xfree(oldptr);
6105 if (!STR_SHARED_P(str)) {
6107 rb_enc_cr_str_exact_copy(shared, str);
6112 STR_SET_LEN(str, nlen);
6114 if (!SHARABLE_MIDDLE_SUBSTRING) {
6115 TERM_FILL(ptr + nlen, TERM_LEN(str));
6122rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
6128 if (beg == 0 && vlen == 0) {
6133 str_modify_keep_cr(str);
6137 RESIZE_CAPA(str, slen + vlen -
len);
6138 sptr = RSTRING_PTR(str);
6147 memmove(sptr + beg + vlen,
6149 slen - (beg +
len));
6151 if (vlen < beg &&
len < 0) {
6155 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
6158 STR_SET_LEN(str, slen);
6159 TERM_FILL(&sptr[slen], TERM_LEN(str));
6166 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
6175 int singlebyte = single_byte_optimizable(str);
6181 enc = rb_enc_check(str, val);
6182 slen = str_strlen(str, enc);
6184 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
6193 if (
len > slen - beg) {
6196 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
6201 beg = p - RSTRING_PTR(str);
6203 rb_str_update_0(str, beg,
len, val);
6204 rb_enc_associate(str, enc);
6215 long start, end,
len;
6225 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
6229 nth += regs->num_regs;
6239 enc = rb_enc_check_str(str, val);
6240 rb_str_update_0(str, start,
len, val);
6241 rb_enc_associate(str, enc);
6249 switch (
TYPE(indx)) {
6251 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
6255 beg = rb_str_index(str, indx, 0);
6309rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
6313 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
6321 return rb_str_aset(str, argv[0], argv[1]);
6381rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6389 str_modify_keep_cr(str);
6397 if ((nth += regs->num_regs) <= 0)
return Qnil;
6399 else if (nth >= regs->num_regs)
return Qnil;
6401 len = END(nth) - beg;
6404 else if (argc == 2) {
6413 beg = p - RSTRING_PTR(str);
6417 beg = rb_str_index(str, indx, 0);
6418 if (beg == -1)
return Qnil;
6419 len = RSTRING_LEN(indx);
6431 beg = p - RSTRING_PTR(str);
6440 beg = p - RSTRING_PTR(str);
6444 rb_enc_cr_str_copy_for_substr(result, str);
6452 char *sptr = RSTRING_PTR(str);
6453 long slen = RSTRING_LEN(str);
6454 if (beg +
len > slen)
6458 slen - (beg +
len));
6460 STR_SET_LEN(str, slen);
6461 TERM_FILL(&sptr[slen], TERM_LEN(str));
6472 switch (OBJ_BUILTIN_TYPE(pat)) {
6491get_pat_quoted(
VALUE pat,
int check)
6495 switch (OBJ_BUILTIN_TYPE(pat)) {
6509 if (check && is_broken_string(pat)) {
6516rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6519 pos = rb_str_byteindex(str, pat, pos);
6520 if (set_backref_str) {
6522 str = rb_str_new_frozen_String(str);
6523 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6525 *match = match_data;
6535 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6540rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6542 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6561rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6575 hash = rb_check_hash_type(argv[1]);
6581 pat = get_pat_quoted(argv[0], 1);
6583 str_modifiable(str);
6584 beg = rb_pat_search(pat, str, 0, 1);
6598 end0 = beg0 + RSTRING_LEN(pat);
6607 if (iter || !
NIL_P(hash)) {
6608 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6614 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6617 str_mod_check(str, p,
len);
6618 rb_check_frozen(str);
6624 enc = rb_enc_compatible(str, repl);
6627 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6631 rb_enc_inspect_name(str_enc),
6632 rb_enc_inspect_name(STR_ENC_GET(repl)));
6634 enc = STR_ENC_GET(repl);
6637 rb_enc_associate(str, enc);
6647 rlen = RSTRING_LEN(repl);
6648 len = RSTRING_LEN(str);
6650 RESIZE_CAPA(str,
len + rlen - plen);
6652 p = RSTRING_PTR(str);
6654 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6656 rp = RSTRING_PTR(repl);
6657 memmove(p + beg0, rp, rlen);
6659 STR_SET_LEN(str,
len);
6660 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6689 rb_str_sub_bang(argc, argv, str);
6694str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6697 long beg, beg0, end0;
6698 long offset, blen, slen,
len, last;
6699 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6701 int need_backref_str = -1;
6711 hash = rb_check_hash_type(argv[1]);
6715 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6723 rb_error_arity(argc, 1, 2);
6726 pat = get_pat_quoted(argv[0], 1);
6727 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6730 if (bang)
return Qnil;
6735 blen = RSTRING_LEN(str) + 30;
6737 sp = RSTRING_PTR(str);
6738 slen = RSTRING_LEN(str);
6740 str_enc = STR_ENC_GET(str);
6741 rb_enc_associate(dest, str_enc);
6748 end0 = beg0 + RSTRING_LEN(pat);
6764 if (mode == FAST_MAP) {
6773 val = rb_hash_aref(hash, key);
6776 str_mod_check(str, sp, slen);
6781 else if (need_backref_str) {
6783 if (need_backref_str < 0) {
6784 need_backref_str = val != repl;
6791 len = beg0 - offset;
6805 if (RSTRING_LEN(str) <= end0)
break;
6806 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6808 offset = end0 +
len;
6810 cp = RSTRING_PTR(str) + offset;
6811 if (offset > RSTRING_LEN(str))
break;
6814 if (mode != FAST_MAP && mode != STR) {
6817 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6822 if (RSTRING_LEN(str) > offset) {
6825 rb_pat_search0(pat, str, last, 1, &match);
6827 str_shared_replace(str, dest);
6855rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6857 str_modify_keep_cr(str);
6858 return str_gsub(argc, argv, str, 1);
6881 return str_gsub(argc, argv, str, 0);
6899 str_modifiable(str);
6900 if (str == str2)
return str;
6904 return str_replace(str, str2);
6919rb_str_clear(
VALUE str)
6923 STR_SET_LEN(str, 0);
6924 RSTRING_PTR(str)[0] = 0;
6925 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6944rb_str_chr(
VALUE str)
6968 pos += RSTRING_LEN(str);
6969 if (pos < 0 || RSTRING_LEN(str) <= pos)
6972 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6991 long len = RSTRING_LEN(str);
6992 char *
ptr, *head, *left = 0;
6996 if (pos < -
len ||
len <= pos)
7003 char byte = (char)(
NUM2INT(w) & 0xFF);
7005 if (!str_independent(str))
7006 str_make_independent(str);
7007 enc = STR_ENC_GET(str);
7008 head = RSTRING_PTR(str);
7010 if (!STR_EMBED_P(str)) {
7017 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
7025 width = rb_enc_precise_mbclen(left, head+
len, enc);
7027 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
7043str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
7045 long n = RSTRING_LEN(str);
7047 if (beg > n ||
len < 0)
return Qnil;
7050 if (beg < 0)
return Qnil;
7055 if (!empty)
return Qnil;
7059 VALUE str2 = str_subseq(str, beg,
len);
7061 str_enc_copy_direct(str2, str);
7063 if (RSTRING_LEN(str2) == 0) {
7064 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
7098 long beg,
len = RSTRING_LEN(str);
7106 return str_byte_substr(str, beg,
len, TRUE);
7111 return str_byte_substr(str, idx, 1, FALSE);
7158rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
7163 return str_byte_substr(str, beg,
len, TRUE);
7166 return str_byte_aref(str, argv[0]);
7170str_check_beg_len(
VALUE str,
long *beg,
long *
len)
7172 long end, slen = RSTRING_LEN(str);
7175 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
7184 if (*
len > slen - *beg) {
7188 str_ensure_byte_pos(str, *beg);
7189 str_ensure_byte_pos(str, end);
7214rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
7216 long beg,
len, vbeg, vlen;
7221 if (!(argc == 2 || argc == 3 || argc == 5)) {
7222 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
7226 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
7227 rb_builtin_class_name(argv[0]));
7234 vlen = RSTRING_LEN(val);
7239 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
7240 rb_builtin_class_name(argv[2]));
7252 vlen = RSTRING_LEN(val);
7260 str_check_beg_len(str, &beg, &
len);
7261 str_check_beg_len(val, &vbeg, &vlen);
7262 str_modify_keep_cr(str);
7265 rb_enc_associate(str, rb_enc_check(str, val));
7268 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
7286rb_str_reverse(
VALUE str)
7293 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
7294 enc = STR_ENC_GET(str);
7300 if (RSTRING_LEN(str) > 1) {
7301 if (single_byte_optimizable(str)) {
7308 int clen = rb_enc_fast_mbclen(s, e, enc);
7316 cr = rb_enc_asciicompat(enc) ?
7319 int clen = rb_enc_mbclen(s, e, enc);
7328 STR_SET_LEN(rev, RSTRING_LEN(str));
7329 str_enc_copy_direct(rev, str);
7349rb_str_reverse_bang(
VALUE str)
7351 if (RSTRING_LEN(str) > 1) {
7352 if (single_byte_optimizable(str)) {
7355 str_modify_keep_cr(str);
7356 s = RSTRING_PTR(str);
7365 str_shared_replace(str, rb_str_reverse(str));
7369 str_modify_keep_cr(str);
7394 i = rb_str_index(str, arg, 0);
7396 return RBOOL(i != -1);
7438 rb_raise(rb_eArgError,
"invalid radix %d", base);
7440 return rb_str_to_inum(str, base, FALSE);
7464rb_str_to_f(
VALUE str)
7479rb_str_to_s(
VALUE str)
7491 char s[RUBY_MAX_CHAR_LEN];
7492 int n = rb_enc_codelen(c, enc);
7494 rb_enc_mbcput(c, s, enc);
7499#define CHAR_ESC_LEN 13
7502rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7504 char buf[CHAR_ESC_LEN + 1];
7512 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7514 else if (c < 0x10000) {
7515 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7518 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7523 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7526 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7529 l = (int)strlen(buf);
7535ruby_escaped_char(
int c)
7538 case '\0':
return "\\0";
7539 case '\n':
return "\\n";
7540 case '\r':
return "\\r";
7541 case '\t':
return "\\t";
7542 case '\f':
return "\\f";
7543 case '\013':
return "\\v";
7544 case '\010':
return "\\b";
7545 case '\007':
return "\\a";
7546 case '\033':
return "\\e";
7547 case '\x7f':
return "\\c?";
7553rb_str_escape(
VALUE str)
7557 const char *p = RSTRING_PTR(str);
7559 const char *prev = p;
7560 char buf[CHAR_ESC_LEN + 1];
7562 int unicode_p = rb_enc_unicode_p(enc);
7563 int asciicompat = rb_enc_asciicompat(enc);
7568 int n = rb_enc_precise_mbclen(p, pend, enc);
7570 if (p > prev) str_buf_cat(result, prev, p - prev);
7571 n = rb_enc_mbminlen(enc);
7573 n = (int)(pend - p);
7575 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7576 str_buf_cat(result, buf, strlen(buf));
7582 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7584 cc = ruby_escaped_char(c);
7586 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7587 str_buf_cat(result, cc, strlen(cc));
7590 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7593 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7594 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7598 if (p > prev) str_buf_cat(result, prev, p - prev);
7622 const char *p, *pend, *prev;
7623 char buf[CHAR_ESC_LEN + 1];
7625 rb_encoding *resenc = rb_default_internal_encoding();
7626 int unicode_p = rb_enc_unicode_p(enc);
7627 int asciicompat = rb_enc_asciicompat(enc);
7629 if (resenc == NULL) resenc = rb_default_external_encoding();
7630 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7631 rb_enc_associate(result, resenc);
7632 str_buf_cat2(result,
"\"");
7640 n = rb_enc_precise_mbclen(p, pend, enc);
7642 if (p > prev) str_buf_cat(result, prev, p - prev);
7643 n = rb_enc_mbminlen(enc);
7645 n = (int)(pend - p);
7647 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7648 str_buf_cat(result, buf, strlen(buf));
7654 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7656 if ((asciicompat || unicode_p) &&
7657 (c ==
'"'|| c ==
'\\' ||
7662 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7663 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7664 str_buf_cat2(result,
"\\");
7665 if (asciicompat || enc == resenc) {
7671 case '\n': cc =
'n';
break;
7672 case '\r': cc =
'r';
break;
7673 case '\t': cc =
't';
break;
7674 case '\f': cc =
'f';
break;
7675 case '\013': cc =
'v';
break;
7676 case '\010': cc =
'b';
break;
7677 case '\007': cc =
'a';
break;
7678 case 033: cc =
'e';
break;
7679 default: cc = 0;
break;
7682 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7685 str_buf_cat(result, buf, 2);
7698 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7702 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7703 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7708 if (p > prev) str_buf_cat(result, prev, p - prev);
7709 str_buf_cat2(result,
"\"");
7714#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7734 int encidx = rb_enc_get_index(str);
7737 const char *p, *pend;
7740 int u8 = (encidx == rb_utf8_encindex());
7741 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7744 if (!rb_enc_asciicompat(enc)) {
7746 len += strlen(enc->name);
7749 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7752 unsigned char c = *p++;
7755 case '"':
case '\\':
7756 case '\n':
case '\r':
7757 case '\t':
case '\f':
7758 case '\013':
case '\010':
case '\007':
case '\033':
7763 clen = IS_EVSTR(p, pend) ? 2 : 1;
7771 if (u8 && c > 0x7F) {
7772 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7774 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7777 else if (cc <= 0xFFFFF)
7790 if (clen > LONG_MAX -
len) {
7797 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7798 q = RSTRING_PTR(result); qend = q +
len + 1;
7802 unsigned char c = *p++;
7804 if (c ==
'"' || c ==
'\\') {
7808 else if (c ==
'#') {
7809 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7812 else if (c ==
'\n') {
7816 else if (c ==
'\r') {
7820 else if (c ==
'\t') {
7824 else if (c ==
'\f') {
7828 else if (c ==
'\013') {
7832 else if (c ==
'\010') {
7836 else if (c ==
'\007') {
7840 else if (c ==
'\033') {
7850 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7852 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7855 snprintf(q, qend-q,
"u%04X", cc);
7857 snprintf(q, qend-q,
"u{%X}", cc);
7862 snprintf(q, qend-q,
"x%02X", c);
7868 if (!rb_enc_asciicompat(enc)) {
7869 snprintf(q, qend-q, nonascii_suffix, enc->name);
7870 encidx = rb_ascii8bit_encindex();
7873 rb_enc_associate_index(result, encidx);
7879unescape_ascii(
unsigned int c)
7903undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7905 const char *s = *ss;
7909 unsigned char buf[6];
7927 *buf = unescape_ascii(*s);
7939 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7940 if (*penc != enc_utf8) {
7942 rb_enc_associate(undumped, enc_utf8);
7959 if (hexlen == 0 || hexlen > 6) {
7965 if (0xd800 <= c && c <= 0xdfff) {
7968 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7978 if (0xd800 <= c && c <= 0xdfff) {
7981 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
8009static VALUE rb_str_is_ascii_only_p(
VALUE str);
8027str_undump(
VALUE str)
8029 const char *s = RSTRING_PTR(str);
8032 VALUE undumped = rb_enc_str_new(s, 0L, enc);
8034 bool binary =
false;
8038 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
8041 if (!str_null_check(str, &w)) {
8044 if (RSTRING_LEN(str) < 2)
goto invalid_format;
8045 if (*s !=
'"')
goto invalid_format;
8063 static const char force_encoding_suffix[] =
".force_encoding(\"";
8064 static const char dup_suffix[] =
".dup";
8065 const char *encname;
8070 size =
sizeof(dup_suffix) - 1;
8071 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
8073 size =
sizeof(force_encoding_suffix) - 1;
8074 if (s_end - s <= size)
goto invalid_format;
8075 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
8079 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
8083 s = memchr(s,
'"', s_end-s);
8085 if (!s)
goto invalid_format;
8086 if (s_end - s != 2)
goto invalid_format;
8087 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
8089 encidx = rb_enc_find_index2(encname, (
long)size);
8093 rb_enc_associate_index(undumped, encidx);
8103 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
8114 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
8120 if (rb_enc_dummy_p(enc)) {
8127str_true_enc(
VALUE str)
8130 rb_str_check_dummy_enc(enc);
8134static OnigCaseFoldType
8135check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
8140 rb_raise(rb_eArgError,
"too many options");
8141 if (argv[0]==sym_turkic) {
8142 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
8144 if (argv[1]==sym_lithuanian)
8145 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
8147 rb_raise(rb_eArgError,
"invalid second option");
8150 else if (argv[0]==sym_lithuanian) {
8151 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
8153 if (argv[1]==sym_turkic)
8154 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
8156 rb_raise(rb_eArgError,
"invalid second option");
8160 rb_raise(rb_eArgError,
"too many options");
8161 else if (argv[0]==sym_ascii)
8162 flags |= ONIGENC_CASE_ASCII_ONLY;
8163 else if (argv[0]==sym_fold) {
8164 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
8165 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
8167 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
8170 rb_raise(rb_eArgError,
"invalid option");
8177 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
8183#define CASE_MAPPING_ADDITIONAL_LENGTH 20
8184#ifndef CASEMAP_DEBUG
8185# define CASEMAP_DEBUG 0
8193 OnigUChar space[FLEX_ARY_LEN];
8197mapping_buffer_free(
void *p)
8201 while (current_buffer) {
8202 previous_buffer = current_buffer;
8203 current_buffer = current_buffer->next;
8204 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
8210 {0, mapping_buffer_free,},
8211 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
8219 const OnigUChar *source_current, *source_end;
8220 int target_length = 0;
8221 VALUE buffer_anchor;
8224 size_t buffer_count = 0;
8225 int buffer_length_or_invalid;
8227 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
8229 source_current = (OnigUChar*)RSTRING_PTR(source);
8234 while (source_current < source_end) {
8236 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
8237 if (CASEMAP_DEBUG) {
8238 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
8241 *pre_buffer = current_buffer;
8242 pre_buffer = ¤t_buffer->next;
8243 current_buffer->next = NULL;
8244 current_buffer->capa =
capa;
8245 buffer_length_or_invalid = enc->case_map(flags,
8246 &source_current, source_end,
8247 current_buffer->space,
8248 current_buffer->space+current_buffer->capa,
8250 if (buffer_length_or_invalid < 0) {
8251 current_buffer =
DATA_PTR(buffer_anchor);
8253 mapping_buffer_free(current_buffer);
8254 rb_raise(rb_eArgError,
"input string invalid");
8256 target_length += current_buffer->used = buffer_length_or_invalid;
8258 if (CASEMAP_DEBUG) {
8259 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
8262 if (buffer_count==1) {
8263 target =
rb_str_new((
const char*)current_buffer->space, target_length);
8266 char *target_current;
8269 target_current = RSTRING_PTR(target);
8270 current_buffer =
DATA_PTR(buffer_anchor);
8271 while (current_buffer) {
8272 memcpy(target_current, current_buffer->space, current_buffer->used);
8273 target_current += current_buffer->used;
8274 current_buffer = current_buffer->next;
8277 current_buffer =
DATA_PTR(buffer_anchor);
8279 mapping_buffer_free(current_buffer);
8284 str_enc_copy_direct(target, source);
8293 const OnigUChar *source_current, *source_end;
8294 OnigUChar *target_current, *target_end;
8295 long old_length = RSTRING_LEN(source);
8296 int length_or_invalid;
8298 if (old_length == 0)
return Qnil;
8300 source_current = (OnigUChar*)RSTRING_PTR(source);
8302 if (source == target) {
8303 target_current = (OnigUChar*)source_current;
8304 target_end = (OnigUChar*)source_end;
8307 target_current = (OnigUChar*)RSTRING_PTR(target);
8311 length_or_invalid = onigenc_ascii_only_case_map(flags,
8312 &source_current, source_end,
8313 target_current, target_end, enc);
8314 if (length_or_invalid < 0)
8315 rb_raise(rb_eArgError,
"input string invalid");
8316 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
8317 fprintf(stderr,
"problem with rb_str_ascii_casemap"
8318 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8319 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
8320 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8323 str_enc_copy(target, source);
8329upcase_single(
VALUE str)
8331 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8332 bool modified =
false;
8335 unsigned int c = *(
unsigned char*)s;
8337 if (
'a' <= c && c <=
'z') {
8338 *s =
'A' + (c -
'a');
8366rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
8369 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8371 flags = check_case_options(argc, argv, flags);
8372 str_modify_keep_cr(str);
8373 enc = str_true_enc(str);
8374 if (case_option_single_p(flags, enc, str)) {
8375 if (upcase_single(str))
8376 flags |= ONIGENC_CASE_MODIFIED;
8378 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8379 rb_str_ascii_casemap(str, str, &flags, enc);
8381 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8383 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8405rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8408 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8411 flags = check_case_options(argc, argv, flags);
8412 enc = str_true_enc(str);
8413 if (case_option_single_p(flags, enc, str)) {
8414 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8415 str_enc_copy_direct(ret, str);
8418 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8420 rb_str_ascii_casemap(str, ret, &flags, enc);
8423 ret = rb_str_casemap(str, &flags, enc);
8430downcase_single(
VALUE str)
8432 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8433 bool modified =
false;
8436 unsigned int c = *(
unsigned char*)s;
8438 if (
'A' <= c && c <=
'Z') {
8439 *s =
'a' + (c -
'A');
8468rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8471 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8473 flags = check_case_options(argc, argv, flags);
8474 str_modify_keep_cr(str);
8475 enc = str_true_enc(str);
8476 if (case_option_single_p(flags, enc, str)) {
8477 if (downcase_single(str))
8478 flags |= ONIGENC_CASE_MODIFIED;
8480 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8481 rb_str_ascii_casemap(str, str, &flags, enc);
8483 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8485 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8507rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8510 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8513 flags = check_case_options(argc, argv, flags);
8514 enc = str_true_enc(str);
8515 if (case_option_single_p(flags, enc, str)) {
8516 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8517 str_enc_copy_direct(ret, str);
8518 downcase_single(ret);
8520 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8522 rb_str_ascii_casemap(str, ret, &flags, enc);
8525 ret = rb_str_casemap(str, &flags, enc);
8553rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8556 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8558 flags = check_case_options(argc, argv, flags);
8559 str_modify_keep_cr(str);
8560 enc = str_true_enc(str);
8561 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8562 if (flags&ONIGENC_CASE_ASCII_ONLY)
8563 rb_str_ascii_casemap(str, str, &flags, enc);
8565 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8567 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8591rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8594 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8597 flags = check_case_options(argc, argv, flags);
8598 enc = str_true_enc(str);
8599 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8600 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8602 rb_str_ascii_casemap(str, ret, &flags, enc);
8605 ret = rb_str_casemap(str, &flags, enc);
8632rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8635 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8637 flags = check_case_options(argc, argv, flags);
8638 str_modify_keep_cr(str);
8639 enc = str_true_enc(str);
8640 if (flags&ONIGENC_CASE_ASCII_ONLY)
8641 rb_str_ascii_casemap(str, str, &flags, enc);
8643 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8645 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8669rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8672 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8675 flags = check_case_options(argc, argv, flags);
8676 enc = str_true_enc(str);
8677 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8678 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8680 rb_str_ascii_casemap(str, ret, &flags, enc);
8683 ret = rb_str_casemap(str, &flags, enc);
8688typedef unsigned char *USTR;
8692 unsigned int now, max;
8704 if (t->p == t->pend)
return -1;
8705 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8708 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8710 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8712 if (t->p < t->pend) {
8713 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8716 if (t->now < 0x80 && c < 0x80) {
8717 rb_raise(rb_eArgError,
8718 "invalid range \"%c-%c\" in string transliteration",
8722 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8726 else if (t->now < c) {
8735 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8736 if (t->now == t->max) {
8741 if (t->now < t->max) {
8757 const unsigned int errc = -1;
8758 unsigned int trans[256];
8760 struct tr trsrc, trrepl;
8762 unsigned int c, c0, last = 0;
8763 int modify = 0, i, l;
8764 unsigned char *s, *send;
8766 int singlebyte = single_byte_optimizable(str);
8770#define CHECK_IF_ASCII(c) \
8771 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8772 (cr = ENC_CODERANGE_VALID) : 0)
8776 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8777 if (RSTRING_LEN(repl) == 0) {
8778 return rb_str_delete_bang(1, &src, str);
8782 e1 = rb_enc_check(str, src);
8783 e2 = rb_enc_check(str, repl);
8788 enc = rb_enc_check(src, repl);
8790 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8791 if (RSTRING_LEN(src) > 1 &&
8792 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8793 trsrc.p + l < trsrc.pend) {
8797 trrepl.p = RSTRING_PTR(repl);
8798 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8799 trsrc.gen = trrepl.gen = 0;
8800 trsrc.now = trrepl.now = 0;
8801 trsrc.max = trrepl.max = 0;
8804 for (i=0; i<256; i++) {
8807 while ((c = trnext(&trsrc, enc)) != errc) {
8812 if (!hash) hash = rb_hash_new();
8816 while ((c = trnext(&trrepl, enc)) != errc)
8819 for (i=0; i<256; i++) {
8820 if (trans[i] != errc) {
8828 for (i=0; i<256; i++) {
8831 while ((c = trnext(&trsrc, enc)) != errc) {
8832 r = trnext(&trrepl, enc);
8833 if (r == errc) r = trrepl.now;
8836 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8839 if (!hash) hash = rb_hash_new();
8847 str_modify_keep_cr(str);
8848 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8849 termlen = rb_enc_mbminlen(enc);
8852 long offset, max = RSTRING_LEN(str);
8853 unsigned int save = -1;
8854 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8859 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8862 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8865 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8867 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8876 if (cflag) c = last;
8879 else if (cflag) c = errc;
8885 if (c != (
unsigned int)-1) {
8891 tlen = rb_enc_codelen(c, enc);
8897 if (enc != e1) may_modify = 1;
8899 if ((offset = t - buf) + tlen > max) {
8900 size_t MAYBE_UNUSED(old) = max + termlen;
8901 max = offset + tlen + (send - s);
8902 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8905 rb_enc_mbcput(c, t, enc);
8906 if (may_modify && memcmp(s, t, tlen) != 0) {
8912 if (!STR_EMBED_P(str)) {
8913 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8915 TERM_FILL((
char *)t, termlen);
8916 RSTRING(str)->as.heap.ptr = (
char *)buf;
8917 STR_SET_LEN(str, t - buf);
8918 STR_SET_NOEMBED(str);
8919 RSTRING(str)->as.heap.aux.capa = max;
8923 c = (
unsigned char)*s;
8924 if (trans[c] != errc) {
8941 long offset, max = (long)((send - s) * 1.2);
8942 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8947 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8950 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8953 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8955 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8963 if (cflag) c = last;
8966 else if (cflag) c = errc;
8970 c = cflag ? last : errc;
8973 tlen = rb_enc_codelen(c, enc);
8978 if (enc != e1) may_modify = 1;
8980 if ((offset = t - buf) + tlen > max) {
8981 size_t MAYBE_UNUSED(old) = max + termlen;
8982 max = offset + tlen + (long)((send - s) * 1.2);
8983 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8987 rb_enc_mbcput(c, t, enc);
8988 if (may_modify && memcmp(s, t, tlen) != 0) {
8996 if (!STR_EMBED_P(str)) {
8997 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8999 TERM_FILL((
char *)t, termlen);
9000 RSTRING(str)->as.heap.ptr = (
char *)buf;
9001 STR_SET_LEN(str, t - buf);
9002 STR_SET_NOEMBED(str);
9003 RSTRING(str)->as.heap.aux.capa = max;
9009 rb_enc_associate(str, enc);
9028 return tr_trans(str, src, repl, 0);
9075 tr_trans(str, src, repl, 0);
9079#define TR_TABLE_MAX (UCHAR_MAX+1)
9080#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
9082tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
9085 const unsigned int errc = -1;
9086 char buf[TR_TABLE_MAX];
9089 VALUE table = 0, ptable = 0;
9090 int i, l, cflag = 0;
9092 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
9093 tr.gen =
tr.now =
tr.max = 0;
9095 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
9100 for (i=0; i<TR_TABLE_MAX; i++) {
9103 stable[TR_TABLE_MAX] = cflag;
9105 else if (stable[TR_TABLE_MAX] && !cflag) {
9106 stable[TR_TABLE_MAX] = 0;
9108 for (i=0; i<TR_TABLE_MAX; i++) {
9112 while ((c = trnext(&
tr, enc)) != errc) {
9113 if (c < TR_TABLE_MAX) {
9114 buf[(
unsigned char)c] = !cflag;
9119 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
9122 table = ptable ? ptable : rb_hash_new();
9126 table = rb_hash_new();
9131 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
9132 rb_hash_aset(table, key,
Qtrue);
9136 for (i=0; i<TR_TABLE_MAX; i++) {
9137 stable[i] = stable[i] && buf[i];
9139 if (!table && !cflag) {
9146tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
9148 if (c < TR_TABLE_MAX) {
9149 return table[c] != 0;
9155 if (!
NIL_P(rb_hash_lookup(del, v)) &&
9156 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
9160 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
9163 return table[TR_TABLE_MAX] ? TRUE : FALSE;
9177rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
9179 char squeez[TR_TABLE_SIZE];
9182 VALUE del = 0, nodel = 0;
9184 int i, ascompat, cr;
9186 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
9188 for (i=0; i<argc; i++) {
9192 enc = rb_enc_check(str, s);
9193 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
9196 str_modify_keep_cr(str);
9197 ascompat = rb_enc_asciicompat(enc);
9198 s = t = RSTRING_PTR(str);
9205 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9216 c = rb_enc_codepoint_len(s, send, &clen, enc);
9218 if (tr_find(c, squeez, del, nodel)) {
9222 if (t != s) rb_enc_mbcput(c, t, enc);
9229 TERM_FILL(t, TERM_LEN(str));
9230 STR_SET_LEN(str, t - RSTRING_PTR(str));
9233 if (modify)
return str;
9253rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
9256 rb_str_delete_bang(argc, argv, str);
9270rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
9272 char squeez[TR_TABLE_SIZE];
9274 VALUE del = 0, nodel = 0;
9275 unsigned char *s, *send, *t;
9277 int ascompat, singlebyte = single_byte_optimizable(str);
9281 enc = STR_ENC_GET(str);
9284 for (i=0; i<argc; i++) {
9288 enc = rb_enc_check(str, s);
9289 if (singlebyte && !single_byte_optimizable(s))
9291 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
9295 str_modify_keep_cr(str);
9296 s = t = (
unsigned char *)RSTRING_PTR(str);
9297 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
9300 ascompat = rb_enc_asciicompat(enc);
9304 unsigned int c = *s++;
9305 if (c != save || (argc > 0 && !squeez[c])) {
9315 if (ascompat && (c = *s) < 0x80) {
9316 if (c != save || (argc > 0 && !squeez[c])) {
9322 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
9324 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
9325 if (t != s) rb_enc_mbcput(c, t, enc);
9334 TERM_FILL((
char *)t, TERM_LEN(str));
9335 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
9336 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
9340 if (modify)
return str;
9363rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
9366 rb_str_squeeze_bang(argc, argv, str);
9384 return tr_trans(str, src, repl, 1);
9407 tr_trans(str, src, repl, 1);
9436rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9438 char table[TR_TABLE_SIZE];
9440 VALUE del = 0, nodel = 0, tstr;
9450 enc = rb_enc_check(str, tstr);
9453 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9454 (ptstr = RSTRING_PTR(tstr),
9455 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9456 !is_broken_string(str)) {
9458 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9460 s = RSTRING_PTR(str);
9461 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9464 if (*(
unsigned char*)s++ == c) n++;
9470 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9471 for (i=1; i<argc; i++) {
9474 enc = rb_enc_check(str, tstr);
9475 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9478 s = RSTRING_PTR(str);
9479 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9481 ascompat = rb_enc_asciicompat(enc);
9485 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9493 c = rb_enc_codepoint_len(s, send, &clen, enc);
9494 if (tr_find(c, table, del, nodel)) {
9505rb_fs_check(
VALUE val)
9509 if (
NIL_P(val))
return 0;
9514static const char isspacetable[256] = {
9515 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9517 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9533#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9536split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9538 if (empty_count >= 0 &&
len == 0) {
9539 return empty_count + 1;
9541 if (empty_count > 0) {
9545 rb_ary_push(result, str_new_empty_String(str));
9546 }
while (--empty_count > 0);
9550 rb_yield(str_new_empty_String(str));
9551 }
while (--empty_count > 0);
9556 rb_ary_push(result, str);
9565 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9569literal_split_pattern(
VALUE spat, split_type_t default_type)
9577 return SPLIT_TYPE_CHARS;
9579 else if (rb_enc_asciicompat(enc)) {
9580 if (
len == 1 && ptr[0] ==
' ') {
9581 return SPLIT_TYPE_AWK;
9586 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9587 return SPLIT_TYPE_AWK;
9590 return default_type;
9603rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9608 split_type_t split_type;
9609 long beg, end, i = 0, empty_count = -1;
9614 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9616 if (lim <= 0) limit =
Qnil;
9617 else if (lim == 1) {
9618 if (RSTRING_LEN(str) == 0)
9629 if (
NIL_P(limit) && !lim) empty_count = 0;
9631 enc = STR_ENC_GET(str);
9632 split_type = SPLIT_TYPE_REGEXP;
9634 spat = get_pat_quoted(spat, 0);
9636 else if (
NIL_P(spat = rb_fs)) {
9637 split_type = SPLIT_TYPE_AWK;
9639 else if (!(spat = rb_fs_check(spat))) {
9640 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9645 if (split_type != SPLIT_TYPE_AWK) {
9650 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9651 if (split_type == SPLIT_TYPE_AWK) {
9653 split_type = SPLIT_TYPE_STRING;
9658 mustnot_broken(spat);
9659 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9667#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
9670 char *ptr = RSTRING_PTR(str);
9672 if (split_type == SPLIT_TYPE_AWK) {
9677 if (result) result = rb_ary_new();
9679 if (is_ascii_string(str)) {
9680 while (ptr < eptr) {
9681 c = (
unsigned char)*ptr++;
9683 if (ascii_isspace(c)) {
9689 if (!
NIL_P(limit) && lim <= i)
break;
9692 else if (ascii_isspace(c)) {
9693 SPLIT_STR(beg, end-beg);
9696 if (!
NIL_P(limit)) ++i;
9704 while (ptr < eptr) {
9707 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9716 if (!
NIL_P(limit) && lim <= i)
break;
9720 SPLIT_STR(beg, end-beg);
9723 if (!
NIL_P(limit)) ++i;
9731 else if (split_type == SPLIT_TYPE_STRING) {
9732 char *str_start = ptr;
9733 char *substr_start = ptr;
9734 char *sptr = RSTRING_PTR(spat);
9735 long slen = RSTRING_LEN(spat);
9737 if (result) result = rb_ary_new();
9738 mustnot_broken(str);
9739 enc = rb_enc_check(str, spat);
9740 while (ptr < eptr &&
9741 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9744 if (t != ptr + end) {
9748 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9751 if (!
NIL_P(limit) && lim <= ++i)
break;
9753 beg = ptr - str_start;
9755 else if (split_type == SPLIT_TYPE_CHARS) {
9756 char *str_start = ptr;
9759 if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
9760 mustnot_broken(str);
9761 enc = rb_enc_get(str);
9762 while (ptr < eptr &&
9763 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9764 SPLIT_STR(ptr - str_start, n);
9766 if (!
NIL_P(limit) && lim <= ++i)
break;
9768 beg = ptr - str_start;
9771 if (result) result = rb_ary_new();
9772 long len = RSTRING_LEN(str);
9780 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9785 if (start == end && BEG(0) == END(0)) {
9790 else if (last_null == 1) {
9791 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9798 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9804 SPLIT_STR(beg, end-beg);
9805 beg = start = END(0);
9809 for (idx=1; idx < regs->num_regs; idx++) {
9810 if (BEG(idx) == -1)
continue;
9811 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9813 if (!
NIL_P(limit) && lim <= ++i)
break;
9815 if (match) rb_match_unbusy(match);
9817 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9818 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9821 return result ? result : str;
9831 return rb_str_split_m(1, &sep, str);
9834#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9840 rb_ary_push(ary, e);
9849#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9852chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9854 const char *prev = rb_enc_prev_char(p, e, e, enc);
9857 prev = rb_enc_prev_char(p, e, e, enc);
9858 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9870 RSTRING_LEN(rs) != 1 ||
9871 RSTRING_PTR(rs)[0] !=
'\n')) {
9877#define rb_rs get_rs()
9884 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9885 long pos,
len, rslen;
9891 static ID keywords[1];
9896 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9900 if (!ENUM_ELEM(ary, str)) {
9908 if (!RSTRING_LEN(str))
goto end;
9910 ptr = subptr = RSTRING_PTR(str);
9912 len = RSTRING_LEN(str);
9914 rslen = RSTRING_LEN(rs);
9917 enc = rb_enc_get(str);
9919 enc = rb_enc_check(str, rs);
9924 const char *eol = NULL;
9926 while (subend < pend) {
9927 long chomp_rslen = 0;
9929 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9931 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9933 if (eol == subend)
break;
9937 chomp_rslen = -rslen;
9941 if (!subptr) subptr = subend;
9945 }
while (subend < pend);
9947 if (rslen == 0) chomp_rslen = 0;
9949 subend - subptr + (chomp ? chomp_rslen : rslen));
9950 if (ENUM_ELEM(ary, line)) {
9951 str_mod_check(str, ptr,
len);
9953 subptr = eol = NULL;
9958 rsptr = RSTRING_PTR(rs);
9959 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9968 rsptr = RSTRING_PTR(rs);
9969 rslen = RSTRING_LEN(rs);
9972 while (subptr < pend) {
9973 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9977 if (hit != adjusted) {
9981 subend = hit += rslen;
9984 subend = chomp_newline(subptr, subend, enc);
9991 if (ENUM_ELEM(ary, line)) {
9992 str_mod_check(str, ptr,
len);
9997 if (subptr != pend) {
10000 pend = chomp_newline(subptr, pend, enc);
10002 else if (pend - subptr >= rslen &&
10003 memcmp(pend - rslen, rsptr, rslen) == 0) {
10008 ENUM_ELEM(ary, line);
10029rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
10032 return rb_str_enumerate_lines(argc, argv, str, 0);
10045rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
10047 VALUE ary = WANTARRAY(
"lines", 0);
10048 return rb_str_enumerate_lines(argc, argv, str, ary);
10054 return LONG2FIX(RSTRING_LEN(str));
10062 for (i=0; i<RSTRING_LEN(str); i++) {
10063 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
10081rb_str_each_byte(
VALUE str)
10084 return rb_str_enumerate_bytes(str, 0);
10096rb_str_bytes(
VALUE str)
10098 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
10099 return rb_str_enumerate_bytes(str, ary);
10117 ptr = RSTRING_PTR(str);
10118 len = RSTRING_LEN(str);
10119 enc = rb_enc_get(str);
10122 for (i = 0; i <
len; i += n) {
10123 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
10128 for (i = 0; i <
len; i += n) {
10129 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
10150rb_str_each_char(
VALUE str)
10153 return rb_str_enumerate_chars(str, 0);
10165rb_str_chars(
VALUE str)
10168 return rb_str_enumerate_chars(str, ary);
10172rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
10177 const char *ptr, *end;
10180 if (single_byte_optimizable(str))
10181 return rb_str_enumerate_bytes(str, ary);
10184 ptr = RSTRING_PTR(str);
10186 enc = STR_ENC_GET(str);
10188 while (ptr < end) {
10189 c = rb_enc_codepoint_len(ptr, end, &n, enc);
10210rb_str_each_codepoint(
VALUE str)
10213 return rb_str_enumerate_codepoints(str, 0);
10225rb_str_codepoints(
VALUE str)
10228 return rb_str_enumerate_codepoints(str, ary);
10234 int encidx = rb_enc_to_index(enc);
10236 const OnigUChar source_ascii[] =
"\\X";
10237 const OnigUChar *source = source_ascii;
10238 size_t source_len =
sizeof(source_ascii) - 1;
10241#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
10242#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
10243#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
10244#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
10245#define CASE_UTF(e) \
10246 case ENCINDEX_UTF_##e: { \
10247 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
10248 source = source_UTF_##e; \
10249 source_len = sizeof(source_UTF_##e); \
10252 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
10260 regex_t *reg_grapheme_cluster;
10262 int r = onig_new(®_grapheme_cluster, source, source + source_len,
10263 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
10265 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
10266 onig_error_code_to_str(message, r, &einfo);
10267 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
10270 return reg_grapheme_cluster;
10276 int encidx = rb_enc_to_index(enc);
10277 static regex_t *reg_grapheme_cluster_utf8 = NULL;
10279 if (encidx == rb_utf8_encindex()) {
10280 if (!reg_grapheme_cluster_utf8) {
10281 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
10284 return reg_grapheme_cluster_utf8;
10293 size_t grapheme_cluster_count = 0;
10295 const char *ptr, *end;
10297 if (!rb_enc_unicode_p(enc)) {
10301 bool cached_reg_grapheme_cluster =
true;
10302 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10303 if (!reg_grapheme_cluster) {
10304 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10305 cached_reg_grapheme_cluster =
false;
10308 ptr = RSTRING_PTR(str);
10311 while (ptr < end) {
10312 OnigPosition
len = onig_match(reg_grapheme_cluster,
10313 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10314 (
const OnigUChar *)ptr, NULL, 0);
10315 if (
len <= 0)
break;
10316 grapheme_cluster_count++;
10320 if (!cached_reg_grapheme_cluster) {
10321 onig_free(reg_grapheme_cluster);
10324 return SIZET2NUM(grapheme_cluster_count);
10328rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
10332 const char *ptr0, *ptr, *end;
10334 if (!rb_enc_unicode_p(enc)) {
10335 return rb_str_enumerate_chars(str, ary);
10340 bool cached_reg_grapheme_cluster =
true;
10341 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10342 if (!reg_grapheme_cluster) {
10343 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10344 cached_reg_grapheme_cluster =
false;
10347 ptr0 = ptr = RSTRING_PTR(str);
10350 while (ptr < end) {
10351 OnigPosition
len = onig_match(reg_grapheme_cluster,
10352 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10353 (
const OnigUChar *)ptr, NULL, 0);
10354 if (
len <= 0)
break;
10359 if (!cached_reg_grapheme_cluster) {
10360 onig_free(reg_grapheme_cluster);
10380rb_str_each_grapheme_cluster(
VALUE str)
10383 return rb_str_enumerate_grapheme_clusters(str, 0);
10395rb_str_grapheme_clusters(
VALUE str)
10398 return rb_str_enumerate_grapheme_clusters(str, ary);
10402chopped_length(
VALUE str)
10405 const char *p, *p2, *beg, *end;
10407 beg = RSTRING_PTR(str);
10408 end = beg + RSTRING_LEN(str);
10409 if (beg >= end)
return 0;
10410 p = rb_enc_prev_char(beg, end, end, enc);
10412 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10413 p2 = rb_enc_prev_char(beg, p, end, enc);
10414 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10430rb_str_chop_bang(
VALUE str)
10432 str_modify_keep_cr(str);
10433 if (RSTRING_LEN(str) > 0) {
10435 len = chopped_length(str);
10436 STR_SET_LEN(str,
len);
10437 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10456rb_str_chop(
VALUE str)
10462smart_chomp(
VALUE str,
const char *e,
const char *p)
10465 if (rb_enc_mbminlen(enc) > 1) {
10470 pp = e - rb_enc_mbminlen(enc);
10473 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10481 if (--e > p && *(e-1) ==
'\r') {
10498 char *pp, *e, *rsptr;
10500 char *
const p = RSTRING_PTR(str);
10501 long len = RSTRING_LEN(str);
10503 if (
len == 0)
return 0;
10506 return smart_chomp(str, e, p);
10509 enc = rb_enc_get(str);
10512 if (rb_enc_mbminlen(enc) > 1) {
10517 pp -= rb_enc_mbminlen(enc);
10520 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10527 while (e > p && *(e-1) ==
'\n') {
10529 if (e > p && *(e-1) ==
'\r')
10535 if (rslen >
len)
return len;
10537 enc = rb_enc_get(rs);
10538 newline = rsptr[rslen-1];
10539 if (rslen == rb_enc_mbminlen(enc)) {
10541 if (newline ==
'\n')
10542 return smart_chomp(str, e, p);
10546 return smart_chomp(str, e, p);
10550 enc = rb_enc_check(str, rs);
10551 if (is_broken_string(rs)) {
10555 if (p[
len-1] == newline &&
10557 memcmp(rsptr, pp, rslen) == 0)) {
10558 if (at_char_boundary(p, pp, e, enc))
10559 return len - rslen;
10571chomp_rs(
int argc,
const VALUE *argv)
10575 VALUE rs = argv[0];
10587 long olen = RSTRING_LEN(str);
10588 long len = chompped_length(str, rs);
10589 if (
len >= olen)
return Qnil;
10590 str_modify_keep_cr(str);
10591 STR_SET_LEN(str,
len);
10592 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10609rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10612 str_modifiable(str);
10613 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10614 rs = chomp_rs(argc, argv);
10616 return rb_str_chomp_string(str, rs);
10629rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10631 VALUE rs = chomp_rs(argc, argv);
10639 const char *
const start = s;
10641 if (!s || s >= e)
return 0;
10644 if (single_byte_optimizable(str)) {
10645 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10650 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10670rb_str_lstrip_bang(
VALUE str)
10674 long olen, loffset;
10676 str_modify_keep_cr(str);
10677 enc = STR_ENC_GET(str);
10679 loffset = lstrip_offset(str, start, start+olen, enc);
10681 long len = olen-loffset;
10682 s = start + loffset;
10683 memmove(start, s,
len);
10684 STR_SET_LEN(str,
len);
10685 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10708rb_str_lstrip(
VALUE str)
10713 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10714 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10723 rb_str_check_dummy_enc(enc);
10727 if (!s || s >= e)
return 0;
10731 if (single_byte_optimizable(str)) {
10733 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10738 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10758rb_str_rstrip_bang(
VALUE str)
10762 long olen, roffset;
10764 str_modify_keep_cr(str);
10765 enc = STR_ENC_GET(str);
10767 roffset = rstrip_offset(str, start, start+olen, enc);
10769 long len = olen - roffset;
10771 STR_SET_LEN(str,
len);
10772 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10795rb_str_rstrip(
VALUE str)
10799 long olen, roffset;
10801 enc = STR_ENC_GET(str);
10803 roffset = rstrip_offset(str, start, start+olen, enc);
10805 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10821rb_str_strip_bang(
VALUE str)
10824 long olen, loffset, roffset;
10827 str_modify_keep_cr(str);
10828 enc = STR_ENC_GET(str);
10830 loffset = lstrip_offset(str, start, start+olen, enc);
10831 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10833 if (loffset > 0 || roffset > 0) {
10834 long len = olen-roffset;
10837 memmove(start, start + loffset,
len);
10839 STR_SET_LEN(str,
len);
10840 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10863rb_str_strip(
VALUE str)
10866 long olen, loffset, roffset;
10870 loffset = lstrip_offset(str, start, start+olen, enc);
10871 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10873 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10878scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10881 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10887 end = pos + RSTRING_LEN(pat);
10901 if (RSTRING_LEN(str) > end)
10902 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10911 if (!regs || regs->num_regs == 1) {
10917 for (
int i = 1; i < regs->num_regs; i++) {
10923 rb_ary_push(result, s);
10978 long last = -1, prev = 0;
10979 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10981 pat = get_pat_quoted(pat, 1);
10982 mustnot_broken(str);
10984 VALUE ary = rb_ary_new();
10986 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10989 rb_ary_push(ary, result);
10991 if (last >= 0) rb_pat_search(pat, str, last, 1);
10996 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
11000 str_mod_check(str, p,
len);
11002 if (last >= 0) rb_pat_search(pat, str, last, 1);
11026rb_str_hex(
VALUE str)
11028 return rb_str_to_inum(str, 16, FALSE);
11053rb_str_oct(
VALUE str)
11055 return rb_str_to_inum(str, -8, FALSE);
11058#ifndef HAVE_CRYPT_R
11063 rb_nativethread_lock_t lock;
11064} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
11067crypt_mutex_initialize(
void)
11138# define CRYPT_END() ALLOCV_END(databuf)
11140 extern char *crypt(
const char *,
const char *);
11141# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
11144 const char *s, *saltp;
11147 char salt_8bit_clean[3];
11151 mustnot_wchar(str);
11152 mustnot_wchar(salt);
11154 saltp = RSTRING_PTR(salt);
11155 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
11156 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
11160 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
11161 salt_8bit_clean[0] = saltp[0] & 0x7f;
11162 salt_8bit_clean[1] = saltp[1] & 0x7f;
11163 salt_8bit_clean[2] =
'\0';
11164 saltp = salt_8bit_clean;
11169# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
11170 data->initialized = 0;
11172 res = crypt_r(s, saltp, data);
11174 crypt_mutex_initialize();
11176 res = crypt(s, saltp);
11217 char *ptr, *p, *pend;
11220 unsigned long sum0 = 0;
11225 ptr = p = RSTRING_PTR(str);
11226 len = RSTRING_LEN(str);
11232 str_mod_check(str, ptr,
len);
11235 sum0 += (
unsigned char)*p;
11246 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11247 sum0 &= (((
unsigned long)1)<<bits)-1;
11267rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11271 long width,
len, flen = 1, fclen = 1;
11274 const char *f =
" ";
11275 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11277 int singlebyte = 1, cr;
11281 enc = STR_ENC_GET(str);
11282 termlen = rb_enc_mbminlen(enc);
11286 enc = rb_enc_check(str, pad);
11287 f = RSTRING_PTR(pad);
11288 flen = RSTRING_LEN(pad);
11289 fclen = str_strlen(pad, enc);
11290 singlebyte = single_byte_optimizable(pad);
11291 if (flen == 0 || fclen == 0) {
11292 rb_raise(rb_eArgError,
"zero width padding");
11295 len = str_strlen(str, enc);
11296 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11298 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11302 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11303 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11305 size = RSTRING_LEN(str);
11306 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11307 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11308 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11309 rb_raise(rb_eArgError,
"argument too big");
11313 p = RSTRING_PTR(res);
11315 memset(p, *f, llen);
11319 while (llen >= fclen) {
11325 memcpy(p, f, llen2);
11329 memcpy(p, RSTRING_PTR(str), size);
11332 memset(p, *f, rlen);
11336 while (rlen >= fclen) {
11342 memcpy(p, f, rlen2);
11346 TERM_FILL(p, termlen);
11347 STR_SET_LEN(res, p-RSTRING_PTR(res));
11370rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11372 return rb_str_justify(argc, argv, str,
'l');
11386rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11388 return rb_str_justify(argc, argv, str,
'r');
11403rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11405 return rb_str_justify(argc, argv, str,
'c');
11421 sep = get_pat_quoted(sep, 0);
11433 pos = rb_str_index(str, sep, 0);
11434 if (pos < 0)
goto failed;
11439 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11442 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11456 long pos = RSTRING_LEN(str);
11458 sep = get_pat_quoted(sep, 0);
11471 pos = rb_str_rindex(str, sep, pos);
11480 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11482 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11494rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11498 for (i=0; i<argc; i++) {
11499 VALUE tmp = argv[i];
11501 if (rb_reg_start_with_p(tmp, str))
11505 const char *p, *s, *e;
11510 enc = rb_enc_check(str, tmp);
11511 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11512 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11513 p = RSTRING_PTR(str);
11516 if (!at_char_right_boundary(p, s, e, enc))
11518 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11534rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11538 for (i=0; i<argc; i++) {
11539 VALUE tmp = argv[i];
11540 const char *p, *s, *e;
11545 enc = rb_enc_check(str, tmp);
11546 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11547 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11548 p = RSTRING_PTR(str);
11551 if (!at_char_boundary(p, s, e, enc))
11553 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11569deleted_prefix_length(
VALUE str,
VALUE prefix)
11571 const char *strptr, *prefixptr;
11572 long olen, prefixlen;
11577 if (!is_broken_string(prefix) ||
11578 !rb_enc_asciicompat(enc) ||
11579 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11580 enc = rb_enc_check(str, prefix);
11584 prefixlen = RSTRING_LEN(prefix);
11585 if (prefixlen <= 0)
return 0;
11586 olen = RSTRING_LEN(str);
11587 if (olen < prefixlen)
return 0;
11588 strptr = RSTRING_PTR(str);
11589 prefixptr = RSTRING_PTR(prefix);
11590 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11591 if (is_broken_string(prefix)) {
11592 if (!is_broken_string(str)) {
11596 const char *strend = strptr + olen;
11597 const char *after_prefix = strptr + prefixlen;
11598 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11618rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11621 str_modify_keep_cr(str);
11623 prefixlen = deleted_prefix_length(str, prefix);
11624 if (prefixlen <= 0)
return Qnil;
11638rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11642 prefixlen = deleted_prefix_length(str, prefix);
11643 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11645 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11658deleted_suffix_length(
VALUE str,
VALUE suffix)
11660 const char *strptr, *suffixptr;
11661 long olen, suffixlen;
11665 if (is_broken_string(suffix))
return 0;
11666 enc = rb_enc_check(str, suffix);
11669 suffixlen = RSTRING_LEN(suffix);
11670 if (suffixlen <= 0)
return 0;
11671 olen = RSTRING_LEN(str);
11672 if (olen < suffixlen)
return 0;
11673 strptr = RSTRING_PTR(str);
11674 suffixptr = RSTRING_PTR(suffix);
11675 const char *strend = strptr + olen;
11676 const char *before_suffix = strend - suffixlen;
11677 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11678 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11693rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11695 long olen, suffixlen,
len;
11696 str_modifiable(str);
11698 suffixlen = deleted_suffix_length(str, suffix);
11699 if (suffixlen <= 0)
return Qnil;
11701 olen = RSTRING_LEN(str);
11702 str_modify_keep_cr(str);
11703 len = olen - suffixlen;
11704 STR_SET_LEN(str,
len);
11705 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11721rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11725 suffixlen = deleted_suffix_length(str, suffix);
11726 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11728 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11735 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11743 val = rb_fs_check(val);
11746 "value of %"PRIsVALUE
" must be String or Regexp",
11750 rb_warn_deprecated(
"'$;'", NULL);
11767 str_modifiable(str);
11770 int idx = rb_enc_to_index(encoding);
11777 rb_enc_associate_index(str, idx);
11801 if (STR_EMBED_P(str)) {
11802 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11807 str_replace_shared_without_enc(str2, str);
11809 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11842rb_str_valid_encoding_p(
VALUE str)
11862rb_str_is_ascii_only_p(
VALUE str)
11872 static const char ellipsis[] =
"...";
11873 const long ellipsislen =
sizeof(ellipsis) - 1;
11875 const long blen = RSTRING_LEN(str);
11876 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11877 VALUE estr, ret = 0;
11880 if (
len * rb_enc_mbminlen(enc) >= blen ||
11884 else if (
len <= ellipsislen ||
11886 if (rb_enc_asciicompat(enc)) {
11888 rb_enc_associate(ret, enc);
11895 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11900 rb_enc_from_encoding(enc), 0,
Qnil);
11913 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11919 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11938 if (enc == STR_ENC_GET(str)) {
11943 return enc_str_scrub(enc, str, repl, cr);
11951 const char *rep, *p, *e, *p1, *sp;
11957 rb_raise(rb_eArgError,
"both of block and replacement given");
11964 if (!
NIL_P(repl)) {
11965 repl = str_compat_and_valid(repl, enc);
11968 if (rb_enc_dummy_p(enc)) {
11971 encidx = rb_enc_to_index(enc);
11973#define DEFAULT_REPLACE_CHAR(str) do { \
11974 static const char replace[sizeof(str)-1] = str; \
11975 rep = replace; replen = (int)sizeof(replace); \
11978 slen = RSTRING_LEN(str);
11979 p = RSTRING_PTR(str);
11984 if (rb_enc_asciicompat(enc)) {
11990 else if (!
NIL_P(repl)) {
11991 rep = RSTRING_PTR(repl);
11992 replen = RSTRING_LEN(repl);
11995 else if (encidx == rb_utf8_encindex()) {
11996 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
12000 DEFAULT_REPLACE_CHAR(
"?");
12005 p = search_nonascii(p, e);
12010 int ret = rb_enc_precise_mbclen(p, e, enc);
12029 if (e - p < clen) clen = e - p;
12036 for (; clen > 1; clen--) {
12037 ret = rb_enc_precise_mbclen(q, q + clen, enc);
12048 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
12049 str_mod_check(str, sp, slen);
12050 repl = str_compat_and_valid(repl, enc);
12057 p = search_nonascii(p, e);
12083 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12084 str_mod_check(str, sp, slen);
12085 repl = str_compat_and_valid(repl, enc);
12094 long mbminlen = rb_enc_mbminlen(enc);
12098 else if (!
NIL_P(repl)) {
12099 rep = RSTRING_PTR(repl);
12100 replen = RSTRING_LEN(repl);
12102 else if (encidx == ENCINDEX_UTF_16BE) {
12103 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
12105 else if (encidx == ENCINDEX_UTF_16LE) {
12106 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
12108 else if (encidx == ENCINDEX_UTF_32BE) {
12109 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
12111 else if (encidx == ENCINDEX_UTF_32LE) {
12112 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
12115 DEFAULT_REPLACE_CHAR(
"?");
12119 int ret = rb_enc_precise_mbclen(p, e, enc);
12132 if (e - p < clen) clen = e - p;
12133 if (clen <= mbminlen * 2) {
12138 for (; clen > mbminlen; clen-=mbminlen) {
12139 ret = rb_enc_precise_mbclen(q, q + clen, enc);
12149 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
12150 str_mod_check(str, sp, slen);
12151 repl = str_compat_and_valid(repl, enc);
12176 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12177 str_mod_check(str, sp, slen);
12178 repl = str_compat_and_valid(repl, enc);
12214str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12222static ID id_normalize;
12223static ID id_normalized_p;
12224static VALUE mUnicodeNormalize;
12227unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12229 static int UnicodeNormalizeRequired = 0;
12232 if (!UnicodeNormalizeRequired) {
12233 rb_require(
"unicode_normalize/normalize.rb");
12234 UnicodeNormalizeRequired = 1;
12238 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12275rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12277 return unicode_normalize_common(argc, argv, str, id_normalize);
12291rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12293 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12320rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12322 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12454#define sym_equal rb_obj_equal
12457sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12461 int c = rb_enc_precise_mbclen(s, send, enc);
12465 c = rb_enc_mbc_to_codepoint(s, send, enc);
12473rb_str_symname_p(
VALUE sym)
12478 rb_encoding *resenc = rb_default_internal_encoding();
12480 if (resenc == NULL) resenc = rb_default_external_encoding();
12481 enc = STR_ENC_GET(sym);
12482 ptr = RSTRING_PTR(sym);
12483 len = RSTRING_LEN(sym);
12484 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12492rb_str_quote_unprintable(
VALUE str)
12500 resenc = rb_default_internal_encoding();
12501 if (resenc == NULL) resenc = rb_default_external_encoding();
12502 enc = STR_ENC_GET(str);
12503 ptr = RSTRING_PTR(str);
12504 len = RSTRING_LEN(str);
12505 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12506 !sym_printable(ptr, ptr +
len, enc)) {
12507 return rb_str_escape(str);
12513rb_id_quote_unprintable(
ID id)
12515 VALUE str = rb_id2str(
id);
12516 if (!rb_str_symname_p(str)) {
12517 return rb_str_escape(str);
12535sym_inspect(
VALUE sym)
12542 if (!rb_str_symname_p(str)) {
12544 len = RSTRING_LEN(str);
12545 rb_str_resize(str,
len + 1);
12546 dest = RSTRING_PTR(str);
12547 memmove(dest + 1, dest,
len);
12551 VALUE orig_str = str;
12553 len = RSTRING_LEN(orig_str);
12554 str = rb_enc_str_new(0,
len + 1, enc);
12557 ptr = RSTRING_PTR(orig_str);
12558 dest = RSTRING_PTR(str);
12559 memcpy(dest + 1, ptr,
len);
12579rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12584 rb_raise(rb_eArgError,
"no receiver given");
12681 return rb_str_match(
rb_sym2str(sym), other);
12696sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12698 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12711sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12713 return rb_str_match_m_p(argc, argv, sym);
12731 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12742sym_length(
VALUE sym)
12756sym_empty(
VALUE sym)
12790sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12806sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12822sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12836sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12838 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12851sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12853 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12865sym_encoding(
VALUE sym)
12871string_for_symbol(
VALUE name)
12876 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12890 name = string_for_symbol(name);
12891 return rb_intern_str(name);
12900 name = string_for_symbol(name);
12924 return rb_fstring(str);
12931 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12943 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12944 rb_enc_autoload(enc);
12948 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12954 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12955 rb_enc_autoload(enc);
12959 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12970rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12975 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12976 rb_str_buf_cat_byte(str, (
char) code);
12990 for (
unsigned int i = 0; i < fstring_table->capacity; i++) {
12991 VALUE str = fstring_table->entries[i].str;
12992 if (!str)
continue;
13160 rb_gc_register_address(&rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ATOMIC_VALUE_CAS(var, oldval, newval)
Identical to RUBY_ATOMIC_CAS, except it expects its arguments are VALUE.
#define RUBY_ATOMIC_VALUE_SET(var, val)
Identical to RUBY_ATOMIC_SET, except it expects its arguments are VALUE.
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
#define RUBY_ATOMIC_FETCH_ADD(var, val)
Atomically replaces the value pointed by var with the result of addition of val to the old value of v...
#define RUBY_ATOMIC_VALUE_EXCHANGE(var, val)
Identical to RUBY_ATOMIC_EXCHANGE, except it expects its arguments are VALUE.
#define RUBY_ATOMIC_DEC(var)
Atomically decrements the value pointed by var.
#define RUBY_ATOMIC_LOAD(var)
Atomic load.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@52 as
String's specific fields.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
struct RString::@52::@54 embed
Embedded contents.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
struct RString::@52::@53 heap
Strings that use separated memory region for contents use this pattern.
union RString::@52::@53::@55 aux
Auxiliary info.
This is the struct that holds necessary info for a struct.
const char * wrap_struct_name
Name of structs of this kind.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.