14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/hash.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
47#include "ruby_assert.h"
52#if defined HAVE_CRYPT_R
53# if defined HAVE_CRYPT_H
56#elif !defined HAVE_CRYPT
57# include "missing/crypt.h"
58# define HAVE_CRYPT_R 1
61#define BEG(no) (regs->beg[(no)])
62#define END(no) (regs->end[(no)])
65#undef rb_usascii_str_new
69#undef rb_usascii_str_new_cstr
70#undef rb_utf8_str_new_cstr
71#undef rb_enc_str_new_cstr
72#undef rb_external_str_new_cstr
73#undef rb_locale_str_new_cstr
74#undef rb_str_dup_frozen
75#undef rb_str_buf_new_cstr
129#define RUBY_MAX_CHAR_LEN 16
130#define STR_PRECOMPUTED_HASH FL_USER4
131#define STR_SHARED_ROOT FL_USER5
132#define STR_BORROWED FL_USER6
133#define STR_TMPLOCK FL_USER7
134#define STR_NOFREE FL_USER18
135#define STR_FAKESTR FL_USER19
137#define STR_SET_NOEMBED(str) do {\
138 FL_SET((str), STR_NOEMBED);\
139 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
141#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
143#define STR_SET_LEN(str, n) do { \
144 RSTRING(str)->len = (n); \
148str_encindex_fastpath(
int encindex)
152 case ENCINDEX_ASCII_8BIT:
154 case ENCINDEX_US_ASCII:
162str_enc_fastpath(
VALUE str)
167#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
168#define TERM_FILL(ptr, termlen) do {\
169 char *const term_fill_ptr = (ptr);\
170 const int term_fill_len = (termlen);\
171 *term_fill_ptr = '\0';\
172 if (UNLIKELY(term_fill_len > 1))\
173 memset(term_fill_ptr, 0, term_fill_len);\
176#define RESIZE_CAPA(str,capacity) do {\
177 const int termlen = TERM_LEN(str);\
178 RESIZE_CAPA_TERM(str,capacity,termlen);\
180#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
181 if (STR_EMBED_P(str)) {\
182 if (str_embed_capa(str) < capacity + termlen) {\
183 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
184 const long tlen = RSTRING_LEN(str);\
185 memcpy(tmp, RSTRING_PTR(str), tlen);\
186 RSTRING(str)->as.heap.ptr = tmp;\
187 RSTRING(str)->len = tlen;\
188 STR_SET_NOEMBED(str);\
189 RSTRING(str)->as.heap.aux.capa = (capacity);\
193 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
194 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
195 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
196 RSTRING(str)->as.heap.aux.capa = (capacity);\
200#define STR_SET_SHARED(str, shared_str) do { \
201 if (!FL_TEST(str, STR_FAKESTR)) { \
202 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
203 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
204 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
205 FL_SET((str), STR_SHARED); \
206 FL_SET((shared_str), STR_SHARED_ROOT); \
207 if (RBASIC_CLASS((shared_str)) == 0) \
208 FL_SET_RAW((shared_str), STR_BORROWED); \
212#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
213#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
216#define STR_ENC_GET(str) get_encoding(str)
218#if !defined SHARABLE_MIDDLE_SUBSTRING
219# define SHARABLE_MIDDLE_SUBSTRING 0
221#if !SHARABLE_MIDDLE_SUBSTRING
222#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
224#define SHARABLE_SUBSTRING_P(beg, len, end) 1
229str_embed_capa(
VALUE str)
231 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
235rb_str_reembeddable_p(
VALUE str)
237 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
241rb_str_embed_size(
long capa)
247rb_str_size_as_embedded(
VALUE str)
250 if (STR_EMBED_P(str)) {
251 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
255 else if (rb_str_reembeddable_p(str)) {
256 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
259 real_size =
sizeof(
struct RString);
263 real_size +=
sizeof(st_index_t);
270STR_EMBEDDABLE_P(
long len,
long termlen)
272 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
277static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
278static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
280static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
281static inline void str_modifiable(
VALUE str);
286str_make_independent(
VALUE str)
288 long len = RSTRING_LEN(str);
289 int termlen = TERM_LEN(str);
290 str_make_independent_expand((str),
len, 0L, termlen);
293static inline int str_dependent_p(
VALUE str);
296rb_str_make_independent(
VALUE str)
298 if (str_dependent_p(str)) {
299 str_make_independent(str);
304rb_str_make_embedded(
VALUE str)
309 char *buf =
RSTRING(str)->as.heap.ptr;
313 STR_SET_LEN(str,
len);
316 memcpy(RSTRING_PTR(str), buf,
len);
320 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
324rb_debug_rstring_null_ptr(
const char *func)
326 fprintf(stderr,
"%s is returning NULL!! "
327 "SIGSEGV is highly expected to follow immediately.\n"
328 "If you could reproduce, attach your debugger here, "
329 "and look at the passed string.\n",
334static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
337get_encoding(
VALUE str)
343mustnot_broken(
VALUE str)
345 if (is_broken_string(str)) {
346 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
351mustnot_wchar(
VALUE str)
354 if (rb_enc_mbminlen(enc) > 1) {
355 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
361static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
363#if SIZEOF_LONG == SIZEOF_VOIDP
364#define PRECOMPUTED_FAKESTR_HASH 1
368#ifdef PRECOMPUTED_FAKESTR_HASH
370fstring_hash(
VALUE str)
375 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
385#define fstring_hash rb_str_hash
389BARE_STRING_P(
VALUE str)
399static inline st_index_t
400str_do_hash(
VALUE str)
402 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
404 if (e && !is_ascii_string(str)) {
411str_store_precomputed_hash(
VALUE str, st_index_t hash)
417 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
418 size_t free_bytes = str_embed_capa(str) - used_bytes;
422 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
424 FL_SET(str, STR_PRECOMPUTED_HASH);
431 bool force_precompute_hash;
443 long len = RSTRING_LEN(str);
444 long capa =
len +
sizeof(st_index_t);
445 int term_len = TERM_LEN(str);
447 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
449 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
450 STR_SET_LEN(new_str, RSTRING_LEN(str));
452 rb_enc_copy(new_str, str);
453 str_store_precomputed_hash(new_str, str_do_hash(str));
457 rb_enc_copy(new_str, str);
458#ifdef PRECOMPUTED_FAKESTR_HASH
459 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
460 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
474 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
477 if (STR_SHARED_P(str)) {
479 str_make_independent(str);
482 if (!BARE_STRING_P(str)) {
488 RBASIC(str)->flags |= RSTRING_FSTR;
508 if (
FL_TEST(str, RSTRING_FSTR))
511 bare = BARE_STRING_P(str);
513 if (STR_EMBED_P(str)) {
518 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
525 rb_str_resize(str, RSTRING_LEN(str));
527 fstr = register_fstring(str,
false,
false);
530 str_replace_shared_without_enc(str, fstr);
537#define FSTRING_TABLE_EMPTY Qfalse
538#define FSTRING_TABLE_TOMBSTONE Qtrue
539#define FSTRING_TABLE_MOVED Qundef
548 unsigned int capacity;
549 unsigned int deleted_entries;
554fstring_table_free(
void *ptr)
557 xfree(table->entries);
561fstring_table_size(
const void *ptr)
572 .dfree = fstring_table_free,
573 .dsize = fstring_table_size,
575 .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE
579static VALUE fstring_table_obj;
582new_fstring_table(
int capacity)
587 table->capacity = capacity;
594Init_fstring_table(
void)
596 fstring_table_obj = new_fstring_table(8192);
597 rb_gc_register_address(&fstring_table_obj);
611 RUBY_ASSERT((table->capacity & (table->capacity - 1)) == 0);
612 probe->mask = table->capacity - 1;
613 probe->idx = hash_code & probe->mask;
620 probe->idx = (probe->idx + 1) & probe->mask;
637 RUBY_ASSERT((table->capacity & (table->capacity - 1)) == 0);
639 probe->mask = table->capacity - 1;
640 probe->idx = hash_code & probe->mask;
648 probe->idx = (probe->idx + probe->d) & probe->mask;
653#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
659 int idx = fstring_table_probe_start(&probe, table, hash_code);
663 VALUE candidate = entry->str;
668 if (candidate == FSTRING_TABLE_EMPTY) {
675 entry->hash = hash_code;
679 idx = fstring_table_probe_next(&probe);
685fstring_try_resize_without_locking(
VALUE old_table_obj)
688 if (RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj) != old_table_obj) {
696 int expected_count =
RUBY_ATOMIC_LOAD(old_table->count) - old_table->deleted_entries;
699 int old_capacity = old_table->capacity;
700 int new_capacity = old_capacity * 2;
701 if (new_capacity > expected_count * 8) {
702 new_capacity = old_capacity / 2;
704 else if (new_capacity > expected_count * 4) {
705 new_capacity = old_capacity;
709 VALUE new_table_obj = new_fstring_table(new_capacity);
712 for (
int i = 0; i < old_capacity; i++) {
716 if (val == FSTRING_TABLE_EMPTY)
continue;
717 if (val == FSTRING_TABLE_TOMBSTONE)
continue;
718 if (rb_objspace_garbage_object_p(val))
continue;
720 VALUE hash_code = RUBY_ATOMIC_VALUE_LOAD(entry->hash);
721 if (hash_code == 0) {
724 hash_code = fstring_hash(val);
727 fstring_insert_on_resize(new_table, hash_code, val);
731 fprintf(stderr,
"resized: %p(%i) -> %p(%i) (count: %i->%i)\n", old_table, old_table->capacity, new_table, new_table->capacity, old_table->count, new_table->count);
741fstring_try_resize(
VALUE old_table_obj)
744 fstring_try_resize_without_locking(old_table_obj);
752 bool inserting =
false;
758 table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
760 table = RTYPEDDATA_GET_DATA(table_obj);
761 idx = fstring_table_probe_start(&probe, table, hash_code);
765 VALUE candidate = RUBY_ATOMIC_VALUE_LOAD(entry->str);
767 if (candidate == FSTRING_TABLE_EMPTY) {
771 value = build_fstring(value, arg);
778 if (UNLIKELY(prev_count > table->capacity / 2)) {
779 fstring_try_resize(table_obj);
784 if (found == FSTRING_TABLE_EMPTY) {
801 else if (candidate == FSTRING_TABLE_TOMBSTONE) {
804 else if (candidate == FSTRING_TABLE_MOVED) {
811 VALUE candidate_hash = RUBY_ATOMIC_VALUE_LOAD(entry->hash);
812 if ((candidate_hash == hash_code || candidate_hash == 0) && !fstring_cmp(candidate, value)) {
814 if (UNLIKELY(rb_objspace_garbage_object_p(candidate))) {
828 idx = fstring_table_probe_next(&probe);
838 VALUE table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
843 int idx = fstring_table_probe_start(&probe, table, hash_code);
847 VALUE candidate = entry->str;
852 if (candidate == FSTRING_TABLE_EMPTY) {
856 else if (candidate == value) {
858 entry->str = FSTRING_TABLE_TOMBSTONE;
859 table->deleted_entries++;
863 idx = fstring_table_probe_next(&probe);
868register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
872 .force_precompute_hash = force_precompute_hash
875#if SIZEOF_VOIDP == SIZEOF_LONG
879 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
883 VALUE hash_code = fstring_hash(str);
884 VALUE result = fstring_find_or_insert(hash_code, str, &args);
886 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
896rb_fstring_foreach_with_replace(st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
901 VALUE table_obj = RUBY_ATOMIC_VALUE_LOAD(fstring_table_obj);
908 for (
unsigned int i = 0; i < table->capacity; i++) {
909 VALUE key = table->entries[i].str;
910 if(key == FSTRING_TABLE_EMPTY)
continue;
911 if(key == FSTRING_TABLE_TOMBSTONE)
continue;
913 enum st_retval retval;
914 retval = (*func)(key, key, arg, 0);
916 if (retval == ST_REPLACE && replace) {
917 st_data_t value = key;
918 retval = (*replace)(&key, &value, arg, TRUE);
919 table->entries[i].str = key;
926 rb_bug(
"unsupported");
930 table->entries[i].str = FSTRING_TABLE_TOMBSTONE;
937rb_obj_is_fstring_table(
VALUE obj)
941 return obj == fstring_table_obj;
945rb_gc_free_fstring(
VALUE obj)
950 VALUE str_hash = fstring_hash(obj);
951 fstring_delete(str_hash, obj);
953 RB_DEBUG_COUNTER_INC(obj_str_fstr);
959setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
962 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
975 return (
VALUE)fake_str;
984 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
993rb_fstring_new(
const char *ptr,
long len)
996 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
1003 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
1007rb_fstring_cstr(
const char *
ptr)
1009 return rb_fstring_new(
ptr, strlen(
ptr));
1016 const char *aptr, *bptr;
1023 return (alen != blen ||
1025 memcmp(aptr, bptr, alen) != 0);
1029single_byte_optimizable(
VALUE str)
1033 case ENCINDEX_ASCII_8BIT:
1034 case ENCINDEX_US_ASCII:
1036 case ENCINDEX_UTF_8:
1056static inline const char *
1057search_nonascii(
const char *p,
const char *e)
1059 const uintptr_t *s, *t;
1061#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
1062# if SIZEOF_UINTPTR_T == 8
1063# define NONASCII_MASK UINT64_C(0x8080808080808080)
1064# elif SIZEOF_UINTPTR_T == 4
1065# define NONASCII_MASK UINT32_C(0x80808080)
1067# error "don't know what to do."
1070# if SIZEOF_UINTPTR_T == 8
1071# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
1072# elif SIZEOF_UINTPTR_T == 4
1073# define NONASCII_MASK 0x80808080UL
1075# error "don't know what to do."
1079 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
1080#if !UNALIGNED_WORD_ACCESS
1081 if ((uintptr_t)p % SIZEOF_VOIDP) {
1082 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
1087 case 7:
if (p[-7]&0x80)
return p-7;
1088 case 6:
if (p[-6]&0x80)
return p-6;
1089 case 5:
if (p[-5]&0x80)
return p-5;
1090 case 4:
if (p[-4]&0x80)
return p-4;
1092 case 3:
if (p[-3]&0x80)
return p-3;
1093 case 2:
if (p[-2]&0x80)
return p-2;
1094 case 1:
if (p[-1]&0x80)
return p-1;
1099#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
1100#define aligned_ptr(value) \
1101 __builtin_assume_aligned((value), sizeof(uintptr_t))
1103#define aligned_ptr(value) (uintptr_t *)(value)
1106 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
1109 if (*s & NONASCII_MASK) {
1110#ifdef WORDS_BIGENDIAN
1111 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
1113 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
1117 p = (
const char *)s;
1123 case 7:
if (e[-7]&0x80)
return e-7;
1124 case 6:
if (e[-6]&0x80)
return e-6;
1125 case 5:
if (e[-5]&0x80)
return e-5;
1126 case 4:
if (e[-4]&0x80)
return e-4;
1128 case 3:
if (e[-3]&0x80)
return e-3;
1129 case 2:
if (e[-2]&0x80)
return e-2;
1130 case 1:
if (e[-1]&0x80)
return e-1;
1131 case 0:
return NULL;
1138 const char *e = p +
len;
1140 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
1142 p = search_nonascii(p, e);
1146 if (rb_enc_asciicompat(enc)) {
1147 p = search_nonascii(p, e);
1150 int ret = rb_enc_precise_mbclen(p, e, enc);
1154 p = search_nonascii(p, e);
1160 int ret = rb_enc_precise_mbclen(p, e, enc);
1176 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
1179 p = search_nonascii(p, e);
1183 else if (rb_enc_asciicompat(enc)) {
1184 p = search_nonascii(p, e);
1190 int ret = rb_enc_precise_mbclen(p, e, enc);
1197 p = search_nonascii(p, e);
1203 int ret = rb_enc_precise_mbclen(p, e, enc);
1228 rb_enc_set_index(str1, rb_enc_get_index(str2));
1236rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
1241 str_enc_copy(dest, src);
1242 if (RSTRING_LEN(dest) == 0) {
1243 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
1254 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
1255 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
1266rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
1268 str_enc_copy(dest, src);
1275 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
1281 return enc_coderange_scan(str, enc);
1290 cr = enc_coderange_scan(str, get_encoding(str));
1297rb_enc_str_asciicompat(
VALUE str)
1300 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
1308 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
1317str_mod_check(
VALUE s,
const char *p,
long len)
1319 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
1325str_capacity(
VALUE str,
const int termlen)
1327 if (STR_EMBED_P(str)) {
1328 return str_embed_capa(str) - termlen;
1330 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
1334 return RSTRING(str)->as.heap.aux.capa;
1341 return str_capacity(str, TERM_LEN(str));
1345must_not_null(
const char *
ptr)
1348 rb_raise(rb_eArgError,
"NULL pointer given");
1353str_alloc_embed(
VALUE klass,
size_t capa)
1355 size_t size = rb_str_embed_size(
capa);
1359 NEWOBJ_OF(str,
struct RString, klass,
1366str_alloc_heap(
VALUE klass)
1368 NEWOBJ_OF(str,
struct RString, klass,
1375empty_str_alloc(
VALUE klass)
1377 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1378 VALUE str = str_alloc_embed(klass, 0);
1379 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1390 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1394 enc = rb_ascii8bit_encoding();
1397 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1399 int termlen = rb_enc_mbminlen(enc);
1401 if (STR_EMBEDDABLE_P(
len, termlen)) {
1402 str = str_alloc_embed(klass,
len + termlen);
1408 str = str_alloc_heap(klass);
1414 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1417 rb_enc_raw_set(str, enc);
1420 memcpy(RSTRING_PTR(str),
ptr,
len);
1423 STR_SET_LEN(str,
len);
1424 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1431 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1466 __msan_unpoison_string(
ptr);
1486 if (rb_enc_mbminlen(enc) != 1) {
1487 rb_raise(rb_eArgError,
"wchar encoding given");
1489 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1493str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1498 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1502 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1505 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1506 str = str_alloc_heap(klass);
1510 RBASIC(str)->flags |= STR_NOFREE;
1511 rb_enc_associate_index(str, encindex);
1540static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1542 int ecflags,
VALUE ecopts);
1547 int encidx = rb_enc_to_index(enc);
1548 if (rb_enc_get_index(str) == encidx)
1549 return is_ascii_string(str);
1560 if (!to)
return str;
1561 if (!from) from = rb_enc_get(str);
1562 if (from == to)
return str;
1563 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1564 rb_is_ascii8bit_enc(to)) {
1565 if (STR_ENC_GET(str) != to) {
1567 rb_enc_associate(str, to);
1574 from, to, ecflags, ecopts);
1575 if (
NIL_P(newstr)) {
1583rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1588 olen = RSTRING_LEN(newstr);
1589 if (ofs < -olen || olen < ofs)
1591 if (ofs < 0) ofs += olen;
1593 STR_SET_LEN(newstr, ofs);
1597 rb_str_modify(newstr);
1598 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1606 STR_SET_LEN(str, 0);
1607 rb_enc_associate(str, enc);
1613str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1615 int ecflags,
VALUE ecopts)
1620 VALUE econv_wrapper;
1621 const unsigned char *start, *sp;
1622 unsigned char *dest, *dp;
1623 size_t converted_output = (size_t)ofs;
1628 RBASIC_CLEAR_CLASS(econv_wrapper);
1630 if (!ec)
return Qnil;
1633 sp = (
unsigned char*)
ptr;
1635 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1636 (dp = dest + converted_output),
1640 size_t converted_input = sp - start;
1641 size_t rest =
len - converted_input;
1642 converted_output = dp - dest;
1644 if (converted_input && converted_output &&
1645 rest < (LONG_MAX / converted_output)) {
1646 rest = (rest * converted_output) / converted_input;
1651 olen += rest < 2 ? 2 : rest;
1652 rb_str_resize(newstr, olen);
1659 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1661 rb_enc_associate(newstr, to);
1680 const int eidx = rb_enc_to_index(eenc);
1683 return rb_enc_str_new(
ptr,
len, eenc);
1687 if ((eidx == rb_ascii8bit_encindex()) ||
1688 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1692 ienc = rb_default_internal_encoding();
1693 if (!ienc || eenc == ienc) {
1694 return rb_enc_str_new(
ptr,
len, eenc);
1698 if ((eidx == rb_ascii8bit_encindex()) ||
1699 (eidx == rb_usascii_encindex()) ||
1700 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1701 return rb_enc_str_new(
ptr,
len, ienc);
1704 str = rb_enc_str_new(NULL, 0, ienc);
1707 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1708 rb_str_initialize(str,
ptr,
len, eenc);
1716 int eidx = rb_enc_to_index(eenc);
1717 if (eidx == rb_usascii_encindex() &&
1718 !is_ascii_string(str)) {
1719 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1722 rb_enc_associate_index(str, eidx);
1781str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1783 const int termlen = TERM_LEN(str);
1788 if (str_embed_capa(str2) >=
len + termlen) {
1789 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1790 STR_SET_EMBED(str2);
1791 memcpy(ptr2, RSTRING_PTR(str),
len);
1792 TERM_FILL(ptr2+
len, termlen);
1796 if (STR_SHARED_P(str)) {
1797 root =
RSTRING(str)->as.heap.aux.shared;
1806 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1808 rb_fatal(
"about to free a possible shared root");
1810 char *ptr2 = STR_HEAP_PTR(str2);
1812 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1815 FL_SET(str2, STR_NOEMBED);
1817 STR_SET_SHARED(str2, root);
1820 STR_SET_LEN(str2,
len);
1828 str_replace_shared_without_enc(str2, str);
1829 rb_enc_cr_str_exact_copy(str2, str);
1836 return str_replace_shared(str_alloc_heap(klass), str);
1853rb_str_new_frozen_String(
VALUE orig)
1861rb_str_frozen_bare_string(
VALUE orig)
1863 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1868rb_str_tmp_frozen_acquire(
VALUE orig)
1871 return str_new_frozen_buffer(0, orig, FALSE);
1875rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1877 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1878 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1880 VALUE str = str_alloc_heap(0);
1883 FL_SET(str, STR_SHARED_ROOT);
1885 size_t capa = str_capacity(orig, TERM_LEN(orig));
1891 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1892 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1899 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1900 RBASIC(orig)->flags &= ~STR_NOFREE;
1901 STR_SET_SHARED(orig, str);
1911rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1916 if (STR_EMBED_P(tmp)) {
1925 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1929 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1930 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1935 STR_SET_LEN(tmp, 0);
1943 return str_new_frozen_buffer(klass, orig, TRUE);
1952 VALUE str = str_alloc_heap(klass);
1953 STR_SET_LEN(str, RSTRING_LEN(orig));
1954 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1955 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1956 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1957 RBASIC(orig)->flags &= ~STR_NOFREE;
1958 STR_SET_SHARED(orig, str);
1965str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1969 long len = RSTRING_LEN(orig);
1970 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1971 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1973 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1974 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1980 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1981 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1987 if ((ofs > 0) || (rest > 0) ||
1990 str = str_new_shared(klass,
shared);
1992 RSTRING(str)->as.heap.ptr += ofs;
1993 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
2001 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
2002 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
2004 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
2005 STR_SET_LEN(str, RSTRING_LEN(orig));
2010 str = heap_str_make_shared(klass, orig);
2014 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
2026str_new_empty_String(
VALUE str)
2029 rb_enc_copy(v, str);
2033#define STR_BUF_MIN_SIZE 63
2038 if (STR_EMBEDDABLE_P(
capa, 1)) {
2046 RSTRING(str)->as.heap.ptr[0] =
'\0';
2066 return str_new(0, 0,
len);
2072 if (STR_EMBED_P(str)) {
2073 RB_DEBUG_COUNTER_INC(obj_str_embed);
2075 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
2076 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
2077 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
2080 RB_DEBUG_COUNTER_INC(obj_str_ptr);
2081 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2086rb_str_memsize(
VALUE str)
2088 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
2089 return STR_HEAP_SIZE(str);
2099 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2102static inline void str_discard(
VALUE str);
2103static void str_shared_replace(
VALUE str,
VALUE str2);
2108 if (str != str2) str_shared_replace(str, str2);
2119 enc = STR_ENC_GET(str2);
2122 termlen = rb_enc_mbminlen(enc);
2124 STR_SET_LEN(str, RSTRING_LEN(str2));
2126 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
2128 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
2129 rb_enc_associate(str, enc);
2133 if (STR_EMBED_P(str2)) {
2135 long len = RSTRING_LEN(str2);
2138 char *new_ptr =
ALLOC_N(
char,
len + termlen);
2139 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
2140 RSTRING(str2)->as.heap.ptr = new_ptr;
2141 STR_SET_LEN(str2,
len);
2143 STR_SET_NOEMBED(str2);
2146 STR_SET_NOEMBED(str);
2148 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
2150 if (
FL_TEST(str2, STR_SHARED)) {
2152 STR_SET_SHARED(str,
shared);
2155 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
2159 STR_SET_EMBED(str2);
2160 RSTRING_PTR(str2)[0] = 0;
2161 STR_SET_LEN(str2, 0);
2162 rb_enc_associate(str, enc);
2176 return rb_obj_as_string_result(str, obj);
2192 len = RSTRING_LEN(str2);
2193 if (STR_SHARED_P(str2)) {
2196 STR_SET_NOEMBED(str);
2197 STR_SET_LEN(str,
len);
2198 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
2199 STR_SET_SHARED(str,
shared);
2200 rb_enc_cr_str_exact_copy(str, str2);
2203 str_replace_shared(str, str2);
2212 size_t size = rb_str_embed_size(
capa);
2216 NEWOBJ_OF(str,
struct RString, klass,
2225 NEWOBJ_OF(str,
struct RString, klass,
2236 encidx = rb_enc_get_index(str);
2237 flags &= ~ENCODING_MASK;
2240 if (encidx) rb_enc_associate_index(dup, encidx);
2250 long len = RSTRING_LEN(str);
2255 STR_SET_LEN(dup, RSTRING_LEN(str));
2256 return str_duplicate_setup_encoding(str, dup, flags);
2265 root =
RSTRING(str)->as.heap.aux.shared;
2267 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
2268 root = str = str_new_frozen(klass, str);
2274 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
2275 FL_SET(root, STR_SHARED_ROOT);
2277 flags |= RSTRING_NOEMBED | STR_SHARED;
2279 STR_SET_LEN(dup, RSTRING_LEN(str));
2280 return str_duplicate_setup_encoding(str, dup, flags);
2286 if (STR_EMBED_P(str)) {
2287 return str_duplicate_setup_embed(klass, str, dup);
2290 return str_duplicate_setup_heap(klass, str, dup);
2298 if (STR_EMBED_P(str)) {
2299 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
2302 dup = str_alloc_heap(klass);
2305 return str_duplicate_setup(klass, str, dup);
2316rb_str_dup_m(
VALUE str)
2318 if (LIKELY(BARE_STRING_P(str))) {
2329 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2336 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2340 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2341 str_duplicate_setup_embed(klass, str, new_str);
2344 new_str = ec_str_alloc_heap(ec, klass);
2345 str_duplicate_setup_heap(klass, str, new_str);
2354rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2356 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2380 static ID keyword_ids[2];
2381 VALUE orig, opt, venc, vcapa;
2386 if (!keyword_ids[0]) {
2387 keyword_ids[0] = rb_id_encoding();
2388 CONST_ID(keyword_ids[1],
"capacity");
2396 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2397 enc = rb_to_encoding(venc);
2399 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2402 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2404 if (
capa < STR_BUF_MIN_SIZE) {
2405 capa = STR_BUF_MIN_SIZE;
2409 len = RSTRING_LEN(orig);
2413 if (orig == str) n = 0;
2415 str_modifiable(str);
2416 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2418 const size_t size = (size_t)
capa + termlen;
2419 const char *
const old_ptr = RSTRING_PTR(str);
2420 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2421 char *new_ptr =
ALLOC_N(
char, size);
2422 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2423 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2425 RSTRING(str)->as.heap.ptr = new_ptr;
2427 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2428 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2429 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2431 STR_SET_LEN(str,
len);
2434 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2435 rb_enc_cr_str_exact_copy(str, orig);
2437 FL_SET(str, STR_NOEMBED);
2444 rb_enc_associate(str, enc);
2456rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2462 static ID keyword_ids[2];
2472 keyword_ids[0] = rb_id_encoding();
2473 CONST_ID(keyword_ids[1],
"capacity");
2475 encoding = kwargs[0];
2476 capacity = kwargs[1];
2485 if (UNDEF_P(encoding)) {
2487 encoding = rb_obj_encoding(orig);
2491 if (!UNDEF_P(encoding)) {
2492 enc = rb_to_encoding(encoding);
2496 if (UNDEF_P(capacity)) {
2498 VALUE empty_str = str_new(klass,
"", 0);
2500 rb_enc_associate(empty_str, enc);
2504 VALUE copy = str_duplicate(klass, orig);
2505 rb_enc_associate(copy, enc);
2518 if (orig_capa >
capa) {
2523 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2524 STR_SET_LEN(str, 0);
2535#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2550static inline uintptr_t
2551count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2556 d = (d>>6) | (~d>>7);
2557 d &= NONASCII_MASK >> 7;
2560#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2562 return rb_popcount_intptr(d);
2566# if SIZEOF_VOIDP == 8
2575enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2581 long diff = (long)(e - p);
2582 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2587 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2588 const uintptr_t *s, *t;
2589 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2590 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2591 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2592 while (p < (
const char *)s) {
2593 if (is_utf8_lead_byte(*p))
len++;
2597 len += count_utf8_lead_bytes_with_word(s);
2600 p = (
const char *)s;
2603 if (is_utf8_lead_byte(*p))
len++;
2609 else if (rb_enc_asciicompat(enc)) {
2614 q = search_nonascii(p, e);
2620 p += rb_enc_fast_mbclen(p, e, enc);
2627 q = search_nonascii(p, e);
2633 p += rb_enc_mbclen(p, e, enc);
2640 for (c=0; p<e; c++) {
2641 p += rb_enc_mbclen(p, e, enc);
2656rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2664 long diff = (long)(e - p);
2665 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2667 else if (rb_enc_asciicompat(enc)) {
2671 q = search_nonascii(p, e);
2679 ret = rb_enc_precise_mbclen(p, e, enc);
2694 for (c=0; p<e; c++) {
2695 ret = rb_enc_precise_mbclen(p, e, enc);
2702 if (p + rb_enc_mbminlen(enc) <= e)
2703 p += rb_enc_mbminlen(enc);
2719 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2720 if (!enc) enc = STR_ENC_GET(str);
2721 p = RSTRING_PTR(str);
2726 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2731 return enc_strlen(p, e, enc, cr);
2738 return str_strlen(str, NULL);
2752 return LONG2NUM(str_strlen(str, NULL));
2764rb_str_bytesize(
VALUE str)
2782rb_str_empty(
VALUE str)
2784 return RBOOL(RSTRING_LEN(str) == 0);
2803 char *ptr1, *ptr2, *ptr3;
2808 enc = rb_enc_check_str(str1, str2);
2811 termlen = rb_enc_mbminlen(enc);
2812 if (len1 > LONG_MAX - len2) {
2813 rb_raise(rb_eArgError,
"string size too big");
2815 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2816 ptr3 = RSTRING_PTR(str3);
2817 memcpy(ptr3, ptr1, len1);
2818 memcpy(ptr3+len1, ptr2, len2);
2819 TERM_FILL(&ptr3[len1+len2], termlen);
2835 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2838 int enc1 = rb_enc_get_index(str1);
2839 int enc2 = rb_enc_get_index(str2);
2844 else if (enc2 < 0) {
2847 else if (enc1 != enc2) {
2850 else if (len1 > LONG_MAX - len2) {
2884 rb_enc_copy(str2, str);
2889 rb_raise(rb_eArgError,
"negative argument");
2891 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2892 if (STR_EMBEDDABLE_P(
len, 1)) {
2894 memset(RSTRING_PTR(str2), 0,
len + 1);
2901 STR_SET_LEN(str2,
len);
2902 rb_enc_copy(str2, str);
2905 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2906 rb_raise(rb_eArgError,
"argument too big");
2909 len *= RSTRING_LEN(str);
2910 termlen = TERM_LEN(str);
2912 ptr2 = RSTRING_PTR(str2);
2914 n = RSTRING_LEN(str);
2915 memcpy(ptr2, RSTRING_PTR(str), n);
2916 while (n <=
len/2) {
2917 memcpy(ptr2 + n, ptr2, n);
2920 memcpy(ptr2 + n, ptr2,
len-n);
2922 STR_SET_LEN(str2,
len);
2923 TERM_FILL(&ptr2[
len], termlen);
2924 rb_enc_cr_str_copy_for_substr(str2, str);
2961rb_check_lockedtmp(
VALUE str)
2963 if (
FL_TEST(str, STR_TMPLOCK)) {
2970#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2972str_modifiable(
VALUE str)
2976 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2977 if (CHILLED_STRING_P(str)) {
2978 CHILLED_STRING_MUTATED(str);
2980 rb_check_lockedtmp(str);
2981 rb_check_frozen(str);
2986str_dependent_p(
VALUE str)
2988 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2998#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
3000str_independent(
VALUE str)
3004 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
3005 str_modifiable(str);
3006 return !str_dependent_p(str);
3012str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
3022 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
3027 STR_SET_LEN(str,
len);
3032 oldptr = RSTRING_PTR(str);
3034 memcpy(
ptr, oldptr,
len);
3036 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
3039 STR_SET_NOEMBED(str);
3040 FL_UNSET(str, STR_SHARED|STR_NOFREE);
3041 TERM_FILL(
ptr +
len, termlen);
3043 STR_SET_LEN(str,
len);
3050 if (!str_independent(str))
3051 str_make_independent(str);
3060 int termlen = TERM_LEN(str);
3061 long len = RSTRING_LEN(str);
3064 rb_raise(rb_eArgError,
"negative expanding string size");
3066 if (expand >= LONG_MAX -
len) {
3067 rb_raise(rb_eArgError,
"string size too big");
3070 if (!str_independent(str)) {
3071 str_make_independent_expand(str,
len, expand, termlen);
3073 else if (expand > 0) {
3074 RESIZE_CAPA_TERM(str,
len + expand, termlen);
3081str_modify_keep_cr(
VALUE str)
3083 if (!str_independent(str))
3084 str_make_independent(str);
3091str_discard(
VALUE str)
3093 str_modifiable(str);
3094 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
3095 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
3096 RSTRING(str)->as.heap.ptr = 0;
3097 STR_SET_LEN(str, 0);
3104 int encindex = rb_enc_get_index(str);
3106 if (RB_UNLIKELY(encindex == -1)) {
3110 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
3115 if (!rb_enc_asciicompat(enc)) {
3137 return RSTRING_PTR(str);
3141zero_filled(
const char *s,
int n)
3143 for (; n > 0; --n) {
3150str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
3152 const char *e = s +
len;
3154 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
3155 if (zero_filled(s, minlen))
return s;
3161str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
3166 if (str_dependent_p(str)) {
3167 if (!zero_filled(s +
len, termlen))
3168 str_make_independent_expand(str,
len, 0L, termlen);
3171 TERM_FILL(s +
len, termlen);
3174 return RSTRING_PTR(str);
3178rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
3180 long capa = str_capacity(str, oldtermlen) + oldtermlen;
3181 long len = RSTRING_LEN(str);
3185 rb_check_lockedtmp(str);
3186 str_make_independent_expand(str,
len, 0L, termlen);
3188 else if (str_dependent_p(str)) {
3189 if (termlen > oldtermlen)
3190 str_make_independent_expand(str,
len, 0L, termlen);
3193 if (!STR_EMBED_P(str)) {
3198 if (termlen > oldtermlen) {
3199 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
3207str_null_check(
VALUE str,
int *w)
3209 char *s = RSTRING_PTR(str);
3210 long len = RSTRING_LEN(str);
3212 const int minlen = rb_enc_mbminlen(enc);
3216 if (str_null_char(s,
len, minlen, enc)) {
3219 return str_fill_term(str, s,
len, minlen);
3222 if (!s || memchr(s, 0,
len)) {
3226 s = str_fill_term(str, s,
len, minlen);
3232rb_str_to_cstr(
VALUE str)
3235 return str_null_check(str, &w);
3243 char *s = str_null_check(str, &w);
3246 rb_raise(rb_eArgError,
"string contains null char");
3248 rb_raise(rb_eArgError,
"string contains null byte");
3254rb_str_fill_terminator(
VALUE str,
const int newminlen)
3256 char *s = RSTRING_PTR(str);
3257 long len = RSTRING_LEN(str);
3258 return str_fill_term(str, s,
len, newminlen);
3264 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
3290str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
3299 else if (rb_enc_asciicompat(enc)) {
3300 const char *p2, *e2;
3303 while (p < e && 0 < nth) {
3310 p2 = search_nonascii(p, e2);
3319 n = rb_enc_mbclen(p, e, enc);
3330 while (p < e && nth--) {
3331 p += rb_enc_mbclen(p, e, enc);
3342 return str_nth_len(p, e, &nth, enc);
3346str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3351 p = str_nth_len(p, e, &nth, enc);
3360str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3362 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3363 if (!pp)
return e - p;
3370 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3371 STR_ENC_GET(str), single_byte_optimizable(str));
3376str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3379 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3380 const uintptr_t *s, *t;
3381 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3382 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3383 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3384 while (p < (
const char *)s) {
3385 if (is_utf8_lead_byte(*p)) nth--;
3389 nth -= count_utf8_lead_bytes_with_word(s);
3391 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3395 if (is_utf8_lead_byte(*p)) {
3396 if (nth == 0)
break;
3406str_utf8_offset(
const char *p,
const char *e,
long nth)
3408 const char *pp = str_utf8_nth(p, e, &nth);
3417 if (single_byte_optimizable(str) || pos < 0)
3420 char *p = RSTRING_PTR(str);
3421 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3426str_subseq(
VALUE str,
long beg,
long len)
3434 const int termlen = TERM_LEN(str);
3435 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3442 if (str_embed_capa(str2) >=
len + termlen) {
3443 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3444 STR_SET_EMBED(str2);
3445 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3446 TERM_FILL(ptr2+
len, termlen);
3448 STR_SET_LEN(str2,
len);
3452 str_replace_shared(str2, str);
3455 RSTRING(str2)->as.heap.ptr += beg;
3456 if (RSTRING_LEN(str2) >
len) {
3457 STR_SET_LEN(str2,
len);
3467 VALUE str2 = str_subseq(str, beg,
len);
3468 rb_enc_cr_str_copy_for_substr(str2, str);
3477 const long blen = RSTRING_LEN(str);
3479 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3481 if (
len < 0)
return 0;
3482 if (beg < 0 && -beg < 0)
return 0;
3486 if (single_byte_optimizable(str)) {
3487 if (beg > blen)
return 0;
3490 if (beg < 0)
return 0;
3492 if (
len > blen - beg)
3494 if (
len < 0)
return 0;
3499 if (
len > -beg)
len = -beg;
3503 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3506 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3512 slen = str_strlen(str, enc);
3514 if (beg < 0)
return 0;
3516 if (
len == 0)
goto end;
3519 else if (beg > 0 && beg > blen) {
3523 if (beg > str_strlen(str, enc))
return 0;
3528 enc == rb_utf8_encoding()) {
3529 p = str_utf8_nth(s, e, &beg);
3530 if (beg > 0)
return 0;
3531 len = str_utf8_offset(p, e,
len);
3537 p = s + beg * char_sz;
3541 else if (
len * char_sz > e - p)
3546 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3547 if (beg > 0)
return 0;
3551 len = str_offset(p, e,
len, enc, 0);
3559static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3564 return str_substr(str, beg,
len, TRUE);
3574str_substr(
VALUE str,
long beg,
long len,
int empty)
3578 if (!p)
return Qnil;
3579 if (!
len && !empty)
return Qnil;
3581 beg = p - RSTRING_PTR(str);
3583 VALUE str2 = str_subseq(str, beg,
len);
3584 rb_enc_cr_str_copy_for_substr(str2, str);
3592 if (CHILLED_STRING_P(str)) {
3597 rb_str_resize(str, RSTRING_LEN(str));
3615 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3658str_uminus(
VALUE str)
3663 return rb_fstring(str);
3667#define rb_str_dup_frozen rb_str_new_frozen
3672 if (
FL_TEST(str, STR_TMPLOCK)) {
3675 FL_SET(str, STR_TMPLOCK);
3682 if (!
FL_TEST(str, STR_TMPLOCK)) {
3702 const int termlen = TERM_LEN(str);
3704 str_modifiable(str);
3705 if (STR_SHARED_P(str)) {
3708 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3709 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3720 else if (
len > RSTRING_LEN(str)) {
3724 const char *
const new_end = RSTRING_PTR(str) +
len;
3734 else if (
len < RSTRING_LEN(str)) {
3742 STR_SET_LEN(str,
len);
3743 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3750 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3753 int independent = str_independent(str);
3754 long slen = RSTRING_LEN(str);
3755 const int termlen = TERM_LEN(str);
3757 if (slen >
len || (termlen != 1 && slen <
len)) {
3763 if (STR_EMBED_P(str)) {
3764 if (
len == slen)
return str;
3765 if (str_embed_capa(str) >=
len + termlen) {
3766 STR_SET_LEN(str,
len);
3770 str_make_independent_expand(str, slen,
len - slen, termlen);
3772 else if (str_embed_capa(str) >=
len + termlen) {
3773 char *
ptr = STR_HEAP_PTR(str);
3775 if (slen >
len) slen =
len;
3778 STR_SET_LEN(str,
len);
3779 if (independent) ruby_xfree(
ptr);
3782 else if (!independent) {
3783 if (
len == slen)
return str;
3784 str_make_independent_expand(str, slen,
len - slen, termlen);
3788 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3789 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3792 else if (
len == slen)
return str;
3793 STR_SET_LEN(str,
len);
3800str_ensure_available_capa(
VALUE str,
long len)
3802 str_modify_keep_cr(str);
3804 const int termlen = TERM_LEN(str);
3805 long olen = RSTRING_LEN(str);
3807 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3808 rb_raise(rb_eArgError,
"string sizes too big");
3811 long total = olen +
len;
3812 long capa = str_capacity(str, termlen);
3815 if (total >= LONG_MAX / 2) {
3818 while (total >
capa) {
3821 RESIZE_CAPA_TERM(str,
capa, termlen);
3826str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3829 str_modify_keep_cr(str);
3834 if (
len == 0)
return 0;
3836 long total, olen,
off = -1;
3838 const int termlen = TERM_LEN(str);
3841 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3845 long capa = str_capacity(str, termlen);
3847 if (olen > LONG_MAX -
len) {
3848 rb_raise(rb_eArgError,
"string sizes too big");
3852 if (total >= LONG_MAX / 2) {
3855 while (total >
capa) {
3858 RESIZE_CAPA_TERM(str,
capa, termlen);
3859 sptr = RSTRING_PTR(str);
3864 memcpy(sptr + olen,
ptr,
len);
3865 STR_SET_LEN(str, total);
3866 TERM_FILL(sptr + total, termlen);
3871#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3872#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3877 if (
len == 0)
return str;
3879 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3881 return str_buf_cat(str,
ptr,
len);
3892rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3897 if (UNLIKELY(!str_independent(str))) {
3898 str_make_independent(str);
3901 long string_length = -1;
3902 const int null_terminator_length = 1;
3907 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3908 rb_raise(rb_eArgError,
"string sizes too big");
3911 long string_capacity = str_capacity(str, null_terminator_length);
3917 if (LIKELY(string_capacity >= string_length + 1)) {
3919 sptr[string_length] = byte;
3920 STR_SET_LEN(str, string_length + 1);
3921 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3925 str_buf_cat(str, (
char *)&
byte, 1);
3941 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3952rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3953 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3962 if (str_encindex == ptr_encindex) {
3964 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3968 str_enc = rb_enc_from_index(str_encindex);
3969 ptr_enc = rb_enc_from_index(ptr_encindex);
3970 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3973 if (RSTRING_LEN(str) == 0) {
3976 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3982 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3991 *ptr_cr_ret = ptr_cr;
3993 if (str_encindex != ptr_encindex &&
3996 str_enc = rb_enc_from_index(str_encindex);
3997 ptr_enc = rb_enc_from_index(ptr_encindex);
4002 res_encindex = str_encindex;
4007 res_encindex = str_encindex;
4011 res_encindex = ptr_encindex;
4016 res_encindex = str_encindex;
4023 res_encindex = str_encindex;
4029 rb_raise(rb_eArgError,
"negative string size (or size too big)");
4031 str_buf_cat(str,
ptr,
len);
4037 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
4044 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
4054 if (rb_enc_asciicompat(enc)) {
4055 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
4061 unsigned int c = (
unsigned char)*
ptr;
4062 int len = rb_enc_codelen(c, enc);
4063 rb_enc_mbcput(c, buf, enc);
4064 rb_enc_cr_str_buf_cat(str, buf,
len,
4077 if (str_enc_fastpath(str)) {
4081 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
4087 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
4098 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
4114rb_str_concat_literals(
size_t num,
const VALUE *strary)
4118 unsigned long len = 1;
4123 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
4125 str_enc_copy_direct(str, strary[0]);
4127 for (i = s; i < num; ++i) {
4128 const VALUE v = strary[i];
4132 if (encidx != ENCINDEX_US_ASCII) {
4134 rb_enc_set_index(str, encidx);
4159rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
4161 str_modifiable(str);
4166 else if (argc > 1) {
4169 rb_enc_copy(arg_str, str);
4170 for (i = 0; i < argc; i++) {
4205rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
4207 long needed_capacity = 0;
4211 for (
int index = 0; index < argc; index++) {
4212 VALUE obj = argv[index];
4220 needed_capacity += RSTRING_LEN(obj);
4225 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
4232 str_ensure_available_capa(str, needed_capacity);
4235 for (
int index = 0; index < argc; index++) {
4236 VALUE obj = argv[index];
4241 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
4242 char byte = (char)(
NUM2INT(obj) & 0xFF);
4256 rb_bug(
"append_as_bytes arguments should have been validated");
4260 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
4261 TERM_FILL(sptr, TERM_LEN(str));
4266 for (
int index = 0; index < argc; index++) {
4267 VALUE obj = argv[index];
4284 rb_bug(
"append_as_bytes arguments should have been validated");
4363 if (rb_num_to_uint(str2, &code) == 0) {
4376 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4379 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4382 long pos = RSTRING_LEN(str1);
4387 switch (
len = rb_enc_codelen(code, enc)) {
4388 case ONIGERR_INVALID_CODE_POINT_VALUE:
4389 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4391 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4397 rb_enc_mbcput(code, buf, enc);
4398 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4399 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4401 rb_str_resize(str1, pos+
len);
4402 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4415rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4417 int encidx = rb_enc_to_index(enc);
4419 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4424 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4425 return ENCINDEX_ASCII_8BIT;
4448rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4450 str_modifiable(str);
4455 else if (argc > 1) {
4458 rb_enc_copy(arg_str, str);
4459 for (i = 0; i < argc; i++) {
4472 st_index_t precomputed_hash;
4473 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4475 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4476 return precomputed_hash;
4479 return str_do_hash(str);
4486 const char *ptr1, *ptr2;
4489 return (len1 != len2 ||
4491 memcmp(ptr1, ptr2, len1) != 0);
4505rb_str_hash_m(
VALUE str)
4511#define lesser(a,b) (((a)>(b))?(b):(a))
4519 if (RSTRING_LEN(str1) == 0)
return TRUE;
4520 if (RSTRING_LEN(str2) == 0)
return TRUE;
4523 if (idx1 == idx2)
return TRUE;
4528 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4532 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4542 const char *ptr1, *ptr2;
4545 if (str1 == str2)
return 0;
4548 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4557 if (len1 > len2)
return 1;
4560 if (retval > 0)
return 1;
4594 if (str1 == str2)
return Qtrue;
4601 return rb_str_eql_internal(str1, str2);
4625 if (str1 == str2)
return Qtrue;
4627 return rb_str_eql_internal(str1, str2);
4659 return rb_invcmp(str1, str2);
4701 return str_casecmp(str1, s);
4709 const char *p1, *p1end, *p2, *p2end;
4711 enc = rb_enc_compatible(str1, str2);
4716 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4717 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4718 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4719 while (p1 < p1end && p2 < p2end) {
4721 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4722 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4724 return INT2FIX(c1 < c2 ? -1 : 1);
4731 while (p1 < p1end && p2 < p2end) {
4732 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4733 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4735 if (0 <= c1 && 0 <= c2) {
4739 return INT2FIX(c1 < c2 ? -1 : 1);
4743 l1 = rb_enc_mbclen(p1, p1end, enc);
4744 l2 = rb_enc_mbclen(p2, p2end, enc);
4745 len = l1 < l2 ? l1 : l2;
4746 r = memcmp(p1, p2,
len);
4748 return INT2FIX(r < 0 ? -1 : 1);
4750 return INT2FIX(l1 < l2 ? -1 : 1);
4756 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
4757 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
4791 return str_casecmp_p(str1, s);
4798 VALUE folded_str1, folded_str2;
4799 VALUE fold_opt = sym_fold;
4801 enc = rb_enc_compatible(str1, str2);
4806 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4807 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4809 return rb_str_eql(folded_str1, folded_str2);
4813strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4814 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4816 const char *search_start = str_ptr;
4817 long pos, search_len = str_len - offset;
4821 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4822 if (pos < 0)
return pos;
4824 if (t == search_start + pos)
break;
4825 search_len -= t - search_start;
4826 if (search_len <= 0)
return -1;
4827 offset += t - search_start;
4830 return pos + offset;
4834#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4835#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4838rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4840 const char *str_ptr, *str_ptr_end, *sub_ptr;
4841 long str_len, sub_len;
4844 enc = rb_enc_check(str, sub);
4845 if (is_broken_string(sub))
return -1;
4847 str_ptr = RSTRING_PTR(str);
4849 str_len = RSTRING_LEN(str);
4850 sub_ptr = RSTRING_PTR(sub);
4851 sub_len = RSTRING_LEN(sub);
4853 if (str_len < sub_len)
return -1;
4856 long str_len_char, sub_len_char;
4857 int single_byte = single_byte_optimizable(str);
4858 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4859 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4861 offset += str_len_char;
4862 if (offset < 0)
return -1;
4864 if (str_len_char - offset < sub_len_char)
return -1;
4865 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4868 if (sub_len == 0)
return offset;
4871 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4885rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4892 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4893 long slen = str_strlen(str, enc);
4895 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4907 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4908 enc, single_byte_optimizable(str));
4919 pos = rb_str_index(str, sub, pos);
4933str_ensure_byte_pos(
VALUE str,
long pos)
4935 if (!single_byte_optimizable(str)) {
4936 const char *s = RSTRING_PTR(str);
4938 const char *p = s + pos;
4939 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4941 "offset %ld does not land on character boundary", pos);
5014rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
5020 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5021 long slen = RSTRING_LEN(str);
5023 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
5034 str_ensure_byte_pos(str, pos);
5046 pos = rb_str_byteindex(str, sub, pos);
5047 if (pos >= 0)
return LONG2NUM(pos);
5054memrchr(
const char *search_str,
int chr,
long search_len)
5056 const char *ptr = search_str + search_len;
5057 while (ptr > search_str) {
5058 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
5068 char *hit, *adjusted;
5070 long slen, searchlen;
5073 sbeg = RSTRING_PTR(str);
5074 slen = RSTRING_LEN(sub);
5075 if (slen == 0)
return s - sbeg;
5077 t = RSTRING_PTR(sub);
5079 searchlen = s - sbeg + 1;
5081 if (memcmp(s, t, slen) == 0) {
5086 hit = memrchr(sbeg, c, searchlen);
5089 if (hit != adjusted) {
5090 searchlen = adjusted - sbeg;
5093 if (memcmp(hit, t, slen) == 0)
5095 searchlen = adjusted - sbeg;
5096 }
while (searchlen > 0);
5110 enc = rb_enc_check(str, sub);
5111 if (is_broken_string(sub))
return -1;
5112 singlebyte = single_byte_optimizable(str);
5113 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
5114 slen = str_strlen(sub, enc);
5117 if (
len < slen)
return -1;
5118 if (
len - pos < slen) pos =
len - slen;
5119 if (
len == 0)
return pos;
5121 sbeg = RSTRING_PTR(str);
5124 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
5130 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
5131 return str_rindex(str, sub, s, enc);
5192rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
5197 long pos,
len = str_strlen(str, enc);
5199 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5201 if (pos < 0 && (pos +=
len) < 0) {
5207 if (pos >
len) pos =
len;
5215 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
5216 enc, single_byte_optimizable(str));
5227 pos = rb_str_rindex(str, sub, pos);
5237rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
5243 enc = rb_enc_check(str, sub);
5244 if (is_broken_string(sub))
return -1;
5245 len = RSTRING_LEN(str);
5246 slen = RSTRING_LEN(sub);
5249 if (
len < slen)
return -1;
5250 if (
len - pos < slen) pos =
len - slen;
5251 if (
len == 0)
return pos;
5253 sbeg = RSTRING_PTR(str);
5256 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
5263 return str_rindex(str, sub, s, enc);
5328rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
5332 long pos,
len = RSTRING_LEN(str);
5334 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5336 if (pos < 0 && (pos +=
len) < 0) {
5342 if (pos >
len) pos =
len;
5348 str_ensure_byte_pos(str, pos);
5360 pos = rb_str_byterindex(str, sub, pos);
5361 if (pos >= 0)
return LONG2NUM(pos);
5400 switch (OBJ_BUILTIN_TYPE(y)) {
5452rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5459 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5491rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5495 re = get_pat(argv[0]);
5496 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5505static enum neighbor_char
5511 if (rb_enc_mbminlen(enc) > 1) {
5513 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5515 return NEIGHBOR_NOT_CHAR;
5517 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5519 if (!l)
return NEIGHBOR_NOT_CHAR;
5520 if (l !=
len)
return NEIGHBOR_WRAPPED;
5521 rb_enc_mbcput(c, p, enc);
5522 r = rb_enc_precise_mbclen(p, p +
len, enc);
5524 return NEIGHBOR_NOT_CHAR;
5526 return NEIGHBOR_FOUND;
5529 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5532 return NEIGHBOR_WRAPPED;
5533 ++((
unsigned char*)p)[i];
5534 l = rb_enc_precise_mbclen(p, p+
len, enc);
5538 return NEIGHBOR_FOUND;
5541 memset(p+l, 0xff,
len-l);
5547 for (len2 =
len-1; 0 < len2; len2--) {
5548 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5552 memset(p+len2+1, 0xff,
len-(len2+1));
5557static enum neighbor_char
5562 if (rb_enc_mbminlen(enc) > 1) {
5564 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5566 return NEIGHBOR_NOT_CHAR;
5568 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5569 if (!c)
return NEIGHBOR_NOT_CHAR;
5572 if (!l)
return NEIGHBOR_NOT_CHAR;
5573 if (l !=
len)
return NEIGHBOR_WRAPPED;
5574 rb_enc_mbcput(c, p, enc);
5575 r = rb_enc_precise_mbclen(p, p +
len, enc);
5577 return NEIGHBOR_NOT_CHAR;
5579 return NEIGHBOR_FOUND;
5582 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5585 return NEIGHBOR_WRAPPED;
5586 --((
unsigned char*)p)[i];
5587 l = rb_enc_precise_mbclen(p, p+
len, enc);
5591 return NEIGHBOR_FOUND;
5594 memset(p+l, 0,
len-l);
5600 for (len2 =
len-1; 0 < len2; len2--) {
5601 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5605 memset(p+len2+1, 0,
len-(len2+1));
5619static enum neighbor_char
5620enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5622 enum neighbor_char ret;
5626 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5630 const int max_gaps = 1;
5632 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5634 ctype = ONIGENC_CTYPE_DIGIT;
5636 ctype = ONIGENC_CTYPE_ALPHA;
5638 return NEIGHBOR_NOT_CHAR;
5641 for (
try = 0;
try <= max_gaps; ++
try) {
5642 ret = enc_succ_char(p,
len, enc);
5643 if (ret == NEIGHBOR_FOUND) {
5644 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5646 return NEIGHBOR_FOUND;
5653 ret = enc_pred_char(p,
len, enc);
5654 if (ret == NEIGHBOR_FOUND) {
5655 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5668 return NEIGHBOR_NOT_CHAR;
5671 if (ctype != ONIGENC_CTYPE_DIGIT) {
5673 return NEIGHBOR_WRAPPED;
5677 enc_succ_char(carry,
len, enc);
5678 return NEIGHBOR_WRAPPED;
5746 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5747 rb_enc_cr_str_copy_for_substr(str, orig);
5748 return str_succ(str);
5755 char *sbeg, *s, *e, *last_alnum = 0;
5756 int found_alnum = 0;
5758 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5759 long carry_pos = 0, carry_len = 1;
5760 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5762 slen = RSTRING_LEN(str);
5763 if (slen == 0)
return str;
5765 enc = STR_ENC_GET(str);
5766 sbeg = RSTRING_PTR(str);
5767 s = e = sbeg + slen;
5769 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5770 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5776 l = rb_enc_precise_mbclen(s, e, enc);
5777 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5778 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5779 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5781 case NEIGHBOR_NOT_CHAR:
5783 case NEIGHBOR_FOUND:
5785 case NEIGHBOR_WRAPPED:
5790 carry_pos = s - sbeg;
5795 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5796 enum neighbor_char neighbor;
5797 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5798 l = rb_enc_precise_mbclen(s, e, enc);
5799 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5800 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5802 neighbor = enc_succ_char(tmp, l, enc);
5804 case NEIGHBOR_FOUND:
5808 case NEIGHBOR_WRAPPED:
5811 case NEIGHBOR_NOT_CHAR:
5814 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5816 enc_succ_char(s, l, enc);
5818 if (!rb_enc_asciicompat(enc)) {
5819 MEMCPY(carry, s,
char, l);
5822 carry_pos = s - sbeg;
5826 RESIZE_CAPA(str, slen + carry_len);
5827 sbeg = RSTRING_PTR(str);
5828 s = sbeg + carry_pos;
5829 memmove(s + carry_len, s, slen - carry_pos);
5830 memmove(s, carry, carry_len);
5832 STR_SET_LEN(str, slen);
5833 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5847rb_str_succ_bang(
VALUE str)
5855all_digits_p(
const char *s,
long len)
5909 VALUE end, exclusive;
5913 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5919 VALUE current, after_end;
5926 enc = rb_enc_check(beg, end);
5927 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5929 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5930 char c = RSTRING_PTR(beg)[0];
5931 char e = RSTRING_PTR(end)[0];
5933 if (c > e || (excl && c == e))
return beg;
5935 VALUE str = rb_enc_str_new(&c, 1, enc);
5937 if ((*each)(str, arg))
break;
5938 if (!excl && c == e)
break;
5940 if (excl && c == e)
break;
5945 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5946 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5947 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5952 b = rb_str_to_inum(beg, 10, FALSE);
5953 e = rb_str_to_inum(end, 10, FALSE);
5960 if (excl && bi == ei)
break;
5961 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5966 ID op = excl ?
'<' : idLE;
5967 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5972 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5973 b = rb_funcallv(b, succ, 0, 0);
5980 if (n > 0 || (excl && n == 0))
return beg;
5982 after_end = rb_funcallv(end, succ, 0, 0);
5987 next = rb_funcallv(current, succ, 0, 0);
5988 if ((*each)(current, arg))
break;
5989 if (
NIL_P(next))
break;
5993 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
6008 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
6009 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
6010 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
6012 b = rb_str_to_inum(beg, 10, FALSE);
6018 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
6026 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
6027 b = rb_funcallv(b, succ, 0, 0);
6033 VALUE next = rb_funcallv(current, succ, 0, 0);
6034 if ((*each)(current, arg))
break;
6037 if (RSTRING_LEN(current) == 0)
6048 if (!
rb_equal(str, *argp))
return 0;
6062 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
6063 rb_enc_asciicompat(STR_ENC_GET(end)) &&
6064 rb_enc_asciicompat(STR_ENC_GET(val))) {
6065 const char *bp = RSTRING_PTR(beg);
6066 const char *ep = RSTRING_PTR(end);
6067 const char *vp = RSTRING_PTR(val);
6068 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
6069 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
6077 if (b <= v && v < e)
return Qtrue;
6078 return RBOOL(!
RTEST(exclusive) && v == e);
6085 all_digits_p(bp, RSTRING_LEN(beg)) &&
6086 all_digits_p(ep, RSTRING_LEN(end))) {
6091 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
6093 return RBOOL(
NIL_P(val));
6116 return rb_str_subpat(str, indx,
INT2FIX(0));
6119 if (rb_str_index(str, indx, 0) != -1)
6125 long beg,
len = str_strlen(str, NULL);
6137 return str_substr(str, idx, 1, FALSE);
6156rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
6160 return rb_str_subpat(str, argv[0], argv[1]);
6163 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
6167 return rb_str_aref(str, argv[0]);
6173 char *ptr = RSTRING_PTR(str);
6174 long olen = RSTRING_LEN(str), nlen;
6176 str_modifiable(str);
6177 if (
len > olen)
len = olen;
6179 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
6181 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
6183 ptr =
RSTRING(str)->as.embed.ary;
6184 memmove(ptr, oldptr +
len, nlen);
6185 if (fl == STR_NOEMBED)
xfree(oldptr);
6188 if (!STR_SHARED_P(str)) {
6190 rb_enc_cr_str_exact_copy(shared, str);
6195 STR_SET_LEN(str, nlen);
6197 if (!SHARABLE_MIDDLE_SUBSTRING) {
6198 TERM_FILL(ptr + nlen, TERM_LEN(str));
6205rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
6211 if (beg == 0 && vlen == 0) {
6216 str_modify_keep_cr(str);
6220 RESIZE_CAPA(str, slen + vlen -
len);
6221 sptr = RSTRING_PTR(str);
6230 memmove(sptr + beg + vlen,
6232 slen - (beg +
len));
6234 if (vlen < beg &&
len < 0) {
6238 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
6241 STR_SET_LEN(str, slen);
6242 TERM_FILL(&sptr[slen], TERM_LEN(str));
6249 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
6258 int singlebyte = single_byte_optimizable(str);
6264 enc = rb_enc_check(str, val);
6265 slen = str_strlen(str, enc);
6267 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
6276 if (
len > slen - beg) {
6279 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
6284 beg = p - RSTRING_PTR(str);
6286 rb_str_update_0(str, beg,
len, val);
6287 rb_enc_associate(str, enc);
6298 long start, end,
len;
6308 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
6312 nth += regs->num_regs;
6322 enc = rb_enc_check_str(str, val);
6323 rb_str_update_0(str, start,
len, val);
6324 rb_enc_associate(str, enc);
6332 switch (
TYPE(indx)) {
6334 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
6338 beg = rb_str_index(str, indx, 0);
6393rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
6397 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
6405 return rb_str_aset(str, argv[0], argv[1]);
6465rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6473 str_modify_keep_cr(str);
6481 if ((nth += regs->num_regs) <= 0)
return Qnil;
6483 else if (nth >= regs->num_regs)
return Qnil;
6485 len = END(nth) - beg;
6488 else if (argc == 2) {
6497 beg = p - RSTRING_PTR(str);
6501 beg = rb_str_index(str, indx, 0);
6502 if (beg == -1)
return Qnil;
6503 len = RSTRING_LEN(indx);
6515 beg = p - RSTRING_PTR(str);
6524 beg = p - RSTRING_PTR(str);
6528 rb_enc_cr_str_copy_for_substr(result, str);
6536 char *sptr = RSTRING_PTR(str);
6537 long slen = RSTRING_LEN(str);
6538 if (beg +
len > slen)
6542 slen - (beg +
len));
6544 STR_SET_LEN(str, slen);
6545 TERM_FILL(&sptr[slen], TERM_LEN(str));
6556 switch (OBJ_BUILTIN_TYPE(pat)) {
6575get_pat_quoted(
VALUE pat,
int check)
6579 switch (OBJ_BUILTIN_TYPE(pat)) {
6593 if (check && is_broken_string(pat)) {
6600rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6603 pos = rb_str_byteindex(str, pat, pos);
6604 if (set_backref_str) {
6606 str = rb_str_new_frozen_String(str);
6607 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6609 *match = match_data;
6619 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6624rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6626 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6645rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6659 hash = rb_check_hash_type(argv[1]);
6665 pat = get_pat_quoted(argv[0], 1);
6667 str_modifiable(str);
6668 beg = rb_pat_search(pat, str, 0, 1);
6682 end0 = beg0 + RSTRING_LEN(pat);
6691 if (iter || !
NIL_P(hash)) {
6692 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6698 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6701 str_mod_check(str, p,
len);
6702 rb_check_frozen(str);
6708 enc = rb_enc_compatible(str, repl);
6711 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6715 rb_enc_inspect_name(str_enc),
6716 rb_enc_inspect_name(STR_ENC_GET(repl)));
6718 enc = STR_ENC_GET(repl);
6721 rb_enc_associate(str, enc);
6731 rlen = RSTRING_LEN(repl);
6732 len = RSTRING_LEN(str);
6734 RESIZE_CAPA(str,
len + rlen - plen);
6736 p = RSTRING_PTR(str);
6738 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6740 rp = RSTRING_PTR(repl);
6741 memmove(p + beg0, rp, rlen);
6743 STR_SET_LEN(str,
len);
6744 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6773 rb_str_sub_bang(argc, argv, str);
6778str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6781 long beg, beg0, end0;
6782 long offset, blen, slen,
len, last;
6783 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6785 int need_backref_str = -1;
6795 hash = rb_check_hash_type(argv[1]);
6799 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6807 rb_error_arity(argc, 1, 2);
6810 pat = get_pat_quoted(argv[0], 1);
6811 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6814 if (bang)
return Qnil;
6819 blen = RSTRING_LEN(str) + 30;
6821 sp = RSTRING_PTR(str);
6822 slen = RSTRING_LEN(str);
6824 str_enc = STR_ENC_GET(str);
6825 rb_enc_associate(dest, str_enc);
6832 end0 = beg0 + RSTRING_LEN(pat);
6848 if (mode == FAST_MAP) {
6857 val = rb_hash_aref(hash, key);
6860 str_mod_check(str, sp, slen);
6865 else if (need_backref_str) {
6867 if (need_backref_str < 0) {
6868 need_backref_str = val != repl;
6875 len = beg0 - offset;
6889 if (RSTRING_LEN(str) <= end0)
break;
6890 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6892 offset = end0 +
len;
6894 cp = RSTRING_PTR(str) + offset;
6895 if (offset > RSTRING_LEN(str))
break;
6898 if (mode != FAST_MAP && mode != STR) {
6901 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6906 if (RSTRING_LEN(str) > offset) {
6909 rb_pat_search0(pat, str, last, 1, &match);
6911 str_shared_replace(str, dest);
6939rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6941 str_modify_keep_cr(str);
6942 return str_gsub(argc, argv, str, 1);
6965 return str_gsub(argc, argv, str, 0);
6983 str_modifiable(str);
6984 if (str == str2)
return str;
6988 return str_replace(str, str2);
7003rb_str_clear(
VALUE str)
7007 STR_SET_LEN(str, 0);
7008 RSTRING_PTR(str)[0] = 0;
7009 if (rb_enc_asciicompat(STR_ENC_GET(str)))
7028rb_str_chr(
VALUE str)
7052 pos += RSTRING_LEN(str);
7053 if (pos < 0 || RSTRING_LEN(str) <= pos)
7056 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
7075 long len = RSTRING_LEN(str);
7076 char *
ptr, *head, *left = 0;
7080 if (pos < -
len ||
len <= pos)
7087 char byte = (char)(
NUM2INT(w) & 0xFF);
7089 if (!str_independent(str))
7090 str_make_independent(str);
7091 enc = STR_ENC_GET(str);
7092 head = RSTRING_PTR(str);
7094 if (!STR_EMBED_P(str)) {
7101 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
7109 width = rb_enc_precise_mbclen(left, head+
len, enc);
7111 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
7127str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
7129 long n = RSTRING_LEN(str);
7131 if (beg > n ||
len < 0)
return Qnil;
7134 if (beg < 0)
return Qnil;
7139 if (!empty)
return Qnil;
7143 VALUE str2 = str_subseq(str, beg,
len);
7145 str_enc_copy_direct(str2, str);
7147 if (RSTRING_LEN(str2) == 0) {
7148 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
7182 long beg,
len = RSTRING_LEN(str);
7190 return str_byte_substr(str, beg,
len, TRUE);
7195 return str_byte_substr(str, idx, 1, FALSE);
7242rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
7247 return str_byte_substr(str, beg,
len, TRUE);
7250 return str_byte_aref(str, argv[0]);
7254str_check_beg_len(
VALUE str,
long *beg,
long *
len)
7256 long end, slen = RSTRING_LEN(str);
7259 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
7268 if (*
len > slen - *beg) {
7272 str_ensure_byte_pos(str, *beg);
7273 str_ensure_byte_pos(str, end);
7298rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
7300 long beg,
len, vbeg, vlen;
7305 if (!(argc == 2 || argc == 3 || argc == 5)) {
7306 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
7310 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
7311 rb_builtin_class_name(argv[0]));
7318 vlen = RSTRING_LEN(val);
7323 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
7324 rb_builtin_class_name(argv[2]));
7336 vlen = RSTRING_LEN(val);
7344 str_check_beg_len(str, &beg, &
len);
7345 str_check_beg_len(val, &vbeg, &vlen);
7346 str_modify_keep_cr(str);
7349 rb_enc_associate(str, rb_enc_check(str, val));
7352 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
7370rb_str_reverse(
VALUE str)
7377 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
7378 enc = STR_ENC_GET(str);
7384 if (RSTRING_LEN(str) > 1) {
7385 if (single_byte_optimizable(str)) {
7392 int clen = rb_enc_fast_mbclen(s, e, enc);
7400 cr = rb_enc_asciicompat(enc) ?
7403 int clen = rb_enc_mbclen(s, e, enc);
7412 STR_SET_LEN(rev, RSTRING_LEN(str));
7413 str_enc_copy_direct(rev, str);
7433rb_str_reverse_bang(
VALUE str)
7435 if (RSTRING_LEN(str) > 1) {
7436 if (single_byte_optimizable(str)) {
7439 str_modify_keep_cr(str);
7440 s = RSTRING_PTR(str);
7449 str_shared_replace(str, rb_str_reverse(str));
7453 str_modify_keep_cr(str);
7478 i = rb_str_index(str, arg, 0);
7480 return RBOOL(i != -1);
7522 rb_raise(rb_eArgError,
"invalid radix %d", base);
7524 return rb_str_to_inum(str, base, FALSE);
7548rb_str_to_f(
VALUE str)
7563rb_str_to_s(
VALUE str)
7575 char s[RUBY_MAX_CHAR_LEN];
7576 int n = rb_enc_codelen(c, enc);
7578 rb_enc_mbcput(c, s, enc);
7583#define CHAR_ESC_LEN 13
7586rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7588 char buf[CHAR_ESC_LEN + 1];
7596 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7598 else if (c < 0x10000) {
7599 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7602 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7607 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7610 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7613 l = (int)strlen(buf);
7619ruby_escaped_char(
int c)
7622 case '\0':
return "\\0";
7623 case '\n':
return "\\n";
7624 case '\r':
return "\\r";
7625 case '\t':
return "\\t";
7626 case '\f':
return "\\f";
7627 case '\013':
return "\\v";
7628 case '\010':
return "\\b";
7629 case '\007':
return "\\a";
7630 case '\033':
return "\\e";
7631 case '\x7f':
return "\\c?";
7637rb_str_escape(
VALUE str)
7641 const char *p = RSTRING_PTR(str);
7643 const char *prev = p;
7644 char buf[CHAR_ESC_LEN + 1];
7646 int unicode_p = rb_enc_unicode_p(enc);
7647 int asciicompat = rb_enc_asciicompat(enc);
7652 int n = rb_enc_precise_mbclen(p, pend, enc);
7654 if (p > prev) str_buf_cat(result, prev, p - prev);
7655 n = rb_enc_mbminlen(enc);
7657 n = (int)(pend - p);
7659 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7660 str_buf_cat(result, buf, strlen(buf));
7666 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7668 cc = ruby_escaped_char(c);
7670 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7671 str_buf_cat(result, cc, strlen(cc));
7674 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7677 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7678 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7682 if (p > prev) str_buf_cat(result, prev, p - prev);
7706 const char *p, *pend, *prev;
7707 char buf[CHAR_ESC_LEN + 1];
7709 rb_encoding *resenc = rb_default_internal_encoding();
7710 int unicode_p = rb_enc_unicode_p(enc);
7711 int asciicompat = rb_enc_asciicompat(enc);
7713 if (resenc == NULL) resenc = rb_default_external_encoding();
7714 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7715 rb_enc_associate(result, resenc);
7716 str_buf_cat2(result,
"\"");
7724 n = rb_enc_precise_mbclen(p, pend, enc);
7726 if (p > prev) str_buf_cat(result, prev, p - prev);
7727 n = rb_enc_mbminlen(enc);
7729 n = (int)(pend - p);
7731 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7732 str_buf_cat(result, buf, strlen(buf));
7738 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7740 if ((asciicompat || unicode_p) &&
7741 (c ==
'"'|| c ==
'\\' ||
7746 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7747 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7748 str_buf_cat2(result,
"\\");
7749 if (asciicompat || enc == resenc) {
7755 case '\n': cc =
'n';
break;
7756 case '\r': cc =
'r';
break;
7757 case '\t': cc =
't';
break;
7758 case '\f': cc =
'f';
break;
7759 case '\013': cc =
'v';
break;
7760 case '\010': cc =
'b';
break;
7761 case '\007': cc =
'a';
break;
7762 case 033: cc =
'e';
break;
7763 default: cc = 0;
break;
7766 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7769 str_buf_cat(result, buf, 2);
7782 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7786 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7787 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7792 if (p > prev) str_buf_cat(result, prev, p - prev);
7793 str_buf_cat2(result,
"\"");
7798#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7818 int encidx = rb_enc_get_index(str);
7821 const char *p, *pend;
7824 int u8 = (encidx == rb_utf8_encindex());
7825 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7828 if (!rb_enc_asciicompat(enc)) {
7830 len += strlen(enc->name);
7833 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7836 unsigned char c = *p++;
7839 case '"':
case '\\':
7840 case '\n':
case '\r':
7841 case '\t':
case '\f':
7842 case '\013':
case '\010':
case '\007':
case '\033':
7847 clen = IS_EVSTR(p, pend) ? 2 : 1;
7855 if (u8 && c > 0x7F) {
7856 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7858 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7861 else if (cc <= 0xFFFFF)
7874 if (clen > LONG_MAX -
len) {
7881 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7882 q = RSTRING_PTR(result); qend = q +
len + 1;
7886 unsigned char c = *p++;
7888 if (c ==
'"' || c ==
'\\') {
7892 else if (c ==
'#') {
7893 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7896 else if (c ==
'\n') {
7900 else if (c ==
'\r') {
7904 else if (c ==
'\t') {
7908 else if (c ==
'\f') {
7912 else if (c ==
'\013') {
7916 else if (c ==
'\010') {
7920 else if (c ==
'\007') {
7924 else if (c ==
'\033') {
7934 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7936 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7939 snprintf(q, qend-q,
"u%04X", cc);
7941 snprintf(q, qend-q,
"u{%X}", cc);
7946 snprintf(q, qend-q,
"x%02X", c);
7952 if (!rb_enc_asciicompat(enc)) {
7953 snprintf(q, qend-q, nonascii_suffix, enc->name);
7954 encidx = rb_ascii8bit_encindex();
7957 rb_enc_associate_index(result, encidx);
7963unescape_ascii(
unsigned int c)
7987undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7989 const char *s = *ss;
7993 unsigned char buf[6];
8011 *buf = unescape_ascii(*s);
8023 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
8024 if (*penc != enc_utf8) {
8026 rb_enc_associate(undumped, enc_utf8);
8043 if (hexlen == 0 || hexlen > 6) {
8049 if (0xd800 <= c && c <= 0xdfff) {
8052 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
8062 if (0xd800 <= c && c <= 0xdfff) {
8065 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
8093static VALUE rb_str_is_ascii_only_p(
VALUE str);
8111str_undump(
VALUE str)
8113 const char *s = RSTRING_PTR(str);
8116 VALUE undumped = rb_enc_str_new(s, 0L, enc);
8118 bool binary =
false;
8122 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
8125 if (!str_null_check(str, &w)) {
8128 if (RSTRING_LEN(str) < 2)
goto invalid_format;
8129 if (*s !=
'"')
goto invalid_format;
8147 static const char force_encoding_suffix[] =
".force_encoding(\"";
8148 static const char dup_suffix[] =
".dup";
8149 const char *encname;
8154 size =
sizeof(dup_suffix) - 1;
8155 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
8157 size =
sizeof(force_encoding_suffix) - 1;
8158 if (s_end - s <= size)
goto invalid_format;
8159 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
8163 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
8167 s = memchr(s,
'"', s_end-s);
8169 if (!s)
goto invalid_format;
8170 if (s_end - s != 2)
goto invalid_format;
8171 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
8173 encidx = rb_enc_find_index2(encname, (
long)size);
8177 rb_enc_associate_index(undumped, encidx);
8187 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
8198 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
8204 if (rb_enc_dummy_p(enc)) {
8211str_true_enc(
VALUE str)
8214 rb_str_check_dummy_enc(enc);
8218static OnigCaseFoldType
8219check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
8224 rb_raise(rb_eArgError,
"too many options");
8225 if (argv[0]==sym_turkic) {
8226 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
8228 if (argv[1]==sym_lithuanian)
8229 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
8231 rb_raise(rb_eArgError,
"invalid second option");
8234 else if (argv[0]==sym_lithuanian) {
8235 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
8237 if (argv[1]==sym_turkic)
8238 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
8240 rb_raise(rb_eArgError,
"invalid second option");
8244 rb_raise(rb_eArgError,
"too many options");
8245 else if (argv[0]==sym_ascii)
8246 flags |= ONIGENC_CASE_ASCII_ONLY;
8247 else if (argv[0]==sym_fold) {
8248 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
8249 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
8251 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
8254 rb_raise(rb_eArgError,
"invalid option");
8261 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
8267#define CASE_MAPPING_ADDITIONAL_LENGTH 20
8268#ifndef CASEMAP_DEBUG
8269# define CASEMAP_DEBUG 0
8277 OnigUChar space[FLEX_ARY_LEN];
8281mapping_buffer_free(
void *p)
8285 while (current_buffer) {
8286 previous_buffer = current_buffer;
8287 current_buffer = current_buffer->next;
8288 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
8294 {0, mapping_buffer_free,},
8295 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
8303 const OnigUChar *source_current, *source_end;
8304 int target_length = 0;
8305 VALUE buffer_anchor;
8308 size_t buffer_count = 0;
8309 int buffer_length_or_invalid;
8311 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
8313 source_current = (OnigUChar*)RSTRING_PTR(source);
8318 while (source_current < source_end) {
8320 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
8321 if (CASEMAP_DEBUG) {
8322 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
8325 *pre_buffer = current_buffer;
8326 pre_buffer = ¤t_buffer->next;
8327 current_buffer->next = NULL;
8328 current_buffer->capa =
capa;
8329 buffer_length_or_invalid = enc->case_map(flags,
8330 &source_current, source_end,
8331 current_buffer->space,
8332 current_buffer->space+current_buffer->capa,
8334 if (buffer_length_or_invalid < 0) {
8335 current_buffer =
DATA_PTR(buffer_anchor);
8337 mapping_buffer_free(current_buffer);
8338 rb_raise(rb_eArgError,
"input string invalid");
8340 target_length += current_buffer->used = buffer_length_or_invalid;
8342 if (CASEMAP_DEBUG) {
8343 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
8346 if (buffer_count==1) {
8347 target =
rb_str_new((
const char*)current_buffer->space, target_length);
8350 char *target_current;
8353 target_current = RSTRING_PTR(target);
8354 current_buffer =
DATA_PTR(buffer_anchor);
8355 while (current_buffer) {
8356 memcpy(target_current, current_buffer->space, current_buffer->used);
8357 target_current += current_buffer->used;
8358 current_buffer = current_buffer->next;
8361 current_buffer =
DATA_PTR(buffer_anchor);
8363 mapping_buffer_free(current_buffer);
8368 str_enc_copy_direct(target, source);
8377 const OnigUChar *source_current, *source_end;
8378 OnigUChar *target_current, *target_end;
8379 long old_length = RSTRING_LEN(source);
8380 int length_or_invalid;
8382 if (old_length == 0)
return Qnil;
8384 source_current = (OnigUChar*)RSTRING_PTR(source);
8386 if (source == target) {
8387 target_current = (OnigUChar*)source_current;
8388 target_end = (OnigUChar*)source_end;
8391 target_current = (OnigUChar*)RSTRING_PTR(target);
8395 length_or_invalid = onigenc_ascii_only_case_map(flags,
8396 &source_current, source_end,
8397 target_current, target_end, enc);
8398 if (length_or_invalid < 0)
8399 rb_raise(rb_eArgError,
"input string invalid");
8400 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
8401 fprintf(stderr,
"problem with rb_str_ascii_casemap"
8402 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8403 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
8404 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8407 str_enc_copy(target, source);
8413upcase_single(
VALUE str)
8415 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8416 bool modified =
false;
8419 unsigned int c = *(
unsigned char*)s;
8421 if (
'a' <= c && c <=
'z') {
8422 *s =
'A' + (c -
'a');
8450rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
8453 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8455 flags = check_case_options(argc, argv, flags);
8456 str_modify_keep_cr(str);
8457 enc = str_true_enc(str);
8458 if (case_option_single_p(flags, enc, str)) {
8459 if (upcase_single(str))
8460 flags |= ONIGENC_CASE_MODIFIED;
8462 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8463 rb_str_ascii_casemap(str, str, &flags, enc);
8465 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8467 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8489rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8492 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8495 flags = check_case_options(argc, argv, flags);
8496 enc = str_true_enc(str);
8497 if (case_option_single_p(flags, enc, str)) {
8498 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8499 str_enc_copy_direct(ret, str);
8502 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8504 rb_str_ascii_casemap(str, ret, &flags, enc);
8507 ret = rb_str_casemap(str, &flags, enc);
8514downcase_single(
VALUE str)
8516 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8517 bool modified =
false;
8520 unsigned int c = *(
unsigned char*)s;
8522 if (
'A' <= c && c <=
'Z') {
8523 *s =
'a' + (c -
'A');
8552rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8555 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8557 flags = check_case_options(argc, argv, flags);
8558 str_modify_keep_cr(str);
8559 enc = str_true_enc(str);
8560 if (case_option_single_p(flags, enc, str)) {
8561 if (downcase_single(str))
8562 flags |= ONIGENC_CASE_MODIFIED;
8564 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8565 rb_str_ascii_casemap(str, str, &flags, enc);
8567 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8569 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8591rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8594 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8597 flags = check_case_options(argc, argv, flags);
8598 enc = str_true_enc(str);
8599 if (case_option_single_p(flags, enc, str)) {
8600 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8601 str_enc_copy_direct(ret, str);
8602 downcase_single(ret);
8604 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8606 rb_str_ascii_casemap(str, ret, &flags, enc);
8609 ret = rb_str_casemap(str, &flags, enc);
8637rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8640 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8642 flags = check_case_options(argc, argv, flags);
8643 str_modify_keep_cr(str);
8644 enc = str_true_enc(str);
8645 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8646 if (flags&ONIGENC_CASE_ASCII_ONLY)
8647 rb_str_ascii_casemap(str, str, &flags, enc);
8649 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8651 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8675rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8678 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8681 flags = check_case_options(argc, argv, flags);
8682 enc = str_true_enc(str);
8683 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8684 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8686 rb_str_ascii_casemap(str, ret, &flags, enc);
8689 ret = rb_str_casemap(str, &flags, enc);
8716rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8719 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8721 flags = check_case_options(argc, argv, flags);
8722 str_modify_keep_cr(str);
8723 enc = str_true_enc(str);
8724 if (flags&ONIGENC_CASE_ASCII_ONLY)
8725 rb_str_ascii_casemap(str, str, &flags, enc);
8727 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8729 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8753rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8756 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8759 flags = check_case_options(argc, argv, flags);
8760 enc = str_true_enc(str);
8761 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8762 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8764 rb_str_ascii_casemap(str, ret, &flags, enc);
8767 ret = rb_str_casemap(str, &flags, enc);
8772typedef unsigned char *USTR;
8776 unsigned int now, max;
8788 if (t->p == t->pend)
return -1;
8789 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8792 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8794 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8796 if (t->p < t->pend) {
8797 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8800 if (t->now < 0x80 && c < 0x80) {
8801 rb_raise(rb_eArgError,
8802 "invalid range \"%c-%c\" in string transliteration",
8806 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8810 else if (t->now < c) {
8819 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8820 if (t->now == t->max) {
8825 if (t->now < t->max) {
8841 const unsigned int errc = -1;
8842 unsigned int trans[256];
8844 struct tr trsrc, trrepl;
8846 unsigned int c, c0, last = 0;
8847 int modify = 0, i, l;
8848 unsigned char *s, *send;
8850 int singlebyte = single_byte_optimizable(str);
8854#define CHECK_IF_ASCII(c) \
8855 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8856 (cr = ENC_CODERANGE_VALID) : 0)
8860 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8861 if (RSTRING_LEN(repl) == 0) {
8862 return rb_str_delete_bang(1, &src, str);
8866 e1 = rb_enc_check(str, src);
8867 e2 = rb_enc_check(str, repl);
8872 enc = rb_enc_check(src, repl);
8874 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8875 if (RSTRING_LEN(src) > 1 &&
8876 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8877 trsrc.p + l < trsrc.pend) {
8881 trrepl.p = RSTRING_PTR(repl);
8882 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8883 trsrc.gen = trrepl.gen = 0;
8884 trsrc.now = trrepl.now = 0;
8885 trsrc.max = trrepl.max = 0;
8888 for (i=0; i<256; i++) {
8891 while ((c = trnext(&trsrc, enc)) != errc) {
8896 if (!hash) hash = rb_hash_new();
8900 while ((c = trnext(&trrepl, enc)) != errc)
8903 for (i=0; i<256; i++) {
8904 if (trans[i] != errc) {
8912 for (i=0; i<256; i++) {
8915 while ((c = trnext(&trsrc, enc)) != errc) {
8916 r = trnext(&trrepl, enc);
8917 if (r == errc) r = trrepl.now;
8920 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8923 if (!hash) hash = rb_hash_new();
8931 str_modify_keep_cr(str);
8932 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8933 termlen = rb_enc_mbminlen(enc);
8936 long offset, max = RSTRING_LEN(str);
8937 unsigned int save = -1;
8938 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8943 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8946 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8949 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8951 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8960 if (cflag) c = last;
8963 else if (cflag) c = errc;
8969 if (c != (
unsigned int)-1) {
8975 tlen = rb_enc_codelen(c, enc);
8981 if (enc != e1) may_modify = 1;
8983 if ((offset = t - buf) + tlen > max) {
8984 size_t MAYBE_UNUSED(old) = max + termlen;
8985 max = offset + tlen + (send - s);
8986 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8989 rb_enc_mbcput(c, t, enc);
8990 if (may_modify && memcmp(s, t, tlen) != 0) {
8996 if (!STR_EMBED_P(str)) {
8997 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8999 TERM_FILL((
char *)t, termlen);
9000 RSTRING(str)->as.heap.ptr = (
char *)buf;
9001 STR_SET_LEN(str, t - buf);
9002 STR_SET_NOEMBED(str);
9003 RSTRING(str)->as.heap.aux.capa = max;
9007 c = (
unsigned char)*s;
9008 if (trans[c] != errc) {
9025 long offset, max = (long)((send - s) * 1.2);
9026 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
9031 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
9034 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
9037 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
9039 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
9047 if (cflag) c = last;
9050 else if (cflag) c = errc;
9054 c = cflag ? last : errc;
9057 tlen = rb_enc_codelen(c, enc);
9062 if (enc != e1) may_modify = 1;
9064 if ((offset = t - buf) + tlen > max) {
9065 size_t MAYBE_UNUSED(old) = max + termlen;
9066 max = offset + tlen + (long)((send - s) * 1.2);
9067 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
9071 rb_enc_mbcput(c, t, enc);
9072 if (may_modify && memcmp(s, t, tlen) != 0) {
9080 if (!STR_EMBED_P(str)) {
9081 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
9083 TERM_FILL((
char *)t, termlen);
9084 RSTRING(str)->as.heap.ptr = (
char *)buf;
9085 STR_SET_LEN(str, t - buf);
9086 STR_SET_NOEMBED(str);
9087 RSTRING(str)->as.heap.aux.capa = max;
9093 rb_enc_associate(str, enc);
9112 return tr_trans(str, src, repl, 0);
9159 tr_trans(str, src, repl, 0);
9163#define TR_TABLE_MAX (UCHAR_MAX+1)
9164#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
9166tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
9169 const unsigned int errc = -1;
9170 char buf[TR_TABLE_MAX];
9173 VALUE table = 0, ptable = 0;
9174 int i, l, cflag = 0;
9176 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
9177 tr.gen =
tr.now =
tr.max = 0;
9179 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
9184 for (i=0; i<TR_TABLE_MAX; i++) {
9187 stable[TR_TABLE_MAX] = cflag;
9189 else if (stable[TR_TABLE_MAX] && !cflag) {
9190 stable[TR_TABLE_MAX] = 0;
9192 for (i=0; i<TR_TABLE_MAX; i++) {
9196 while ((c = trnext(&
tr, enc)) != errc) {
9197 if (c < TR_TABLE_MAX) {
9198 buf[(
unsigned char)c] = !cflag;
9203 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
9206 table = ptable ? ptable : rb_hash_new();
9210 table = rb_hash_new();
9215 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
9216 rb_hash_aset(table, key,
Qtrue);
9220 for (i=0; i<TR_TABLE_MAX; i++) {
9221 stable[i] = stable[i] && buf[i];
9223 if (!table && !cflag) {
9230tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
9232 if (c < TR_TABLE_MAX) {
9233 return table[c] != 0;
9239 if (!
NIL_P(rb_hash_lookup(del, v)) &&
9240 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
9244 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
9247 return table[TR_TABLE_MAX] ? TRUE : FALSE;
9261rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
9263 char squeez[TR_TABLE_SIZE];
9266 VALUE del = 0, nodel = 0;
9268 int i, ascompat, cr;
9270 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
9272 for (i=0; i<argc; i++) {
9276 enc = rb_enc_check(str, s);
9277 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
9280 str_modify_keep_cr(str);
9281 ascompat = rb_enc_asciicompat(enc);
9282 s = t = RSTRING_PTR(str);
9289 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9300 c = rb_enc_codepoint_len(s, send, &clen, enc);
9302 if (tr_find(c, squeez, del, nodel)) {
9306 if (t != s) rb_enc_mbcput(c, t, enc);
9313 TERM_FILL(t, TERM_LEN(str));
9314 STR_SET_LEN(str, t - RSTRING_PTR(str));
9317 if (modify)
return str;
9337rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
9340 rb_str_delete_bang(argc, argv, str);
9354rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
9356 char squeez[TR_TABLE_SIZE];
9358 VALUE del = 0, nodel = 0;
9359 unsigned char *s, *send, *t;
9361 int ascompat, singlebyte = single_byte_optimizable(str);
9365 enc = STR_ENC_GET(str);
9368 for (i=0; i<argc; i++) {
9372 enc = rb_enc_check(str, s);
9373 if (singlebyte && !single_byte_optimizable(s))
9375 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
9379 str_modify_keep_cr(str);
9380 s = t = (
unsigned char *)RSTRING_PTR(str);
9381 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
9384 ascompat = rb_enc_asciicompat(enc);
9388 unsigned int c = *s++;
9389 if (c != save || (argc > 0 && !squeez[c])) {
9399 if (ascompat && (c = *s) < 0x80) {
9400 if (c != save || (argc > 0 && !squeez[c])) {
9406 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
9408 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
9409 if (t != s) rb_enc_mbcput(c, t, enc);
9418 TERM_FILL((
char *)t, TERM_LEN(str));
9419 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
9420 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
9424 if (modify)
return str;
9447rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
9450 rb_str_squeeze_bang(argc, argv, str);
9468 return tr_trans(str, src, repl, 1);
9491 tr_trans(str, src, repl, 1);
9520rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9522 char table[TR_TABLE_SIZE];
9524 VALUE del = 0, nodel = 0, tstr;
9534 enc = rb_enc_check(str, tstr);
9537 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9538 (ptstr = RSTRING_PTR(tstr),
9539 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9540 !is_broken_string(str)) {
9542 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9544 s = RSTRING_PTR(str);
9545 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9548 if (*(
unsigned char*)s++ == c) n++;
9554 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9555 for (i=1; i<argc; i++) {
9558 enc = rb_enc_check(str, tstr);
9559 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9562 s = RSTRING_PTR(str);
9563 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9565 ascompat = rb_enc_asciicompat(enc);
9569 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9577 c = rb_enc_codepoint_len(s, send, &clen, enc);
9578 if (tr_find(c, table, del, nodel)) {
9589rb_fs_check(
VALUE val)
9593 if (
NIL_P(val))
return 0;
9598static const char isspacetable[256] = {
9599 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9600 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9601 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9605 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9606 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9610 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9617#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9620split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9622 if (empty_count >= 0 &&
len == 0) {
9623 return empty_count + 1;
9625 if (empty_count > 0) {
9630 }
while (--empty_count > 0);
9634 rb_yield(str_new_empty_String(str));
9635 }
while (--empty_count > 0);
9649 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9653literal_split_pattern(
VALUE spat, split_type_t default_type)
9661 return SPLIT_TYPE_CHARS;
9663 else if (rb_enc_asciicompat(enc)) {
9664 if (
len == 1 && ptr[0] ==
' ') {
9665 return SPLIT_TYPE_AWK;
9670 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9671 return SPLIT_TYPE_AWK;
9674 return default_type;
9687rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9692 split_type_t split_type;
9693 long beg, end, i = 0, empty_count = -1;
9698 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9700 if (lim <= 0) limit =
Qnil;
9701 else if (lim == 1) {
9702 if (RSTRING_LEN(str) == 0)
9713 if (
NIL_P(limit) && !lim) empty_count = 0;
9715 enc = STR_ENC_GET(str);
9716 split_type = SPLIT_TYPE_REGEXP;
9718 spat = get_pat_quoted(spat, 0);
9720 else if (
NIL_P(spat = rb_fs)) {
9721 split_type = SPLIT_TYPE_AWK;
9723 else if (!(spat = rb_fs_check(spat))) {
9724 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9729 if (split_type != SPLIT_TYPE_AWK) {
9734 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9735 if (split_type == SPLIT_TYPE_AWK) {
9737 split_type = SPLIT_TYPE_STRING;
9742 mustnot_broken(spat);
9743 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9751#define SPLIT_STR(beg, len) ( \
9752 empty_count = split_string(result, str, beg, len, empty_count), \
9753 str_mod_check(str, str_start, str_len))
9756 char *ptr = RSTRING_PTR(str);
9757 char *
const str_start = ptr;
9758 const long str_len = RSTRING_LEN(str);
9759 char *
const eptr = str_start + str_len;
9760 if (split_type == SPLIT_TYPE_AWK) {
9767 if (is_ascii_string(str)) {
9768 while (ptr < eptr) {
9769 c = (
unsigned char)*ptr++;
9771 if (ascii_isspace(c)) {
9777 if (!
NIL_P(limit) && lim <= i)
break;
9780 else if (ascii_isspace(c)) {
9781 SPLIT_STR(beg, end-beg);
9784 if (!
NIL_P(limit)) ++i;
9792 while (ptr < eptr) {
9795 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9804 if (!
NIL_P(limit) && lim <= i)
break;
9808 SPLIT_STR(beg, end-beg);
9811 if (!
NIL_P(limit)) ++i;
9819 else if (split_type == SPLIT_TYPE_STRING) {
9820 char *substr_start = ptr;
9821 char *sptr = RSTRING_PTR(spat);
9822 long slen = RSTRING_LEN(spat);
9825 mustnot_broken(str);
9826 enc = rb_enc_check(str, spat);
9827 while (ptr < eptr &&
9828 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9831 if (t != ptr + end) {
9835 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9836 str_mod_check(spat, sptr, slen);
9839 if (!
NIL_P(limit) && lim <= ++i)
break;
9841 beg = ptr - str_start;
9843 else if (split_type == SPLIT_TYPE_CHARS) {
9847 mustnot_broken(str);
9848 enc = rb_enc_get(str);
9849 while (ptr < eptr &&
9850 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9851 SPLIT_STR(ptr - str_start, n);
9853 if (!
NIL_P(limit) && lim <= ++i)
break;
9855 beg = ptr - str_start;
9859 long len = RSTRING_LEN(str);
9867 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9872 if (start == end && BEG(0) == END(0)) {
9877 else if (last_null == 1) {
9878 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9885 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9891 SPLIT_STR(beg, end-beg);
9892 beg = start = END(0);
9896 for (idx=1; idx < regs->num_regs; idx++) {
9897 if (BEG(idx) == -1)
continue;
9898 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9900 if (!
NIL_P(limit) && lim <= ++i)
break;
9902 if (match) rb_match_unbusy(match);
9904 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9905 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9908 return result ? result : str;
9918 return rb_str_split_m(1, &sep, str);
9921#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9936#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9939chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9941 const char *prev = rb_enc_prev_char(p, e, e, enc);
9944 prev = rb_enc_prev_char(p, e, e, enc);
9945 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9957 RSTRING_LEN(rs) != 1 ||
9958 RSTRING_PTR(rs)[0] !=
'\n')) {
9964#define rb_rs get_rs()
9971 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9972 long pos,
len, rslen;
9978 static ID keywords[1];
9983 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9987 if (!ENUM_ELEM(ary, str)) {
9995 if (!RSTRING_LEN(str))
goto end;
9997 ptr = subptr = RSTRING_PTR(str);
9999 len = RSTRING_LEN(str);
10001 rslen = RSTRING_LEN(rs);
10004 enc = rb_enc_get(str);
10006 enc = rb_enc_check(str, rs);
10011 const char *eol = NULL;
10013 while (subend < pend) {
10014 long chomp_rslen = 0;
10016 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
10018 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
10020 if (eol == subend)
break;
10024 chomp_rslen = -rslen;
10028 if (!subptr) subptr = subend;
10032 }
while (subend < pend);
10033 if (!subptr)
break;
10034 if (rslen == 0) chomp_rslen = 0;
10036 subend - subptr + (chomp ? chomp_rslen : rslen));
10037 if (ENUM_ELEM(ary, line)) {
10038 str_mod_check(str, ptr,
len);
10040 subptr = eol = NULL;
10045 rsptr = RSTRING_PTR(rs);
10046 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
10055 rsptr = RSTRING_PTR(rs);
10056 rslen = RSTRING_LEN(rs);
10059 while (subptr < pend) {
10060 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
10061 if (pos < 0)
break;
10062 hit = subptr + pos;
10064 if (hit != adjusted) {
10068 subend = hit += rslen;
10071 subend = chomp_newline(subptr, subend, enc);
10078 if (ENUM_ELEM(ary, line)) {
10079 str_mod_check(str, ptr,
len);
10084 if (subptr != pend) {
10087 pend = chomp_newline(subptr, pend, enc);
10089 else if (pend - subptr >= rslen &&
10090 memcmp(pend - rslen, rsptr, rslen) == 0) {
10095 ENUM_ELEM(ary, line);
10116rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
10119 return rb_str_enumerate_lines(argc, argv, str, 0);
10132rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
10134 VALUE ary = WANTARRAY(
"lines", 0);
10135 return rb_str_enumerate_lines(argc, argv, str, ary);
10141 return LONG2FIX(RSTRING_LEN(str));
10149 for (i=0; i<RSTRING_LEN(str); i++) {
10150 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
10168rb_str_each_byte(
VALUE str)
10171 return rb_str_enumerate_bytes(str, 0);
10183rb_str_bytes(
VALUE str)
10185 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
10186 return rb_str_enumerate_bytes(str, ary);
10204 ptr = RSTRING_PTR(str);
10205 len = RSTRING_LEN(str);
10206 enc = rb_enc_get(str);
10209 for (i = 0; i <
len; i += n) {
10210 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
10215 for (i = 0; i <
len; i += n) {
10216 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
10237rb_str_each_char(
VALUE str)
10240 return rb_str_enumerate_chars(str, 0);
10252rb_str_chars(
VALUE str)
10255 return rb_str_enumerate_chars(str, ary);
10259rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
10264 const char *ptr, *end;
10267 if (single_byte_optimizable(str))
10268 return rb_str_enumerate_bytes(str, ary);
10271 ptr = RSTRING_PTR(str);
10273 enc = STR_ENC_GET(str);
10275 while (ptr < end) {
10276 c = rb_enc_codepoint_len(ptr, end, &n, enc);
10297rb_str_each_codepoint(
VALUE str)
10300 return rb_str_enumerate_codepoints(str, 0);
10312rb_str_codepoints(
VALUE str)
10315 return rb_str_enumerate_codepoints(str, ary);
10321 int encidx = rb_enc_to_index(enc);
10323 const OnigUChar source_ascii[] =
"\\X";
10324 const OnigUChar *source = source_ascii;
10325 size_t source_len =
sizeof(source_ascii) - 1;
10328#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
10329#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
10330#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
10331#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
10332#define CASE_UTF(e) \
10333 case ENCINDEX_UTF_##e: { \
10334 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
10335 source = source_UTF_##e; \
10336 source_len = sizeof(source_UTF_##e); \
10339 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
10347 regex_t *reg_grapheme_cluster;
10349 int r = onig_new(®_grapheme_cluster, source, source + source_len,
10350 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
10352 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
10353 onig_error_code_to_str(message, r, &einfo);
10354 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
10357 return reg_grapheme_cluster;
10363 int encidx = rb_enc_to_index(enc);
10364 static regex_t *reg_grapheme_cluster_utf8 = NULL;
10366 if (encidx == rb_utf8_encindex()) {
10367 if (!reg_grapheme_cluster_utf8) {
10368 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
10371 return reg_grapheme_cluster_utf8;
10380 size_t grapheme_cluster_count = 0;
10382 const char *ptr, *end;
10384 if (!rb_enc_unicode_p(enc)) {
10388 bool cached_reg_grapheme_cluster =
true;
10389 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10390 if (!reg_grapheme_cluster) {
10391 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10392 cached_reg_grapheme_cluster =
false;
10395 ptr = RSTRING_PTR(str);
10398 while (ptr < end) {
10399 OnigPosition
len = onig_match(reg_grapheme_cluster,
10400 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10401 (
const OnigUChar *)ptr, NULL, 0);
10402 if (
len <= 0)
break;
10403 grapheme_cluster_count++;
10407 if (!cached_reg_grapheme_cluster) {
10408 onig_free(reg_grapheme_cluster);
10411 return SIZET2NUM(grapheme_cluster_count);
10415rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
10419 const char *ptr0, *ptr, *end;
10421 if (!rb_enc_unicode_p(enc)) {
10422 return rb_str_enumerate_chars(str, ary);
10427 bool cached_reg_grapheme_cluster =
true;
10428 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10429 if (!reg_grapheme_cluster) {
10430 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10431 cached_reg_grapheme_cluster =
false;
10434 ptr0 = ptr = RSTRING_PTR(str);
10437 while (ptr < end) {
10438 OnigPosition
len = onig_match(reg_grapheme_cluster,
10439 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10440 (
const OnigUChar *)ptr, NULL, 0);
10441 if (
len <= 0)
break;
10446 if (!cached_reg_grapheme_cluster) {
10447 onig_free(reg_grapheme_cluster);
10467rb_str_each_grapheme_cluster(
VALUE str)
10470 return rb_str_enumerate_grapheme_clusters(str, 0);
10482rb_str_grapheme_clusters(
VALUE str)
10485 return rb_str_enumerate_grapheme_clusters(str, ary);
10489chopped_length(
VALUE str)
10492 const char *p, *p2, *beg, *end;
10494 beg = RSTRING_PTR(str);
10495 end = beg + RSTRING_LEN(str);
10496 if (beg >= end)
return 0;
10497 p = rb_enc_prev_char(beg, end, end, enc);
10499 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10500 p2 = rb_enc_prev_char(beg, p, end, enc);
10501 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10517rb_str_chop_bang(
VALUE str)
10519 str_modify_keep_cr(str);
10520 if (RSTRING_LEN(str) > 0) {
10522 len = chopped_length(str);
10523 STR_SET_LEN(str,
len);
10524 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10543rb_str_chop(
VALUE str)
10549smart_chomp(
VALUE str,
const char *e,
const char *p)
10552 if (rb_enc_mbminlen(enc) > 1) {
10557 pp = e - rb_enc_mbminlen(enc);
10560 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10568 if (--e > p && *(e-1) ==
'\r') {
10585 char *pp, *e, *rsptr;
10587 char *
const p = RSTRING_PTR(str);
10588 long len = RSTRING_LEN(str);
10590 if (
len == 0)
return 0;
10593 return smart_chomp(str, e, p);
10596 enc = rb_enc_get(str);
10599 if (rb_enc_mbminlen(enc) > 1) {
10604 pp -= rb_enc_mbminlen(enc);
10607 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10614 while (e > p && *(e-1) ==
'\n') {
10616 if (e > p && *(e-1) ==
'\r')
10622 if (rslen >
len)
return len;
10624 enc = rb_enc_get(rs);
10625 newline = rsptr[rslen-1];
10626 if (rslen == rb_enc_mbminlen(enc)) {
10628 if (newline ==
'\n')
10629 return smart_chomp(str, e, p);
10633 return smart_chomp(str, e, p);
10637 enc = rb_enc_check(str, rs);
10638 if (is_broken_string(rs)) {
10642 if (p[
len-1] == newline &&
10644 memcmp(rsptr, pp, rslen) == 0)) {
10645 if (at_char_boundary(p, pp, e, enc))
10646 return len - rslen;
10658chomp_rs(
int argc,
const VALUE *argv)
10662 VALUE rs = argv[0];
10674 long olen = RSTRING_LEN(str);
10675 long len = chompped_length(str, rs);
10676 if (
len >= olen)
return Qnil;
10677 str_modify_keep_cr(str);
10678 STR_SET_LEN(str,
len);
10679 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10696rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10699 str_modifiable(str);
10700 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10701 rs = chomp_rs(argc, argv);
10703 return rb_str_chomp_string(str, rs);
10716rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10718 VALUE rs = chomp_rs(argc, argv);
10726 const char *
const start = s;
10728 if (!s || s >= e)
return 0;
10731 if (single_byte_optimizable(str)) {
10732 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10737 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10757rb_str_lstrip_bang(
VALUE str)
10761 long olen, loffset;
10763 str_modify_keep_cr(str);
10764 enc = STR_ENC_GET(str);
10766 loffset = lstrip_offset(str, start, start+olen, enc);
10768 long len = olen-loffset;
10769 s = start + loffset;
10770 memmove(start, s,
len);
10771 STR_SET_LEN(str,
len);
10772 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10795rb_str_lstrip(
VALUE str)
10800 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10801 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10810 rb_str_check_dummy_enc(enc);
10814 if (!s || s >= e)
return 0;
10818 if (single_byte_optimizable(str)) {
10820 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10825 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10845rb_str_rstrip_bang(
VALUE str)
10849 long olen, roffset;
10851 str_modify_keep_cr(str);
10852 enc = STR_ENC_GET(str);
10854 roffset = rstrip_offset(str, start, start+olen, enc);
10856 long len = olen - roffset;
10858 STR_SET_LEN(str,
len);
10859 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10882rb_str_rstrip(
VALUE str)
10886 long olen, roffset;
10888 enc = STR_ENC_GET(str);
10890 roffset = rstrip_offset(str, start, start+olen, enc);
10892 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10908rb_str_strip_bang(
VALUE str)
10911 long olen, loffset, roffset;
10914 str_modify_keep_cr(str);
10915 enc = STR_ENC_GET(str);
10917 loffset = lstrip_offset(str, start, start+olen, enc);
10918 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10920 if (loffset > 0 || roffset > 0) {
10921 long len = olen-roffset;
10924 memmove(start, start + loffset,
len);
10926 STR_SET_LEN(str,
len);
10927 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10950rb_str_strip(
VALUE str)
10953 long olen, loffset, roffset;
10957 loffset = lstrip_offset(str, start, start+olen, enc);
10958 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10960 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10965scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10968 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10974 end = pos + RSTRING_LEN(pat);
10988 if (RSTRING_LEN(str) > end)
10989 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10998 if (!regs || regs->num_regs == 1) {
11004 for (
int i = 1; i < regs->num_regs; i++) {
11065 long last = -1, prev = 0;
11066 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
11068 pat = get_pat_quoted(pat, 1);
11069 mustnot_broken(str);
11073 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
11078 if (last >= 0) rb_pat_search(pat, str, last, 1);
11083 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
11087 str_mod_check(str, p,
len);
11089 if (last >= 0) rb_pat_search(pat, str, last, 1);
11113rb_str_hex(
VALUE str)
11115 return rb_str_to_inum(str, 16, FALSE);
11140rb_str_oct(
VALUE str)
11142 return rb_str_to_inum(str, -8, FALSE);
11145#ifndef HAVE_CRYPT_R
11150 rb_nativethread_lock_t lock;
11151} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
11154crypt_mutex_initialize(
void)
11225# define CRYPT_END() ALLOCV_END(databuf)
11227 extern char *crypt(
const char *,
const char *);
11228# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
11231 const char *s, *saltp;
11234 char salt_8bit_clean[3];
11238 mustnot_wchar(str);
11239 mustnot_wchar(salt);
11241 saltp = RSTRING_PTR(salt);
11242 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
11243 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
11247 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
11248 salt_8bit_clean[0] = saltp[0] & 0x7f;
11249 salt_8bit_clean[1] = saltp[1] & 0x7f;
11250 salt_8bit_clean[2] =
'\0';
11251 saltp = salt_8bit_clean;
11256# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
11257 data->initialized = 0;
11259 res = crypt_r(s, saltp, data);
11261 crypt_mutex_initialize();
11263 res = crypt(s, saltp);
11304 char *ptr, *p, *pend;
11307 unsigned long sum0 = 0;
11312 ptr = p = RSTRING_PTR(str);
11313 len = RSTRING_LEN(str);
11319 str_mod_check(str, ptr,
len);
11322 sum0 += (
unsigned char)*p;
11333 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11334 sum0 &= (((
unsigned long)1)<<bits)-1;
11354rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11358 long width,
len, flen = 1, fclen = 1;
11361 const char *f =
" ";
11362 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11364 int singlebyte = 1, cr;
11368 enc = STR_ENC_GET(str);
11369 termlen = rb_enc_mbminlen(enc);
11373 enc = rb_enc_check(str, pad);
11374 f = RSTRING_PTR(pad);
11375 flen = RSTRING_LEN(pad);
11376 fclen = str_strlen(pad, enc);
11377 singlebyte = single_byte_optimizable(pad);
11378 if (flen == 0 || fclen == 0) {
11379 rb_raise(rb_eArgError,
"zero width padding");
11382 len = str_strlen(str, enc);
11383 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11385 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11389 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11390 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11392 size = RSTRING_LEN(str);
11393 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11394 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11395 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11396 rb_raise(rb_eArgError,
"argument too big");
11400 p = RSTRING_PTR(res);
11402 memset(p, *f, llen);
11406 while (llen >= fclen) {
11412 memcpy(p, f, llen2);
11416 memcpy(p, RSTRING_PTR(str), size);
11419 memset(p, *f, rlen);
11423 while (rlen >= fclen) {
11429 memcpy(p, f, rlen2);
11433 TERM_FILL(p, termlen);
11434 STR_SET_LEN(res, p-RSTRING_PTR(res));
11457rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11459 return rb_str_justify(argc, argv, str,
'l');
11473rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11475 return rb_str_justify(argc, argv, str,
'r');
11490rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11492 return rb_str_justify(argc, argv, str,
'c');
11508 sep = get_pat_quoted(sep, 0);
11520 pos = rb_str_index(str, sep, 0);
11521 if (pos < 0)
goto failed;
11526 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11529 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11543 long pos = RSTRING_LEN(str);
11545 sep = get_pat_quoted(sep, 0);
11558 pos = rb_str_rindex(str, sep, pos);
11567 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11569 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11581rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11585 for (i=0; i<argc; i++) {
11586 VALUE tmp = argv[i];
11588 if (rb_reg_start_with_p(tmp, str))
11592 const char *p, *s, *e;
11597 enc = rb_enc_check(str, tmp);
11598 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11599 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11600 p = RSTRING_PTR(str);
11603 if (!at_char_right_boundary(p, s, e, enc))
11605 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11621rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11625 for (i=0; i<argc; i++) {
11626 VALUE tmp = argv[i];
11627 const char *p, *s, *e;
11632 enc = rb_enc_check(str, tmp);
11633 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11634 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11635 p = RSTRING_PTR(str);
11638 if (!at_char_boundary(p, s, e, enc))
11640 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11656deleted_prefix_length(
VALUE str,
VALUE prefix)
11658 const char *strptr, *prefixptr;
11659 long olen, prefixlen;
11664 if (!is_broken_string(prefix) ||
11665 !rb_enc_asciicompat(enc) ||
11666 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11667 enc = rb_enc_check(str, prefix);
11671 prefixlen = RSTRING_LEN(prefix);
11672 if (prefixlen <= 0)
return 0;
11673 olen = RSTRING_LEN(str);
11674 if (olen < prefixlen)
return 0;
11675 strptr = RSTRING_PTR(str);
11676 prefixptr = RSTRING_PTR(prefix);
11677 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11678 if (is_broken_string(prefix)) {
11679 if (!is_broken_string(str)) {
11683 const char *strend = strptr + olen;
11684 const char *after_prefix = strptr + prefixlen;
11685 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11705rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11708 str_modify_keep_cr(str);
11710 prefixlen = deleted_prefix_length(str, prefix);
11711 if (prefixlen <= 0)
return Qnil;
11725rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11729 prefixlen = deleted_prefix_length(str, prefix);
11730 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11732 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11745deleted_suffix_length(
VALUE str,
VALUE suffix)
11747 const char *strptr, *suffixptr;
11748 long olen, suffixlen;
11752 if (is_broken_string(suffix))
return 0;
11753 enc = rb_enc_check(str, suffix);
11756 suffixlen = RSTRING_LEN(suffix);
11757 if (suffixlen <= 0)
return 0;
11758 olen = RSTRING_LEN(str);
11759 if (olen < suffixlen)
return 0;
11760 strptr = RSTRING_PTR(str);
11761 suffixptr = RSTRING_PTR(suffix);
11762 const char *strend = strptr + olen;
11763 const char *before_suffix = strend - suffixlen;
11764 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11765 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11780rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11782 long olen, suffixlen,
len;
11783 str_modifiable(str);
11785 suffixlen = deleted_suffix_length(str, suffix);
11786 if (suffixlen <= 0)
return Qnil;
11788 olen = RSTRING_LEN(str);
11789 str_modify_keep_cr(str);
11790 len = olen - suffixlen;
11791 STR_SET_LEN(str,
len);
11792 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11808rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11812 suffixlen = deleted_suffix_length(str, suffix);
11813 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11815 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11822 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11830 val = rb_fs_check(val);
11833 "value of %"PRIsVALUE
" must be String or Regexp",
11837 rb_warn_deprecated(
"'$;'", NULL);
11854 str_modifiable(str);
11857 int idx = rb_enc_to_index(encoding);
11864 rb_enc_associate_index(str, idx);
11888 if (STR_EMBED_P(str)) {
11889 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11894 str_replace_shared_without_enc(str2, str);
11896 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11929rb_str_valid_encoding_p(
VALUE str)
11949rb_str_is_ascii_only_p(
VALUE str)
11959 static const char ellipsis[] =
"...";
11960 const long ellipsislen =
sizeof(ellipsis) - 1;
11962 const long blen = RSTRING_LEN(str);
11963 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11964 VALUE estr, ret = 0;
11967 if (
len * rb_enc_mbminlen(enc) >= blen ||
11971 else if (
len <= ellipsislen ||
11973 if (rb_enc_asciicompat(enc)) {
11975 rb_enc_associate(ret, enc);
11982 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11987 rb_enc_from_encoding(enc), 0,
Qnil);
12000 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
12006 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
12025 if (enc == STR_ENC_GET(str)) {
12030 return enc_str_scrub(enc, str, repl, cr);
12038 const char *rep, *p, *e, *p1, *sp;
12044 rb_raise(rb_eArgError,
"both of block and replacement given");
12051 if (!
NIL_P(repl)) {
12052 repl = str_compat_and_valid(repl, enc);
12055 if (rb_enc_dummy_p(enc)) {
12058 encidx = rb_enc_to_index(enc);
12060#define DEFAULT_REPLACE_CHAR(str) do { \
12061 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
12062 rep = replace; replen = (int)sizeof(replace); \
12065 slen = RSTRING_LEN(str);
12066 p = RSTRING_PTR(str);
12071 if (rb_enc_asciicompat(enc)) {
12077 else if (!
NIL_P(repl)) {
12078 rep = RSTRING_PTR(repl);
12079 replen = RSTRING_LEN(repl);
12082 else if (encidx == rb_utf8_encindex()) {
12083 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
12087 DEFAULT_REPLACE_CHAR(
"?");
12092 p = search_nonascii(p, e);
12097 int ret = rb_enc_precise_mbclen(p, e, enc);
12116 if (e - p < clen) clen = e - p;
12123 for (; clen > 1; clen--) {
12124 ret = rb_enc_precise_mbclen(q, q + clen, enc);
12135 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
12136 str_mod_check(str, sp, slen);
12137 repl = str_compat_and_valid(repl, enc);
12144 p = search_nonascii(p, e);
12170 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12171 str_mod_check(str, sp, slen);
12172 repl = str_compat_and_valid(repl, enc);
12181 long mbminlen = rb_enc_mbminlen(enc);
12185 else if (!
NIL_P(repl)) {
12186 rep = RSTRING_PTR(repl);
12187 replen = RSTRING_LEN(repl);
12189 else if (encidx == ENCINDEX_UTF_16BE) {
12190 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
12192 else if (encidx == ENCINDEX_UTF_16LE) {
12193 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
12195 else if (encidx == ENCINDEX_UTF_32BE) {
12196 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
12198 else if (encidx == ENCINDEX_UTF_32LE) {
12199 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
12202 DEFAULT_REPLACE_CHAR(
"?");
12206 int ret = rb_enc_precise_mbclen(p, e, enc);
12219 if (e - p < clen) clen = e - p;
12220 if (clen <= mbminlen * 2) {
12225 for (; clen > mbminlen; clen-=mbminlen) {
12226 ret = rb_enc_precise_mbclen(q, q + clen, enc);
12236 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
12237 str_mod_check(str, sp, slen);
12238 repl = str_compat_and_valid(repl, enc);
12263 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12264 str_mod_check(str, sp, slen);
12265 repl = str_compat_and_valid(repl, enc);
12301str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12309static ID id_normalize;
12310static ID id_normalized_p;
12311static VALUE mUnicodeNormalize;
12314unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12316 static int UnicodeNormalizeRequired = 0;
12319 if (!UnicodeNormalizeRequired) {
12320 rb_require(
"unicode_normalize/normalize.rb");
12321 UnicodeNormalizeRequired = 1;
12325 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12362rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12364 return unicode_normalize_common(argc, argv, str, id_normalize);
12378rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12380 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12407rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12409 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12541#define sym_equal rb_obj_equal
12544sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12548 int c = rb_enc_precise_mbclen(s, send, enc);
12552 c = rb_enc_mbc_to_codepoint(s, send, enc);
12560rb_str_symname_p(
VALUE sym)
12565 rb_encoding *resenc = rb_default_internal_encoding();
12567 if (resenc == NULL) resenc = rb_default_external_encoding();
12568 enc = STR_ENC_GET(sym);
12569 ptr = RSTRING_PTR(sym);
12570 len = RSTRING_LEN(sym);
12571 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12579rb_str_quote_unprintable(
VALUE str)
12587 resenc = rb_default_internal_encoding();
12588 if (resenc == NULL) resenc = rb_default_external_encoding();
12589 enc = STR_ENC_GET(str);
12590 ptr = RSTRING_PTR(str);
12591 len = RSTRING_LEN(str);
12592 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12593 !sym_printable(ptr, ptr +
len, enc)) {
12594 return rb_str_escape(str);
12600rb_id_quote_unprintable(
ID id)
12602 VALUE str = rb_id2str(
id);
12603 if (!rb_str_symname_p(str)) {
12604 return rb_str_escape(str);
12622sym_inspect(
VALUE sym)
12629 if (!rb_str_symname_p(str)) {
12631 len = RSTRING_LEN(str);
12632 rb_str_resize(str,
len + 1);
12633 dest = RSTRING_PTR(str);
12634 memmove(dest + 1, dest,
len);
12638 VALUE orig_str = str;
12640 len = RSTRING_LEN(orig_str);
12641 str = rb_enc_str_new(0,
len + 1, enc);
12644 ptr = RSTRING_PTR(orig_str);
12645 dest = RSTRING_PTR(str);
12646 memcpy(dest + 1, ptr,
len);
12666rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12671 rb_raise(rb_eArgError,
"no receiver given");
12768 return rb_str_match(
rb_sym2str(sym), other);
12783sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12785 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12798sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12800 return rb_str_match_m_p(argc, argv, sym);
12818 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12829sym_length(
VALUE sym)
12843sym_empty(
VALUE sym)
12877sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12893sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12909sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12923sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12925 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12938sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12940 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12952sym_encoding(
VALUE sym)
12958string_for_symbol(
VALUE name)
12963 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12977 name = string_for_symbol(name);
12978 return rb_intern_str(name);
12987 name = string_for_symbol(name);
13011 return rb_fstring(str);
13018 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
13030 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
13031 rb_enc_autoload(enc);
13035 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
13041 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
13042 rb_enc_autoload(enc);
13046 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
13057rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
13062 if (RB_LIKELY(code >= 0 && code < 0xff)) {
13063 rb_str_buf_cat_byte(str, (
char) code);
13077 for (
unsigned int i = 0; i < fstring_table->capacity; i++) {
13078 VALUE str = fstring_table->entries[i].str;
13079 if (!str)
continue;
13247 rb_gc_register_address(&rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ATOMIC_VALUE_CAS(var, oldval, newval)
Identical to RUBY_ATOMIC_CAS, except it expects its arguments are VALUE.
#define RUBY_ATOMIC_VALUE_SET(var, val)
Identical to RUBY_ATOMIC_SET, except it expects its arguments are VALUE.
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
#define RUBY_ATOMIC_FETCH_ADD(var, val)
Atomically replaces the value pointed by var with the result of addition of val to the old value of v...
#define RUBY_ATOMIC_VALUE_EXCHANGE(var, val)
Identical to RUBY_ATOMIC_EXCHANGE, except it expects its arguments are VALUE.
#define RUBY_ATOMIC_DEC(var)
Atomically decrements the value pointed by var.
#define RUBY_ATOMIC_LOAD(var)
Atomic load.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
st_index_t rb_ivar_count(VALUE obj)
Number of instance variables defined on an object.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
const char * wrap_struct_name
Name of structs of this kind.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.