14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa,
long termlen)
251rb_str_size_as_embedded(
VALUE str)
254 if (STR_EMBED_P(str)) {
256 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
258 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 else if (rb_str_reembeddable_p(str)) {
264 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
266 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
269 real_size =
sizeof(
struct RString);
276STR_EMBEDDABLE_P(
long len,
long termlen)
278 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
283static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
284static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
286static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
287static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
330rb_debug_rstring_null_ptr(
const char *func)
332 fprintf(stderr,
"%s is returning NULL!! "
333 "SIGSEGV is highly expected to follow immediately.\n"
334 "If you could reproduce, attach your debugger here, "
335 "and look at the passed string.\n",
340static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
343get_encoding(
VALUE str)
349mustnot_broken(
VALUE str)
351 if (is_broken_string(str)) {
352 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
357mustnot_wchar(
VALUE str)
360 if (rb_enc_mbminlen(enc) > 1) {
361 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
365static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
367#if SIZEOF_LONG == SIZEOF_VOIDP
368#define PRECOMPUTED_FAKESTR_HASH 1
373BARE_STRING_P(
VALUE str)
378static inline st_index_t
379str_do_hash(
VALUE str)
381 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
383 if (e && !is_ascii_string(str)) {
390str_store_precomputed_hash(
VALUE str, st_index_t hash)
396 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
397 size_t free_bytes = str_embed_capa(str) - used_bytes;
401 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
403 FL_SET(str, STR_PRECOMPUTED_HASH);
416 if (
FL_TEST(str, RSTRING_FSTR))
419 bare = BARE_STRING_P(str);
421 if (STR_EMBED_P(str)) {
426 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
433 rb_str_resize(str, RSTRING_LEN(str));
435 fstr = register_fstring(str,
false,
false);
438 str_replace_shared_without_enc(str, fstr);
445static VALUE fstring_table_obj;
448fstring_concurrent_set_hash(
VALUE str)
450#ifdef PRECOMPUTED_FAKESTR_HASH
454 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
471 const char *aptr, *bptr;
478 return (alen == blen &&
480 memcmp(aptr, bptr, alen) == 0);
485 bool force_precompute_hash;
489fstring_concurrent_set_create(
VALUE str,
void *data)
499 long len = RSTRING_LEN(str);
500 long capa =
len +
sizeof(st_index_t);
501 int term_len = TERM_LEN(str);
503 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
505 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
506 STR_SET_LEN(new_str, RSTRING_LEN(str));
508 rb_enc_copy(new_str, str);
509 str_store_precomputed_hash(new_str, str_do_hash(str));
513 rb_enc_copy(new_str, str);
514#ifdef PRECOMPUTED_FAKESTR_HASH
515 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
516 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
530 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
533 if (STR_SHARED_P(str)) {
535 str_make_independent(str);
538 if (!BARE_STRING_P(str)) {
544 RBASIC(str)->flags |= RSTRING_FSTR;
546 RB_OBJ_SET_SHAREABLE(str);
560 .hash = fstring_concurrent_set_hash,
561 .cmp = fstring_concurrent_set_cmp,
562 .create = fstring_concurrent_set_create,
567Init_fstring_table(
void)
569 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
570 rb_gc_register_address(&fstring_table_obj);
574register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
578 .force_precompute_hash = force_precompute_hash
581#if SIZEOF_VOIDP == SIZEOF_LONG
585 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
589 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
591 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
603rb_obj_is_fstring_table(
VALUE obj)
607 return obj == fstring_table_obj;
611rb_gc_free_fstring(
VALUE obj)
613 ASSERT_vm_locking_with_barrier();
619 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
621 RB_DEBUG_COUNTER_INC(obj_str_fstr);
627rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
629 if (fstring_table_obj) {
630 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
635setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
638 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
651 return (
VALUE)fake_str;
660 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
669rb_fstring_new(
const char *ptr,
long len)
671 struct RString fake_str = {RBASIC_INIT};
672 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
678 struct RString fake_str = {RBASIC_INIT};
679 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
683rb_fstring_cstr(
const char *
ptr)
685 return rb_fstring_new(
ptr, strlen(
ptr));
689single_byte_optimizable(
VALUE str)
693 case ENCINDEX_ASCII_8BIT:
694 case ENCINDEX_US_ASCII:
716static inline const char *
717search_nonascii(
const char *p,
const char *e)
721#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
722# if SIZEOF_UINTPTR_T == 8
723# define NONASCII_MASK UINT64_C(0x8080808080808080)
724# elif SIZEOF_UINTPTR_T == 4
725# define NONASCII_MASK UINT32_C(0x80808080)
727# error "don't know what to do."
730# if SIZEOF_UINTPTR_T == 8
731# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
732# elif SIZEOF_UINTPTR_T == 4
733# define NONASCII_MASK 0x80808080UL
735# error "don't know what to do."
739 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
740#if !UNALIGNED_WORD_ACCESS
741 if ((uintptr_t)p % SIZEOF_VOIDP) {
742 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
747 case 7:
if (p[-7]&0x80)
return p-7;
748 case 6:
if (p[-6]&0x80)
return p-6;
749 case 5:
if (p[-5]&0x80)
return p-5;
750 case 4:
if (p[-4]&0x80)
return p-4;
752 case 3:
if (p[-3]&0x80)
return p-3;
753 case 2:
if (p[-2]&0x80)
return p-2;
754 case 1:
if (p[-1]&0x80)
return p-1;
759#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
760#define aligned_ptr(value) \
761 __builtin_assume_aligned((value), sizeof(uintptr_t))
763#define aligned_ptr(value) (value)
766 t = (e - (SIZEOF_VOIDP-1));
768 for (;s < t; s +=
sizeof(uintptr_t)) {
770 memcpy(&word, s,
sizeof(word));
771 if (word & NONASCII_MASK) {
772#ifdef WORDS_BIGENDIAN
773 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
775 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
785 case 7:
if (e[-7]&0x80)
return e-7;
786 case 6:
if (e[-6]&0x80)
return e-6;
787 case 5:
if (e[-5]&0x80)
return e-5;
788 case 4:
if (e[-4]&0x80)
return e-4;
790 case 3:
if (e[-3]&0x80)
return e-3;
791 case 2:
if (e[-2]&0x80)
return e-2;
792 case 1:
if (e[-1]&0x80)
return e-1;
800 const char *e = p +
len;
802 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
804 p = search_nonascii(p, e);
808 if (rb_enc_asciicompat(enc)) {
809 p = search_nonascii(p, e);
812 int ret = rb_enc_precise_mbclen(p, e, enc);
816 p = search_nonascii(p, e);
822 int ret = rb_enc_precise_mbclen(p, e, enc);
838 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
841 p = search_nonascii(p, e);
845 else if (rb_enc_asciicompat(enc)) {
846 p = search_nonascii(p, e);
852 int ret = rb_enc_precise_mbclen(p, e, enc);
859 p = search_nonascii(p, e);
865 int ret = rb_enc_precise_mbclen(p, e, enc);
890 rb_enc_set_index(str1, rb_enc_get_index(str2));
898rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
903 str_enc_copy(dest, src);
904 if (RSTRING_LEN(dest) == 0) {
905 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
916 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
917 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
928rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
930 str_enc_copy(dest, src);
937 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
943 return enc_coderange_scan(str, enc);
952 cr = enc_coderange_scan(str, get_encoding(str));
959rb_enc_str_asciicompat(
VALUE str)
962 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
970 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
979str_mod_check(
VALUE s,
const char *p,
long len)
981 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
987str_capacity(
VALUE str,
const int termlen)
989 if (STR_EMBED_P(str)) {
990 return str_embed_capa(str) - termlen;
992 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
996 return RSTRING(str)->as.heap.aux.capa;
1003 return str_capacity(str, TERM_LEN(str));
1007must_not_null(
const char *
ptr)
1010 rb_raise(rb_eArgError,
"NULL pointer given");
1015str_alloc_embed(
VALUE klass,
size_t capa)
1017 size_t size = rb_str_embed_size(
capa, 0);
1021 NEWOBJ_OF(str,
struct RString, klass,
1025 str->as.embed.ary[0] = 0;
1031str_alloc_heap(
VALUE klass)
1033 NEWOBJ_OF(str,
struct RString, klass,
1037 str->as.heap.aux.capa = 0;
1038 str->as.heap.ptr = NULL;
1044empty_str_alloc(
VALUE klass)
1046 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1047 VALUE str = str_alloc_embed(klass, 0);
1048 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1059 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1063 enc = rb_ascii8bit_encoding();
1066 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1068 int termlen = rb_enc_mbminlen(enc);
1070 if (STR_EMBEDDABLE_P(
len, termlen)) {
1071 str = str_alloc_embed(klass,
len + termlen);
1077 str = str_alloc_heap(klass);
1083 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1086 rb_enc_raw_set(str, enc);
1089 memcpy(RSTRING_PTR(str),
ptr,
len);
1092 memset(RSTRING_PTR(str), 0,
len);
1095 STR_SET_LEN(str,
len);
1096 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1103 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1138 __msan_unpoison_string(
ptr);
1158 if (rb_enc_mbminlen(enc) != 1) {
1159 rb_raise(rb_eArgError,
"wchar encoding given");
1161 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1165str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1170 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1174 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1177 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1178 str = str_alloc_heap(klass);
1182 RBASIC(str)->flags |= STR_NOFREE;
1183 rb_enc_associate_index(str, encindex);
1212static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1214 int ecflags,
VALUE ecopts);
1219 int encidx = rb_enc_to_index(enc);
1220 if (rb_enc_get_index(str) == encidx)
1221 return is_ascii_string(str);
1232 if (!to)
return str;
1233 if (!from) from = rb_enc_get(str);
1234 if (from == to)
return str;
1235 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1236 rb_is_ascii8bit_enc(to)) {
1237 if (STR_ENC_GET(str) != to) {
1239 rb_enc_associate(str, to);
1246 from, to, ecflags, ecopts);
1247 if (
NIL_P(newstr)) {
1255rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1260 olen = RSTRING_LEN(newstr);
1261 if (ofs < -olen || olen < ofs)
1263 if (ofs < 0) ofs += olen;
1265 STR_SET_LEN(newstr, ofs);
1269 rb_str_modify(newstr);
1270 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1278 STR_SET_LEN(str, 0);
1279 rb_enc_associate(str, enc);
1285str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1287 int ecflags,
VALUE ecopts)
1292 VALUE econv_wrapper;
1293 const unsigned char *start, *sp;
1294 unsigned char *dest, *dp;
1295 size_t converted_output = (size_t)ofs;
1300 RBASIC_CLEAR_CLASS(econv_wrapper);
1302 if (!ec)
return Qnil;
1305 sp = (
unsigned char*)
ptr;
1307 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1308 (dp = dest + converted_output),
1312 size_t converted_input = sp - start;
1313 size_t rest =
len - converted_input;
1314 converted_output = dp - dest;
1316 if (converted_input && converted_output &&
1317 rest < (LONG_MAX / converted_output)) {
1318 rest = (rest * converted_output) / converted_input;
1323 olen += rest < 2 ? 2 : rest;
1324 rb_str_resize(newstr, olen);
1331 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1333 rb_enc_associate(newstr, to);
1352 const int eidx = rb_enc_to_index(eenc);
1355 return rb_enc_str_new(
ptr,
len, eenc);
1359 if ((eidx == rb_ascii8bit_encindex()) ||
1360 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1364 ienc = rb_default_internal_encoding();
1365 if (!ienc || eenc == ienc) {
1366 return rb_enc_str_new(
ptr,
len, eenc);
1370 if ((eidx == rb_ascii8bit_encindex()) ||
1371 (eidx == rb_usascii_encindex()) ||
1372 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1373 return rb_enc_str_new(
ptr,
len, ienc);
1376 str = rb_enc_str_new(NULL, 0, ienc);
1379 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1380 rb_str_initialize(str,
ptr,
len, eenc);
1388 int eidx = rb_enc_to_index(eenc);
1389 if (eidx == rb_usascii_encindex() &&
1390 !is_ascii_string(str)) {
1391 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1394 rb_enc_associate_index(str, eidx);
1453str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1455 const int termlen = TERM_LEN(str);
1460 if (str_embed_capa(str2) >=
len + termlen) {
1461 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1462 STR_SET_EMBED(str2);
1463 memcpy(ptr2, RSTRING_PTR(str),
len);
1464 TERM_FILL(ptr2+
len, termlen);
1468 if (STR_SHARED_P(str)) {
1469 root =
RSTRING(str)->as.heap.aux.shared;
1478 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1480 rb_fatal(
"about to free a possible shared root");
1482 char *ptr2 = STR_HEAP_PTR(str2);
1484 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1487 FL_SET(str2, STR_NOEMBED);
1489 STR_SET_SHARED(str2, root);
1492 STR_SET_LEN(str2,
len);
1500 str_replace_shared_without_enc(str2, str);
1501 rb_enc_cr_str_exact_copy(str2, str);
1508 return str_replace_shared(str_alloc_heap(klass), str);
1525rb_str_new_frozen_String(
VALUE orig)
1533rb_str_frozen_bare_string(
VALUE orig)
1535 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1540rb_str_tmp_frozen_acquire(
VALUE orig)
1543 return str_new_frozen_buffer(0, orig, FALSE);
1547rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1549 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1550 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1552 VALUE str = str_alloc_heap(0);
1555 FL_SET(str, STR_SHARED_ROOT);
1557 size_t capa = str_capacity(orig, TERM_LEN(orig));
1563 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1564 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1571 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1572 RBASIC(orig)->flags &= ~STR_NOFREE;
1573 STR_SET_SHARED(orig, str);
1575 RB_OBJ_SET_SHAREABLE(str);
1587rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1592 if (STR_EMBED_P(tmp)) {
1595 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1601 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1605 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1606 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1611 STR_SET_LEN(tmp, 0);
1619 return str_new_frozen_buffer(klass, orig, TRUE);
1629 VALUE str = str_alloc_heap(klass);
1630 STR_SET_LEN(str, RSTRING_LEN(orig));
1631 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1632 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1633 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1634 RBASIC(orig)->flags &= ~STR_NOFREE;
1635 STR_SET_SHARED(orig, str);
1642str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1646 long len = RSTRING_LEN(orig);
1647 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1648 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1650 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1651 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1657 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1658 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1664 if ((ofs > 0) || (rest > 0) ||
1667 str = str_new_shared(klass,
shared);
1669 RSTRING(str)->as.heap.ptr += ofs;
1670 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1678 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1679 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1681 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1682 STR_SET_LEN(str, RSTRING_LEN(orig));
1688 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1691 str = heap_str_make_shared(klass, orig);
1696 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1708str_new_empty_String(
VALUE str)
1711 rb_enc_copy(v, str);
1715#define STR_BUF_MIN_SIZE 63
1720 if (STR_EMBEDDABLE_P(
capa, 1)) {
1728 RSTRING(str)->as.heap.ptr[0] =
'\0';
1748 return str_new(0, 0,
len);
1754 if (STR_EMBED_P(str)) {
1755 RB_DEBUG_COUNTER_INC(obj_str_embed);
1757 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1758 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1759 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1762 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1763 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1768rb_str_memsize(
VALUE str)
1770 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1771 return STR_HEAP_SIZE(str);
1781 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1784static inline void str_discard(
VALUE str);
1785static void str_shared_replace(
VALUE str,
VALUE str2);
1790 if (str != str2) str_shared_replace(str, str2);
1801 enc = STR_ENC_GET(str2);
1804 termlen = rb_enc_mbminlen(enc);
1806 STR_SET_LEN(str, RSTRING_LEN(str2));
1808 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1810 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1811 rb_enc_associate(str, enc);
1815 if (STR_EMBED_P(str2)) {
1817 long len = RSTRING_LEN(str2);
1820 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1821 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1822 RSTRING(str2)->as.heap.ptr = new_ptr;
1823 STR_SET_LEN(str2,
len);
1825 STR_SET_NOEMBED(str2);
1828 STR_SET_NOEMBED(str);
1830 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1832 if (
FL_TEST(str2, STR_SHARED)) {
1834 STR_SET_SHARED(str,
shared);
1837 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1841 STR_SET_EMBED(str2);
1842 RSTRING_PTR(str2)[0] = 0;
1843 STR_SET_LEN(str2, 0);
1844 rb_enc_associate(str, enc);
1858 return rb_obj_as_string_result(str, obj);
1874 len = RSTRING_LEN(str2);
1875 if (STR_SHARED_P(str2)) {
1878 STR_SET_NOEMBED(str);
1879 STR_SET_LEN(str,
len);
1880 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1881 STR_SET_SHARED(str,
shared);
1882 rb_enc_cr_str_exact_copy(str, str2);
1885 str_replace_shared(str, str2);
1894 size_t size = rb_str_embed_size(
capa, 0);
1898 NEWOBJ_OF(str,
struct RString, klass,
1909 NEWOBJ_OF(str,
struct RString, klass,
1912 str->as.heap.aux.capa = 0;
1913 str->as.heap.ptr = NULL;
1923 encidx = rb_enc_get_index(str);
1924 flags &= ~ENCODING_MASK;
1927 if (encidx) rb_enc_associate_index(dup, encidx);
1937 long len = RSTRING_LEN(str);
1942 STR_SET_LEN(dup, RSTRING_LEN(str));
1943 return str_duplicate_setup_encoding(str, dup, flags);
1952 root =
RSTRING(str)->as.heap.aux.shared;
1955 root = str = str_new_frozen(klass, str);
1961 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1962 FL_SET(root, STR_SHARED_ROOT);
1964 flags |= RSTRING_NOEMBED | STR_SHARED;
1966 STR_SET_LEN(dup, RSTRING_LEN(str));
1967 return str_duplicate_setup_encoding(str, dup, flags);
1973 if (STR_EMBED_P(str)) {
1974 return str_duplicate_setup_embed(klass, str, dup);
1977 return str_duplicate_setup_heap(klass, str, dup);
1985 if (STR_EMBED_P(str)) {
1986 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1989 dup = str_alloc_heap(klass);
1992 return str_duplicate_setup(klass, str, dup);
2003rb_str_dup_m(
VALUE str)
2005 if (LIKELY(BARE_STRING_P(str))) {
2016 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2023 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2027 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2028 str_duplicate_setup_embed(klass, str, new_str);
2031 new_str = ec_str_alloc_heap(ec, klass);
2032 str_duplicate_setup_heap(klass, str, new_str);
2041rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2043 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2067 static ID keyword_ids[2];
2068 VALUE orig, opt, venc, vcapa;
2073 if (!keyword_ids[0]) {
2074 keyword_ids[0] = rb_id_encoding();
2075 CONST_ID(keyword_ids[1],
"capacity");
2083 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2084 enc = rb_to_encoding(venc);
2086 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2089 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2091 if (
capa < STR_BUF_MIN_SIZE) {
2092 capa = STR_BUF_MIN_SIZE;
2096 len = RSTRING_LEN(orig);
2100 if (orig == str) n = 0;
2102 str_modifiable(str);
2103 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2105 const size_t size = (size_t)
capa + termlen;
2106 const char *
const old_ptr = RSTRING_PTR(str);
2107 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2108 char *new_ptr =
ALLOC_N(
char, size);
2109 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2110 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2112 RSTRING(str)->as.heap.ptr = new_ptr;
2114 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2115 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2116 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2118 STR_SET_LEN(str,
len);
2121 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2122 rb_enc_cr_str_exact_copy(str, orig);
2124 FL_SET(str, STR_NOEMBED);
2131 rb_enc_associate(str, enc);
2143rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2149 static ID keyword_ids[2];
2159 keyword_ids[0] = rb_id_encoding();
2160 CONST_ID(keyword_ids[1],
"capacity");
2162 encoding = kwargs[0];
2163 capacity = kwargs[1];
2172 if (UNDEF_P(encoding)) {
2174 encoding = rb_obj_encoding(orig);
2178 if (!UNDEF_P(encoding)) {
2179 enc = rb_to_encoding(encoding);
2183 if (UNDEF_P(capacity)) {
2185 VALUE empty_str = str_new(klass,
"", 0);
2187 rb_enc_associate(empty_str, enc);
2191 VALUE copy = str_duplicate(klass, orig);
2192 rb_enc_associate(copy, enc);
2205 if (orig_capa >
capa) {
2210 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2211 STR_SET_LEN(str, 0);
2222#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2237static inline uintptr_t
2238count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2243 d = (d>>6) | (~d>>7);
2244 d &= NONASCII_MASK >> 7;
2247#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2249 return rb_popcount_intptr(d);
2253# if SIZEOF_VOIDP == 8
2262enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2268 long diff = (long)(e - p);
2269 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2274 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2275 const uintptr_t *s, *t;
2276 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2277 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2278 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2279 while (p < (
const char *)s) {
2280 if (is_utf8_lead_byte(*p))
len++;
2284 len += count_utf8_lead_bytes_with_word(s);
2287 p = (
const char *)s;
2290 if (is_utf8_lead_byte(*p))
len++;
2296 else if (rb_enc_asciicompat(enc)) {
2301 q = search_nonascii(p, e);
2307 p += rb_enc_fast_mbclen(p, e, enc);
2314 q = search_nonascii(p, e);
2320 p += rb_enc_mbclen(p, e, enc);
2327 for (c=0; p<e; c++) {
2328 p += rb_enc_mbclen(p, e, enc);
2343rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2351 long diff = (long)(e - p);
2352 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2354 else if (rb_enc_asciicompat(enc)) {
2358 q = search_nonascii(p, e);
2366 ret = rb_enc_precise_mbclen(p, e, enc);
2381 for (c=0; p<e; c++) {
2382 ret = rb_enc_precise_mbclen(p, e, enc);
2389 if (p + rb_enc_mbminlen(enc) <= e)
2390 p += rb_enc_mbminlen(enc);
2406 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2407 if (!enc) enc = STR_ENC_GET(str);
2408 p = RSTRING_PTR(str);
2413 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2418 return enc_strlen(p, e, enc, cr);
2425 return str_strlen(str, NULL);
2439 return LONG2NUM(str_strlen(str, NULL));
2451rb_str_bytesize(
VALUE str)
2470rb_str_empty(
VALUE str)
2472 return RBOOL(RSTRING_LEN(str) == 0);
2491 char *ptr1, *ptr2, *ptr3;
2496 enc = rb_enc_check_str(str1, str2);
2499 termlen = rb_enc_mbminlen(enc);
2500 if (len1 > LONG_MAX - len2) {
2501 rb_raise(rb_eArgError,
"string size too big");
2503 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2504 ptr3 = RSTRING_PTR(str3);
2505 memcpy(ptr3, ptr1, len1);
2506 memcpy(ptr3+len1, ptr2, len2);
2507 TERM_FILL(&ptr3[len1+len2], termlen);
2523 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2526 int enc1 = rb_enc_get_index(str1);
2527 int enc2 = rb_enc_get_index(str2);
2532 else if (enc2 < 0) {
2535 else if (enc1 != enc2) {
2538 else if (len1 > LONG_MAX - len2) {
2572 rb_enc_copy(str2, str);
2577 rb_raise(rb_eArgError,
"negative argument");
2579 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2580 if (STR_EMBEDDABLE_P(
len, 1)) {
2582 memset(RSTRING_PTR(str2), 0,
len + 1);
2589 STR_SET_LEN(str2,
len);
2590 rb_enc_copy(str2, str);
2593 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2594 rb_raise(rb_eArgError,
"argument too big");
2597 len *= RSTRING_LEN(str);
2598 termlen = TERM_LEN(str);
2600 ptr2 = RSTRING_PTR(str2);
2602 n = RSTRING_LEN(str);
2603 memcpy(ptr2, RSTRING_PTR(str), n);
2604 while (n <=
len/2) {
2605 memcpy(ptr2 + n, ptr2, n);
2608 memcpy(ptr2 + n, ptr2,
len-n);
2610 STR_SET_LEN(str2,
len);
2611 TERM_FILL(&ptr2[
len], termlen);
2612 rb_enc_cr_str_copy_for_substr(str2, str);
2649rb_check_lockedtmp(
VALUE str)
2651 if (
FL_TEST(str, STR_TMPLOCK)) {
2658#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2660str_modifiable(
VALUE str)
2664 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2665 if (CHILLED_STRING_P(str)) {
2666 CHILLED_STRING_MUTATED(str);
2668 rb_check_lockedtmp(str);
2669 rb_check_frozen(str);
2674str_dependent_p(
VALUE str)
2676 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2686#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2688str_independent(
VALUE str)
2692 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2693 str_modifiable(str);
2694 return !str_dependent_p(str);
2700str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2710 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2715 STR_SET_LEN(str,
len);
2720 oldptr = RSTRING_PTR(str);
2722 memcpy(
ptr, oldptr,
len);
2724 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2727 STR_SET_NOEMBED(str);
2728 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2729 TERM_FILL(
ptr +
len, termlen);
2731 STR_SET_LEN(str,
len);
2738 if (!str_independent(str))
2739 str_make_independent(str);
2748 int termlen = TERM_LEN(str);
2749 long len = RSTRING_LEN(str);
2752 rb_raise(rb_eArgError,
"negative expanding string size");
2754 if (expand >= LONG_MAX -
len) {
2755 rb_raise(rb_eArgError,
"string size too big");
2758 if (!str_independent(str)) {
2759 str_make_independent_expand(str,
len, expand, termlen);
2761 else if (expand > 0) {
2762 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2769str_modify_keep_cr(
VALUE str)
2771 if (!str_independent(str))
2772 str_make_independent(str);
2779str_discard(
VALUE str)
2781 str_modifiable(str);
2782 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2783 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2784 RSTRING(str)->as.heap.ptr = 0;
2785 STR_SET_LEN(str, 0);
2792 int encindex = rb_enc_get_index(str);
2794 if (RB_UNLIKELY(encindex == -1)) {
2798 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2803 if (!rb_enc_asciicompat(enc)) {
2825 return RSTRING_PTR(str);
2829zero_filled(
const char *s,
int n)
2831 for (; n > 0; --n) {
2838str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2840 const char *e = s +
len;
2842 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2843 if (zero_filled(s, minlen))
return s;
2849str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2854 if (str_dependent_p(str)) {
2855 if (!zero_filled(s +
len, termlen))
2856 str_make_independent_expand(str,
len, 0L, termlen);
2859 TERM_FILL(s +
len, termlen);
2862 return RSTRING_PTR(str);
2866rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2868 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2869 long len = RSTRING_LEN(str);
2873 rb_check_lockedtmp(str);
2874 str_make_independent_expand(str,
len, 0L, termlen);
2876 else if (str_dependent_p(str)) {
2877 if (termlen > oldtermlen)
2878 str_make_independent_expand(str,
len, 0L, termlen);
2881 if (!STR_EMBED_P(str)) {
2886 if (termlen > oldtermlen) {
2887 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2895str_null_check(
VALUE str,
int *w)
2897 char *s = RSTRING_PTR(str);
2898 long len = RSTRING_LEN(str);
2900 const int minlen = rb_enc_mbminlen(enc);
2904 if (str_null_char(s,
len, minlen, enc)) {
2907 return str_fill_term(str, s,
len, minlen);
2910 if (!s || memchr(s, 0,
len)) {
2914 s = str_fill_term(str, s,
len, minlen);
2920rb_str_to_cstr(
VALUE str)
2923 return str_null_check(str, &w);
2931 char *s = str_null_check(str, &w);
2934 rb_raise(rb_eArgError,
"string contains null char");
2936 rb_raise(rb_eArgError,
"string contains null byte");
2942rb_str_fill_terminator(
VALUE str,
const int newminlen)
2944 char *s = RSTRING_PTR(str);
2945 long len = RSTRING_LEN(str);
2946 return str_fill_term(str, s,
len, newminlen);
2952 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2978str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2987 else if (rb_enc_asciicompat(enc)) {
2988 const char *p2, *e2;
2991 while (p < e && 0 < nth) {
2998 p2 = search_nonascii(p, e2);
3007 n = rb_enc_mbclen(p, e, enc);
3018 while (p < e && nth--) {
3019 p += rb_enc_mbclen(p, e, enc);
3030 return str_nth_len(p, e, &nth, enc);
3034str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3039 p = str_nth_len(p, e, &nth, enc);
3048str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3050 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3051 if (!pp)
return e - p;
3058 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3059 STR_ENC_GET(str), single_byte_optimizable(str));
3064str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3067 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3068 const uintptr_t *s, *t;
3069 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3070 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3071 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3072 while (p < (
const char *)s) {
3073 if (is_utf8_lead_byte(*p)) nth--;
3077 nth -= count_utf8_lead_bytes_with_word(s);
3079 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3083 if (is_utf8_lead_byte(*p)) {
3084 if (nth == 0)
break;
3094str_utf8_offset(
const char *p,
const char *e,
long nth)
3096 const char *pp = str_utf8_nth(p, e, &nth);
3105 if (single_byte_optimizable(str) || pos < 0)
3108 char *p = RSTRING_PTR(str);
3109 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3114str_subseq(
VALUE str,
long beg,
long len)
3122 const int termlen = TERM_LEN(str);
3123 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3130 if (str_embed_capa(str2) >=
len + termlen) {
3131 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3132 STR_SET_EMBED(str2);
3133 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3134 TERM_FILL(ptr2+
len, termlen);
3136 STR_SET_LEN(str2,
len);
3140 str_replace_shared(str2, str);
3143 RSTRING(str2)->as.heap.ptr += beg;
3144 if (RSTRING_LEN(str2) >
len) {
3145 STR_SET_LEN(str2,
len);
3155 VALUE str2 = str_subseq(str, beg,
len);
3156 rb_enc_cr_str_copy_for_substr(str2, str);
3165 const long blen = RSTRING_LEN(str);
3167 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3169 if (
len < 0)
return 0;
3170 if (beg < 0 && -beg < 0)
return 0;
3174 if (single_byte_optimizable(str)) {
3175 if (beg > blen)
return 0;
3178 if (beg < 0)
return 0;
3180 if (
len > blen - beg)
3182 if (
len < 0)
return 0;
3187 if (
len > -beg)
len = -beg;
3191 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3194 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3200 slen = str_strlen(str, enc);
3202 if (beg < 0)
return 0;
3204 if (
len == 0)
goto end;
3207 else if (beg > 0 && beg > blen) {
3211 if (beg > str_strlen(str, enc))
return 0;
3216 enc == rb_utf8_encoding()) {
3217 p = str_utf8_nth(s, e, &beg);
3218 if (beg > 0)
return 0;
3219 len = str_utf8_offset(p, e,
len);
3225 p = s + beg * char_sz;
3229 else if (
len * char_sz > e - p)
3234 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3235 if (beg > 0)
return 0;
3239 len = str_offset(p, e,
len, enc, 0);
3247static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3252 return str_substr(str, beg,
len, TRUE);
3262str_substr(
VALUE str,
long beg,
long len,
int empty)
3266 if (!p)
return Qnil;
3267 if (!
len && !empty)
return Qnil;
3269 beg = p - RSTRING_PTR(str);
3271 VALUE str2 = str_subseq(str, beg,
len);
3272 rb_enc_cr_str_copy_for_substr(str2, str);
3280 if (CHILLED_STRING_P(str)) {
3285 rb_str_resize(str, RSTRING_LEN(str));
3303 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3346str_uminus(
VALUE str)
3351 return rb_fstring(str);
3355#define rb_str_dup_frozen rb_str_new_frozen
3360 rb_check_frozen(str);
3361 if (
FL_TEST(str, STR_TMPLOCK)) {
3364 FL_SET(str, STR_TMPLOCK);
3371 rb_check_frozen(str);
3372 if (!
FL_TEST(str, STR_TMPLOCK)) {
3392 const int termlen = TERM_LEN(str);
3394 str_modifiable(str);
3395 if (STR_SHARED_P(str)) {
3398 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3399 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3410 else if (
len > RSTRING_LEN(str)) {
3414 const char *
const new_end = RSTRING_PTR(str) +
len;
3424 else if (
len < RSTRING_LEN(str)) {
3432 STR_SET_LEN(str,
len);
3433 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3440 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3443 int independent = str_independent(str);
3444 long slen = RSTRING_LEN(str);
3445 const int termlen = TERM_LEN(str);
3447 if (slen >
len || (termlen != 1 && slen <
len)) {
3453 if (STR_EMBED_P(str)) {
3454 if (
len == slen)
return str;
3455 if (str_embed_capa(str) >=
len + termlen) {
3456 STR_SET_LEN(str,
len);
3460 str_make_independent_expand(str, slen,
len - slen, termlen);
3462 else if (str_embed_capa(str) >=
len + termlen) {
3463 char *
ptr = STR_HEAP_PTR(str);
3465 if (slen >
len) slen =
len;
3468 STR_SET_LEN(str,
len);
3469 if (independent) ruby_xfree(
ptr);
3472 else if (!independent) {
3473 if (
len == slen)
return str;
3474 str_make_independent_expand(str, slen,
len - slen, termlen);
3478 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3479 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3482 else if (
len == slen)
return str;
3483 STR_SET_LEN(str,
len);
3490str_ensure_available_capa(
VALUE str,
long len)
3492 str_modify_keep_cr(str);
3494 const int termlen = TERM_LEN(str);
3495 long olen = RSTRING_LEN(str);
3497 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3498 rb_raise(rb_eArgError,
"string sizes too big");
3501 long total = olen +
len;
3502 long capa = str_capacity(str, termlen);
3505 if (total >= LONG_MAX / 2) {
3508 while (total >
capa) {
3511 RESIZE_CAPA_TERM(str,
capa, termlen);
3516str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3519 str_modify_keep_cr(str);
3524 if (
len == 0)
return 0;
3526 long total, olen,
off = -1;
3528 const int termlen = TERM_LEN(str);
3531 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3535 long capa = str_capacity(str, termlen);
3537 if (olen > LONG_MAX -
len) {
3538 rb_raise(rb_eArgError,
"string sizes too big");
3542 if (total >= LONG_MAX / 2) {
3545 while (total >
capa) {
3548 RESIZE_CAPA_TERM(str,
capa, termlen);
3549 sptr = RSTRING_PTR(str);
3554 memcpy(sptr + olen,
ptr,
len);
3555 STR_SET_LEN(str, total);
3556 TERM_FILL(sptr + total, termlen);
3561#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3562#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3567 if (
len == 0)
return str;
3569 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3571 return str_buf_cat(str,
ptr,
len);
3582rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3587 if (UNLIKELY(!str_independent(str))) {
3588 str_make_independent(str);
3591 long string_length = -1;
3592 const int null_terminator_length = 1;
3597 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3598 rb_raise(rb_eArgError,
"string sizes too big");
3601 long string_capacity = str_capacity(str, null_terminator_length);
3607 if (LIKELY(string_capacity >= string_length + 1)) {
3609 sptr[string_length] = byte;
3610 STR_SET_LEN(str, string_length + 1);
3611 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3615 str_buf_cat(str, (
char *)&
byte, 1);
3631 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3642rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3643 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3652 if (str_encindex == ptr_encindex) {
3654 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3658 str_enc = rb_enc_from_index(str_encindex);
3659 ptr_enc = rb_enc_from_index(ptr_encindex);
3660 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3663 if (RSTRING_LEN(str) == 0) {
3666 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3672 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3681 *ptr_cr_ret = ptr_cr;
3683 if (str_encindex != ptr_encindex &&
3686 str_enc = rb_enc_from_index(str_encindex);
3687 ptr_enc = rb_enc_from_index(ptr_encindex);
3692 res_encindex = str_encindex;
3697 res_encindex = str_encindex;
3701 res_encindex = ptr_encindex;
3706 res_encindex = str_encindex;
3713 res_encindex = str_encindex;
3719 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3721 str_buf_cat(str,
ptr,
len);
3727 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3734 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3744 if (rb_enc_asciicompat(enc)) {
3745 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3751 unsigned int c = (
unsigned char)*
ptr;
3752 int len = rb_enc_codelen(c, enc);
3753 rb_enc_mbcput(c, buf, enc);
3754 rb_enc_cr_str_buf_cat(str, buf,
len,
3767 if (str_enc_fastpath(str)) {
3771 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3777 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3788 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3804rb_str_concat_literals(
size_t num,
const VALUE *strary)
3808 unsigned long len = 1;
3813 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3815 str_enc_copy_direct(str, strary[0]);
3817 for (i = s; i < num; ++i) {
3818 const VALUE v = strary[i];
3822 if (encidx != ENCINDEX_US_ASCII) {
3824 rb_enc_set_index(str, encidx);
3837rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3839 str_modifiable(str);
3844 else if (argc > 1) {
3847 rb_enc_copy(arg_str, str);
3848 for (i = 0; i < argc; i++) {
3883rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3885 long needed_capacity = 0;
3889 for (
int index = 0; index < argc; index++) {
3890 VALUE obj = argv[index];
3898 needed_capacity += RSTRING_LEN(obj);
3903 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3910 str_ensure_available_capa(str, needed_capacity);
3913 for (
int index = 0; index < argc; index++) {
3914 VALUE obj = argv[index];
3919 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3920 char byte = (char)(
NUM2INT(obj) & 0xFF);
3934 rb_bug(
"append_as_bytes arguments should have been validated");
3938 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3939 TERM_FILL(sptr, TERM_LEN(str));
3944 for (
int index = 0; index < argc; index++) {
3945 VALUE obj = argv[index];
3962 rb_bug(
"append_as_bytes arguments should have been validated");
4041 if (rb_num_to_uint(str2, &code) == 0) {
4054 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4057 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4060 long pos = RSTRING_LEN(str1);
4065 switch (
len = rb_enc_codelen(code, enc)) {
4066 case ONIGERR_INVALID_CODE_POINT_VALUE:
4067 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4069 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4075 rb_enc_mbcput(code, buf, enc);
4076 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4077 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4079 rb_str_resize(str1, pos+
len);
4080 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4093rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4095 int encidx = rb_enc_to_index(enc);
4097 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4102 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4103 return ENCINDEX_ASCII_8BIT;
4125rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4127 str_modifiable(str);
4132 else if (argc > 1) {
4135 rb_enc_copy(arg_str, str);
4136 for (i = 0; i < argc; i++) {
4149 st_index_t precomputed_hash;
4150 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4152 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4153 return precomputed_hash;
4156 return str_do_hash(str);
4163 const char *ptr1, *ptr2;
4166 return (len1 != len2 ||
4168 memcmp(ptr1, ptr2, len1) != 0);
4180rb_str_hash_m(
VALUE str)
4186#define lesser(a,b) (((a)>(b))?(b):(a))
4194 if (RSTRING_LEN(str1) == 0)
return TRUE;
4195 if (RSTRING_LEN(str2) == 0)
return TRUE;
4198 if (idx1 == idx2)
return TRUE;
4203 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4207 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4217 const char *ptr1, *ptr2;
4220 if (str1 == str2)
return 0;
4223 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4232 if (len1 > len2)
return 1;
4235 if (retval > 0)
return 1;
4269 if (str1 == str2)
return Qtrue;
4276 return rb_str_eql_internal(str1, str2);
4290 if (str1 == str2)
return Qtrue;
4292 return rb_str_eql_internal(str1, str2);
4330 return rb_invcmp(str1, str2);
4372 return str_casecmp(str1, s);
4380 const char *p1, *p1end, *p2, *p2end;
4382 enc = rb_enc_compatible(str1, str2);
4387 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4388 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4389 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4390 while (p1 < p1end && p2 < p2end) {
4392 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4393 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4395 return INT2FIX(c1 < c2 ? -1 : 1);
4402 while (p1 < p1end && p2 < p2end) {
4403 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4404 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4406 if (0 <= c1 && 0 <= c2) {
4410 return INT2FIX(c1 < c2 ? -1 : 1);
4414 l1 = rb_enc_mbclen(p1, p1end, enc);
4415 l2 = rb_enc_mbclen(p2, p2end, enc);
4416 len = l1 < l2 ? l1 : l2;
4417 r = memcmp(p1, p2,
len);
4419 return INT2FIX(r < 0 ? -1 : 1);
4421 return INT2FIX(l1 < l2 ? -1 : 1);
4427 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4428 if (p1 == p1end)
return INT2FIX(-1);
4461 return str_casecmp_p(str1, s);
4468 VALUE folded_str1, folded_str2;
4469 VALUE fold_opt = sym_fold;
4471 enc = rb_enc_compatible(str1, str2);
4476 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4477 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4479 return rb_str_eql(folded_str1, folded_str2);
4483strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4484 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4486 const char *search_start = str_ptr;
4487 long pos, search_len = str_len - offset;
4491 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4492 if (pos < 0)
return pos;
4494 if (t == search_start + pos)
break;
4495 search_len -= t - search_start;
4496 if (search_len <= 0)
return -1;
4497 offset += t - search_start;
4500 return pos + offset;
4504#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4505#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4508rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4510 const char *str_ptr, *str_ptr_end, *sub_ptr;
4511 long str_len, sub_len;
4514 enc = rb_enc_check(str, sub);
4515 if (is_broken_string(sub))
return -1;
4517 str_ptr = RSTRING_PTR(str);
4519 str_len = RSTRING_LEN(str);
4520 sub_ptr = RSTRING_PTR(sub);
4521 sub_len = RSTRING_LEN(sub);
4523 if (str_len < sub_len)
return -1;
4526 long str_len_char, sub_len_char;
4527 int single_byte = single_byte_optimizable(str);
4528 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4529 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4531 offset += str_len_char;
4532 if (offset < 0)
return -1;
4534 if (str_len_char - offset < sub_len_char)
return -1;
4535 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4538 if (sub_len == 0)
return offset;
4541 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4554rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4561 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4562 long slen = str_strlen(str, enc);
4564 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4576 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4577 enc, single_byte_optimizable(str));
4588 pos = rb_str_index(str, sub, pos);
4602str_ensure_byte_pos(
VALUE str,
long pos)
4604 if (!single_byte_optimizable(str)) {
4605 const char *s = RSTRING_PTR(str);
4607 const char *p = s + pos;
4608 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4610 "offset %ld does not land on character boundary", pos);
4683rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4689 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4690 long slen = RSTRING_LEN(str);
4692 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4703 str_ensure_byte_pos(str, pos);
4715 pos = rb_str_byteindex(str, sub, pos);
4716 if (pos >= 0)
return LONG2NUM(pos);
4723memrchr(
const char *search_str,
int chr,
long search_len)
4725 const char *ptr = search_str + search_len;
4726 while (ptr > search_str) {
4727 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4737 char *hit, *adjusted;
4739 long slen, searchlen;
4742 sbeg = RSTRING_PTR(str);
4743 slen = RSTRING_LEN(sub);
4744 if (slen == 0)
return s - sbeg;
4746 t = RSTRING_PTR(sub);
4748 searchlen = s - sbeg + 1;
4750 if (memcmp(s, t, slen) == 0) {
4755 hit = memrchr(sbeg, c, searchlen);
4758 if (hit != adjusted) {
4759 searchlen = adjusted - sbeg;
4762 if (memcmp(hit, t, slen) == 0)
4764 searchlen = adjusted - sbeg;
4765 }
while (searchlen > 0);
4779 enc = rb_enc_check(str, sub);
4780 if (is_broken_string(sub))
return -1;
4781 singlebyte = single_byte_optimizable(str);
4782 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4783 slen = str_strlen(sub, enc);
4786 if (
len < slen)
return -1;
4787 if (
len - pos < slen) pos =
len - slen;
4788 if (
len == 0)
return pos;
4790 sbeg = RSTRING_PTR(str);
4793 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4799 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4800 return str_rindex(str, sub, s, enc);
4812rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4817 long pos,
len = str_strlen(str, enc);
4819 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4821 if (pos < 0 && (pos +=
len) < 0) {
4827 if (pos >
len) pos =
len;
4835 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4836 enc, single_byte_optimizable(str));
4847 pos = rb_str_rindex(str, sub, pos);
4857rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4863 enc = rb_enc_check(str, sub);
4864 if (is_broken_string(sub))
return -1;
4865 len = RSTRING_LEN(str);
4866 slen = RSTRING_LEN(sub);
4869 if (
len < slen)
return -1;
4870 if (
len - pos < slen) pos =
len - slen;
4871 if (
len == 0)
return pos;
4873 sbeg = RSTRING_PTR(str);
4876 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4883 return str_rindex(str, sub, s, enc);
4973rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4977 long pos,
len = RSTRING_LEN(str);
4979 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4981 if (pos < 0 && (pos +=
len) < 0) {
4987 if (pos >
len) pos =
len;
4993 str_ensure_byte_pos(str, pos);
5005 pos = rb_str_byterindex(str, sub, pos);
5006 if (pos >= 0)
return LONG2NUM(pos);
5045 switch (OBJ_BUILTIN_TYPE(y)) {
5099rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5106 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5137rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5141 re = get_pat(argv[0]);
5142 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5151static enum neighbor_char
5157 if (rb_enc_mbminlen(enc) > 1) {
5159 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5161 return NEIGHBOR_NOT_CHAR;
5163 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5165 if (!l)
return NEIGHBOR_NOT_CHAR;
5166 if (l !=
len)
return NEIGHBOR_WRAPPED;
5167 rb_enc_mbcput(c, p, enc);
5168 r = rb_enc_precise_mbclen(p, p +
len, enc);
5170 return NEIGHBOR_NOT_CHAR;
5172 return NEIGHBOR_FOUND;
5175 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5178 return NEIGHBOR_WRAPPED;
5179 ++((
unsigned char*)p)[i];
5180 l = rb_enc_precise_mbclen(p, p+
len, enc);
5184 return NEIGHBOR_FOUND;
5187 memset(p+l, 0xff,
len-l);
5193 for (len2 =
len-1; 0 < len2; len2--) {
5194 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5198 memset(p+len2+1, 0xff,
len-(len2+1));
5203static enum neighbor_char
5208 if (rb_enc_mbminlen(enc) > 1) {
5210 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5212 return NEIGHBOR_NOT_CHAR;
5214 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5215 if (!c)
return NEIGHBOR_NOT_CHAR;
5218 if (!l)
return NEIGHBOR_NOT_CHAR;
5219 if (l !=
len)
return NEIGHBOR_WRAPPED;
5220 rb_enc_mbcput(c, p, enc);
5221 r = rb_enc_precise_mbclen(p, p +
len, enc);
5223 return NEIGHBOR_NOT_CHAR;
5225 return NEIGHBOR_FOUND;
5228 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5231 return NEIGHBOR_WRAPPED;
5232 --((
unsigned char*)p)[i];
5233 l = rb_enc_precise_mbclen(p, p+
len, enc);
5237 return NEIGHBOR_FOUND;
5240 memset(p+l, 0,
len-l);
5246 for (len2 =
len-1; 0 < len2; len2--) {
5247 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5251 memset(p+len2+1, 0,
len-(len2+1));
5265static enum neighbor_char
5266enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5268 enum neighbor_char ret;
5272 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5276 const int max_gaps = 1;
5278 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5280 ctype = ONIGENC_CTYPE_DIGIT;
5282 ctype = ONIGENC_CTYPE_ALPHA;
5284 return NEIGHBOR_NOT_CHAR;
5287 for (
try = 0;
try <= max_gaps; ++
try) {
5288 ret = enc_succ_char(p,
len, enc);
5289 if (ret == NEIGHBOR_FOUND) {
5290 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5292 return NEIGHBOR_FOUND;
5299 ret = enc_pred_char(p,
len, enc);
5300 if (ret == NEIGHBOR_FOUND) {
5301 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5314 return NEIGHBOR_NOT_CHAR;
5317 if (ctype != ONIGENC_CTYPE_DIGIT) {
5319 return NEIGHBOR_WRAPPED;
5323 enc_succ_char(carry,
len, enc);
5324 return NEIGHBOR_WRAPPED;
5342 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5343 rb_enc_cr_str_copy_for_substr(str, orig);
5344 return str_succ(str);
5351 char *sbeg, *s, *e, *last_alnum = 0;
5352 int found_alnum = 0;
5354 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5355 long carry_pos = 0, carry_len = 1;
5356 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5358 slen = RSTRING_LEN(str);
5359 if (slen == 0)
return str;
5361 enc = STR_ENC_GET(str);
5362 sbeg = RSTRING_PTR(str);
5363 s = e = sbeg + slen;
5365 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5366 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5372 l = rb_enc_precise_mbclen(s, e, enc);
5373 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5374 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5375 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5377 case NEIGHBOR_NOT_CHAR:
5379 case NEIGHBOR_FOUND:
5381 case NEIGHBOR_WRAPPED:
5386 carry_pos = s - sbeg;
5391 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5392 enum neighbor_char neighbor;
5393 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5394 l = rb_enc_precise_mbclen(s, e, enc);
5395 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5396 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5398 neighbor = enc_succ_char(tmp, l, enc);
5400 case NEIGHBOR_FOUND:
5404 case NEIGHBOR_WRAPPED:
5407 case NEIGHBOR_NOT_CHAR:
5410 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5412 enc_succ_char(s, l, enc);
5414 if (!rb_enc_asciicompat(enc)) {
5415 MEMCPY(carry, s,
char, l);
5418 carry_pos = s - sbeg;
5422 RESIZE_CAPA(str, slen + carry_len);
5423 sbeg = RSTRING_PTR(str);
5424 s = sbeg + carry_pos;
5425 memmove(s + carry_len, s, slen - carry_pos);
5426 memmove(s, carry, carry_len);
5428 STR_SET_LEN(str, slen);
5429 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5445rb_str_succ_bang(
VALUE str)
5453all_digits_p(
const char *s,
long len)
5481 VALUE end, exclusive;
5485 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5491 VALUE current, after_end;
5498 enc = rb_enc_check(beg, end);
5499 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5501 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5502 char c = RSTRING_PTR(beg)[0];
5503 char e = RSTRING_PTR(end)[0];
5505 if (c > e || (excl && c == e))
return beg;
5507 VALUE str = rb_enc_str_new(&c, 1, enc);
5509 if ((*each)(str, arg))
break;
5510 if (!excl && c == e)
break;
5512 if (excl && c == e)
break;
5517 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5518 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5519 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5524 b = rb_str_to_inum(beg, 10, FALSE);
5525 e = rb_str_to_inum(end, 10, FALSE);
5532 if (excl && bi == ei)
break;
5533 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5538 ID op = excl ?
'<' : idLE;
5539 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5544 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5545 b = rb_funcallv(b, succ, 0, 0);
5552 if (n > 0 || (excl && n == 0))
return beg;
5554 after_end = rb_funcallv(end, succ, 0, 0);
5559 next = rb_funcallv(current, succ, 0, 0);
5560 if ((*each)(current, arg))
break;
5561 if (
NIL_P(next))
break;
5565 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5580 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5581 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5582 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5584 b = rb_str_to_inum(beg, 10, FALSE);
5590 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5598 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5599 b = rb_funcallv(b, succ, 0, 0);
5605 VALUE next = rb_funcallv(current, succ, 0, 0);
5606 if ((*each)(current, arg))
break;
5609 if (RSTRING_LEN(current) == 0)
5620 if (!
rb_equal(str, *argp))
return 0;
5634 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5635 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5636 rb_enc_asciicompat(STR_ENC_GET(val))) {
5637 const char *bp = RSTRING_PTR(beg);
5638 const char *ep = RSTRING_PTR(end);
5639 const char *vp = RSTRING_PTR(val);
5640 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5641 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5649 if (b <= v && v < e)
return Qtrue;
5650 return RBOOL(!
RTEST(exclusive) && v == e);
5657 all_digits_p(bp, RSTRING_LEN(beg)) &&
5658 all_digits_p(ep, RSTRING_LEN(end))) {
5663 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5665 return RBOOL(
NIL_P(val));
5688 return rb_str_subpat(str, indx,
INT2FIX(0));
5691 if (rb_str_index(str, indx, 0) != -1)
5697 long beg,
len = str_strlen(str, NULL);
5709 return str_substr(str, idx, 1, FALSE);
5726rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5730 return rb_str_subpat(str, argv[0], argv[1]);
5733 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5737 return rb_str_aref(str, argv[0]);
5743 char *ptr = RSTRING_PTR(str);
5744 long olen = RSTRING_LEN(str), nlen;
5746 str_modifiable(str);
5747 if (
len > olen)
len = olen;
5749 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5751 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5753 ptr =
RSTRING(str)->as.embed.ary;
5754 memmove(ptr, oldptr +
len, nlen);
5755 if (fl == STR_NOEMBED)
xfree(oldptr);
5758 if (!STR_SHARED_P(str)) {
5760 rb_enc_cr_str_exact_copy(shared, str);
5765 STR_SET_LEN(str, nlen);
5767 if (!SHARABLE_MIDDLE_SUBSTRING) {
5768 TERM_FILL(ptr + nlen, TERM_LEN(str));
5775rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5781 if (beg == 0 && vlen == 0) {
5786 str_modify_keep_cr(str);
5790 RESIZE_CAPA(str, slen + vlen -
len);
5791 sptr = RSTRING_PTR(str);
5800 memmove(sptr + beg + vlen,
5802 slen - (beg +
len));
5804 if (vlen < beg &&
len < 0) {
5808 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5811 STR_SET_LEN(str, slen);
5812 TERM_FILL(&sptr[slen], TERM_LEN(str));
5819 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5828 int singlebyte = single_byte_optimizable(str);
5834 enc = rb_enc_check(str, val);
5835 slen = str_strlen(str, enc);
5837 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5846 if (
len > slen - beg) {
5849 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5854 beg = p - RSTRING_PTR(str);
5856 rb_str_update_0(str, beg,
len, val);
5857 rb_enc_associate(str, enc);
5868 long start, end,
len;
5878 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5882 nth += regs->num_regs;
5892 enc = rb_enc_check_str(str, val);
5893 rb_str_update_0(str, start,
len, val);
5894 rb_enc_associate(str, enc);
5902 switch (
TYPE(indx)) {
5904 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5908 beg = rb_str_index(str, indx, 0);
5947rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5951 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5959 return rb_str_aset(str, argv[0], argv[1]);
6011rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6019 str_modify_keep_cr(str);
6027 if ((nth += regs->num_regs) <= 0)
return Qnil;
6029 else if (nth >= regs->num_regs)
return Qnil;
6031 len = END(nth) - beg;
6034 else if (argc == 2) {
6043 beg = p - RSTRING_PTR(str);
6047 beg = rb_str_index(str, indx, 0);
6048 if (beg == -1)
return Qnil;
6049 len = RSTRING_LEN(indx);
6061 beg = p - RSTRING_PTR(str);
6070 beg = p - RSTRING_PTR(str);
6074 rb_enc_cr_str_copy_for_substr(result, str);
6082 char *sptr = RSTRING_PTR(str);
6083 long slen = RSTRING_LEN(str);
6084 if (beg +
len > slen)
6088 slen - (beg +
len));
6090 STR_SET_LEN(str, slen);
6091 TERM_FILL(&sptr[slen], TERM_LEN(str));
6102 switch (OBJ_BUILTIN_TYPE(pat)) {
6121get_pat_quoted(
VALUE pat,
int check)
6125 switch (OBJ_BUILTIN_TYPE(pat)) {
6139 if (check && is_broken_string(pat)) {
6146rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6149 pos = rb_str_byteindex(str, pat, pos);
6150 if (set_backref_str) {
6152 str = rb_str_new_frozen_String(str);
6153 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6155 *match = match_data;
6165 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6170rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6172 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6190rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6204 hash = rb_check_hash_type(argv[1]);
6210 pat = get_pat_quoted(argv[0], 1);
6212 str_modifiable(str);
6213 beg = rb_pat_search(pat, str, 0, 1);
6227 end0 = beg0 + RSTRING_LEN(pat);
6236 if (iter || !
NIL_P(hash)) {
6237 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6243 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6246 str_mod_check(str, p,
len);
6247 rb_check_frozen(str);
6253 enc = rb_enc_compatible(str, repl);
6256 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6260 rb_enc_inspect_name(str_enc),
6261 rb_enc_inspect_name(STR_ENC_GET(repl)));
6263 enc = STR_ENC_GET(repl);
6266 rb_enc_associate(str, enc);
6276 rlen = RSTRING_LEN(repl);
6277 len = RSTRING_LEN(str);
6279 RESIZE_CAPA(str,
len + rlen - plen);
6281 p = RSTRING_PTR(str);
6283 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6285 rp = RSTRING_PTR(repl);
6286 memmove(p + beg0, rp, rlen);
6288 STR_SET_LEN(str,
len);
6289 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6312 rb_str_sub_bang(argc, argv, str);
6317str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6320 long beg, beg0, end0;
6321 long offset, blen, slen,
len, last;
6322 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6324 int need_backref_str = -1;
6334 hash = rb_check_hash_type(argv[1]);
6338 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6346 rb_error_arity(argc, 1, 2);
6349 pat = get_pat_quoted(argv[0], 1);
6350 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6353 if (bang)
return Qnil;
6358 blen = RSTRING_LEN(str) + 30;
6360 sp = RSTRING_PTR(str);
6361 slen = RSTRING_LEN(str);
6363 str_enc = STR_ENC_GET(str);
6364 rb_enc_associate(dest, str_enc);
6371 end0 = beg0 + RSTRING_LEN(pat);
6385 struct RString fake_str = {RBASIC_INIT};
6387 if (mode == FAST_MAP) {
6396 val = rb_hash_aref(hash, key);
6399 str_mod_check(str, sp, slen);
6404 else if (need_backref_str) {
6406 if (need_backref_str < 0) {
6407 need_backref_str = val != repl;
6414 len = beg0 - offset;
6428 if (RSTRING_LEN(str) <= end0)
break;
6429 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6431 offset = end0 +
len;
6433 cp = RSTRING_PTR(str) + offset;
6434 if (offset > RSTRING_LEN(str))
break;
6437 if (mode != FAST_MAP && mode != STR) {
6440 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6445 if (RSTRING_LEN(str) > offset) {
6448 rb_pat_search0(pat, str, last, 1, &match);
6450 str_shared_replace(str, dest);
6475rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6477 str_modify_keep_cr(str);
6478 return str_gsub(argc, argv, str, 1);
6528 return str_gsub(argc, argv, str, 0);
6548 str_modifiable(str);
6549 if (str == str2)
return str;
6553 return str_replace(str, str2);
6570rb_str_clear(
VALUE str)
6574 STR_SET_LEN(str, 0);
6575 RSTRING_PTR(str)[0] = 0;
6576 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6592rb_str_chr(
VALUE str)
6610 pos += RSTRING_LEN(str);
6611 if (pos < 0 || RSTRING_LEN(str) <= pos)
6614 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6634 long len = RSTRING_LEN(str);
6635 char *
ptr, *head, *left = 0;
6639 if (pos < -
len ||
len <= pos)
6646 char byte = (char)(
NUM2INT(w) & 0xFF);
6648 if (!str_independent(str))
6649 str_make_independent(str);
6650 enc = STR_ENC_GET(str);
6651 head = RSTRING_PTR(str);
6653 if (!STR_EMBED_P(str)) {
6660 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6668 width = rb_enc_precise_mbclen(left, head+
len, enc);
6670 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6686str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6688 long n = RSTRING_LEN(str);
6690 if (beg > n ||
len < 0)
return Qnil;
6693 if (beg < 0)
return Qnil;
6698 if (!empty)
return Qnil;
6702 VALUE str2 = str_subseq(str, beg,
len);
6704 str_enc_copy_direct(str2, str);
6706 if (RSTRING_LEN(str2) == 0) {
6707 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6741 long beg,
len = RSTRING_LEN(str);
6749 return str_byte_substr(str, beg,
len, TRUE);
6754 return str_byte_substr(str, idx, 1, FALSE);
6766rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6771 return str_byte_substr(str, beg,
len, TRUE);
6774 return str_byte_aref(str, argv[0]);
6778str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6780 long end, slen = RSTRING_LEN(str);
6783 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6792 if (*
len > slen - *beg) {
6796 str_ensure_byte_pos(str, *beg);
6797 str_ensure_byte_pos(str, end);
6811rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6813 long beg,
len, vbeg, vlen;
6818 if (!(argc == 2 || argc == 3 || argc == 5)) {
6819 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6823 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6824 rb_builtin_class_name(argv[0]));
6831 vlen = RSTRING_LEN(val);
6836 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6837 rb_builtin_class_name(argv[2]));
6849 vlen = RSTRING_LEN(val);
6857 str_check_beg_len(str, &beg, &
len);
6858 str_check_beg_len(val, &vbeg, &vlen);
6859 str_modify_keep_cr(str);
6862 rb_enc_associate(str, rb_enc_check(str, val));
6865 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6887rb_str_reverse(
VALUE str)
6894 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6895 enc = STR_ENC_GET(str);
6901 if (RSTRING_LEN(str) > 1) {
6902 if (single_byte_optimizable(str)) {
6909 int clen = rb_enc_fast_mbclen(s, e, enc);
6917 cr = rb_enc_asciicompat(enc) ?
6920 int clen = rb_enc_mbclen(s, e, enc);
6929 STR_SET_LEN(rev, RSTRING_LEN(str));
6930 str_enc_copy_direct(rev, str);
6952rb_str_reverse_bang(
VALUE str)
6954 if (RSTRING_LEN(str) > 1) {
6955 if (single_byte_optimizable(str)) {
6958 str_modify_keep_cr(str);
6959 s = RSTRING_PTR(str);
6968 str_shared_replace(str, rb_str_reverse(str));
6972 str_modify_keep_cr(str);
7001 i = rb_str_index(str, arg, 0);
7003 return RBOOL(i != -1);
7047 rb_raise(rb_eArgError,
"invalid radix %d", base);
7049 return rb_str_to_inum(str, base, FALSE);
7074rb_str_to_f(
VALUE str)
7091rb_str_to_s(
VALUE str)
7103 char s[RUBY_MAX_CHAR_LEN];
7104 int n = rb_enc_codelen(c, enc);
7106 rb_enc_mbcput(c, s, enc);
7111#define CHAR_ESC_LEN 13
7114rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7116 char buf[CHAR_ESC_LEN + 1];
7124 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7126 else if (c < 0x10000) {
7127 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7130 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7135 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7138 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7141 l = (int)strlen(buf);
7147ruby_escaped_char(
int c)
7150 case '\0':
return "\\0";
7151 case '\n':
return "\\n";
7152 case '\r':
return "\\r";
7153 case '\t':
return "\\t";
7154 case '\f':
return "\\f";
7155 case '\013':
return "\\v";
7156 case '\010':
return "\\b";
7157 case '\007':
return "\\a";
7158 case '\033':
return "\\e";
7159 case '\x7f':
return "\\c?";
7165rb_str_escape(
VALUE str)
7169 const char *p = RSTRING_PTR(str);
7171 const char *prev = p;
7172 char buf[CHAR_ESC_LEN + 1];
7174 int unicode_p = rb_enc_unicode_p(enc);
7175 int asciicompat = rb_enc_asciicompat(enc);
7180 int n = rb_enc_precise_mbclen(p, pend, enc);
7182 if (p > prev) str_buf_cat(result, prev, p - prev);
7183 n = rb_enc_mbminlen(enc);
7185 n = (int)(pend - p);
7187 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7188 str_buf_cat(result, buf, strlen(buf));
7194 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7196 cc = ruby_escaped_char(c);
7198 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7199 str_buf_cat(result, cc, strlen(cc));
7202 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7205 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7206 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7210 if (p > prev) str_buf_cat(result, prev, p - prev);
7229 const char *p, *pend, *prev;
7230 char buf[CHAR_ESC_LEN + 1];
7232 rb_encoding *resenc = rb_default_internal_encoding();
7233 int unicode_p = rb_enc_unicode_p(enc);
7234 int asciicompat = rb_enc_asciicompat(enc);
7236 if (resenc == NULL) resenc = rb_default_external_encoding();
7237 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7238 rb_enc_associate(result, resenc);
7239 str_buf_cat2(result,
"\"");
7247 n = rb_enc_precise_mbclen(p, pend, enc);
7249 if (p > prev) str_buf_cat(result, prev, p - prev);
7250 n = rb_enc_mbminlen(enc);
7252 n = (int)(pend - p);
7254 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7255 str_buf_cat(result, buf, strlen(buf));
7261 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7263 if ((asciicompat || unicode_p) &&
7264 (c ==
'"'|| c ==
'\\' ||
7269 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7270 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7271 str_buf_cat2(result,
"\\");
7272 if (asciicompat || enc == resenc) {
7278 case '\n': cc =
'n';
break;
7279 case '\r': cc =
'r';
break;
7280 case '\t': cc =
't';
break;
7281 case '\f': cc =
'f';
break;
7282 case '\013': cc =
'v';
break;
7283 case '\010': cc =
'b';
break;
7284 case '\007': cc =
'a';
break;
7285 case 033: cc =
'e';
break;
7286 default: cc = 0;
break;
7289 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7292 str_buf_cat(result, buf, 2);
7305 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7309 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7310 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7315 if (p > prev) str_buf_cat(result, prev, p - prev);
7316 str_buf_cat2(result,
"\"");
7321#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7334 int encidx = rb_enc_get_index(str);
7337 const char *p, *pend;
7340 int u8 = (encidx == rb_utf8_encindex());
7341 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7344 if (!rb_enc_asciicompat(enc)) {
7346 len += strlen(enc->name);
7349 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7352 unsigned char c = *p++;
7355 case '"':
case '\\':
7356 case '\n':
case '\r':
7357 case '\t':
case '\f':
7358 case '\013':
case '\010':
case '\007':
case '\033':
7363 clen = IS_EVSTR(p, pend) ? 2 : 1;
7371 if (u8 && c > 0x7F) {
7372 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7374 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7377 else if (cc <= 0xFFFFF)
7390 if (clen > LONG_MAX -
len) {
7397 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7398 q = RSTRING_PTR(result); qend = q +
len + 1;
7402 unsigned char c = *p++;
7404 if (c ==
'"' || c ==
'\\') {
7408 else if (c ==
'#') {
7409 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7412 else if (c ==
'\n') {
7416 else if (c ==
'\r') {
7420 else if (c ==
'\t') {
7424 else if (c ==
'\f') {
7428 else if (c ==
'\013') {
7432 else if (c ==
'\010') {
7436 else if (c ==
'\007') {
7440 else if (c ==
'\033') {
7450 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7452 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7455 snprintf(q, qend-q,
"u%04X", cc);
7457 snprintf(q, qend-q,
"u{%X}", cc);
7462 snprintf(q, qend-q,
"x%02X", c);
7468 if (!rb_enc_asciicompat(enc)) {
7469 snprintf(q, qend-q, nonascii_suffix, enc->name);
7470 encidx = rb_ascii8bit_encindex();
7473 rb_enc_associate_index(result, encidx);
7479unescape_ascii(
unsigned int c)
7503undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7505 const char *s = *ss;
7509 unsigned char buf[6];
7527 *buf = unescape_ascii(*s);
7539 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7540 if (*penc != enc_utf8) {
7542 rb_enc_associate(undumped, enc_utf8);
7559 if (hexlen == 0 || hexlen > 6) {
7565 if (0xd800 <= c && c <= 0xdfff) {
7568 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7578 if (0xd800 <= c && c <= 0xdfff) {
7581 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7611static VALUE rb_str_is_ascii_only_p(
VALUE str);
7623str_undump(
VALUE str)
7625 const char *s = RSTRING_PTR(str);
7628 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7630 bool binary =
false;
7634 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7637 if (!str_null_check(str, &w)) {
7640 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7641 if (*s !=
'"')
goto invalid_format;
7659 static const char force_encoding_suffix[] =
".force_encoding(\"";
7660 static const char dup_suffix[] =
".dup";
7661 const char *encname;
7666 size =
sizeof(dup_suffix) - 1;
7667 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7669 size =
sizeof(force_encoding_suffix) - 1;
7670 if (s_end - s <= size)
goto invalid_format;
7671 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7675 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7679 s = memchr(s,
'"', s_end-s);
7681 if (!s)
goto invalid_format;
7682 if (s_end - s != 2)
goto invalid_format;
7683 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7685 encidx = rb_enc_find_index2(encname, (
long)size);
7689 rb_enc_associate_index(undumped, encidx);
7699 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7710 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7716 if (rb_enc_dummy_p(enc)) {
7723str_true_enc(
VALUE str)
7726 rb_str_check_dummy_enc(enc);
7730static OnigCaseFoldType
7731check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7736 rb_raise(rb_eArgError,
"too many options");
7737 if (argv[0]==sym_turkic) {
7738 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7740 if (argv[1]==sym_lithuanian)
7741 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7743 rb_raise(rb_eArgError,
"invalid second option");
7746 else if (argv[0]==sym_lithuanian) {
7747 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7749 if (argv[1]==sym_turkic)
7750 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7752 rb_raise(rb_eArgError,
"invalid second option");
7756 rb_raise(rb_eArgError,
"too many options");
7757 else if (argv[0]==sym_ascii)
7758 flags |= ONIGENC_CASE_ASCII_ONLY;
7759 else if (argv[0]==sym_fold) {
7760 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7761 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7763 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7766 rb_raise(rb_eArgError,
"invalid option");
7773 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7779#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7780#ifndef CASEMAP_DEBUG
7781# define CASEMAP_DEBUG 0
7789 OnigUChar space[FLEX_ARY_LEN];
7793mapping_buffer_free(
void *p)
7797 while (current_buffer) {
7798 previous_buffer = current_buffer;
7799 current_buffer = current_buffer->next;
7800 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7806 {0, mapping_buffer_free,},
7807 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7815 const OnigUChar *source_current, *source_end;
7816 int target_length = 0;
7817 VALUE buffer_anchor;
7820 size_t buffer_count = 0;
7821 int buffer_length_or_invalid;
7823 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7825 source_current = (OnigUChar*)RSTRING_PTR(source);
7830 while (source_current < source_end) {
7832 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7833 if (CASEMAP_DEBUG) {
7834 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7837 *pre_buffer = current_buffer;
7838 pre_buffer = ¤t_buffer->next;
7839 current_buffer->next = NULL;
7840 current_buffer->capa =
capa;
7841 buffer_length_or_invalid = enc->case_map(flags,
7842 &source_current, source_end,
7843 current_buffer->space,
7844 current_buffer->space+current_buffer->capa,
7846 if (buffer_length_or_invalid < 0) {
7847 current_buffer =
DATA_PTR(buffer_anchor);
7849 mapping_buffer_free(current_buffer);
7850 rb_raise(rb_eArgError,
"input string invalid");
7852 target_length += current_buffer->used = buffer_length_or_invalid;
7854 if (CASEMAP_DEBUG) {
7855 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7858 if (buffer_count==1) {
7859 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7862 char *target_current;
7865 target_current = RSTRING_PTR(target);
7866 current_buffer =
DATA_PTR(buffer_anchor);
7867 while (current_buffer) {
7868 memcpy(target_current, current_buffer->space, current_buffer->used);
7869 target_current += current_buffer->used;
7870 current_buffer = current_buffer->next;
7873 current_buffer =
DATA_PTR(buffer_anchor);
7875 mapping_buffer_free(current_buffer);
7880 str_enc_copy_direct(target, source);
7889 const OnigUChar *source_current, *source_end;
7890 OnigUChar *target_current, *target_end;
7891 long old_length = RSTRING_LEN(source);
7892 int length_or_invalid;
7894 if (old_length == 0)
return Qnil;
7896 source_current = (OnigUChar*)RSTRING_PTR(source);
7898 if (source == target) {
7899 target_current = (OnigUChar*)source_current;
7900 target_end = (OnigUChar*)source_end;
7903 target_current = (OnigUChar*)RSTRING_PTR(target);
7907 length_or_invalid = onigenc_ascii_only_case_map(flags,
7908 &source_current, source_end,
7909 target_current, target_end, enc);
7910 if (length_or_invalid < 0)
7911 rb_raise(rb_eArgError,
"input string invalid");
7912 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7913 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7914 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7915 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7916 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7919 str_enc_copy(target, source);
7925upcase_single(
VALUE str)
7927 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7928 bool modified =
false;
7931 unsigned int c = *(
unsigned char*)s;
7933 if (
'a' <= c && c <=
'z') {
7934 *s =
'A' + (c -
'a');
7955rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7958 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7960 flags = check_case_options(argc, argv, flags);
7961 str_modify_keep_cr(str);
7962 enc = str_true_enc(str);
7963 if (case_option_single_p(flags, enc, str)) {
7964 if (upcase_single(str))
7965 flags |= ONIGENC_CASE_MODIFIED;
7967 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7968 rb_str_ascii_casemap(str, str, &flags, enc);
7970 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7972 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7985rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7988 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7991 flags = check_case_options(argc, argv, flags);
7992 enc = str_true_enc(str);
7993 if (case_option_single_p(flags, enc, str)) {
7994 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7995 str_enc_copy_direct(ret, str);
7998 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8000 rb_str_ascii_casemap(str, ret, &flags, enc);
8003 ret = rb_str_casemap(str, &flags, enc);
8010downcase_single(
VALUE str)
8012 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8013 bool modified =
false;
8016 unsigned int c = *(
unsigned char*)s;
8018 if (
'A' <= c && c <=
'Z') {
8019 *s =
'a' + (c -
'A');
8041rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8044 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8046 flags = check_case_options(argc, argv, flags);
8047 str_modify_keep_cr(str);
8048 enc = str_true_enc(str);
8049 if (case_option_single_p(flags, enc, str)) {
8050 if (downcase_single(str))
8051 flags |= ONIGENC_CASE_MODIFIED;
8053 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8054 rb_str_ascii_casemap(str, str, &flags, enc);
8056 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8058 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8072rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8075 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8078 flags = check_case_options(argc, argv, flags);
8079 enc = str_true_enc(str);
8080 if (case_option_single_p(flags, enc, str)) {
8081 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8082 str_enc_copy_direct(ret, str);
8083 downcase_single(ret);
8085 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8087 rb_str_ascii_casemap(str, ret, &flags, enc);
8090 ret = rb_str_casemap(str, &flags, enc);
8110rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8113 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8115 flags = check_case_options(argc, argv, flags);
8116 str_modify_keep_cr(str);
8117 enc = str_true_enc(str);
8118 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8119 if (flags&ONIGENC_CASE_ASCII_ONLY)
8120 rb_str_ascii_casemap(str, str, &flags, enc);
8122 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8124 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8138rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8141 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8144 flags = check_case_options(argc, argv, flags);
8145 enc = str_true_enc(str);
8146 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8147 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8149 rb_str_ascii_casemap(str, ret, &flags, enc);
8152 ret = rb_str_casemap(str, &flags, enc);
8171rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8174 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8176 flags = check_case_options(argc, argv, flags);
8177 str_modify_keep_cr(str);
8178 enc = str_true_enc(str);
8179 if (flags&ONIGENC_CASE_ASCII_ONLY)
8180 rb_str_ascii_casemap(str, str, &flags, enc);
8182 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8184 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8198rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8201 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8204 flags = check_case_options(argc, argv, flags);
8205 enc = str_true_enc(str);
8206 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8207 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8209 rb_str_ascii_casemap(str, ret, &flags, enc);
8212 ret = rb_str_casemap(str, &flags, enc);
8217typedef unsigned char *USTR;
8221 unsigned int now, max;
8233 if (t->p == t->pend)
return -1;
8234 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8237 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8239 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8241 if (t->p < t->pend) {
8242 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8245 if (t->now < 0x80 && c < 0x80) {
8246 rb_raise(rb_eArgError,
8247 "invalid range \"%c-%c\" in string transliteration",
8251 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8255 else if (t->now < c) {
8264 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8265 if (t->now == t->max) {
8270 if (t->now < t->max) {
8286 const unsigned int errc = -1;
8287 unsigned int trans[256];
8289 struct tr trsrc, trrepl;
8291 unsigned int c, c0, last = 0;
8292 int modify = 0, i, l;
8293 unsigned char *s, *send;
8295 int singlebyte = single_byte_optimizable(str);
8299#define CHECK_IF_ASCII(c) \
8300 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8301 (cr = ENC_CODERANGE_VALID) : 0)
8305 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8306 if (RSTRING_LEN(repl) == 0) {
8307 return rb_str_delete_bang(1, &src, str);
8311 e1 = rb_enc_check(str, src);
8312 e2 = rb_enc_check(str, repl);
8317 enc = rb_enc_check(src, repl);
8319 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8320 if (RSTRING_LEN(src) > 1 &&
8321 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8322 trsrc.p + l < trsrc.pend) {
8326 trrepl.p = RSTRING_PTR(repl);
8327 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8328 trsrc.gen = trrepl.gen = 0;
8329 trsrc.now = trrepl.now = 0;
8330 trsrc.max = trrepl.max = 0;
8333 for (i=0; i<256; i++) {
8336 while ((c = trnext(&trsrc, enc)) != errc) {
8341 if (!hash) hash = rb_hash_new();
8345 while ((c = trnext(&trrepl, enc)) != errc)
8348 for (i=0; i<256; i++) {
8349 if (trans[i] != errc) {
8357 for (i=0; i<256; i++) {
8360 while ((c = trnext(&trsrc, enc)) != errc) {
8361 r = trnext(&trrepl, enc);
8362 if (r == errc) r = trrepl.now;
8365 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8368 if (!hash) hash = rb_hash_new();
8376 str_modify_keep_cr(str);
8377 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8378 termlen = rb_enc_mbminlen(enc);
8381 long offset, max = RSTRING_LEN(str);
8382 unsigned int save = -1;
8383 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8388 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8391 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8394 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8396 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8405 if (cflag) c = last;
8408 else if (cflag) c = errc;
8414 if (c != (
unsigned int)-1) {
8420 tlen = rb_enc_codelen(c, enc);
8426 if (enc != e1) may_modify = 1;
8428 if ((offset = t - buf) + tlen > max) {
8429 size_t MAYBE_UNUSED(old) = max + termlen;
8430 max = offset + tlen + (send - s);
8431 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8434 rb_enc_mbcput(c, t, enc);
8435 if (may_modify && memcmp(s, t, tlen) != 0) {
8441 if (!STR_EMBED_P(str)) {
8442 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8444 TERM_FILL((
char *)t, termlen);
8445 RSTRING(str)->as.heap.ptr = (
char *)buf;
8446 STR_SET_LEN(str, t - buf);
8447 STR_SET_NOEMBED(str);
8448 RSTRING(str)->as.heap.aux.capa = max;
8452 c = (
unsigned char)*s;
8453 if (trans[c] != errc) {
8470 long offset, max = (long)((send - s) * 1.2);
8471 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8476 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8479 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8482 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8484 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8492 if (cflag) c = last;
8495 else if (cflag) c = errc;
8499 c = cflag ? last : errc;
8502 tlen = rb_enc_codelen(c, enc);
8507 if (enc != e1) may_modify = 1;
8509 if ((offset = t - buf) + tlen > max) {
8510 size_t MAYBE_UNUSED(old) = max + termlen;
8511 max = offset + tlen + (long)((send - s) * 1.2);
8512 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8516 rb_enc_mbcput(c, t, enc);
8517 if (may_modify && memcmp(s, t, tlen) != 0) {
8525 if (!STR_EMBED_P(str)) {
8526 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8528 TERM_FILL((
char *)t, termlen);
8529 RSTRING(str)->as.heap.ptr = (
char *)buf;
8530 STR_SET_LEN(str, t - buf);
8531 STR_SET_NOEMBED(str);
8532 RSTRING(str)->as.heap.aux.capa = max;
8538 rb_enc_associate(str, enc);
8560 return tr_trans(str, src, repl, 0);
8605 tr_trans(str, src, repl, 0);
8609#define TR_TABLE_MAX (UCHAR_MAX+1)
8610#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8612tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8615 const unsigned int errc = -1;
8616 char buf[TR_TABLE_MAX];
8619 VALUE table = 0, ptable = 0;
8620 int i, l, cflag = 0;
8622 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8623 tr.gen =
tr.now =
tr.max = 0;
8625 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8630 for (i=0; i<TR_TABLE_MAX; i++) {
8633 stable[TR_TABLE_MAX] = cflag;
8635 else if (stable[TR_TABLE_MAX] && !cflag) {
8636 stable[TR_TABLE_MAX] = 0;
8638 for (i=0; i<TR_TABLE_MAX; i++) {
8642 while ((c = trnext(&
tr, enc)) != errc) {
8643 if (c < TR_TABLE_MAX) {
8644 buf[(
unsigned char)c] = !cflag;
8649 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8652 table = ptable ? ptable : rb_hash_new();
8656 table = rb_hash_new();
8661 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8662 rb_hash_aset(table, key,
Qtrue);
8666 for (i=0; i<TR_TABLE_MAX; i++) {
8667 stable[i] = stable[i] && buf[i];
8669 if (!table && !cflag) {
8676tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8678 if (c < TR_TABLE_MAX) {
8679 return table[c] != 0;
8685 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8686 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8690 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8693 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8708rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8710 char squeez[TR_TABLE_SIZE];
8713 VALUE del = 0, nodel = 0;
8715 int i, ascompat, cr;
8717 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8719 for (i=0; i<argc; i++) {
8723 enc = rb_enc_check(str, s);
8724 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8727 str_modify_keep_cr(str);
8728 ascompat = rb_enc_asciicompat(enc);
8729 s = t = RSTRING_PTR(str);
8736 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8747 c = rb_enc_codepoint_len(s, send, &clen, enc);
8749 if (tr_find(c, squeez, del, nodel)) {
8753 if (t != s) rb_enc_mbcput(c, t, enc);
8760 TERM_FILL(t, TERM_LEN(str));
8761 STR_SET_LEN(str, t - RSTRING_PTR(str));
8764 if (modify)
return str;
8778rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8781 rb_str_delete_bang(argc, argv, str);
8799rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8801 char squeez[TR_TABLE_SIZE];
8803 VALUE del = 0, nodel = 0;
8804 unsigned char *s, *send, *t;
8806 int ascompat, singlebyte = single_byte_optimizable(str);
8810 enc = STR_ENC_GET(str);
8813 for (i=0; i<argc; i++) {
8817 enc = rb_enc_check(str, s);
8818 if (singlebyte && !single_byte_optimizable(s))
8820 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8824 str_modify_keep_cr(str);
8825 s = t = (
unsigned char *)RSTRING_PTR(str);
8826 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8829 ascompat = rb_enc_asciicompat(enc);
8833 unsigned int c = *s++;
8834 if (c != save || (argc > 0 && !squeez[c])) {
8844 if (ascompat && (c = *s) < 0x80) {
8845 if (c != save || (argc > 0 && !squeez[c])) {
8851 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8853 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8854 if (t != s) rb_enc_mbcput(c, t, enc);
8863 TERM_FILL((
char *)t, TERM_LEN(str));
8864 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8865 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8869 if (modify)
return str;
8883rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8886 rb_str_squeeze_bang(argc, argv, str);
8906 return tr_trans(str, src, repl, 1);
8934 tr_trans(str, src, repl, 1);
8947rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8949 char table[TR_TABLE_SIZE];
8951 VALUE del = 0, nodel = 0, tstr;
8961 enc = rb_enc_check(str, tstr);
8964 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8965 (ptstr = RSTRING_PTR(tstr),
8966 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8967 !is_broken_string(str)) {
8969 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8971 s = RSTRING_PTR(str);
8972 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8975 if (*(
unsigned char*)s++ == c) n++;
8981 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8982 for (i=1; i<argc; i++) {
8985 enc = rb_enc_check(str, tstr);
8986 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8989 s = RSTRING_PTR(str);
8990 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8992 ascompat = rb_enc_asciicompat(enc);
8996 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9004 c = rb_enc_codepoint_len(s, send, &clen, enc);
9005 if (tr_find(c, table, del, nodel)) {
9016rb_fs_check(
VALUE val)
9020 if (
NIL_P(val))
return 0;
9025static const char isspacetable[256] = {
9026 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9028 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9035 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9044#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9047split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9049 if (empty_count >= 0 &&
len == 0) {
9050 return empty_count + 1;
9052 if (empty_count > 0) {
9057 }
while (--empty_count > 0);
9061 rb_yield(str_new_empty_String(str));
9062 }
while (--empty_count > 0);
9076 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9080literal_split_pattern(
VALUE spat, split_type_t default_type)
9088 return SPLIT_TYPE_CHARS;
9090 else if (rb_enc_asciicompat(enc)) {
9091 if (
len == 1 && ptr[0] ==
' ') {
9092 return SPLIT_TYPE_AWK;
9097 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9098 return SPLIT_TYPE_AWK;
9101 return default_type;
9114rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9119 split_type_t split_type;
9120 long beg, end, i = 0, empty_count = -1;
9125 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9127 if (lim <= 0) limit =
Qnil;
9128 else if (lim == 1) {
9129 if (RSTRING_LEN(str) == 0)
9140 if (
NIL_P(limit) && !lim) empty_count = 0;
9142 enc = STR_ENC_GET(str);
9143 split_type = SPLIT_TYPE_REGEXP;
9145 spat = get_pat_quoted(spat, 0);
9147 else if (
NIL_P(spat = rb_fs)) {
9148 split_type = SPLIT_TYPE_AWK;
9150 else if (!(spat = rb_fs_check(spat))) {
9151 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9156 if (split_type != SPLIT_TYPE_AWK) {
9161 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9162 if (split_type == SPLIT_TYPE_AWK) {
9164 split_type = SPLIT_TYPE_STRING;
9169 mustnot_broken(spat);
9170 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9178#define SPLIT_STR(beg, len) ( \
9179 empty_count = split_string(result, str, beg, len, empty_count), \
9180 str_mod_check(str, str_start, str_len))
9183 char *ptr = RSTRING_PTR(str);
9184 char *
const str_start = ptr;
9185 const long str_len = RSTRING_LEN(str);
9186 char *
const eptr = str_start + str_len;
9187 if (split_type == SPLIT_TYPE_AWK) {
9194 if (is_ascii_string(str)) {
9195 while (ptr < eptr) {
9196 c = (
unsigned char)*ptr++;
9198 if (ascii_isspace(c)) {
9204 if (!
NIL_P(limit) && lim <= i)
break;
9207 else if (ascii_isspace(c)) {
9208 SPLIT_STR(beg, end-beg);
9211 if (!
NIL_P(limit)) ++i;
9219 while (ptr < eptr) {
9222 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9231 if (!
NIL_P(limit) && lim <= i)
break;
9235 SPLIT_STR(beg, end-beg);
9238 if (!
NIL_P(limit)) ++i;
9246 else if (split_type == SPLIT_TYPE_STRING) {
9247 char *substr_start = ptr;
9248 char *sptr = RSTRING_PTR(spat);
9249 long slen = RSTRING_LEN(spat);
9252 mustnot_broken(str);
9253 enc = rb_enc_check(str, spat);
9254 while (ptr < eptr &&
9255 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9258 if (t != ptr + end) {
9262 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9263 str_mod_check(spat, sptr, slen);
9266 if (!
NIL_P(limit) && lim <= ++i)
break;
9268 beg = ptr - str_start;
9270 else if (split_type == SPLIT_TYPE_CHARS) {
9274 mustnot_broken(str);
9275 enc = rb_enc_get(str);
9276 while (ptr < eptr &&
9277 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9278 SPLIT_STR(ptr - str_start, n);
9280 if (!
NIL_P(limit) && lim <= ++i)
break;
9282 beg = ptr - str_start;
9286 long len = RSTRING_LEN(str);
9294 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9299 if (start == end && BEG(0) == END(0)) {
9304 else if (last_null == 1) {
9305 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9312 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9318 SPLIT_STR(beg, end-beg);
9319 beg = start = END(0);
9323 for (idx=1; idx < regs->num_regs; idx++) {
9324 if (BEG(idx) == -1)
continue;
9325 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9327 if (!
NIL_P(limit) && lim <= ++i)
break;
9329 if (match) rb_match_unbusy(match);
9331 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9332 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9335 return result ? result : str;
9345 return rb_str_split_m(1, &sep, str);
9348#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9363#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9366chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9368 const char *prev = rb_enc_prev_char(p, e, e, enc);
9371 prev = rb_enc_prev_char(p, e, e, enc);
9372 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9384 RSTRING_LEN(rs) != 1 ||
9385 RSTRING_PTR(rs)[0] !=
'\n')) {
9391#define rb_rs get_rs()
9398 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9399 long pos,
len, rslen;
9405 static ID keywords[1];
9410 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9414 if (!ENUM_ELEM(ary, str)) {
9422 if (!RSTRING_LEN(str))
goto end;
9424 ptr = subptr = RSTRING_PTR(str);
9426 len = RSTRING_LEN(str);
9428 rslen = RSTRING_LEN(rs);
9431 enc = rb_enc_get(str);
9433 enc = rb_enc_check(str, rs);
9438 const char *eol = NULL;
9440 while (subend < pend) {
9441 long chomp_rslen = 0;
9443 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9445 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9447 if (eol == subend)
break;
9451 chomp_rslen = -rslen;
9455 if (!subptr) subptr = subend;
9459 }
while (subend < pend);
9461 if (rslen == 0) chomp_rslen = 0;
9463 subend - subptr + (chomp ? chomp_rslen : rslen));
9464 if (ENUM_ELEM(ary, line)) {
9465 str_mod_check(str, ptr,
len);
9467 subptr = eol = NULL;
9472 rsptr = RSTRING_PTR(rs);
9473 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9482 rsptr = RSTRING_PTR(rs);
9483 rslen = RSTRING_LEN(rs);
9486 while (subptr < pend) {
9487 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9491 if (hit != adjusted) {
9495 subend = hit += rslen;
9498 subend = chomp_newline(subptr, subend, enc);
9505 if (ENUM_ELEM(ary, line)) {
9506 str_mod_check(str, ptr,
len);
9511 if (subptr != pend) {
9514 pend = chomp_newline(subptr, pend, enc);
9516 else if (pend - subptr >= rslen &&
9517 memcmp(pend - rslen, rsptr, rslen) == 0) {
9522 ENUM_ELEM(ary, line);
9543rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9546 return rb_str_enumerate_lines(argc, argv, str, 0);
9601rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9603 VALUE ary = WANTARRAY(
"lines", 0);
9604 return rb_str_enumerate_lines(argc, argv, str, ary);
9618 for (i=0; i<RSTRING_LEN(str); i++) {
9619 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9637rb_str_each_byte(
VALUE str)
9640 return rb_str_enumerate_bytes(str, 0);
9652rb_str_bytes(
VALUE str)
9654 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9655 return rb_str_enumerate_bytes(str, ary);
9673 ptr = RSTRING_PTR(str);
9674 len = RSTRING_LEN(str);
9675 enc = rb_enc_get(str);
9678 for (i = 0; i <
len; i += n) {
9679 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9684 for (i = 0; i <
len; i += n) {
9685 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9706rb_str_each_char(
VALUE str)
9709 return rb_str_enumerate_chars(str, 0);
9721rb_str_chars(
VALUE str)
9724 return rb_str_enumerate_chars(str, ary);
9728rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9733 const char *ptr, *end;
9736 if (single_byte_optimizable(str))
9737 return rb_str_enumerate_bytes(str, ary);
9740 ptr = RSTRING_PTR(str);
9742 enc = STR_ENC_GET(str);
9745 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9766rb_str_each_codepoint(
VALUE str)
9769 return rb_str_enumerate_codepoints(str, 0);
9781rb_str_codepoints(
VALUE str)
9784 return rb_str_enumerate_codepoints(str, ary);
9790 int encidx = rb_enc_to_index(enc);
9792 const OnigUChar source_ascii[] =
"\\X";
9793 const OnigUChar *source = source_ascii;
9794 size_t source_len =
sizeof(source_ascii) - 1;
9797#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9798#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9799#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9800#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9801#define CASE_UTF(e) \
9802 case ENCINDEX_UTF_##e: { \
9803 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9804 source = source_UTF_##e; \
9805 source_len = sizeof(source_UTF_##e); \
9808 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9816 regex_t *reg_grapheme_cluster;
9818 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9819 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9821 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9822 onig_error_code_to_str(message, r, &einfo);
9823 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9826 return reg_grapheme_cluster;
9832 int encidx = rb_enc_to_index(enc);
9833 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9835 if (encidx == rb_utf8_encindex()) {
9836 if (!reg_grapheme_cluster_utf8) {
9837 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9840 return reg_grapheme_cluster_utf8;
9849 size_t grapheme_cluster_count = 0;
9851 const char *ptr, *end;
9853 if (!rb_enc_unicode_p(enc)) {
9857 bool cached_reg_grapheme_cluster =
true;
9858 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9859 if (!reg_grapheme_cluster) {
9860 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9861 cached_reg_grapheme_cluster =
false;
9864 ptr = RSTRING_PTR(str);
9868 OnigPosition
len = onig_match(reg_grapheme_cluster,
9869 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9870 (
const OnigUChar *)ptr, NULL, 0);
9871 if (
len <= 0)
break;
9872 grapheme_cluster_count++;
9876 if (!cached_reg_grapheme_cluster) {
9877 onig_free(reg_grapheme_cluster);
9880 return SIZET2NUM(grapheme_cluster_count);
9884rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9888 const char *ptr0, *ptr, *end;
9890 if (!rb_enc_unicode_p(enc)) {
9891 return rb_str_enumerate_chars(str, ary);
9896 bool cached_reg_grapheme_cluster =
true;
9897 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9898 if (!reg_grapheme_cluster) {
9899 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9900 cached_reg_grapheme_cluster =
false;
9903 ptr0 = ptr = RSTRING_PTR(str);
9907 OnigPosition
len = onig_match(reg_grapheme_cluster,
9908 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9909 (
const OnigUChar *)ptr, NULL, 0);
9910 if (
len <= 0)
break;
9915 if (!cached_reg_grapheme_cluster) {
9916 onig_free(reg_grapheme_cluster);
9936rb_str_each_grapheme_cluster(
VALUE str)
9939 return rb_str_enumerate_grapheme_clusters(str, 0);
9951rb_str_grapheme_clusters(
VALUE str)
9954 return rb_str_enumerate_grapheme_clusters(str, ary);
9958chopped_length(
VALUE str)
9961 const char *p, *p2, *beg, *end;
9963 beg = RSTRING_PTR(str);
9964 end = beg + RSTRING_LEN(str);
9965 if (beg >= end)
return 0;
9966 p = rb_enc_prev_char(beg, end, end, enc);
9968 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9969 p2 = rb_enc_prev_char(beg, p, end, enc);
9970 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9988rb_str_chop_bang(
VALUE str)
9990 str_modify_keep_cr(str);
9991 if (RSTRING_LEN(str) > 0) {
9993 len = chopped_length(str);
9994 STR_SET_LEN(str,
len);
9995 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10014rb_str_chop(
VALUE str)
10020smart_chomp(
VALUE str,
const char *e,
const char *p)
10023 if (rb_enc_mbminlen(enc) > 1) {
10028 pp = e - rb_enc_mbminlen(enc);
10031 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10039 if (--e > p && *(e-1) ==
'\r') {
10056 char *pp, *e, *rsptr;
10058 char *
const p = RSTRING_PTR(str);
10059 long len = RSTRING_LEN(str);
10061 if (
len == 0)
return 0;
10064 return smart_chomp(str, e, p);
10067 enc = rb_enc_get(str);
10070 if (rb_enc_mbminlen(enc) > 1) {
10075 pp -= rb_enc_mbminlen(enc);
10078 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10085 while (e > p && *(e-1) ==
'\n') {
10087 if (e > p && *(e-1) ==
'\r')
10093 if (rslen >
len)
return len;
10095 enc = rb_enc_get(rs);
10096 newline = rsptr[rslen-1];
10097 if (rslen == rb_enc_mbminlen(enc)) {
10099 if (newline ==
'\n')
10100 return smart_chomp(str, e, p);
10104 return smart_chomp(str, e, p);
10108 enc = rb_enc_check(str, rs);
10109 if (is_broken_string(rs)) {
10113 if (p[
len-1] == newline &&
10115 memcmp(rsptr, pp, rslen) == 0)) {
10116 if (at_char_boundary(p, pp, e, enc))
10117 return len - rslen;
10129chomp_rs(
int argc,
const VALUE *argv)
10133 VALUE rs = argv[0];
10145 long olen = RSTRING_LEN(str);
10146 long len = chompped_length(str, rs);
10147 if (
len >= olen)
return Qnil;
10148 str_modify_keep_cr(str);
10149 STR_SET_LEN(str,
len);
10150 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10170rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10173 str_modifiable(str);
10174 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10175 rs = chomp_rs(argc, argv);
10177 return rb_str_chomp_string(str, rs);
10190rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10192 VALUE rs = chomp_rs(argc, argv);
10198tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10199 VALUE str,
int num_selectors,
VALUE *selectors)
10203 for (i=0; i<num_selectors; i++) {
10204 VALUE selector = selectors[i];
10208 enc = rb_enc_check(str, selector);
10209 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10216 const char *
const start = s;
10218 if (!s || s >= e)
return 0;
10221 if (single_byte_optimizable(str)) {
10222 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10227 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10237lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10238 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10240 const char *
const start = s;
10242 if (!s || s >= e)
return 0;
10247 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10249 if (!tr_find(cc, table, del, nodel))
break;
10268rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10272 long olen, loffset;
10274 str_modify_keep_cr(str);
10275 enc = STR_ENC_GET(str);
10278 char table[TR_TABLE_SIZE];
10279 VALUE del = 0, nodel = 0;
10281 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10282 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10285 loffset = lstrip_offset(str, start, start+olen, enc);
10289 long len = olen-loffset;
10290 s = start + loffset;
10291 memmove(start, s,
len);
10292 STR_SET_LEN(str,
len);
10293 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10328rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10335 char table[TR_TABLE_SIZE];
10336 VALUE del = 0, nodel = 0;
10338 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10339 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10342 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10344 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10353 rb_str_check_dummy_enc(enc);
10357 if (!s || s >= e)
return 0;
10361 if (single_byte_optimizable(str)) {
10363 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10368 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10378rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10379 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10384 rb_str_check_dummy_enc(enc);
10388 if (!s || s >= e)
return 0;
10392 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10394 if (!tr_find(c, table, del, nodel))
break;
10414rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10418 long olen, roffset;
10420 str_modify_keep_cr(str);
10421 enc = STR_ENC_GET(str);
10424 char table[TR_TABLE_SIZE];
10425 VALUE del = 0, nodel = 0;
10427 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10428 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10431 roffset = rstrip_offset(str, start, start+olen, enc);
10434 long len = olen - roffset;
10436 STR_SET_LEN(str,
len);
10437 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10471rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10475 long olen, roffset;
10477 enc = STR_ENC_GET(str);
10480 char table[TR_TABLE_SIZE];
10481 VALUE del = 0, nodel = 0;
10483 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10484 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10487 roffset = rstrip_offset(str, start, start+olen, enc);
10489 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10507rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10510 long olen, loffset, roffset;
10513 str_modify_keep_cr(str);
10514 enc = STR_ENC_GET(str);
10518 char table[TR_TABLE_SIZE];
10519 VALUE del = 0, nodel = 0;
10521 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10522 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10523 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10526 loffset = lstrip_offset(str, start, start+olen, enc);
10527 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10530 if (loffset > 0 || roffset > 0) {
10531 long len = olen-roffset;
10534 memmove(start, start + loffset,
len);
10536 STR_SET_LEN(str,
len);
10537 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10572rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10575 long olen, loffset, roffset;
10581 char table[TR_TABLE_SIZE];
10582 VALUE del = 0, nodel = 0;
10584 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10585 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10586 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10589 loffset = lstrip_offset(str, start, start+olen, enc);
10590 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10593 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10598scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10601 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10607 end = pos + RSTRING_LEN(pat);
10621 if (RSTRING_LEN(str) > end)
10622 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10631 if (!regs || regs->num_regs == 1) {
10637 for (
int i = 1; i < regs->num_regs; i++) {
10668 long last = -1, prev = 0;
10669 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10671 pat = get_pat_quoted(pat, 1);
10672 mustnot_broken(str);
10676 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10681 if (last >= 0) rb_pat_search(pat, str, last, 1);
10686 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10690 str_mod_check(str, p,
len);
10692 if (last >= 0) rb_pat_search(pat, str, last, 1);
10744rb_str_hex(
VALUE str)
10746 return rb_str_to_inum(str, 16, FALSE);
10830rb_str_oct(
VALUE str)
10832 return rb_str_to_inum(str, -8, FALSE);
10835#ifndef HAVE_CRYPT_R
10840 rb_nativethread_lock_t lock;
10841} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10910# define CRYPT_END() ALLOCV_END(databuf)
10913 extern char *crypt(
const char *,
const char *);
10914# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10917 const char *s, *saltp;
10920 char salt_8bit_clean[3];
10924 mustnot_wchar(str);
10925 mustnot_wchar(salt);
10927 saltp = RSTRING_PTR(salt);
10928 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10929 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10933 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10934 salt_8bit_clean[0] = saltp[0] & 0x7f;
10935 salt_8bit_clean[1] = saltp[1] & 0x7f;
10936 salt_8bit_clean[2] =
'\0';
10937 saltp = salt_8bit_clean;
10942# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10943 data->initialized = 0;
10945 res = crypt_r(s, saltp, data);
10948 res = crypt(s, saltp);
10963 size_t res_size = strlen(res)+1;
10964 tmp_buf =
ALLOCA_N(
char, res_size);
10965 memcpy(tmp_buf, res, res_size);
11002 char *ptr, *p, *pend;
11005 unsigned long sum0 = 0;
11010 ptr = p = RSTRING_PTR(str);
11011 len = RSTRING_LEN(str);
11017 str_mod_check(str, ptr,
len);
11020 sum0 += (
unsigned char)*p;
11031 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11032 sum0 &= (((
unsigned long)1)<<bits)-1;
11052rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11056 long width,
len, flen = 1, fclen = 1;
11059 const char *f =
" ";
11060 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11062 int singlebyte = 1, cr;
11066 enc = STR_ENC_GET(str);
11067 termlen = rb_enc_mbminlen(enc);
11071 enc = rb_enc_check(str, pad);
11072 f = RSTRING_PTR(pad);
11073 flen = RSTRING_LEN(pad);
11074 fclen = str_strlen(pad, enc);
11075 singlebyte = single_byte_optimizable(pad);
11076 if (flen == 0 || fclen == 0) {
11077 rb_raise(rb_eArgError,
"zero width padding");
11080 len = str_strlen(str, enc);
11081 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11083 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11087 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11088 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11090 size = RSTRING_LEN(str);
11091 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11092 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11093 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11094 rb_raise(rb_eArgError,
"argument too big");
11098 p = RSTRING_PTR(res);
11100 memset(p, *f, llen);
11104 while (llen >= fclen) {
11110 memcpy(p, f, llen2);
11114 memcpy(p, RSTRING_PTR(str), size);
11117 memset(p, *f, rlen);
11121 while (rlen >= fclen) {
11127 memcpy(p, f, rlen2);
11131 TERM_FILL(p, termlen);
11132 STR_SET_LEN(res, p-RSTRING_PTR(res));
11153rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11155 return rb_str_justify(argc, argv, str,
'l');
11167rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11169 return rb_str_justify(argc, argv, str,
'r');
11182rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11184 return rb_str_justify(argc, argv, str,
'c');
11200 sep = get_pat_quoted(sep, 0);
11212 pos = rb_str_index(str, sep, 0);
11213 if (pos < 0)
goto failed;
11218 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11221 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11235 long pos = RSTRING_LEN(str);
11237 sep = get_pat_quoted(sep, 0);
11250 pos = rb_str_rindex(str, sep, pos);
11259 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11261 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11273rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11277 for (i=0; i<argc; i++) {
11278 VALUE tmp = argv[i];
11280 if (rb_reg_start_with_p(tmp, str))
11284 const char *p, *s, *e;
11289 enc = rb_enc_check(str, tmp);
11290 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11291 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11292 p = RSTRING_PTR(str);
11295 if (!at_char_right_boundary(p, s, e, enc))
11297 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11313rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11317 for (i=0; i<argc; i++) {
11318 VALUE tmp = argv[i];
11319 const char *p, *s, *e;
11324 enc = rb_enc_check(str, tmp);
11325 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11326 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11327 p = RSTRING_PTR(str);
11330 if (!at_char_boundary(p, s, e, enc))
11332 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11348deleted_prefix_length(
VALUE str,
VALUE prefix)
11350 const char *strptr, *prefixptr;
11351 long olen, prefixlen;
11356 if (!is_broken_string(prefix) ||
11357 !rb_enc_asciicompat(enc) ||
11358 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11359 enc = rb_enc_check(str, prefix);
11363 prefixlen = RSTRING_LEN(prefix);
11364 if (prefixlen <= 0)
return 0;
11365 olen = RSTRING_LEN(str);
11366 if (olen < prefixlen)
return 0;
11367 strptr = RSTRING_PTR(str);
11368 prefixptr = RSTRING_PTR(prefix);
11369 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11370 if (is_broken_string(prefix)) {
11371 if (!is_broken_string(str)) {
11375 const char *strend = strptr + olen;
11376 const char *after_prefix = strptr + prefixlen;
11377 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11398rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11401 str_modify_keep_cr(str);
11403 prefixlen = deleted_prefix_length(str, prefix);
11404 if (prefixlen <= 0)
return Qnil;
11418rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11422 prefixlen = deleted_prefix_length(str, prefix);
11423 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11425 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11438deleted_suffix_length(
VALUE str,
VALUE suffix)
11440 const char *strptr, *suffixptr;
11441 long olen, suffixlen;
11445 if (is_broken_string(suffix))
return 0;
11446 enc = rb_enc_check(str, suffix);
11449 suffixlen = RSTRING_LEN(suffix);
11450 if (suffixlen <= 0)
return 0;
11451 olen = RSTRING_LEN(str);
11452 if (olen < suffixlen)
return 0;
11453 strptr = RSTRING_PTR(str);
11454 suffixptr = RSTRING_PTR(suffix);
11455 const char *strend = strptr + olen;
11456 const char *before_suffix = strend - suffixlen;
11457 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11458 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11474rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11476 long olen, suffixlen,
len;
11477 str_modifiable(str);
11479 suffixlen = deleted_suffix_length(str, suffix);
11480 if (suffixlen <= 0)
return Qnil;
11482 olen = RSTRING_LEN(str);
11483 str_modify_keep_cr(str);
11484 len = olen - suffixlen;
11485 STR_SET_LEN(str,
len);
11486 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11502rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11506 suffixlen = deleted_suffix_length(str, suffix);
11507 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11509 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11516 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11522nil_setter_warning(
ID id)
11524 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11531 if (!
NIL_P(*var)) {
11532 nil_setter_warning(
id);
11539 val = rb_fs_check(val);
11542 "value of %"PRIsVALUE
" must be String or Regexp",
11546 nil_setter_warning(
id);
11563 str_modifiable(str);
11566 int idx = rb_enc_to_index(encoding);
11573 rb_enc_associate_index(str, idx);
11597 if (STR_EMBED_P(str)) {
11598 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11603 str_replace_shared_without_enc(str2, str);
11605 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11635rb_str_valid_encoding_p(
VALUE str)
11655rb_str_is_ascii_only_p(
VALUE str)
11665 static const char ellipsis[] =
"...";
11666 const long ellipsislen =
sizeof(ellipsis) - 1;
11668 const long blen = RSTRING_LEN(str);
11669 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11670 VALUE estr, ret = 0;
11673 if (
len * rb_enc_mbminlen(enc) >= blen ||
11677 else if (
len <= ellipsislen ||
11679 if (rb_enc_asciicompat(enc)) {
11681 rb_enc_associate(ret, enc);
11688 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11693 rb_enc_from_encoding(enc), 0,
Qnil);
11706 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11712 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11731 if (enc == STR_ENC_GET(str)) {
11736 return enc_str_scrub(enc, str, repl, cr);
11744 const char *rep, *p, *e, *p1, *sp;
11750 rb_raise(rb_eArgError,
"both of block and replacement given");
11757 if (!
NIL_P(repl)) {
11758 repl = str_compat_and_valid(repl, enc);
11761 if (rb_enc_dummy_p(enc)) {
11764 encidx = rb_enc_to_index(enc);
11766#define DEFAULT_REPLACE_CHAR(str) do { \
11767 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11768 rep = replace; replen = (int)sizeof(replace); \
11771 slen = RSTRING_LEN(str);
11772 p = RSTRING_PTR(str);
11777 if (rb_enc_asciicompat(enc)) {
11783 else if (!
NIL_P(repl)) {
11784 rep = RSTRING_PTR(repl);
11785 replen = RSTRING_LEN(repl);
11788 else if (encidx == rb_utf8_encindex()) {
11789 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11793 DEFAULT_REPLACE_CHAR(
"?");
11798 p = search_nonascii(p, e);
11803 int ret = rb_enc_precise_mbclen(p, e, enc);
11822 if (e - p < clen) clen = e - p;
11829 for (; clen > 1; clen--) {
11830 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11841 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11842 str_mod_check(str, sp, slen);
11843 repl = str_compat_and_valid(repl, enc);
11850 p = search_nonascii(p, e);
11876 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11877 str_mod_check(str, sp, slen);
11878 repl = str_compat_and_valid(repl, enc);
11887 long mbminlen = rb_enc_mbminlen(enc);
11891 else if (!
NIL_P(repl)) {
11892 rep = RSTRING_PTR(repl);
11893 replen = RSTRING_LEN(repl);
11895 else if (encidx == ENCINDEX_UTF_16BE) {
11896 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11898 else if (encidx == ENCINDEX_UTF_16LE) {
11899 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11901 else if (encidx == ENCINDEX_UTF_32BE) {
11902 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11904 else if (encidx == ENCINDEX_UTF_32LE) {
11905 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11908 DEFAULT_REPLACE_CHAR(
"?");
11912 int ret = rb_enc_precise_mbclen(p, e, enc);
11925 if (e - p < clen) clen = e - p;
11926 if (clen <= mbminlen * 2) {
11931 for (; clen > mbminlen; clen-=mbminlen) {
11932 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11942 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11943 str_mod_check(str, sp, slen);
11944 repl = str_compat_and_valid(repl, enc);
11969 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11970 str_mod_check(str, sp, slen);
11971 repl = str_compat_and_valid(repl, enc);
12011str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12019static ID id_normalize;
12020static ID id_normalized_p;
12021static VALUE mUnicodeNormalize;
12024unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12026 static int UnicodeNormalizeRequired = 0;
12029 if (!UnicodeNormalizeRequired) {
12030 rb_require(
"unicode_normalize/normalize.rb");
12031 UnicodeNormalizeRequired = 1;
12035 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12046rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12048 return unicode_normalize_common(argc, argv, str, id_normalize);
12062rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12064 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12091rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12093 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12225#define sym_equal rb_obj_equal
12228sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12232 int c = rb_enc_precise_mbclen(s, send, enc);
12236 c = rb_enc_mbc_to_codepoint(s, send, enc);
12244rb_str_symname_p(
VALUE sym)
12249 rb_encoding *resenc = rb_default_internal_encoding();
12251 if (resenc == NULL) resenc = rb_default_external_encoding();
12252 enc = STR_ENC_GET(sym);
12253 ptr = RSTRING_PTR(sym);
12254 len = RSTRING_LEN(sym);
12255 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12263rb_str_quote_unprintable(
VALUE str)
12271 resenc = rb_default_internal_encoding();
12272 if (resenc == NULL) resenc = rb_default_external_encoding();
12273 enc = STR_ENC_GET(str);
12274 ptr = RSTRING_PTR(str);
12275 len = RSTRING_LEN(str);
12276 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12277 !sym_printable(ptr, ptr +
len, enc)) {
12278 return rb_str_escape(str);
12284rb_id_quote_unprintable(
ID id)
12286 VALUE str = rb_id2str(
id);
12287 if (!rb_str_symname_p(str)) {
12288 return rb_str_escape(str);
12306sym_inspect(
VALUE sym)
12313 if (!rb_str_symname_p(str)) {
12315 len = RSTRING_LEN(str);
12316 rb_str_resize(str,
len + 1);
12317 dest = RSTRING_PTR(str);
12318 memmove(dest + 1, dest,
len);
12322 VALUE orig_str = str;
12324 len = RSTRING_LEN(orig_str);
12325 str = rb_enc_str_new(0,
len + 1, enc);
12328 ptr = RSTRING_PTR(orig_str);
12329 dest = RSTRING_PTR(str);
12330 memcpy(dest + 1, ptr,
len);
12350rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12355 rb_raise(rb_eArgError,
"no receiver given");
12458 return rb_str_match(
rb_sym2str(sym), other);
12473sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12475 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12488sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12490 return rb_str_match_m_p(argc, argv, sym);
12508 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12519sym_length(
VALUE sym)
12533sym_empty(
VALUE sym)
12567sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12583sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12599sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12613sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12615 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12628sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12630 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12642sym_encoding(
VALUE sym)
12648string_for_symbol(
VALUE name)
12653 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12667 name = string_for_symbol(name);
12668 return rb_intern_str(name);
12677 name = string_for_symbol(name);
12701 return rb_fstring(str);
12707 struct RString fake_str = {RBASIC_INIT};
12708 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12720 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12721 rb_enc_autoload(enc);
12724 struct RString fake_str = {RBASIC_INIT};
12725 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12731 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12732 rb_enc_autoload(enc);
12735 struct RString fake_str = {RBASIC_INIT};
12736 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12747#if USE_YJIT || USE_ZJIT
12749rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12754 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12755 rb_str_buf_cat_byte(str, (
char) code);
12765fstring_set_class_i(
VALUE *str,
void *data)
12769 return ST_CONTINUE;
12777 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12944 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern_str(), except it tries to convert the parameter object to an instance of rb_cS...
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.