14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa,
long termlen)
251rb_str_size_as_embedded(
VALUE str)
254 if (STR_EMBED_P(str)) {
256 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
258 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 else if (rb_str_reembeddable_p(str)) {
264 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
266 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
269 real_size =
sizeof(
struct RString);
276STR_EMBEDDABLE_P(
long len,
long termlen)
278 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
283static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
284static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
286static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
287static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
330rb_debug_rstring_null_ptr(
const char *func)
332 fprintf(stderr,
"%s is returning NULL!! "
333 "SIGSEGV is highly expected to follow immediately.\n"
334 "If you could reproduce, attach your debugger here, "
335 "and look at the passed string.\n",
340static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
343get_encoding(
VALUE str)
349mustnot_broken(
VALUE str)
351 if (is_broken_string(str)) {
352 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
357mustnot_wchar(
VALUE str)
360 if (rb_enc_mbminlen(enc) > 1) {
361 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
365static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
367#if SIZEOF_LONG == SIZEOF_VOIDP
368#define PRECOMPUTED_FAKESTR_HASH 1
373BARE_STRING_P(
VALUE str)
378static inline st_index_t
379str_do_hash(
VALUE str)
381 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
383 if (e && !is_ascii_string(str)) {
390str_store_precomputed_hash(
VALUE str, st_index_t hash)
396 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
397 size_t free_bytes = str_embed_capa(str) - used_bytes;
401 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
403 FL_SET(str, STR_PRECOMPUTED_HASH);
416 if (
FL_TEST(str, RSTRING_FSTR))
419 bare = BARE_STRING_P(str);
421 if (STR_EMBED_P(str)) {
426 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
433 rb_str_resize(str, RSTRING_LEN(str));
435 fstr = register_fstring(str,
false,
false);
438 str_replace_shared_without_enc(str, fstr);
445static VALUE fstring_table_obj;
448fstring_concurrent_set_hash(
VALUE str)
450#ifdef PRECOMPUTED_FAKESTR_HASH
454 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
471 const char *aptr, *bptr;
478 return (alen == blen &&
480 memcmp(aptr, bptr, alen) == 0);
485 bool force_precompute_hash;
489fstring_concurrent_set_create(
VALUE str,
void *data)
499 long len = RSTRING_LEN(str);
500 long capa =
len +
sizeof(st_index_t);
501 int term_len = TERM_LEN(str);
503 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
505 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
506 STR_SET_LEN(new_str, RSTRING_LEN(str));
508 rb_enc_copy(new_str, str);
509 str_store_precomputed_hash(new_str, str_do_hash(str));
513 rb_enc_copy(new_str, str);
514#ifdef PRECOMPUTED_FAKESTR_HASH
515 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
516 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
530 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
533 if (STR_SHARED_P(str)) {
535 str_make_independent(str);
538 if (!BARE_STRING_P(str)) {
544 RBASIC(str)->flags |= RSTRING_FSTR;
546 RB_OBJ_SET_SHAREABLE(str);
560 .hash = fstring_concurrent_set_hash,
561 .cmp = fstring_concurrent_set_cmp,
562 .create = fstring_concurrent_set_create,
567Init_fstring_table(
void)
569 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
570 rb_gc_register_address(&fstring_table_obj);
574register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
578 .force_precompute_hash = force_precompute_hash
581#if SIZEOF_VOIDP == SIZEOF_LONG
585 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
589 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
591 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
603rb_obj_is_fstring_table(
VALUE obj)
607 return obj == fstring_table_obj;
611rb_gc_free_fstring(
VALUE obj)
613 ASSERT_vm_locking_with_barrier();
619 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
621 RB_DEBUG_COUNTER_INC(obj_str_fstr);
627rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
629 if (fstring_table_obj) {
630 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
635setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
638 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
651 return (
VALUE)fake_str;
660 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
669rb_fstring_new(
const char *ptr,
long len)
671 struct RString fake_str = {RBASIC_INIT};
672 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
678 struct RString fake_str = {RBASIC_INIT};
679 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
683rb_fstring_cstr(
const char *
ptr)
685 return rb_fstring_new(
ptr, strlen(
ptr));
689single_byte_optimizable(
VALUE str)
693 case ENCINDEX_ASCII_8BIT:
694 case ENCINDEX_US_ASCII:
716static inline const char *
717search_nonascii(
const char *p,
const char *e)
721#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
722# if SIZEOF_UINTPTR_T == 8
723# define NONASCII_MASK UINT64_C(0x8080808080808080)
724# elif SIZEOF_UINTPTR_T == 4
725# define NONASCII_MASK UINT32_C(0x80808080)
727# error "don't know what to do."
730# if SIZEOF_UINTPTR_T == 8
731# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
732# elif SIZEOF_UINTPTR_T == 4
733# define NONASCII_MASK 0x80808080UL
735# error "don't know what to do."
739 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
740#if !UNALIGNED_WORD_ACCESS
741 if ((uintptr_t)p % SIZEOF_VOIDP) {
742 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
747 case 7:
if (p[-7]&0x80)
return p-7;
748 case 6:
if (p[-6]&0x80)
return p-6;
749 case 5:
if (p[-5]&0x80)
return p-5;
750 case 4:
if (p[-4]&0x80)
return p-4;
752 case 3:
if (p[-3]&0x80)
return p-3;
753 case 2:
if (p[-2]&0x80)
return p-2;
754 case 1:
if (p[-1]&0x80)
return p-1;
759#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
760#define aligned_ptr(value) \
761 __builtin_assume_aligned((value), sizeof(uintptr_t))
763#define aligned_ptr(value) (value)
766 t = (e - (SIZEOF_VOIDP-1));
768 for (;s < t; s +=
sizeof(uintptr_t)) {
770 memcpy(&word, s,
sizeof(word));
771 if (word & NONASCII_MASK) {
772#ifdef WORDS_BIGENDIAN
773 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
775 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
785 case 7:
if (e[-7]&0x80)
return e-7;
786 case 6:
if (e[-6]&0x80)
return e-6;
787 case 5:
if (e[-5]&0x80)
return e-5;
788 case 4:
if (e[-4]&0x80)
return e-4;
790 case 3:
if (e[-3]&0x80)
return e-3;
791 case 2:
if (e[-2]&0x80)
return e-2;
792 case 1:
if (e[-1]&0x80)
return e-1;
800 const char *e = p +
len;
802 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
804 p = search_nonascii(p, e);
808 if (rb_enc_asciicompat(enc)) {
809 p = search_nonascii(p, e);
812 int ret = rb_enc_precise_mbclen(p, e, enc);
816 p = search_nonascii(p, e);
822 int ret = rb_enc_precise_mbclen(p, e, enc);
838 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
841 p = search_nonascii(p, e);
845 else if (rb_enc_asciicompat(enc)) {
846 p = search_nonascii(p, e);
852 int ret = rb_enc_precise_mbclen(p, e, enc);
859 p = search_nonascii(p, e);
865 int ret = rb_enc_precise_mbclen(p, e, enc);
890 rb_enc_set_index(str1, rb_enc_get_index(str2));
898rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
903 str_enc_copy(dest, src);
904 if (RSTRING_LEN(dest) == 0) {
905 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
916 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
917 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
928rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
930 str_enc_copy(dest, src);
937 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
943 return enc_coderange_scan(str, enc);
952 cr = enc_coderange_scan(str, get_encoding(str));
959rb_enc_str_asciicompat(
VALUE str)
962 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
970 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
979str_mod_check(
VALUE s,
const char *p,
long len)
981 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
987str_capacity(
VALUE str,
const int termlen)
989 if (STR_EMBED_P(str)) {
990 return str_embed_capa(str) - termlen;
992 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
996 return RSTRING(str)->as.heap.aux.capa;
1003 return str_capacity(str, TERM_LEN(str));
1007must_not_null(
const char *
ptr)
1010 rb_raise(rb_eArgError,
"NULL pointer given");
1015str_alloc_embed(
VALUE klass,
size_t capa)
1017 size_t size = rb_str_embed_size(
capa, 0);
1021 NEWOBJ_OF(str,
struct RString, klass,
1025 str->as.embed.ary[0] = 0;
1031str_alloc_heap(
VALUE klass)
1033 NEWOBJ_OF(str,
struct RString, klass,
1037 str->as.heap.aux.capa = 0;
1038 str->as.heap.ptr = NULL;
1044empty_str_alloc(
VALUE klass)
1046 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1047 VALUE str = str_alloc_embed(klass, 0);
1048 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1059 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1063 enc = rb_ascii8bit_encoding();
1066 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1068 int termlen = rb_enc_mbminlen(enc);
1070 if (STR_EMBEDDABLE_P(
len, termlen)) {
1071 str = str_alloc_embed(klass,
len + termlen);
1077 str = str_alloc_heap(klass);
1083 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1086 rb_enc_raw_set(str, enc);
1089 memcpy(RSTRING_PTR(str),
ptr,
len);
1092 memset(RSTRING_PTR(str), 0,
len);
1095 STR_SET_LEN(str,
len);
1096 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1103 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1138 __msan_unpoison_string(
ptr);
1158 if (rb_enc_mbminlen(enc) != 1) {
1159 rb_raise(rb_eArgError,
"wchar encoding given");
1161 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1165str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1170 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1174 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1177 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1178 str = str_alloc_heap(klass);
1182 RBASIC(str)->flags |= STR_NOFREE;
1183 rb_enc_associate_index(str, encindex);
1212static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1214 int ecflags,
VALUE ecopts);
1219 int encidx = rb_enc_to_index(enc);
1220 if (rb_enc_get_index(str) == encidx)
1221 return is_ascii_string(str);
1232 if (!to)
return str;
1233 if (!from) from = rb_enc_get(str);
1234 if (from == to)
return str;
1235 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1236 rb_is_ascii8bit_enc(to)) {
1237 if (STR_ENC_GET(str) != to) {
1239 rb_enc_associate(str, to);
1246 from, to, ecflags, ecopts);
1247 if (
NIL_P(newstr)) {
1255rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1260 olen = RSTRING_LEN(newstr);
1261 if (ofs < -olen || olen < ofs)
1263 if (ofs < 0) ofs += olen;
1265 STR_SET_LEN(newstr, ofs);
1269 rb_str_modify(newstr);
1270 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1278 STR_SET_LEN(str, 0);
1279 rb_enc_associate(str, enc);
1285str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1287 int ecflags,
VALUE ecopts)
1292 VALUE econv_wrapper;
1293 const unsigned char *start, *sp;
1294 unsigned char *dest, *dp;
1295 size_t converted_output = (size_t)ofs;
1300 RBASIC_CLEAR_CLASS(econv_wrapper);
1302 if (!ec)
return Qnil;
1305 sp = (
unsigned char*)
ptr;
1307 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1308 (dp = dest + converted_output),
1312 size_t converted_input = sp - start;
1313 size_t rest =
len - converted_input;
1314 converted_output = dp - dest;
1316 if (converted_input && converted_output &&
1317 rest < (LONG_MAX / converted_output)) {
1318 rest = (rest * converted_output) / converted_input;
1323 olen += rest < 2 ? 2 : rest;
1324 rb_str_resize(newstr, olen);
1331 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1333 rb_enc_associate(newstr, to);
1352 const int eidx = rb_enc_to_index(eenc);
1355 return rb_enc_str_new(
ptr,
len, eenc);
1359 if ((eidx == rb_ascii8bit_encindex()) ||
1360 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1364 ienc = rb_default_internal_encoding();
1365 if (!ienc || eenc == ienc) {
1366 return rb_enc_str_new(
ptr,
len, eenc);
1370 if ((eidx == rb_ascii8bit_encindex()) ||
1371 (eidx == rb_usascii_encindex()) ||
1372 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1373 return rb_enc_str_new(
ptr,
len, ienc);
1376 str = rb_enc_str_new(NULL, 0, ienc);
1379 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1380 rb_str_initialize(str,
ptr,
len, eenc);
1388 int eidx = rb_enc_to_index(eenc);
1389 if (eidx == rb_usascii_encindex() &&
1390 !is_ascii_string(str)) {
1391 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1394 rb_enc_associate_index(str, eidx);
1453str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1455 const int termlen = TERM_LEN(str);
1460 if (str_embed_capa(str2) >=
len + termlen) {
1461 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1462 STR_SET_EMBED(str2);
1463 memcpy(ptr2, RSTRING_PTR(str),
len);
1464 TERM_FILL(ptr2+
len, termlen);
1468 if (STR_SHARED_P(str)) {
1469 root =
RSTRING(str)->as.heap.aux.shared;
1478 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1480 rb_fatal(
"about to free a possible shared root");
1482 char *ptr2 = STR_HEAP_PTR(str2);
1484 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1487 FL_SET(str2, STR_NOEMBED);
1489 STR_SET_SHARED(str2, root);
1492 STR_SET_LEN(str2,
len);
1500 str_replace_shared_without_enc(str2, str);
1501 rb_enc_cr_str_exact_copy(str2, str);
1508 return str_replace_shared(str_alloc_heap(klass), str);
1525rb_str_new_frozen_String(
VALUE orig)
1533rb_str_frozen_bare_string(
VALUE orig)
1535 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1540rb_str_tmp_frozen_acquire(
VALUE orig)
1543 return str_new_frozen_buffer(0, orig, FALSE);
1547rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1549 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1550 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1552 VALUE str = str_alloc_heap(0);
1555 FL_SET(str, STR_SHARED_ROOT);
1557 size_t capa = str_capacity(orig, TERM_LEN(orig));
1563 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1564 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1571 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1572 RBASIC(orig)->flags &= ~STR_NOFREE;
1573 STR_SET_SHARED(orig, str);
1575 RB_OBJ_SET_SHAREABLE(str);
1587rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1592 if (STR_EMBED_P(tmp)) {
1595 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1601 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1605 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1606 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1611 STR_SET_LEN(tmp, 0);
1619 return str_new_frozen_buffer(klass, orig, TRUE);
1629 VALUE str = str_alloc_heap(klass);
1630 STR_SET_LEN(str, RSTRING_LEN(orig));
1631 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1632 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1633 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1634 RBASIC(orig)->flags &= ~STR_NOFREE;
1635 STR_SET_SHARED(orig, str);
1642str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1646 long len = RSTRING_LEN(orig);
1647 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1648 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1650 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1651 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1657 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1658 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1664 if ((ofs > 0) || (rest > 0) ||
1667 str = str_new_shared(klass,
shared);
1669 RSTRING(str)->as.heap.ptr += ofs;
1670 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1678 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1679 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1681 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1682 STR_SET_LEN(str, RSTRING_LEN(orig));
1688 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1691 str = heap_str_make_shared(klass, orig);
1696 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1708str_new_empty_String(
VALUE str)
1711 rb_enc_copy(v, str);
1715#define STR_BUF_MIN_SIZE 63
1720 if (STR_EMBEDDABLE_P(
capa, 1)) {
1728 RSTRING(str)->as.heap.ptr[0] =
'\0';
1748 return str_new(0, 0,
len);
1754 if (STR_EMBED_P(str)) {
1755 RB_DEBUG_COUNTER_INC(obj_str_embed);
1757 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1758 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1759 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1762 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1763 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1768rb_str_memsize(
VALUE str)
1770 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1771 return STR_HEAP_SIZE(str);
1781 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1784static inline void str_discard(
VALUE str);
1785static void str_shared_replace(
VALUE str,
VALUE str2);
1790 if (str != str2) str_shared_replace(str, str2);
1801 enc = STR_ENC_GET(str2);
1804 termlen = rb_enc_mbminlen(enc);
1806 STR_SET_LEN(str, RSTRING_LEN(str2));
1808 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1810 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1811 rb_enc_associate(str, enc);
1815 if (STR_EMBED_P(str2)) {
1817 long len = RSTRING_LEN(str2);
1820 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1821 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1822 RSTRING(str2)->as.heap.ptr = new_ptr;
1823 STR_SET_LEN(str2,
len);
1825 STR_SET_NOEMBED(str2);
1828 STR_SET_NOEMBED(str);
1830 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1832 if (
FL_TEST(str2, STR_SHARED)) {
1834 STR_SET_SHARED(str,
shared);
1837 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1841 STR_SET_EMBED(str2);
1842 RSTRING_PTR(str2)[0] = 0;
1843 STR_SET_LEN(str2, 0);
1844 rb_enc_associate(str, enc);
1858 return rb_obj_as_string_result(str, obj);
1874 len = RSTRING_LEN(str2);
1875 if (STR_SHARED_P(str2)) {
1878 STR_SET_NOEMBED(str);
1879 STR_SET_LEN(str,
len);
1880 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1881 STR_SET_SHARED(str,
shared);
1882 rb_enc_cr_str_exact_copy(str, str2);
1885 str_replace_shared(str, str2);
1894 size_t size = rb_str_embed_size(
capa, 0);
1898 NEWOBJ_OF(str,
struct RString, klass,
1909 NEWOBJ_OF(str,
struct RString, klass,
1912 str->as.heap.aux.capa = 0;
1913 str->as.heap.ptr = NULL;
1923 encidx = rb_enc_get_index(str);
1924 flags &= ~ENCODING_MASK;
1927 if (encidx) rb_enc_associate_index(dup, encidx);
1937 long len = RSTRING_LEN(str);
1942 STR_SET_LEN(dup, RSTRING_LEN(str));
1943 return str_duplicate_setup_encoding(str, dup, flags);
1952 root =
RSTRING(str)->as.heap.aux.shared;
1955 root = str = str_new_frozen(klass, str);
1961 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1962 FL_SET(root, STR_SHARED_ROOT);
1964 flags |= RSTRING_NOEMBED | STR_SHARED;
1966 STR_SET_LEN(dup, RSTRING_LEN(str));
1967 return str_duplicate_setup_encoding(str, dup, flags);
1973 if (STR_EMBED_P(str)) {
1974 return str_duplicate_setup_embed(klass, str, dup);
1977 return str_duplicate_setup_heap(klass, str, dup);
1985 if (STR_EMBED_P(str)) {
1986 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1989 dup = str_alloc_heap(klass);
1992 return str_duplicate_setup(klass, str, dup);
2003rb_str_dup_m(
VALUE str)
2005 if (LIKELY(BARE_STRING_P(str))) {
2016 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2023 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2027 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2028 str_duplicate_setup_embed(klass, str, new_str);
2031 new_str = ec_str_alloc_heap(ec, klass);
2032 str_duplicate_setup_heap(klass, str, new_str);
2041rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2043 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2067 static ID keyword_ids[2];
2068 VALUE orig, opt, venc, vcapa;
2073 if (!keyword_ids[0]) {
2074 keyword_ids[0] = rb_id_encoding();
2075 CONST_ID(keyword_ids[1],
"capacity");
2083 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2084 enc = rb_to_encoding(venc);
2086 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2089 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2091 if (
capa < STR_BUF_MIN_SIZE) {
2092 capa = STR_BUF_MIN_SIZE;
2096 len = RSTRING_LEN(orig);
2100 if (orig == str) n = 0;
2102 str_modifiable(str);
2103 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2105 const size_t size = (size_t)
capa + termlen;
2106 const char *
const old_ptr = RSTRING_PTR(str);
2107 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2108 char *new_ptr =
ALLOC_N(
char, size);
2109 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2110 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2112 RSTRING(str)->as.heap.ptr = new_ptr;
2114 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2115 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2116 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2118 STR_SET_LEN(str,
len);
2121 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2122 rb_enc_cr_str_exact_copy(str, orig);
2124 FL_SET(str, STR_NOEMBED);
2131 rb_enc_associate(str, enc);
2143rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2149 static ID keyword_ids[2];
2159 keyword_ids[0] = rb_id_encoding();
2160 CONST_ID(keyword_ids[1],
"capacity");
2162 encoding = kwargs[0];
2163 capacity = kwargs[1];
2172 if (UNDEF_P(encoding)) {
2174 encoding = rb_obj_encoding(orig);
2178 if (!UNDEF_P(encoding)) {
2179 enc = rb_to_encoding(encoding);
2183 if (UNDEF_P(capacity)) {
2185 VALUE empty_str = str_new(klass,
"", 0);
2187 rb_enc_associate(empty_str, enc);
2191 VALUE copy = str_duplicate(klass, orig);
2192 rb_enc_associate(copy, enc);
2205 if (orig_capa >
capa) {
2210 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2211 STR_SET_LEN(str, 0);
2222#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2237static inline uintptr_t
2238count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2243 d = (d>>6) | (~d>>7);
2244 d &= NONASCII_MASK >> 7;
2247#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2249 return rb_popcount_intptr(d);
2253# if SIZEOF_VOIDP == 8
2262enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2268 long diff = (long)(e - p);
2269 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2274 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2275 const uintptr_t *s, *t;
2276 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2277 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2278 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2279 while (p < (
const char *)s) {
2280 if (is_utf8_lead_byte(*p))
len++;
2284 len += count_utf8_lead_bytes_with_word(s);
2287 p = (
const char *)s;
2290 if (is_utf8_lead_byte(*p))
len++;
2296 else if (rb_enc_asciicompat(enc)) {
2301 q = search_nonascii(p, e);
2307 p += rb_enc_fast_mbclen(p, e, enc);
2314 q = search_nonascii(p, e);
2320 p += rb_enc_mbclen(p, e, enc);
2327 for (c=0; p<e; c++) {
2328 p += rb_enc_mbclen(p, e, enc);
2343rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2351 long diff = (long)(e - p);
2352 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2354 else if (rb_enc_asciicompat(enc)) {
2358 q = search_nonascii(p, e);
2366 ret = rb_enc_precise_mbclen(p, e, enc);
2381 for (c=0; p<e; c++) {
2382 ret = rb_enc_precise_mbclen(p, e, enc);
2389 if (p + rb_enc_mbminlen(enc) <= e)
2390 p += rb_enc_mbminlen(enc);
2406 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2407 if (!enc) enc = STR_ENC_GET(str);
2408 p = RSTRING_PTR(str);
2413 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2418 return enc_strlen(p, e, enc, cr);
2425 return str_strlen(str, NULL);
2439 return LONG2NUM(str_strlen(str, NULL));
2451rb_str_bytesize(
VALUE str)
2470rb_str_empty(
VALUE str)
2472 return RBOOL(RSTRING_LEN(str) == 0);
2491 char *ptr1, *ptr2, *ptr3;
2496 enc = rb_enc_check_str(str1, str2);
2499 termlen = rb_enc_mbminlen(enc);
2500 if (len1 > LONG_MAX - len2) {
2501 rb_raise(rb_eArgError,
"string size too big");
2503 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2504 ptr3 = RSTRING_PTR(str3);
2505 memcpy(ptr3, ptr1, len1);
2506 memcpy(ptr3+len1, ptr2, len2);
2507 TERM_FILL(&ptr3[len1+len2], termlen);
2523 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2526 int enc1 = rb_enc_get_index(str1);
2527 int enc2 = rb_enc_get_index(str2);
2532 else if (enc2 < 0) {
2535 else if (enc1 != enc2) {
2538 else if (len1 > LONG_MAX - len2) {
2572 rb_enc_copy(str2, str);
2577 rb_raise(rb_eArgError,
"negative argument");
2579 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2580 if (STR_EMBEDDABLE_P(
len, 1)) {
2582 memset(RSTRING_PTR(str2), 0,
len + 1);
2589 STR_SET_LEN(str2,
len);
2590 rb_enc_copy(str2, str);
2593 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2594 rb_raise(rb_eArgError,
"argument too big");
2597 len *= RSTRING_LEN(str);
2598 termlen = TERM_LEN(str);
2600 ptr2 = RSTRING_PTR(str2);
2602 n = RSTRING_LEN(str);
2603 memcpy(ptr2, RSTRING_PTR(str), n);
2604 while (n <=
len/2) {
2605 memcpy(ptr2 + n, ptr2, n);
2608 memcpy(ptr2 + n, ptr2,
len-n);
2610 STR_SET_LEN(str2,
len);
2611 TERM_FILL(&ptr2[
len], termlen);
2612 rb_enc_cr_str_copy_for_substr(str2, str);
2649rb_check_lockedtmp(
VALUE str)
2651 if (
FL_TEST(str, STR_TMPLOCK)) {
2658#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2660str_modifiable(
VALUE str)
2664 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2665 if (CHILLED_STRING_P(str)) {
2666 CHILLED_STRING_MUTATED(str);
2668 rb_check_lockedtmp(str);
2669 rb_check_frozen(str);
2674str_dependent_p(
VALUE str)
2676 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2686#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2688str_independent(
VALUE str)
2692 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2693 str_modifiable(str);
2694 return !str_dependent_p(str);
2700str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2710 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2715 STR_SET_LEN(str,
len);
2720 oldptr = RSTRING_PTR(str);
2722 memcpy(
ptr, oldptr,
len);
2724 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2727 STR_SET_NOEMBED(str);
2728 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2729 TERM_FILL(
ptr +
len, termlen);
2731 STR_SET_LEN(str,
len);
2738 if (!str_independent(str))
2739 str_make_independent(str);
2748 int termlen = TERM_LEN(str);
2749 long len = RSTRING_LEN(str);
2752 rb_raise(rb_eArgError,
"negative expanding string size");
2754 if (expand >= LONG_MAX -
len) {
2755 rb_raise(rb_eArgError,
"string size too big");
2758 if (!str_independent(str)) {
2759 str_make_independent_expand(str,
len, expand, termlen);
2761 else if (expand > 0) {
2762 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2769str_modify_keep_cr(
VALUE str)
2771 if (!str_independent(str))
2772 str_make_independent(str);
2779str_discard(
VALUE str)
2781 str_modifiable(str);
2782 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2783 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2784 RSTRING(str)->as.heap.ptr = 0;
2785 STR_SET_LEN(str, 0);
2792 int encindex = rb_enc_get_index(str);
2794 if (RB_UNLIKELY(encindex == -1)) {
2798 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2803 if (!rb_enc_asciicompat(enc)) {
2825 return RSTRING_PTR(str);
2829zero_filled(
const char *s,
int n)
2831 for (; n > 0; --n) {
2838str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2840 const char *e = s +
len;
2842 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2843 if (zero_filled(s, minlen))
return s;
2849str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2854 if (str_dependent_p(str)) {
2855 if (!zero_filled(s +
len, termlen))
2856 str_make_independent_expand(str,
len, 0L, termlen);
2859 TERM_FILL(s +
len, termlen);
2862 return RSTRING_PTR(str);
2866rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2868 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2869 long len = RSTRING_LEN(str);
2873 rb_check_lockedtmp(str);
2874 str_make_independent_expand(str,
len, 0L, termlen);
2876 else if (str_dependent_p(str)) {
2877 if (termlen > oldtermlen)
2878 str_make_independent_expand(str,
len, 0L, termlen);
2881 if (!STR_EMBED_P(str)) {
2886 if (termlen > oldtermlen) {
2887 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2895str_null_check(
VALUE str,
int *w)
2897 char *s = RSTRING_PTR(str);
2898 long len = RSTRING_LEN(str);
2900 const int minlen = rb_enc_mbminlen(enc);
2904 if (str_null_char(s,
len, minlen, enc)) {
2907 return str_fill_term(str, s,
len, minlen);
2910 if (!s || memchr(s, 0,
len)) {
2914 s = str_fill_term(str, s,
len, minlen);
2920rb_str_to_cstr(
VALUE str)
2923 return str_null_check(str, &w);
2931 char *s = str_null_check(str, &w);
2934 rb_raise(rb_eArgError,
"string contains null char");
2936 rb_raise(rb_eArgError,
"string contains null byte");
2942rb_str_fill_terminator(
VALUE str,
const int newminlen)
2944 char *s = RSTRING_PTR(str);
2945 long len = RSTRING_LEN(str);
2946 return str_fill_term(str, s,
len, newminlen);
2952 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2978str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2987 else if (rb_enc_asciicompat(enc)) {
2988 const char *p2, *e2;
2991 while (p < e && 0 < nth) {
2998 p2 = search_nonascii(p, e2);
3007 n = rb_enc_mbclen(p, e, enc);
3018 while (p < e && nth--) {
3019 p += rb_enc_mbclen(p, e, enc);
3030 return str_nth_len(p, e, &nth, enc);
3034str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3039 p = str_nth_len(p, e, &nth, enc);
3048str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3050 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3051 if (!pp)
return e - p;
3058 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3059 STR_ENC_GET(str), single_byte_optimizable(str));
3064str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3067 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3068 const uintptr_t *s, *t;
3069 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3070 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3071 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3072 while (p < (
const char *)s) {
3073 if (is_utf8_lead_byte(*p)) nth--;
3077 nth -= count_utf8_lead_bytes_with_word(s);
3079 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3083 if (is_utf8_lead_byte(*p)) {
3084 if (nth == 0)
break;
3094str_utf8_offset(
const char *p,
const char *e,
long nth)
3096 const char *pp = str_utf8_nth(p, e, &nth);
3105 if (single_byte_optimizable(str) || pos < 0)
3108 char *p = RSTRING_PTR(str);
3109 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3114str_subseq(
VALUE str,
long beg,
long len)
3122 const int termlen = TERM_LEN(str);
3123 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3130 if (str_embed_capa(str2) >=
len + termlen) {
3131 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3132 STR_SET_EMBED(str2);
3133 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3134 TERM_FILL(ptr2+
len, termlen);
3136 STR_SET_LEN(str2,
len);
3140 str_replace_shared(str2, str);
3143 RSTRING(str2)->as.heap.ptr += beg;
3144 if (RSTRING_LEN(str2) >
len) {
3145 STR_SET_LEN(str2,
len);
3155 VALUE str2 = str_subseq(str, beg,
len);
3156 rb_enc_cr_str_copy_for_substr(str2, str);
3165 const long blen = RSTRING_LEN(str);
3167 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3169 if (
len < 0)
return 0;
3170 if (beg < 0 && -beg < 0)
return 0;
3174 if (single_byte_optimizable(str)) {
3175 if (beg > blen)
return 0;
3178 if (beg < 0)
return 0;
3180 if (
len > blen - beg)
3182 if (
len < 0)
return 0;
3187 if (
len > -beg)
len = -beg;
3191 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3194 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3200 slen = str_strlen(str, enc);
3202 if (beg < 0)
return 0;
3204 if (
len == 0)
goto end;
3207 else if (beg > 0 && beg > blen) {
3211 if (beg > str_strlen(str, enc))
return 0;
3216 enc == rb_utf8_encoding()) {
3217 p = str_utf8_nth(s, e, &beg);
3218 if (beg > 0)
return 0;
3219 len = str_utf8_offset(p, e,
len);
3225 p = s + beg * char_sz;
3229 else if (
len * char_sz > e - p)
3234 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3235 if (beg > 0)
return 0;
3239 len = str_offset(p, e,
len, enc, 0);
3247static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3252 return str_substr(str, beg,
len, TRUE);
3262str_substr(
VALUE str,
long beg,
long len,
int empty)
3266 if (!p)
return Qnil;
3267 if (!
len && !empty)
return Qnil;
3269 beg = p - RSTRING_PTR(str);
3271 VALUE str2 = str_subseq(str, beg,
len);
3272 rb_enc_cr_str_copy_for_substr(str2, str);
3280 if (CHILLED_STRING_P(str)) {
3285 rb_str_resize(str, RSTRING_LEN(str));
3303 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3346str_uminus(
VALUE str)
3351 return rb_fstring(str);
3355#define rb_str_dup_frozen rb_str_new_frozen
3360 rb_check_frozen(str);
3361 if (
FL_TEST(str, STR_TMPLOCK)) {
3364 FL_SET(str, STR_TMPLOCK);
3371 rb_check_frozen(str);
3372 if (!
FL_TEST(str, STR_TMPLOCK)) {
3392 const int termlen = TERM_LEN(str);
3394 str_modifiable(str);
3395 if (STR_SHARED_P(str)) {
3398 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3399 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3410 else if (
len > RSTRING_LEN(str)) {
3414 const char *
const new_end = RSTRING_PTR(str) +
len;
3424 else if (
len < RSTRING_LEN(str)) {
3432 STR_SET_LEN(str,
len);
3433 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3440 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3443 int independent = str_independent(str);
3444 long slen = RSTRING_LEN(str);
3445 const int termlen = TERM_LEN(str);
3447 if (slen >
len || (termlen != 1 && slen <
len)) {
3453 if (STR_EMBED_P(str)) {
3454 if (
len == slen)
return str;
3455 if (str_embed_capa(str) >=
len + termlen) {
3456 STR_SET_LEN(str,
len);
3460 str_make_independent_expand(str, slen,
len - slen, termlen);
3462 else if (str_embed_capa(str) >=
len + termlen) {
3463 char *
ptr = STR_HEAP_PTR(str);
3465 if (slen >
len) slen =
len;
3468 STR_SET_LEN(str,
len);
3469 if (independent) ruby_xfree(
ptr);
3472 else if (!independent) {
3473 if (
len == slen)
return str;
3474 str_make_independent_expand(str, slen,
len - slen, termlen);
3478 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3479 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3482 else if (
len == slen)
return str;
3483 STR_SET_LEN(str,
len);
3490str_ensure_available_capa(
VALUE str,
long len)
3492 str_modify_keep_cr(str);
3494 const int termlen = TERM_LEN(str);
3495 long olen = RSTRING_LEN(str);
3497 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3498 rb_raise(rb_eArgError,
"string sizes too big");
3501 long total = olen +
len;
3502 long capa = str_capacity(str, termlen);
3505 if (total >= LONG_MAX / 2) {
3508 while (total >
capa) {
3511 RESIZE_CAPA_TERM(str,
capa, termlen);
3516str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3519 str_modify_keep_cr(str);
3524 if (
len == 0)
return 0;
3526 long total, olen,
off = -1;
3528 const int termlen = TERM_LEN(str);
3531 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3535 long capa = str_capacity(str, termlen);
3537 if (olen > LONG_MAX -
len) {
3538 rb_raise(rb_eArgError,
"string sizes too big");
3542 if (total >= LONG_MAX / 2) {
3545 while (total >
capa) {
3548 RESIZE_CAPA_TERM(str,
capa, termlen);
3549 sptr = RSTRING_PTR(str);
3554 memcpy(sptr + olen,
ptr,
len);
3555 STR_SET_LEN(str, total);
3556 TERM_FILL(sptr + total, termlen);
3561#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3562#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3567 if (
len == 0)
return str;
3569 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3571 return str_buf_cat(str,
ptr,
len);
3582rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3587 if (UNLIKELY(!str_independent(str))) {
3588 str_make_independent(str);
3591 long string_length = -1;
3592 const int null_terminator_length = 1;
3597 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3598 rb_raise(rb_eArgError,
"string sizes too big");
3601 long string_capacity = str_capacity(str, null_terminator_length);
3607 if (LIKELY(string_capacity >= string_length + 1)) {
3609 sptr[string_length] = byte;
3610 STR_SET_LEN(str, string_length + 1);
3611 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3615 str_buf_cat(str, (
char *)&
byte, 1);
3631 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3642rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3643 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3652 if (str_encindex == ptr_encindex) {
3654 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3658 str_enc = rb_enc_from_index(str_encindex);
3659 ptr_enc = rb_enc_from_index(ptr_encindex);
3660 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3663 if (RSTRING_LEN(str) == 0) {
3666 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3672 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3681 *ptr_cr_ret = ptr_cr;
3683 if (str_encindex != ptr_encindex &&
3686 str_enc = rb_enc_from_index(str_encindex);
3687 ptr_enc = rb_enc_from_index(ptr_encindex);
3692 res_encindex = str_encindex;
3697 res_encindex = str_encindex;
3701 res_encindex = ptr_encindex;
3706 res_encindex = str_encindex;
3713 res_encindex = str_encindex;
3719 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3721 str_buf_cat(str,
ptr,
len);
3727 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3734 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3744 if (rb_enc_asciicompat(enc)) {
3745 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3751 unsigned int c = (
unsigned char)*
ptr;
3752 int len = rb_enc_codelen(c, enc);
3753 rb_enc_mbcput(c, buf, enc);
3754 rb_enc_cr_str_buf_cat(str, buf,
len,
3767 if (str_enc_fastpath(str)) {
3771 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3777 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3788 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3804rb_str_concat_literals(
size_t num,
const VALUE *strary)
3808 unsigned long len = 1;
3813 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3815 str_enc_copy_direct(str, strary[0]);
3817 for (i = s; i < num; ++i) {
3818 const VALUE v = strary[i];
3822 if (encidx != ENCINDEX_US_ASCII) {
3824 rb_enc_set_index(str, encidx);
3837rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3839 str_modifiable(str);
3844 else if (argc > 1) {
3847 rb_enc_copy(arg_str, str);
3848 for (i = 0; i < argc; i++) {
3883rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3885 long needed_capacity = 0;
3889 for (
int index = 0; index < argc; index++) {
3890 VALUE obj = argv[index];
3898 needed_capacity += RSTRING_LEN(obj);
3903 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3910 str_ensure_available_capa(str, needed_capacity);
3913 for (
int index = 0; index < argc; index++) {
3914 VALUE obj = argv[index];
3919 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3920 char byte = (char)(
NUM2INT(obj) & 0xFF);
3934 rb_bug(
"append_as_bytes arguments should have been validated");
3938 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3939 TERM_FILL(sptr, TERM_LEN(str));
3944 for (
int index = 0; index < argc; index++) {
3945 VALUE obj = argv[index];
3962 rb_bug(
"append_as_bytes arguments should have been validated");
4041 if (rb_num_to_uint(str2, &code) == 0) {
4054 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4057 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4060 long pos = RSTRING_LEN(str1);
4065 switch (
len = rb_enc_codelen(code, enc)) {
4066 case ONIGERR_INVALID_CODE_POINT_VALUE:
4067 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4069 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4075 rb_enc_mbcput(code, buf, enc);
4076 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4077 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4079 rb_str_resize(str1, pos+
len);
4080 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4093rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4095 int encidx = rb_enc_to_index(enc);
4097 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4102 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4103 return ENCINDEX_ASCII_8BIT;
4125rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4127 str_modifiable(str);
4132 else if (argc > 1) {
4135 rb_enc_copy(arg_str, str);
4136 for (i = 0; i < argc; i++) {
4149 st_index_t precomputed_hash;
4150 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4152 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4153 return precomputed_hash;
4156 return str_do_hash(str);
4163 const char *ptr1, *ptr2;
4166 return (len1 != len2 ||
4168 memcmp(ptr1, ptr2, len1) != 0);
4180rb_str_hash_m(
VALUE str)
4186#define lesser(a,b) (((a)>(b))?(b):(a))
4194 if (RSTRING_LEN(str1) == 0)
return TRUE;
4195 if (RSTRING_LEN(str2) == 0)
return TRUE;
4198 if (idx1 == idx2)
return TRUE;
4203 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4207 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4217 const char *ptr1, *ptr2;
4220 if (str1 == str2)
return 0;
4223 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4232 if (len1 > len2)
return 1;
4235 if (retval > 0)
return 1;
4269 if (str1 == str2)
return Qtrue;
4276 return rb_str_eql_internal(str1, str2);
4290 if (str1 == str2)
return Qtrue;
4292 return rb_str_eql_internal(str1, str2);
4327 return rb_invcmp(str1, str2);
4369 return str_casecmp(str1, s);
4377 const char *p1, *p1end, *p2, *p2end;
4379 enc = rb_enc_compatible(str1, str2);
4384 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4385 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4386 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4387 while (p1 < p1end && p2 < p2end) {
4389 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4390 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4392 return INT2FIX(c1 < c2 ? -1 : 1);
4399 while (p1 < p1end && p2 < p2end) {
4400 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4401 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4403 if (0 <= c1 && 0 <= c2) {
4407 return INT2FIX(c1 < c2 ? -1 : 1);
4411 l1 = rb_enc_mbclen(p1, p1end, enc);
4412 l2 = rb_enc_mbclen(p2, p2end, enc);
4413 len = l1 < l2 ? l1 : l2;
4414 r = memcmp(p1, p2,
len);
4416 return INT2FIX(r < 0 ? -1 : 1);
4418 return INT2FIX(l1 < l2 ? -1 : 1);
4424 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4425 if (p1 == p1end)
return INT2FIX(-1);
4458 return str_casecmp_p(str1, s);
4465 VALUE folded_str1, folded_str2;
4466 VALUE fold_opt = sym_fold;
4468 enc = rb_enc_compatible(str1, str2);
4473 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4474 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4476 return rb_str_eql(folded_str1, folded_str2);
4480strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4481 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4483 const char *search_start = str_ptr;
4484 long pos, search_len = str_len - offset;
4488 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4489 if (pos < 0)
return pos;
4491 if (t == search_start + pos)
break;
4492 search_len -= t - search_start;
4493 if (search_len <= 0)
return -1;
4494 offset += t - search_start;
4497 return pos + offset;
4501#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4502#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4505rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4507 const char *str_ptr, *str_ptr_end, *sub_ptr;
4508 long str_len, sub_len;
4511 enc = rb_enc_check(str, sub);
4512 if (is_broken_string(sub))
return -1;
4514 str_ptr = RSTRING_PTR(str);
4516 str_len = RSTRING_LEN(str);
4517 sub_ptr = RSTRING_PTR(sub);
4518 sub_len = RSTRING_LEN(sub);
4520 if (str_len < sub_len)
return -1;
4523 long str_len_char, sub_len_char;
4524 int single_byte = single_byte_optimizable(str);
4525 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4526 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4528 offset += str_len_char;
4529 if (offset < 0)
return -1;
4531 if (str_len_char - offset < sub_len_char)
return -1;
4532 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4535 if (sub_len == 0)
return offset;
4538 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4551rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4558 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4559 long slen = str_strlen(str, enc);
4561 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4573 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4574 enc, single_byte_optimizable(str));
4585 pos = rb_str_index(str, sub, pos);
4599str_ensure_byte_pos(
VALUE str,
long pos)
4601 if (!single_byte_optimizable(str)) {
4602 const char *s = RSTRING_PTR(str);
4604 const char *p = s + pos;
4605 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4607 "offset %ld does not land on character boundary", pos);
4680rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4686 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4687 long slen = RSTRING_LEN(str);
4689 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4700 str_ensure_byte_pos(str, pos);
4712 pos = rb_str_byteindex(str, sub, pos);
4713 if (pos >= 0)
return LONG2NUM(pos);
4720memrchr(
const char *search_str,
int chr,
long search_len)
4722 const char *ptr = search_str + search_len;
4723 while (ptr > search_str) {
4724 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4734 char *hit, *adjusted;
4736 long slen, searchlen;
4739 sbeg = RSTRING_PTR(str);
4740 slen = RSTRING_LEN(sub);
4741 if (slen == 0)
return s - sbeg;
4743 t = RSTRING_PTR(sub);
4745 searchlen = s - sbeg + 1;
4747 if (memcmp(s, t, slen) == 0) {
4752 hit = memrchr(sbeg, c, searchlen);
4755 if (hit != adjusted) {
4756 searchlen = adjusted - sbeg;
4759 if (memcmp(hit, t, slen) == 0)
4761 searchlen = adjusted - sbeg;
4762 }
while (searchlen > 0);
4776 enc = rb_enc_check(str, sub);
4777 if (is_broken_string(sub))
return -1;
4778 singlebyte = single_byte_optimizable(str);
4779 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4780 slen = str_strlen(sub, enc);
4783 if (
len < slen)
return -1;
4784 if (
len - pos < slen) pos =
len - slen;
4785 if (
len == 0)
return pos;
4787 sbeg = RSTRING_PTR(str);
4790 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4796 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4797 return str_rindex(str, sub, s, enc);
4809rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4814 long pos,
len = str_strlen(str, enc);
4816 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4818 if (pos < 0 && (pos +=
len) < 0) {
4824 if (pos >
len) pos =
len;
4832 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4833 enc, single_byte_optimizable(str));
4844 pos = rb_str_rindex(str, sub, pos);
4854rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4860 enc = rb_enc_check(str, sub);
4861 if (is_broken_string(sub))
return -1;
4862 len = RSTRING_LEN(str);
4863 slen = RSTRING_LEN(sub);
4866 if (
len < slen)
return -1;
4867 if (
len - pos < slen) pos =
len - slen;
4868 if (
len == 0)
return pos;
4870 sbeg = RSTRING_PTR(str);
4873 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4880 return str_rindex(str, sub, s, enc);
4970rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4974 long pos,
len = RSTRING_LEN(str);
4976 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4978 if (pos < 0 && (pos +=
len) < 0) {
4984 if (pos >
len) pos =
len;
4990 str_ensure_byte_pos(str, pos);
5002 pos = rb_str_byterindex(str, sub, pos);
5003 if (pos >= 0)
return LONG2NUM(pos);
5042 switch (OBJ_BUILTIN_TYPE(y)) {
5096rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5103 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5134rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5138 re = get_pat(argv[0]);
5139 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5148static enum neighbor_char
5154 if (rb_enc_mbminlen(enc) > 1) {
5156 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5158 return NEIGHBOR_NOT_CHAR;
5160 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5162 if (!l)
return NEIGHBOR_NOT_CHAR;
5163 if (l !=
len)
return NEIGHBOR_WRAPPED;
5164 rb_enc_mbcput(c, p, enc);
5165 r = rb_enc_precise_mbclen(p, p +
len, enc);
5167 return NEIGHBOR_NOT_CHAR;
5169 return NEIGHBOR_FOUND;
5172 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5175 return NEIGHBOR_WRAPPED;
5176 ++((
unsigned char*)p)[i];
5177 l = rb_enc_precise_mbclen(p, p+
len, enc);
5181 return NEIGHBOR_FOUND;
5184 memset(p+l, 0xff,
len-l);
5190 for (len2 =
len-1; 0 < len2; len2--) {
5191 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5195 memset(p+len2+1, 0xff,
len-(len2+1));
5200static enum neighbor_char
5205 if (rb_enc_mbminlen(enc) > 1) {
5207 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5209 return NEIGHBOR_NOT_CHAR;
5211 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5212 if (!c)
return NEIGHBOR_NOT_CHAR;
5215 if (!l)
return NEIGHBOR_NOT_CHAR;
5216 if (l !=
len)
return NEIGHBOR_WRAPPED;
5217 rb_enc_mbcput(c, p, enc);
5218 r = rb_enc_precise_mbclen(p, p +
len, enc);
5220 return NEIGHBOR_NOT_CHAR;
5222 return NEIGHBOR_FOUND;
5225 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5228 return NEIGHBOR_WRAPPED;
5229 --((
unsigned char*)p)[i];
5230 l = rb_enc_precise_mbclen(p, p+
len, enc);
5234 return NEIGHBOR_FOUND;
5237 memset(p+l, 0,
len-l);
5243 for (len2 =
len-1; 0 < len2; len2--) {
5244 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5248 memset(p+len2+1, 0,
len-(len2+1));
5262static enum neighbor_char
5263enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5265 enum neighbor_char ret;
5269 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5273 const int max_gaps = 1;
5275 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5277 ctype = ONIGENC_CTYPE_DIGIT;
5279 ctype = ONIGENC_CTYPE_ALPHA;
5281 return NEIGHBOR_NOT_CHAR;
5284 for (
try = 0;
try <= max_gaps; ++
try) {
5285 ret = enc_succ_char(p,
len, enc);
5286 if (ret == NEIGHBOR_FOUND) {
5287 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5289 return NEIGHBOR_FOUND;
5296 ret = enc_pred_char(p,
len, enc);
5297 if (ret == NEIGHBOR_FOUND) {
5298 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5311 return NEIGHBOR_NOT_CHAR;
5314 if (ctype != ONIGENC_CTYPE_DIGIT) {
5316 return NEIGHBOR_WRAPPED;
5320 enc_succ_char(carry,
len, enc);
5321 return NEIGHBOR_WRAPPED;
5339 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5340 rb_enc_cr_str_copy_for_substr(str, orig);
5341 return str_succ(str);
5348 char *sbeg, *s, *e, *last_alnum = 0;
5349 int found_alnum = 0;
5351 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5352 long carry_pos = 0, carry_len = 1;
5353 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5355 slen = RSTRING_LEN(str);
5356 if (slen == 0)
return str;
5358 enc = STR_ENC_GET(str);
5359 sbeg = RSTRING_PTR(str);
5360 s = e = sbeg + slen;
5362 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5363 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5369 l = rb_enc_precise_mbclen(s, e, enc);
5370 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5371 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5372 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5374 case NEIGHBOR_NOT_CHAR:
5376 case NEIGHBOR_FOUND:
5378 case NEIGHBOR_WRAPPED:
5383 carry_pos = s - sbeg;
5388 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5389 enum neighbor_char neighbor;
5390 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5391 l = rb_enc_precise_mbclen(s, e, enc);
5392 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5393 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5395 neighbor = enc_succ_char(tmp, l, enc);
5397 case NEIGHBOR_FOUND:
5401 case NEIGHBOR_WRAPPED:
5404 case NEIGHBOR_NOT_CHAR:
5407 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5409 enc_succ_char(s, l, enc);
5411 if (!rb_enc_asciicompat(enc)) {
5412 MEMCPY(carry, s,
char, l);
5415 carry_pos = s - sbeg;
5419 RESIZE_CAPA(str, slen + carry_len);
5420 sbeg = RSTRING_PTR(str);
5421 s = sbeg + carry_pos;
5422 memmove(s + carry_len, s, slen - carry_pos);
5423 memmove(s, carry, carry_len);
5425 STR_SET_LEN(str, slen);
5426 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5442rb_str_succ_bang(
VALUE str)
5450all_digits_p(
const char *s,
long len)
5478 VALUE end, exclusive;
5482 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5488 VALUE current, after_end;
5495 enc = rb_enc_check(beg, end);
5496 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5498 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5499 char c = RSTRING_PTR(beg)[0];
5500 char e = RSTRING_PTR(end)[0];
5502 if (c > e || (excl && c == e))
return beg;
5504 VALUE str = rb_enc_str_new(&c, 1, enc);
5506 if ((*each)(str, arg))
break;
5507 if (!excl && c == e)
break;
5509 if (excl && c == e)
break;
5514 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5515 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5516 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5521 b = rb_str_to_inum(beg, 10, FALSE);
5522 e = rb_str_to_inum(end, 10, FALSE);
5529 if (excl && bi == ei)
break;
5530 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5535 ID op = excl ?
'<' : idLE;
5536 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5541 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5542 b = rb_funcallv(b, succ, 0, 0);
5549 if (n > 0 || (excl && n == 0))
return beg;
5551 after_end = rb_funcallv(end, succ, 0, 0);
5556 next = rb_funcallv(current, succ, 0, 0);
5557 if ((*each)(current, arg))
break;
5558 if (
NIL_P(next))
break;
5562 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5577 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5578 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5579 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5581 b = rb_str_to_inum(beg, 10, FALSE);
5587 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5595 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5596 b = rb_funcallv(b, succ, 0, 0);
5602 VALUE next = rb_funcallv(current, succ, 0, 0);
5603 if ((*each)(current, arg))
break;
5606 if (RSTRING_LEN(current) == 0)
5617 if (!
rb_equal(str, *argp))
return 0;
5631 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5632 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5633 rb_enc_asciicompat(STR_ENC_GET(val))) {
5634 const char *bp = RSTRING_PTR(beg);
5635 const char *ep = RSTRING_PTR(end);
5636 const char *vp = RSTRING_PTR(val);
5637 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5638 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5646 if (b <= v && v < e)
return Qtrue;
5647 return RBOOL(!
RTEST(exclusive) && v == e);
5654 all_digits_p(bp, RSTRING_LEN(beg)) &&
5655 all_digits_p(ep, RSTRING_LEN(end))) {
5660 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5662 return RBOOL(
NIL_P(val));
5685 return rb_str_subpat(str, indx,
INT2FIX(0));
5688 if (rb_str_index(str, indx, 0) != -1)
5694 long beg,
len = str_strlen(str, NULL);
5706 return str_substr(str, idx, 1, FALSE);
5723rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5727 return rb_str_subpat(str, argv[0], argv[1]);
5730 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5734 return rb_str_aref(str, argv[0]);
5740 char *ptr = RSTRING_PTR(str);
5741 long olen = RSTRING_LEN(str), nlen;
5743 str_modifiable(str);
5744 if (
len > olen)
len = olen;
5746 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5748 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5750 ptr =
RSTRING(str)->as.embed.ary;
5751 memmove(ptr, oldptr +
len, nlen);
5752 if (fl == STR_NOEMBED)
xfree(oldptr);
5755 if (!STR_SHARED_P(str)) {
5757 rb_enc_cr_str_exact_copy(shared, str);
5762 STR_SET_LEN(str, nlen);
5764 if (!SHARABLE_MIDDLE_SUBSTRING) {
5765 TERM_FILL(ptr + nlen, TERM_LEN(str));
5772rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5778 if (beg == 0 && vlen == 0) {
5783 str_modify_keep_cr(str);
5787 RESIZE_CAPA(str, slen + vlen -
len);
5788 sptr = RSTRING_PTR(str);
5797 memmove(sptr + beg + vlen,
5799 slen - (beg +
len));
5801 if (vlen < beg &&
len < 0) {
5805 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5808 STR_SET_LEN(str, slen);
5809 TERM_FILL(&sptr[slen], TERM_LEN(str));
5816 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5825 int singlebyte = single_byte_optimizable(str);
5831 enc = rb_enc_check(str, val);
5832 slen = str_strlen(str, enc);
5834 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5843 if (
len > slen - beg) {
5846 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5851 beg = p - RSTRING_PTR(str);
5853 rb_str_update_0(str, beg,
len, val);
5854 rb_enc_associate(str, enc);
5865 long start, end,
len;
5875 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5879 nth += regs->num_regs;
5889 enc = rb_enc_check_str(str, val);
5890 rb_str_update_0(str, start,
len, val);
5891 rb_enc_associate(str, enc);
5899 switch (
TYPE(indx)) {
5901 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5905 beg = rb_str_index(str, indx, 0);
5944rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5948 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5956 return rb_str_aset(str, argv[0], argv[1]);
6008rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6016 str_modify_keep_cr(str);
6024 if ((nth += regs->num_regs) <= 0)
return Qnil;
6026 else if (nth >= regs->num_regs)
return Qnil;
6028 len = END(nth) - beg;
6031 else if (argc == 2) {
6040 beg = p - RSTRING_PTR(str);
6044 beg = rb_str_index(str, indx, 0);
6045 if (beg == -1)
return Qnil;
6046 len = RSTRING_LEN(indx);
6058 beg = p - RSTRING_PTR(str);
6067 beg = p - RSTRING_PTR(str);
6071 rb_enc_cr_str_copy_for_substr(result, str);
6079 char *sptr = RSTRING_PTR(str);
6080 long slen = RSTRING_LEN(str);
6081 if (beg +
len > slen)
6085 slen - (beg +
len));
6087 STR_SET_LEN(str, slen);
6088 TERM_FILL(&sptr[slen], TERM_LEN(str));
6099 switch (OBJ_BUILTIN_TYPE(pat)) {
6118get_pat_quoted(
VALUE pat,
int check)
6122 switch (OBJ_BUILTIN_TYPE(pat)) {
6136 if (check && is_broken_string(pat)) {
6143rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6146 pos = rb_str_byteindex(str, pat, pos);
6147 if (set_backref_str) {
6149 str = rb_str_new_frozen_String(str);
6150 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6152 *match = match_data;
6162 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6167rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6169 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6187rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6201 hash = rb_check_hash_type(argv[1]);
6207 pat = get_pat_quoted(argv[0], 1);
6209 str_modifiable(str);
6210 beg = rb_pat_search(pat, str, 0, 1);
6224 end0 = beg0 + RSTRING_LEN(pat);
6233 if (iter || !
NIL_P(hash)) {
6234 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6240 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6243 str_mod_check(str, p,
len);
6244 rb_check_frozen(str);
6250 enc = rb_enc_compatible(str, repl);
6253 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6257 rb_enc_inspect_name(str_enc),
6258 rb_enc_inspect_name(STR_ENC_GET(repl)));
6260 enc = STR_ENC_GET(repl);
6263 rb_enc_associate(str, enc);
6273 rlen = RSTRING_LEN(repl);
6274 len = RSTRING_LEN(str);
6276 RESIZE_CAPA(str,
len + rlen - plen);
6278 p = RSTRING_PTR(str);
6280 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6282 rp = RSTRING_PTR(repl);
6283 memmove(p + beg0, rp, rlen);
6285 STR_SET_LEN(str,
len);
6286 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6309 rb_str_sub_bang(argc, argv, str);
6314str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6317 long beg, beg0, end0;
6318 long offset, blen, slen,
len, last;
6319 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6321 int need_backref_str = -1;
6331 hash = rb_check_hash_type(argv[1]);
6335 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6343 rb_error_arity(argc, 1, 2);
6346 pat = get_pat_quoted(argv[0], 1);
6347 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6350 if (bang)
return Qnil;
6355 blen = RSTRING_LEN(str) + 30;
6357 sp = RSTRING_PTR(str);
6358 slen = RSTRING_LEN(str);
6360 str_enc = STR_ENC_GET(str);
6361 rb_enc_associate(dest, str_enc);
6368 end0 = beg0 + RSTRING_LEN(pat);
6382 struct RString fake_str = {RBASIC_INIT};
6384 if (mode == FAST_MAP) {
6393 val = rb_hash_aref(hash, key);
6396 str_mod_check(str, sp, slen);
6401 else if (need_backref_str) {
6403 if (need_backref_str < 0) {
6404 need_backref_str = val != repl;
6411 len = beg0 - offset;
6425 if (RSTRING_LEN(str) <= end0)
break;
6426 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6428 offset = end0 +
len;
6430 cp = RSTRING_PTR(str) + offset;
6431 if (offset > RSTRING_LEN(str))
break;
6434 if (mode != FAST_MAP && mode != STR) {
6437 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6442 if (RSTRING_LEN(str) > offset) {
6445 rb_pat_search0(pat, str, last, 1, &match);
6447 str_shared_replace(str, dest);
6472rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6474 str_modify_keep_cr(str);
6475 return str_gsub(argc, argv, str, 1);
6525 return str_gsub(argc, argv, str, 0);
6545 str_modifiable(str);
6546 if (str == str2)
return str;
6550 return str_replace(str, str2);
6567rb_str_clear(
VALUE str)
6571 STR_SET_LEN(str, 0);
6572 RSTRING_PTR(str)[0] = 0;
6573 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6589rb_str_chr(
VALUE str)
6607 pos += RSTRING_LEN(str);
6608 if (pos < 0 || RSTRING_LEN(str) <= pos)
6611 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6631 long len = RSTRING_LEN(str);
6632 char *
ptr, *head, *left = 0;
6636 if (pos < -
len ||
len <= pos)
6643 char byte = (char)(
NUM2INT(w) & 0xFF);
6645 if (!str_independent(str))
6646 str_make_independent(str);
6647 enc = STR_ENC_GET(str);
6648 head = RSTRING_PTR(str);
6650 if (!STR_EMBED_P(str)) {
6657 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6665 width = rb_enc_precise_mbclen(left, head+
len, enc);
6667 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6683str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6685 long n = RSTRING_LEN(str);
6687 if (beg > n ||
len < 0)
return Qnil;
6690 if (beg < 0)
return Qnil;
6695 if (!empty)
return Qnil;
6699 VALUE str2 = str_subseq(str, beg,
len);
6701 str_enc_copy_direct(str2, str);
6703 if (RSTRING_LEN(str2) == 0) {
6704 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6738 long beg,
len = RSTRING_LEN(str);
6746 return str_byte_substr(str, beg,
len, TRUE);
6751 return str_byte_substr(str, idx, 1, FALSE);
6763rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6768 return str_byte_substr(str, beg,
len, TRUE);
6771 return str_byte_aref(str, argv[0]);
6775str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6777 long end, slen = RSTRING_LEN(str);
6780 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6789 if (*
len > slen - *beg) {
6793 str_ensure_byte_pos(str, *beg);
6794 str_ensure_byte_pos(str, end);
6808rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6810 long beg,
len, vbeg, vlen;
6815 if (!(argc == 2 || argc == 3 || argc == 5)) {
6816 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6820 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6821 rb_builtin_class_name(argv[0]));
6828 vlen = RSTRING_LEN(val);
6833 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6834 rb_builtin_class_name(argv[2]));
6846 vlen = RSTRING_LEN(val);
6854 str_check_beg_len(str, &beg, &
len);
6855 str_check_beg_len(val, &vbeg, &vlen);
6856 str_modify_keep_cr(str);
6859 rb_enc_associate(str, rb_enc_check(str, val));
6862 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6884rb_str_reverse(
VALUE str)
6891 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6892 enc = STR_ENC_GET(str);
6898 if (RSTRING_LEN(str) > 1) {
6899 if (single_byte_optimizable(str)) {
6906 int clen = rb_enc_fast_mbclen(s, e, enc);
6914 cr = rb_enc_asciicompat(enc) ?
6917 int clen = rb_enc_mbclen(s, e, enc);
6926 STR_SET_LEN(rev, RSTRING_LEN(str));
6927 str_enc_copy_direct(rev, str);
6949rb_str_reverse_bang(
VALUE str)
6951 if (RSTRING_LEN(str) > 1) {
6952 if (single_byte_optimizable(str)) {
6955 str_modify_keep_cr(str);
6956 s = RSTRING_PTR(str);
6965 str_shared_replace(str, rb_str_reverse(str));
6969 str_modify_keep_cr(str);
6998 i = rb_str_index(str, arg, 0);
7000 return RBOOL(i != -1);
7044 rb_raise(rb_eArgError,
"invalid radix %d", base);
7046 return rb_str_to_inum(str, base, FALSE);
7071rb_str_to_f(
VALUE str)
7088rb_str_to_s(
VALUE str)
7100 char s[RUBY_MAX_CHAR_LEN];
7101 int n = rb_enc_codelen(c, enc);
7103 rb_enc_mbcput(c, s, enc);
7108#define CHAR_ESC_LEN 13
7111rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7113 char buf[CHAR_ESC_LEN + 1];
7121 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7123 else if (c < 0x10000) {
7124 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7127 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7132 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7135 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7138 l = (int)strlen(buf);
7144ruby_escaped_char(
int c)
7147 case '\0':
return "\\0";
7148 case '\n':
return "\\n";
7149 case '\r':
return "\\r";
7150 case '\t':
return "\\t";
7151 case '\f':
return "\\f";
7152 case '\013':
return "\\v";
7153 case '\010':
return "\\b";
7154 case '\007':
return "\\a";
7155 case '\033':
return "\\e";
7156 case '\x7f':
return "\\c?";
7162rb_str_escape(
VALUE str)
7166 const char *p = RSTRING_PTR(str);
7168 const char *prev = p;
7169 char buf[CHAR_ESC_LEN + 1];
7171 int unicode_p = rb_enc_unicode_p(enc);
7172 int asciicompat = rb_enc_asciicompat(enc);
7177 int n = rb_enc_precise_mbclen(p, pend, enc);
7179 if (p > prev) str_buf_cat(result, prev, p - prev);
7180 n = rb_enc_mbminlen(enc);
7182 n = (int)(pend - p);
7184 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7185 str_buf_cat(result, buf, strlen(buf));
7191 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7193 cc = ruby_escaped_char(c);
7195 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7196 str_buf_cat(result, cc, strlen(cc));
7199 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7202 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7203 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7207 if (p > prev) str_buf_cat(result, prev, p - prev);
7226 const char *p, *pend, *prev;
7227 char buf[CHAR_ESC_LEN + 1];
7229 rb_encoding *resenc = rb_default_internal_encoding();
7230 int unicode_p = rb_enc_unicode_p(enc);
7231 int asciicompat = rb_enc_asciicompat(enc);
7233 if (resenc == NULL) resenc = rb_default_external_encoding();
7234 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7235 rb_enc_associate(result, resenc);
7236 str_buf_cat2(result,
"\"");
7244 n = rb_enc_precise_mbclen(p, pend, enc);
7246 if (p > prev) str_buf_cat(result, prev, p - prev);
7247 n = rb_enc_mbminlen(enc);
7249 n = (int)(pend - p);
7251 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7252 str_buf_cat(result, buf, strlen(buf));
7258 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7260 if ((asciicompat || unicode_p) &&
7261 (c ==
'"'|| c ==
'\\' ||
7266 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7267 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7268 str_buf_cat2(result,
"\\");
7269 if (asciicompat || enc == resenc) {
7275 case '\n': cc =
'n';
break;
7276 case '\r': cc =
'r';
break;
7277 case '\t': cc =
't';
break;
7278 case '\f': cc =
'f';
break;
7279 case '\013': cc =
'v';
break;
7280 case '\010': cc =
'b';
break;
7281 case '\007': cc =
'a';
break;
7282 case 033: cc =
'e';
break;
7283 default: cc = 0;
break;
7286 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7289 str_buf_cat(result, buf, 2);
7302 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7306 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7307 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7312 if (p > prev) str_buf_cat(result, prev, p - prev);
7313 str_buf_cat2(result,
"\"");
7318#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7331 int encidx = rb_enc_get_index(str);
7334 const char *p, *pend;
7337 int u8 = (encidx == rb_utf8_encindex());
7338 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7341 if (!rb_enc_asciicompat(enc)) {
7343 len += strlen(enc->name);
7346 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7349 unsigned char c = *p++;
7352 case '"':
case '\\':
7353 case '\n':
case '\r':
7354 case '\t':
case '\f':
7355 case '\013':
case '\010':
case '\007':
case '\033':
7360 clen = IS_EVSTR(p, pend) ? 2 : 1;
7368 if (u8 && c > 0x7F) {
7369 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7371 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7374 else if (cc <= 0xFFFFF)
7387 if (clen > LONG_MAX -
len) {
7394 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7395 q = RSTRING_PTR(result); qend = q +
len + 1;
7399 unsigned char c = *p++;
7401 if (c ==
'"' || c ==
'\\') {
7405 else if (c ==
'#') {
7406 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7409 else if (c ==
'\n') {
7413 else if (c ==
'\r') {
7417 else if (c ==
'\t') {
7421 else if (c ==
'\f') {
7425 else if (c ==
'\013') {
7429 else if (c ==
'\010') {
7433 else if (c ==
'\007') {
7437 else if (c ==
'\033') {
7447 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7449 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7452 snprintf(q, qend-q,
"u%04X", cc);
7454 snprintf(q, qend-q,
"u{%X}", cc);
7459 snprintf(q, qend-q,
"x%02X", c);
7465 if (!rb_enc_asciicompat(enc)) {
7466 snprintf(q, qend-q, nonascii_suffix, enc->name);
7467 encidx = rb_ascii8bit_encindex();
7470 rb_enc_associate_index(result, encidx);
7476unescape_ascii(
unsigned int c)
7500undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7502 const char *s = *ss;
7506 unsigned char buf[6];
7524 *buf = unescape_ascii(*s);
7536 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7537 if (*penc != enc_utf8) {
7539 rb_enc_associate(undumped, enc_utf8);
7556 if (hexlen == 0 || hexlen > 6) {
7562 if (0xd800 <= c && c <= 0xdfff) {
7565 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7575 if (0xd800 <= c && c <= 0xdfff) {
7578 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7608static VALUE rb_str_is_ascii_only_p(
VALUE str);
7620str_undump(
VALUE str)
7622 const char *s = RSTRING_PTR(str);
7625 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7627 bool binary =
false;
7631 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7634 if (!str_null_check(str, &w)) {
7637 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7638 if (*s !=
'"')
goto invalid_format;
7656 static const char force_encoding_suffix[] =
".force_encoding(\"";
7657 static const char dup_suffix[] =
".dup";
7658 const char *encname;
7663 size =
sizeof(dup_suffix) - 1;
7664 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7666 size =
sizeof(force_encoding_suffix) - 1;
7667 if (s_end - s <= size)
goto invalid_format;
7668 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7672 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7676 s = memchr(s,
'"', s_end-s);
7678 if (!s)
goto invalid_format;
7679 if (s_end - s != 2)
goto invalid_format;
7680 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7682 encidx = rb_enc_find_index2(encname, (
long)size);
7686 rb_enc_associate_index(undumped, encidx);
7696 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7707 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7713 if (rb_enc_dummy_p(enc)) {
7720str_true_enc(
VALUE str)
7723 rb_str_check_dummy_enc(enc);
7727static OnigCaseFoldType
7728check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7733 rb_raise(rb_eArgError,
"too many options");
7734 if (argv[0]==sym_turkic) {
7735 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7737 if (argv[1]==sym_lithuanian)
7738 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7740 rb_raise(rb_eArgError,
"invalid second option");
7743 else if (argv[0]==sym_lithuanian) {
7744 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7746 if (argv[1]==sym_turkic)
7747 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7749 rb_raise(rb_eArgError,
"invalid second option");
7753 rb_raise(rb_eArgError,
"too many options");
7754 else if (argv[0]==sym_ascii)
7755 flags |= ONIGENC_CASE_ASCII_ONLY;
7756 else if (argv[0]==sym_fold) {
7757 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7758 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7760 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7763 rb_raise(rb_eArgError,
"invalid option");
7770 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7776#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7777#ifndef CASEMAP_DEBUG
7778# define CASEMAP_DEBUG 0
7786 OnigUChar space[FLEX_ARY_LEN];
7790mapping_buffer_free(
void *p)
7794 while (current_buffer) {
7795 previous_buffer = current_buffer;
7796 current_buffer = current_buffer->next;
7797 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7803 {0, mapping_buffer_free,},
7804 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7812 const OnigUChar *source_current, *source_end;
7813 int target_length = 0;
7814 VALUE buffer_anchor;
7817 size_t buffer_count = 0;
7818 int buffer_length_or_invalid;
7820 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7822 source_current = (OnigUChar*)RSTRING_PTR(source);
7827 while (source_current < source_end) {
7829 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7830 if (CASEMAP_DEBUG) {
7831 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7834 *pre_buffer = current_buffer;
7835 pre_buffer = ¤t_buffer->next;
7836 current_buffer->next = NULL;
7837 current_buffer->capa =
capa;
7838 buffer_length_or_invalid = enc->case_map(flags,
7839 &source_current, source_end,
7840 current_buffer->space,
7841 current_buffer->space+current_buffer->capa,
7843 if (buffer_length_or_invalid < 0) {
7844 current_buffer =
DATA_PTR(buffer_anchor);
7846 mapping_buffer_free(current_buffer);
7847 rb_raise(rb_eArgError,
"input string invalid");
7849 target_length += current_buffer->used = buffer_length_or_invalid;
7851 if (CASEMAP_DEBUG) {
7852 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7855 if (buffer_count==1) {
7856 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7859 char *target_current;
7862 target_current = RSTRING_PTR(target);
7863 current_buffer =
DATA_PTR(buffer_anchor);
7864 while (current_buffer) {
7865 memcpy(target_current, current_buffer->space, current_buffer->used);
7866 target_current += current_buffer->used;
7867 current_buffer = current_buffer->next;
7870 current_buffer =
DATA_PTR(buffer_anchor);
7872 mapping_buffer_free(current_buffer);
7877 str_enc_copy_direct(target, source);
7886 const OnigUChar *source_current, *source_end;
7887 OnigUChar *target_current, *target_end;
7888 long old_length = RSTRING_LEN(source);
7889 int length_or_invalid;
7891 if (old_length == 0)
return Qnil;
7893 source_current = (OnigUChar*)RSTRING_PTR(source);
7895 if (source == target) {
7896 target_current = (OnigUChar*)source_current;
7897 target_end = (OnigUChar*)source_end;
7900 target_current = (OnigUChar*)RSTRING_PTR(target);
7904 length_or_invalid = onigenc_ascii_only_case_map(flags,
7905 &source_current, source_end,
7906 target_current, target_end, enc);
7907 if (length_or_invalid < 0)
7908 rb_raise(rb_eArgError,
"input string invalid");
7909 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7910 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7911 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7912 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7913 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7916 str_enc_copy(target, source);
7922upcase_single(
VALUE str)
7924 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7925 bool modified =
false;
7928 unsigned int c = *(
unsigned char*)s;
7930 if (
'a' <= c && c <=
'z') {
7931 *s =
'A' + (c -
'a');
7952rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7955 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7957 flags = check_case_options(argc, argv, flags);
7958 str_modify_keep_cr(str);
7959 enc = str_true_enc(str);
7960 if (case_option_single_p(flags, enc, str)) {
7961 if (upcase_single(str))
7962 flags |= ONIGENC_CASE_MODIFIED;
7964 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7965 rb_str_ascii_casemap(str, str, &flags, enc);
7967 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7969 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7982rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7985 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7988 flags = check_case_options(argc, argv, flags);
7989 enc = str_true_enc(str);
7990 if (case_option_single_p(flags, enc, str)) {
7991 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7992 str_enc_copy_direct(ret, str);
7995 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7997 rb_str_ascii_casemap(str, ret, &flags, enc);
8000 ret = rb_str_casemap(str, &flags, enc);
8007downcase_single(
VALUE str)
8009 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8010 bool modified =
false;
8013 unsigned int c = *(
unsigned char*)s;
8015 if (
'A' <= c && c <=
'Z') {
8016 *s =
'a' + (c -
'A');
8038rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8041 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8043 flags = check_case_options(argc, argv, flags);
8044 str_modify_keep_cr(str);
8045 enc = str_true_enc(str);
8046 if (case_option_single_p(flags, enc, str)) {
8047 if (downcase_single(str))
8048 flags |= ONIGENC_CASE_MODIFIED;
8050 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8051 rb_str_ascii_casemap(str, str, &flags, enc);
8053 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8055 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8069rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8072 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8075 flags = check_case_options(argc, argv, flags);
8076 enc = str_true_enc(str);
8077 if (case_option_single_p(flags, enc, str)) {
8078 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8079 str_enc_copy_direct(ret, str);
8080 downcase_single(ret);
8082 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8084 rb_str_ascii_casemap(str, ret, &flags, enc);
8087 ret = rb_str_casemap(str, &flags, enc);
8107rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8110 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8112 flags = check_case_options(argc, argv, flags);
8113 str_modify_keep_cr(str);
8114 enc = str_true_enc(str);
8115 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8116 if (flags&ONIGENC_CASE_ASCII_ONLY)
8117 rb_str_ascii_casemap(str, str, &flags, enc);
8119 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8121 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8135rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8138 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8141 flags = check_case_options(argc, argv, flags);
8142 enc = str_true_enc(str);
8143 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8144 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8146 rb_str_ascii_casemap(str, ret, &flags, enc);
8149 ret = rb_str_casemap(str, &flags, enc);
8168rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8171 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8173 flags = check_case_options(argc, argv, flags);
8174 str_modify_keep_cr(str);
8175 enc = str_true_enc(str);
8176 if (flags&ONIGENC_CASE_ASCII_ONLY)
8177 rb_str_ascii_casemap(str, str, &flags, enc);
8179 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8181 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8195rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8198 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8201 flags = check_case_options(argc, argv, flags);
8202 enc = str_true_enc(str);
8203 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8204 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8206 rb_str_ascii_casemap(str, ret, &flags, enc);
8209 ret = rb_str_casemap(str, &flags, enc);
8214typedef unsigned char *USTR;
8218 unsigned int now, max;
8230 if (t->p == t->pend)
return -1;
8231 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8234 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8236 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8238 if (t->p < t->pend) {
8239 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8242 if (t->now < 0x80 && c < 0x80) {
8243 rb_raise(rb_eArgError,
8244 "invalid range \"%c-%c\" in string transliteration",
8248 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8252 else if (t->now < c) {
8261 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8262 if (t->now == t->max) {
8267 if (t->now < t->max) {
8283 const unsigned int errc = -1;
8284 unsigned int trans[256];
8286 struct tr trsrc, trrepl;
8288 unsigned int c, c0, last = 0;
8289 int modify = 0, i, l;
8290 unsigned char *s, *send;
8292 int singlebyte = single_byte_optimizable(str);
8296#define CHECK_IF_ASCII(c) \
8297 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8298 (cr = ENC_CODERANGE_VALID) : 0)
8302 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8303 if (RSTRING_LEN(repl) == 0) {
8304 return rb_str_delete_bang(1, &src, str);
8308 e1 = rb_enc_check(str, src);
8309 e2 = rb_enc_check(str, repl);
8314 enc = rb_enc_check(src, repl);
8316 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8317 if (RSTRING_LEN(src) > 1 &&
8318 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8319 trsrc.p + l < trsrc.pend) {
8323 trrepl.p = RSTRING_PTR(repl);
8324 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8325 trsrc.gen = trrepl.gen = 0;
8326 trsrc.now = trrepl.now = 0;
8327 trsrc.max = trrepl.max = 0;
8330 for (i=0; i<256; i++) {
8333 while ((c = trnext(&trsrc, enc)) != errc) {
8338 if (!hash) hash = rb_hash_new();
8342 while ((c = trnext(&trrepl, enc)) != errc)
8345 for (i=0; i<256; i++) {
8346 if (trans[i] != errc) {
8354 for (i=0; i<256; i++) {
8357 while ((c = trnext(&trsrc, enc)) != errc) {
8358 r = trnext(&trrepl, enc);
8359 if (r == errc) r = trrepl.now;
8362 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8365 if (!hash) hash = rb_hash_new();
8373 str_modify_keep_cr(str);
8374 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8375 termlen = rb_enc_mbminlen(enc);
8378 long offset, max = RSTRING_LEN(str);
8379 unsigned int save = -1;
8380 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8385 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8388 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8391 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8393 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8402 if (cflag) c = last;
8405 else if (cflag) c = errc;
8411 if (c != (
unsigned int)-1) {
8417 tlen = rb_enc_codelen(c, enc);
8423 if (enc != e1) may_modify = 1;
8425 if ((offset = t - buf) + tlen > max) {
8426 size_t MAYBE_UNUSED(old) = max + termlen;
8427 max = offset + tlen + (send - s);
8428 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8431 rb_enc_mbcput(c, t, enc);
8432 if (may_modify && memcmp(s, t, tlen) != 0) {
8438 if (!STR_EMBED_P(str)) {
8439 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8441 TERM_FILL((
char *)t, termlen);
8442 RSTRING(str)->as.heap.ptr = (
char *)buf;
8443 STR_SET_LEN(str, t - buf);
8444 STR_SET_NOEMBED(str);
8445 RSTRING(str)->as.heap.aux.capa = max;
8449 c = (
unsigned char)*s;
8450 if (trans[c] != errc) {
8467 long offset, max = (long)((send - s) * 1.2);
8468 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8473 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8476 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8479 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8481 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8489 if (cflag) c = last;
8492 else if (cflag) c = errc;
8496 c = cflag ? last : errc;
8499 tlen = rb_enc_codelen(c, enc);
8504 if (enc != e1) may_modify = 1;
8506 if ((offset = t - buf) + tlen > max) {
8507 size_t MAYBE_UNUSED(old) = max + termlen;
8508 max = offset + tlen + (long)((send - s) * 1.2);
8509 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8513 rb_enc_mbcput(c, t, enc);
8514 if (may_modify && memcmp(s, t, tlen) != 0) {
8522 if (!STR_EMBED_P(str)) {
8523 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8525 TERM_FILL((
char *)t, termlen);
8526 RSTRING(str)->as.heap.ptr = (
char *)buf;
8527 STR_SET_LEN(str, t - buf);
8528 STR_SET_NOEMBED(str);
8529 RSTRING(str)->as.heap.aux.capa = max;
8535 rb_enc_associate(str, enc);
8557 return tr_trans(str, src, repl, 0);
8602 tr_trans(str, src, repl, 0);
8606#define TR_TABLE_MAX (UCHAR_MAX+1)
8607#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8609tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8612 const unsigned int errc = -1;
8613 char buf[TR_TABLE_MAX];
8616 VALUE table = 0, ptable = 0;
8617 int i, l, cflag = 0;
8619 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8620 tr.gen =
tr.now =
tr.max = 0;
8622 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8627 for (i=0; i<TR_TABLE_MAX; i++) {
8630 stable[TR_TABLE_MAX] = cflag;
8632 else if (stable[TR_TABLE_MAX] && !cflag) {
8633 stable[TR_TABLE_MAX] = 0;
8635 for (i=0; i<TR_TABLE_MAX; i++) {
8639 while ((c = trnext(&
tr, enc)) != errc) {
8640 if (c < TR_TABLE_MAX) {
8641 buf[(
unsigned char)c] = !cflag;
8646 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8649 table = ptable ? ptable : rb_hash_new();
8653 table = rb_hash_new();
8658 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8659 rb_hash_aset(table, key,
Qtrue);
8663 for (i=0; i<TR_TABLE_MAX; i++) {
8664 stable[i] = stable[i] && buf[i];
8666 if (!table && !cflag) {
8673tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8675 if (c < TR_TABLE_MAX) {
8676 return table[c] != 0;
8682 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8683 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8687 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8690 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8705rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8707 char squeez[TR_TABLE_SIZE];
8710 VALUE del = 0, nodel = 0;
8712 int i, ascompat, cr;
8714 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8716 for (i=0; i<argc; i++) {
8720 enc = rb_enc_check(str, s);
8721 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8724 str_modify_keep_cr(str);
8725 ascompat = rb_enc_asciicompat(enc);
8726 s = t = RSTRING_PTR(str);
8733 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8744 c = rb_enc_codepoint_len(s, send, &clen, enc);
8746 if (tr_find(c, squeez, del, nodel)) {
8750 if (t != s) rb_enc_mbcput(c, t, enc);
8757 TERM_FILL(t, TERM_LEN(str));
8758 STR_SET_LEN(str, t - RSTRING_PTR(str));
8761 if (modify)
return str;
8775rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8778 rb_str_delete_bang(argc, argv, str);
8796rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8798 char squeez[TR_TABLE_SIZE];
8800 VALUE del = 0, nodel = 0;
8801 unsigned char *s, *send, *t;
8803 int ascompat, singlebyte = single_byte_optimizable(str);
8807 enc = STR_ENC_GET(str);
8810 for (i=0; i<argc; i++) {
8814 enc = rb_enc_check(str, s);
8815 if (singlebyte && !single_byte_optimizable(s))
8817 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8821 str_modify_keep_cr(str);
8822 s = t = (
unsigned char *)RSTRING_PTR(str);
8823 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8826 ascompat = rb_enc_asciicompat(enc);
8830 unsigned int c = *s++;
8831 if (c != save || (argc > 0 && !squeez[c])) {
8841 if (ascompat && (c = *s) < 0x80) {
8842 if (c != save || (argc > 0 && !squeez[c])) {
8848 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8850 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8851 if (t != s) rb_enc_mbcput(c, t, enc);
8860 TERM_FILL((
char *)t, TERM_LEN(str));
8861 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8862 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8866 if (modify)
return str;
8880rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8883 rb_str_squeeze_bang(argc, argv, str);
8903 return tr_trans(str, src, repl, 1);
8931 tr_trans(str, src, repl, 1);
8944rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8946 char table[TR_TABLE_SIZE];
8948 VALUE del = 0, nodel = 0, tstr;
8958 enc = rb_enc_check(str, tstr);
8961 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8962 (ptstr = RSTRING_PTR(tstr),
8963 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8964 !is_broken_string(str)) {
8966 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8968 s = RSTRING_PTR(str);
8969 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8972 if (*(
unsigned char*)s++ == c) n++;
8978 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8979 for (i=1; i<argc; i++) {
8982 enc = rb_enc_check(str, tstr);
8983 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8986 s = RSTRING_PTR(str);
8987 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8989 ascompat = rb_enc_asciicompat(enc);
8993 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9001 c = rb_enc_codepoint_len(s, send, &clen, enc);
9002 if (tr_find(c, table, del, nodel)) {
9013rb_fs_check(
VALUE val)
9017 if (
NIL_P(val))
return 0;
9022static const char isspacetable[256] = {
9023 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9025 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9035 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9041#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9044split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9046 if (empty_count >= 0 &&
len == 0) {
9047 return empty_count + 1;
9049 if (empty_count > 0) {
9054 }
while (--empty_count > 0);
9058 rb_yield(str_new_empty_String(str));
9059 }
while (--empty_count > 0);
9073 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9077literal_split_pattern(
VALUE spat, split_type_t default_type)
9085 return SPLIT_TYPE_CHARS;
9087 else if (rb_enc_asciicompat(enc)) {
9088 if (
len == 1 && ptr[0] ==
' ') {
9089 return SPLIT_TYPE_AWK;
9094 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9095 return SPLIT_TYPE_AWK;
9098 return default_type;
9111rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9116 split_type_t split_type;
9117 long beg, end, i = 0, empty_count = -1;
9122 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9124 if (lim <= 0) limit =
Qnil;
9125 else if (lim == 1) {
9126 if (RSTRING_LEN(str) == 0)
9137 if (
NIL_P(limit) && !lim) empty_count = 0;
9139 enc = STR_ENC_GET(str);
9140 split_type = SPLIT_TYPE_REGEXP;
9142 spat = get_pat_quoted(spat, 0);
9144 else if (
NIL_P(spat = rb_fs)) {
9145 split_type = SPLIT_TYPE_AWK;
9147 else if (!(spat = rb_fs_check(spat))) {
9148 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9153 if (split_type != SPLIT_TYPE_AWK) {
9158 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9159 if (split_type == SPLIT_TYPE_AWK) {
9161 split_type = SPLIT_TYPE_STRING;
9166 mustnot_broken(spat);
9167 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9175#define SPLIT_STR(beg, len) ( \
9176 empty_count = split_string(result, str, beg, len, empty_count), \
9177 str_mod_check(str, str_start, str_len))
9180 char *ptr = RSTRING_PTR(str);
9181 char *
const str_start = ptr;
9182 const long str_len = RSTRING_LEN(str);
9183 char *
const eptr = str_start + str_len;
9184 if (split_type == SPLIT_TYPE_AWK) {
9191 if (is_ascii_string(str)) {
9192 while (ptr < eptr) {
9193 c = (
unsigned char)*ptr++;
9195 if (ascii_isspace(c)) {
9201 if (!
NIL_P(limit) && lim <= i)
break;
9204 else if (ascii_isspace(c)) {
9205 SPLIT_STR(beg, end-beg);
9208 if (!
NIL_P(limit)) ++i;
9216 while (ptr < eptr) {
9219 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9228 if (!
NIL_P(limit) && lim <= i)
break;
9232 SPLIT_STR(beg, end-beg);
9235 if (!
NIL_P(limit)) ++i;
9243 else if (split_type == SPLIT_TYPE_STRING) {
9244 char *substr_start = ptr;
9245 char *sptr = RSTRING_PTR(spat);
9246 long slen = RSTRING_LEN(spat);
9249 mustnot_broken(str);
9250 enc = rb_enc_check(str, spat);
9251 while (ptr < eptr &&
9252 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9255 if (t != ptr + end) {
9259 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9260 str_mod_check(spat, sptr, slen);
9263 if (!
NIL_P(limit) && lim <= ++i)
break;
9265 beg = ptr - str_start;
9267 else if (split_type == SPLIT_TYPE_CHARS) {
9271 mustnot_broken(str);
9272 enc = rb_enc_get(str);
9273 while (ptr < eptr &&
9274 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9275 SPLIT_STR(ptr - str_start, n);
9277 if (!
NIL_P(limit) && lim <= ++i)
break;
9279 beg = ptr - str_start;
9283 long len = RSTRING_LEN(str);
9291 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9296 if (start == end && BEG(0) == END(0)) {
9301 else if (last_null == 1) {
9302 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9309 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9315 SPLIT_STR(beg, end-beg);
9316 beg = start = END(0);
9320 for (idx=1; idx < regs->num_regs; idx++) {
9321 if (BEG(idx) == -1)
continue;
9322 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9324 if (!
NIL_P(limit) && lim <= ++i)
break;
9326 if (match) rb_match_unbusy(match);
9328 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9329 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9332 return result ? result : str;
9342 return rb_str_split_m(1, &sep, str);
9345#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9360#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9363chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9365 const char *prev = rb_enc_prev_char(p, e, e, enc);
9368 prev = rb_enc_prev_char(p, e, e, enc);
9369 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9381 RSTRING_LEN(rs) != 1 ||
9382 RSTRING_PTR(rs)[0] !=
'\n')) {
9388#define rb_rs get_rs()
9395 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9396 long pos,
len, rslen;
9402 static ID keywords[1];
9407 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9411 if (!ENUM_ELEM(ary, str)) {
9419 if (!RSTRING_LEN(str))
goto end;
9421 ptr = subptr = RSTRING_PTR(str);
9423 len = RSTRING_LEN(str);
9425 rslen = RSTRING_LEN(rs);
9428 enc = rb_enc_get(str);
9430 enc = rb_enc_check(str, rs);
9435 const char *eol = NULL;
9437 while (subend < pend) {
9438 long chomp_rslen = 0;
9440 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9442 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9444 if (eol == subend)
break;
9448 chomp_rslen = -rslen;
9452 if (!subptr) subptr = subend;
9456 }
while (subend < pend);
9458 if (rslen == 0) chomp_rslen = 0;
9460 subend - subptr + (chomp ? chomp_rslen : rslen));
9461 if (ENUM_ELEM(ary, line)) {
9462 str_mod_check(str, ptr,
len);
9464 subptr = eol = NULL;
9469 rsptr = RSTRING_PTR(rs);
9470 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9479 rsptr = RSTRING_PTR(rs);
9480 rslen = RSTRING_LEN(rs);
9483 while (subptr < pend) {
9484 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9488 if (hit != adjusted) {
9492 subend = hit += rslen;
9495 subend = chomp_newline(subptr, subend, enc);
9502 if (ENUM_ELEM(ary, line)) {
9503 str_mod_check(str, ptr,
len);
9508 if (subptr != pend) {
9511 pend = chomp_newline(subptr, pend, enc);
9513 else if (pend - subptr >= rslen &&
9514 memcmp(pend - rslen, rsptr, rslen) == 0) {
9519 ENUM_ELEM(ary, line);
9540rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9543 return rb_str_enumerate_lines(argc, argv, str, 0);
9598rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9600 VALUE ary = WANTARRAY(
"lines", 0);
9601 return rb_str_enumerate_lines(argc, argv, str, ary);
9615 for (i=0; i<RSTRING_LEN(str); i++) {
9616 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9634rb_str_each_byte(
VALUE str)
9637 return rb_str_enumerate_bytes(str, 0);
9649rb_str_bytes(
VALUE str)
9651 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9652 return rb_str_enumerate_bytes(str, ary);
9670 ptr = RSTRING_PTR(str);
9671 len = RSTRING_LEN(str);
9672 enc = rb_enc_get(str);
9675 for (i = 0; i <
len; i += n) {
9676 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9681 for (i = 0; i <
len; i += n) {
9682 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9703rb_str_each_char(
VALUE str)
9706 return rb_str_enumerate_chars(str, 0);
9718rb_str_chars(
VALUE str)
9721 return rb_str_enumerate_chars(str, ary);
9725rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9730 const char *ptr, *end;
9733 if (single_byte_optimizable(str))
9734 return rb_str_enumerate_bytes(str, ary);
9737 ptr = RSTRING_PTR(str);
9739 enc = STR_ENC_GET(str);
9742 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9763rb_str_each_codepoint(
VALUE str)
9766 return rb_str_enumerate_codepoints(str, 0);
9778rb_str_codepoints(
VALUE str)
9781 return rb_str_enumerate_codepoints(str, ary);
9787 int encidx = rb_enc_to_index(enc);
9789 const OnigUChar source_ascii[] =
"\\X";
9790 const OnigUChar *source = source_ascii;
9791 size_t source_len =
sizeof(source_ascii) - 1;
9794#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9795#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9796#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9797#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9798#define CASE_UTF(e) \
9799 case ENCINDEX_UTF_##e: { \
9800 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9801 source = source_UTF_##e; \
9802 source_len = sizeof(source_UTF_##e); \
9805 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9813 regex_t *reg_grapheme_cluster;
9815 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9816 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9818 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9819 onig_error_code_to_str(message, r, &einfo);
9820 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9823 return reg_grapheme_cluster;
9829 int encidx = rb_enc_to_index(enc);
9830 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9832 if (encidx == rb_utf8_encindex()) {
9833 if (!reg_grapheme_cluster_utf8) {
9834 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9837 return reg_grapheme_cluster_utf8;
9846 size_t grapheme_cluster_count = 0;
9848 const char *ptr, *end;
9850 if (!rb_enc_unicode_p(enc)) {
9854 bool cached_reg_grapheme_cluster =
true;
9855 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9856 if (!reg_grapheme_cluster) {
9857 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9858 cached_reg_grapheme_cluster =
false;
9861 ptr = RSTRING_PTR(str);
9865 OnigPosition
len = onig_match(reg_grapheme_cluster,
9866 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9867 (
const OnigUChar *)ptr, NULL, 0);
9868 if (
len <= 0)
break;
9869 grapheme_cluster_count++;
9873 if (!cached_reg_grapheme_cluster) {
9874 onig_free(reg_grapheme_cluster);
9877 return SIZET2NUM(grapheme_cluster_count);
9881rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9885 const char *ptr0, *ptr, *end;
9887 if (!rb_enc_unicode_p(enc)) {
9888 return rb_str_enumerate_chars(str, ary);
9893 bool cached_reg_grapheme_cluster =
true;
9894 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9895 if (!reg_grapheme_cluster) {
9896 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9897 cached_reg_grapheme_cluster =
false;
9900 ptr0 = ptr = RSTRING_PTR(str);
9904 OnigPosition
len = onig_match(reg_grapheme_cluster,
9905 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9906 (
const OnigUChar *)ptr, NULL, 0);
9907 if (
len <= 0)
break;
9912 if (!cached_reg_grapheme_cluster) {
9913 onig_free(reg_grapheme_cluster);
9933rb_str_each_grapheme_cluster(
VALUE str)
9936 return rb_str_enumerate_grapheme_clusters(str, 0);
9948rb_str_grapheme_clusters(
VALUE str)
9951 return rb_str_enumerate_grapheme_clusters(str, ary);
9955chopped_length(
VALUE str)
9958 const char *p, *p2, *beg, *end;
9960 beg = RSTRING_PTR(str);
9961 end = beg + RSTRING_LEN(str);
9962 if (beg >= end)
return 0;
9963 p = rb_enc_prev_char(beg, end, end, enc);
9965 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9966 p2 = rb_enc_prev_char(beg, p, end, enc);
9967 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9985rb_str_chop_bang(
VALUE str)
9987 str_modify_keep_cr(str);
9988 if (RSTRING_LEN(str) > 0) {
9990 len = chopped_length(str);
9991 STR_SET_LEN(str,
len);
9992 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10011rb_str_chop(
VALUE str)
10017smart_chomp(
VALUE str,
const char *e,
const char *p)
10020 if (rb_enc_mbminlen(enc) > 1) {
10025 pp = e - rb_enc_mbminlen(enc);
10028 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10036 if (--e > p && *(e-1) ==
'\r') {
10053 char *pp, *e, *rsptr;
10055 char *
const p = RSTRING_PTR(str);
10056 long len = RSTRING_LEN(str);
10058 if (
len == 0)
return 0;
10061 return smart_chomp(str, e, p);
10064 enc = rb_enc_get(str);
10067 if (rb_enc_mbminlen(enc) > 1) {
10072 pp -= rb_enc_mbminlen(enc);
10075 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10082 while (e > p && *(e-1) ==
'\n') {
10084 if (e > p && *(e-1) ==
'\r')
10090 if (rslen >
len)
return len;
10092 enc = rb_enc_get(rs);
10093 newline = rsptr[rslen-1];
10094 if (rslen == rb_enc_mbminlen(enc)) {
10096 if (newline ==
'\n')
10097 return smart_chomp(str, e, p);
10101 return smart_chomp(str, e, p);
10105 enc = rb_enc_check(str, rs);
10106 if (is_broken_string(rs)) {
10110 if (p[
len-1] == newline &&
10112 memcmp(rsptr, pp, rslen) == 0)) {
10113 if (at_char_boundary(p, pp, e, enc))
10114 return len - rslen;
10126chomp_rs(
int argc,
const VALUE *argv)
10130 VALUE rs = argv[0];
10142 long olen = RSTRING_LEN(str);
10143 long len = chompped_length(str, rs);
10144 if (
len >= olen)
return Qnil;
10145 str_modify_keep_cr(str);
10146 STR_SET_LEN(str,
len);
10147 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10167rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10170 str_modifiable(str);
10171 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10172 rs = chomp_rs(argc, argv);
10174 return rb_str_chomp_string(str, rs);
10187rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10189 VALUE rs = chomp_rs(argc, argv);
10195tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10196 VALUE str,
int num_selectors,
VALUE *selectors)
10200 for (i=0; i<num_selectors; i++) {
10201 VALUE selector = selectors[i];
10205 enc = rb_enc_check(str, selector);
10206 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10213 const char *
const start = s;
10215 if (!s || s >= e)
return 0;
10218 if (single_byte_optimizable(str)) {
10219 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10224 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10234lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10235 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10237 const char *
const start = s;
10239 if (!s || s >= e)
return 0;
10244 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10246 if (!tr_find(cc, table, del, nodel))
break;
10265rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10269 long olen, loffset;
10271 str_modify_keep_cr(str);
10272 enc = STR_ENC_GET(str);
10275 char table[TR_TABLE_SIZE];
10276 VALUE del = 0, nodel = 0;
10278 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10279 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10282 loffset = lstrip_offset(str, start, start+olen, enc);
10286 long len = olen-loffset;
10287 s = start + loffset;
10288 memmove(start, s,
len);
10289 STR_SET_LEN(str,
len);
10290 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10325rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10332 char table[TR_TABLE_SIZE];
10333 VALUE del = 0, nodel = 0;
10335 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10336 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10339 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10341 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10350 rb_str_check_dummy_enc(enc);
10354 if (!s || s >= e)
return 0;
10358 if (single_byte_optimizable(str)) {
10360 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10365 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10375rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10376 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10381 rb_str_check_dummy_enc(enc);
10385 if (!s || s >= e)
return 0;
10389 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10391 if (!tr_find(c, table, del, nodel))
break;
10411rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10415 long olen, roffset;
10417 str_modify_keep_cr(str);
10418 enc = STR_ENC_GET(str);
10421 char table[TR_TABLE_SIZE];
10422 VALUE del = 0, nodel = 0;
10424 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10425 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10428 roffset = rstrip_offset(str, start, start+olen, enc);
10431 long len = olen - roffset;
10433 STR_SET_LEN(str,
len);
10434 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10468rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10472 long olen, roffset;
10474 enc = STR_ENC_GET(str);
10477 char table[TR_TABLE_SIZE];
10478 VALUE del = 0, nodel = 0;
10480 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10481 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10484 roffset = rstrip_offset(str, start, start+olen, enc);
10486 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10504rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10507 long olen, loffset, roffset;
10510 str_modify_keep_cr(str);
10511 enc = STR_ENC_GET(str);
10515 char table[TR_TABLE_SIZE];
10516 VALUE del = 0, nodel = 0;
10518 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10519 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10520 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10523 loffset = lstrip_offset(str, start, start+olen, enc);
10524 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10527 if (loffset > 0 || roffset > 0) {
10528 long len = olen-roffset;
10531 memmove(start, start + loffset,
len);
10533 STR_SET_LEN(str,
len);
10534 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10569rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10572 long olen, loffset, roffset;
10578 char table[TR_TABLE_SIZE];
10579 VALUE del = 0, nodel = 0;
10581 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10582 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10583 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10586 loffset = lstrip_offset(str, start, start+olen, enc);
10587 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10590 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10595scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10598 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10604 end = pos + RSTRING_LEN(pat);
10618 if (RSTRING_LEN(str) > end)
10619 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10628 if (!regs || regs->num_regs == 1) {
10634 for (
int i = 1; i < regs->num_regs; i++) {
10665 long last = -1, prev = 0;
10666 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10668 pat = get_pat_quoted(pat, 1);
10669 mustnot_broken(str);
10673 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10678 if (last >= 0) rb_pat_search(pat, str, last, 1);
10683 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10687 str_mod_check(str, p,
len);
10689 if (last >= 0) rb_pat_search(pat, str, last, 1);
10741rb_str_hex(
VALUE str)
10743 return rb_str_to_inum(str, 16, FALSE);
10827rb_str_oct(
VALUE str)
10829 return rb_str_to_inum(str, -8, FALSE);
10832#ifndef HAVE_CRYPT_R
10837 rb_nativethread_lock_t lock;
10838} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10907# define CRYPT_END() ALLOCV_END(databuf)
10910 extern char *crypt(
const char *,
const char *);
10911# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10914 const char *s, *saltp;
10917 char salt_8bit_clean[3];
10921 mustnot_wchar(str);
10922 mustnot_wchar(salt);
10924 saltp = RSTRING_PTR(salt);
10925 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10926 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10930 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10931 salt_8bit_clean[0] = saltp[0] & 0x7f;
10932 salt_8bit_clean[1] = saltp[1] & 0x7f;
10933 salt_8bit_clean[2] =
'\0';
10934 saltp = salt_8bit_clean;
10939# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10940 data->initialized = 0;
10942 res = crypt_r(s, saltp, data);
10945 res = crypt(s, saltp);
10960 size_t res_size = strlen(res)+1;
10961 tmp_buf =
ALLOCA_N(
char, res_size);
10962 memcpy(tmp_buf, res, res_size);
10999 char *ptr, *p, *pend;
11002 unsigned long sum0 = 0;
11007 ptr = p = RSTRING_PTR(str);
11008 len = RSTRING_LEN(str);
11014 str_mod_check(str, ptr,
len);
11017 sum0 += (
unsigned char)*p;
11028 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11029 sum0 &= (((
unsigned long)1)<<bits)-1;
11049rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11053 long width,
len, flen = 1, fclen = 1;
11056 const char *f =
" ";
11057 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11059 int singlebyte = 1, cr;
11063 enc = STR_ENC_GET(str);
11064 termlen = rb_enc_mbminlen(enc);
11068 enc = rb_enc_check(str, pad);
11069 f = RSTRING_PTR(pad);
11070 flen = RSTRING_LEN(pad);
11071 fclen = str_strlen(pad, enc);
11072 singlebyte = single_byte_optimizable(pad);
11073 if (flen == 0 || fclen == 0) {
11074 rb_raise(rb_eArgError,
"zero width padding");
11077 len = str_strlen(str, enc);
11078 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11080 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11084 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11085 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11087 size = RSTRING_LEN(str);
11088 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11089 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11090 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11091 rb_raise(rb_eArgError,
"argument too big");
11095 p = RSTRING_PTR(res);
11097 memset(p, *f, llen);
11101 while (llen >= fclen) {
11107 memcpy(p, f, llen2);
11111 memcpy(p, RSTRING_PTR(str), size);
11114 memset(p, *f, rlen);
11118 while (rlen >= fclen) {
11124 memcpy(p, f, rlen2);
11128 TERM_FILL(p, termlen);
11129 STR_SET_LEN(res, p-RSTRING_PTR(res));
11150rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11152 return rb_str_justify(argc, argv, str,
'l');
11164rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11166 return rb_str_justify(argc, argv, str,
'r');
11179rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11181 return rb_str_justify(argc, argv, str,
'c');
11197 sep = get_pat_quoted(sep, 0);
11209 pos = rb_str_index(str, sep, 0);
11210 if (pos < 0)
goto failed;
11215 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11218 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11232 long pos = RSTRING_LEN(str);
11234 sep = get_pat_quoted(sep, 0);
11247 pos = rb_str_rindex(str, sep, pos);
11256 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11258 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11270rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11274 for (i=0; i<argc; i++) {
11275 VALUE tmp = argv[i];
11277 if (rb_reg_start_with_p(tmp, str))
11281 const char *p, *s, *e;
11286 enc = rb_enc_check(str, tmp);
11287 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11288 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11289 p = RSTRING_PTR(str);
11292 if (!at_char_right_boundary(p, s, e, enc))
11294 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11310rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11314 for (i=0; i<argc; i++) {
11315 VALUE tmp = argv[i];
11316 const char *p, *s, *e;
11321 enc = rb_enc_check(str, tmp);
11322 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11323 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11324 p = RSTRING_PTR(str);
11327 if (!at_char_boundary(p, s, e, enc))
11329 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11345deleted_prefix_length(
VALUE str,
VALUE prefix)
11347 const char *strptr, *prefixptr;
11348 long olen, prefixlen;
11353 if (!is_broken_string(prefix) ||
11354 !rb_enc_asciicompat(enc) ||
11355 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11356 enc = rb_enc_check(str, prefix);
11360 prefixlen = RSTRING_LEN(prefix);
11361 if (prefixlen <= 0)
return 0;
11362 olen = RSTRING_LEN(str);
11363 if (olen < prefixlen)
return 0;
11364 strptr = RSTRING_PTR(str);
11365 prefixptr = RSTRING_PTR(prefix);
11366 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11367 if (is_broken_string(prefix)) {
11368 if (!is_broken_string(str)) {
11372 const char *strend = strptr + olen;
11373 const char *after_prefix = strptr + prefixlen;
11374 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11395rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11398 str_modify_keep_cr(str);
11400 prefixlen = deleted_prefix_length(str, prefix);
11401 if (prefixlen <= 0)
return Qnil;
11415rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11419 prefixlen = deleted_prefix_length(str, prefix);
11420 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11422 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11435deleted_suffix_length(
VALUE str,
VALUE suffix)
11437 const char *strptr, *suffixptr;
11438 long olen, suffixlen;
11442 if (is_broken_string(suffix))
return 0;
11443 enc = rb_enc_check(str, suffix);
11446 suffixlen = RSTRING_LEN(suffix);
11447 if (suffixlen <= 0)
return 0;
11448 olen = RSTRING_LEN(str);
11449 if (olen < suffixlen)
return 0;
11450 strptr = RSTRING_PTR(str);
11451 suffixptr = RSTRING_PTR(suffix);
11452 const char *strend = strptr + olen;
11453 const char *before_suffix = strend - suffixlen;
11454 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11455 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11471rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11473 long olen, suffixlen,
len;
11474 str_modifiable(str);
11476 suffixlen = deleted_suffix_length(str, suffix);
11477 if (suffixlen <= 0)
return Qnil;
11479 olen = RSTRING_LEN(str);
11480 str_modify_keep_cr(str);
11481 len = olen - suffixlen;
11482 STR_SET_LEN(str,
len);
11483 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11499rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11503 suffixlen = deleted_suffix_length(str, suffix);
11504 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11506 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11513 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11519nil_setter_warning(
ID id)
11521 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11528 if (!
NIL_P(*var)) {
11529 nil_setter_warning(
id);
11536 val = rb_fs_check(val);
11539 "value of %"PRIsVALUE
" must be String or Regexp",
11543 nil_setter_warning(
id);
11560 str_modifiable(str);
11563 int idx = rb_enc_to_index(encoding);
11570 rb_enc_associate_index(str, idx);
11594 if (STR_EMBED_P(str)) {
11595 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11600 str_replace_shared_without_enc(str2, str);
11602 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11632rb_str_valid_encoding_p(
VALUE str)
11652rb_str_is_ascii_only_p(
VALUE str)
11662 static const char ellipsis[] =
"...";
11663 const long ellipsislen =
sizeof(ellipsis) - 1;
11665 const long blen = RSTRING_LEN(str);
11666 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11667 VALUE estr, ret = 0;
11670 if (
len * rb_enc_mbminlen(enc) >= blen ||
11674 else if (
len <= ellipsislen ||
11676 if (rb_enc_asciicompat(enc)) {
11678 rb_enc_associate(ret, enc);
11685 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11690 rb_enc_from_encoding(enc), 0,
Qnil);
11703 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11709 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11728 if (enc == STR_ENC_GET(str)) {
11733 return enc_str_scrub(enc, str, repl, cr);
11741 const char *rep, *p, *e, *p1, *sp;
11747 rb_raise(rb_eArgError,
"both of block and replacement given");
11754 if (!
NIL_P(repl)) {
11755 repl = str_compat_and_valid(repl, enc);
11758 if (rb_enc_dummy_p(enc)) {
11761 encidx = rb_enc_to_index(enc);
11763#define DEFAULT_REPLACE_CHAR(str) do { \
11764 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11765 rep = replace; replen = (int)sizeof(replace); \
11768 slen = RSTRING_LEN(str);
11769 p = RSTRING_PTR(str);
11774 if (rb_enc_asciicompat(enc)) {
11780 else if (!
NIL_P(repl)) {
11781 rep = RSTRING_PTR(repl);
11782 replen = RSTRING_LEN(repl);
11785 else if (encidx == rb_utf8_encindex()) {
11786 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11790 DEFAULT_REPLACE_CHAR(
"?");
11795 p = search_nonascii(p, e);
11800 int ret = rb_enc_precise_mbclen(p, e, enc);
11819 if (e - p < clen) clen = e - p;
11826 for (; clen > 1; clen--) {
11827 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11838 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11839 str_mod_check(str, sp, slen);
11840 repl = str_compat_and_valid(repl, enc);
11847 p = search_nonascii(p, e);
11873 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11874 str_mod_check(str, sp, slen);
11875 repl = str_compat_and_valid(repl, enc);
11884 long mbminlen = rb_enc_mbminlen(enc);
11888 else if (!
NIL_P(repl)) {
11889 rep = RSTRING_PTR(repl);
11890 replen = RSTRING_LEN(repl);
11892 else if (encidx == ENCINDEX_UTF_16BE) {
11893 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11895 else if (encidx == ENCINDEX_UTF_16LE) {
11896 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11898 else if (encidx == ENCINDEX_UTF_32BE) {
11899 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11901 else if (encidx == ENCINDEX_UTF_32LE) {
11902 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11905 DEFAULT_REPLACE_CHAR(
"?");
11909 int ret = rb_enc_precise_mbclen(p, e, enc);
11922 if (e - p < clen) clen = e - p;
11923 if (clen <= mbminlen * 2) {
11928 for (; clen > mbminlen; clen-=mbminlen) {
11929 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11939 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11940 str_mod_check(str, sp, slen);
11941 repl = str_compat_and_valid(repl, enc);
11966 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11967 str_mod_check(str, sp, slen);
11968 repl = str_compat_and_valid(repl, enc);
12008str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12016static ID id_normalize;
12017static ID id_normalized_p;
12018static VALUE mUnicodeNormalize;
12021unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12023 static int UnicodeNormalizeRequired = 0;
12026 if (!UnicodeNormalizeRequired) {
12027 rb_require(
"unicode_normalize/normalize.rb");
12028 UnicodeNormalizeRequired = 1;
12032 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12043rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12045 return unicode_normalize_common(argc, argv, str, id_normalize);
12059rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12061 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12088rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12090 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12222#define sym_equal rb_obj_equal
12225sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12229 int c = rb_enc_precise_mbclen(s, send, enc);
12233 c = rb_enc_mbc_to_codepoint(s, send, enc);
12241rb_str_symname_p(
VALUE sym)
12246 rb_encoding *resenc = rb_default_internal_encoding();
12248 if (resenc == NULL) resenc = rb_default_external_encoding();
12249 enc = STR_ENC_GET(sym);
12250 ptr = RSTRING_PTR(sym);
12251 len = RSTRING_LEN(sym);
12252 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12260rb_str_quote_unprintable(
VALUE str)
12268 resenc = rb_default_internal_encoding();
12269 if (resenc == NULL) resenc = rb_default_external_encoding();
12270 enc = STR_ENC_GET(str);
12271 ptr = RSTRING_PTR(str);
12272 len = RSTRING_LEN(str);
12273 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12274 !sym_printable(ptr, ptr +
len, enc)) {
12275 return rb_str_escape(str);
12281rb_id_quote_unprintable(
ID id)
12283 VALUE str = rb_id2str(
id);
12284 if (!rb_str_symname_p(str)) {
12285 return rb_str_escape(str);
12303sym_inspect(
VALUE sym)
12310 if (!rb_str_symname_p(str)) {
12312 len = RSTRING_LEN(str);
12313 rb_str_resize(str,
len + 1);
12314 dest = RSTRING_PTR(str);
12315 memmove(dest + 1, dest,
len);
12319 VALUE orig_str = str;
12321 len = RSTRING_LEN(orig_str);
12322 str = rb_enc_str_new(0,
len + 1, enc);
12325 ptr = RSTRING_PTR(orig_str);
12326 dest = RSTRING_PTR(str);
12327 memcpy(dest + 1, ptr,
len);
12347rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12352 rb_raise(rb_eArgError,
"no receiver given");
12455 return rb_str_match(
rb_sym2str(sym), other);
12470sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12472 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12485sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12487 return rb_str_match_m_p(argc, argv, sym);
12505 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12516sym_length(
VALUE sym)
12530sym_empty(
VALUE sym)
12564sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12580sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12596sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12610sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12612 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12625sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12627 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12639sym_encoding(
VALUE sym)
12645string_for_symbol(
VALUE name)
12650 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12664 name = string_for_symbol(name);
12665 return rb_intern_str(name);
12674 name = string_for_symbol(name);
12698 return rb_fstring(str);
12704 struct RString fake_str = {RBASIC_INIT};
12705 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12717 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12718 rb_enc_autoload(enc);
12721 struct RString fake_str = {RBASIC_INIT};
12722 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12728 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12729 rb_enc_autoload(enc);
12732 struct RString fake_str = {RBASIC_INIT};
12733 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12744#if USE_YJIT || USE_ZJIT
12746rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12751 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12752 rb_str_buf_cat_byte(str, (
char) code);
12762fstring_set_class_i(
VALUE *str,
void *data)
12766 return ST_CONTINUE;
12774 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12941 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern_str(), except it tries to convert the parameter object to an instance of rb_cS...
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.