14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 FL_SET((shared_str), STR_SHARED_ROOT); \
209 if (RBASIC_CLASS((shared_str)) == 0) \
210 FL_SET_RAW((shared_str), STR_BORROWED); \
214#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
215#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
218#define STR_ENC_GET(str) get_encoding(str)
220#if !defined SHARABLE_MIDDLE_SUBSTRING
221# define SHARABLE_MIDDLE_SUBSTRING 0
223#if !SHARABLE_MIDDLE_SUBSTRING
224#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
226#define SHARABLE_SUBSTRING_P(beg, len, end) 1
231str_embed_capa(
VALUE str)
233 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
237rb_str_reembeddable_p(
VALUE str)
239 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
243rb_str_embed_size(
long capa,
long termlen)
251rb_str_size_as_embedded(
VALUE str)
254 if (STR_EMBED_P(str)) {
256 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
258 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
262 else if (rb_str_reembeddable_p(str)) {
264 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
266 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
269 real_size =
sizeof(
struct RString);
276STR_EMBEDDABLE_P(
long len,
long termlen)
278 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
283static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
284static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
286static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
287static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
330rb_debug_rstring_null_ptr(
const char *func)
332 fprintf(stderr,
"%s is returning NULL!! "
333 "SIGSEGV is highly expected to follow immediately.\n"
334 "If you could reproduce, attach your debugger here, "
335 "and look at the passed string.\n",
340static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
343get_encoding(
VALUE str)
349mustnot_broken(
VALUE str)
351 if (is_broken_string(str)) {
352 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
357mustnot_wchar(
VALUE str)
360 if (rb_enc_mbminlen(enc) > 1) {
361 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
365static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
367#if SIZEOF_LONG == SIZEOF_VOIDP
368#define PRECOMPUTED_FAKESTR_HASH 1
373BARE_STRING_P(
VALUE str)
378static inline st_index_t
379str_do_hash(
VALUE str)
381 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
383 if (e && !is_ascii_string(str)) {
390str_store_precomputed_hash(
VALUE str, st_index_t hash)
396 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
397 size_t free_bytes = str_embed_capa(str) - used_bytes;
401 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
403 FL_SET(str, STR_PRECOMPUTED_HASH);
416 if (
FL_TEST(str, RSTRING_FSTR))
419 bare = BARE_STRING_P(str);
421 if (STR_EMBED_P(str)) {
426 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
433 rb_str_resize(str, RSTRING_LEN(str));
435 fstr = register_fstring(str,
false,
false);
438 str_replace_shared_without_enc(str, fstr);
445static VALUE fstring_table_obj;
448fstring_concurrent_set_hash(
VALUE str)
450#ifdef PRECOMPUTED_FAKESTR_HASH
454 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
471 const char *aptr, *bptr;
478 return (alen == blen &&
480 memcmp(aptr, bptr, alen) == 0);
485 bool force_precompute_hash;
489fstring_concurrent_set_create(
VALUE str,
void *data)
499 long len = RSTRING_LEN(str);
500 long capa =
len +
sizeof(st_index_t);
501 int term_len = TERM_LEN(str);
503 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
505 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
506 STR_SET_LEN(new_str, RSTRING_LEN(str));
508 rb_enc_copy(new_str, str);
509 str_store_precomputed_hash(new_str, str_do_hash(str));
513 rb_enc_copy(new_str, str);
514#ifdef PRECOMPUTED_FAKESTR_HASH
515 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
516 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
530 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
533 if (STR_SHARED_P(str)) {
535 str_make_independent(str);
538 if (!BARE_STRING_P(str)) {
544 RBASIC(str)->flags |= RSTRING_FSTR;
546 RB_OBJ_SET_SHAREABLE(str);
560 .hash = fstring_concurrent_set_hash,
561 .cmp = fstring_concurrent_set_cmp,
562 .create = fstring_concurrent_set_create,
567Init_fstring_table(
void)
569 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
570 rb_gc_register_address(&fstring_table_obj);
574register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
578 .force_precompute_hash = force_precompute_hash
581#if SIZEOF_VOIDP == SIZEOF_LONG
585 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
589 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
591 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
603rb_obj_is_fstring_table(
VALUE obj)
607 return obj == fstring_table_obj;
611rb_gc_free_fstring(
VALUE obj)
613 ASSERT_vm_locking_with_barrier();
619 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
621 RB_DEBUG_COUNTER_INC(obj_str_fstr);
627rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
629 if (fstring_table_obj) {
630 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
635setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
638 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
651 return (
VALUE)fake_str;
660 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
669rb_fstring_new(
const char *ptr,
long len)
671 struct RString fake_str = {RBASIC_INIT};
672 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
678 struct RString fake_str = {RBASIC_INIT};
679 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
683rb_fstring_cstr(
const char *
ptr)
685 return rb_fstring_new(
ptr, strlen(
ptr));
689single_byte_optimizable(
VALUE str)
693 case ENCINDEX_ASCII_8BIT:
694 case ENCINDEX_US_ASCII:
716static inline const char *
717search_nonascii(
const char *p,
const char *e)
719 const uintptr_t *s, *t;
721#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
722# if SIZEOF_UINTPTR_T == 8
723# define NONASCII_MASK UINT64_C(0x8080808080808080)
724# elif SIZEOF_UINTPTR_T == 4
725# define NONASCII_MASK UINT32_C(0x80808080)
727# error "don't know what to do."
730# if SIZEOF_UINTPTR_T == 8
731# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
732# elif SIZEOF_UINTPTR_T == 4
733# define NONASCII_MASK 0x80808080UL
735# error "don't know what to do."
739 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
740#if !UNALIGNED_WORD_ACCESS
741 if ((uintptr_t)p % SIZEOF_VOIDP) {
742 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
747 case 7:
if (p[-7]&0x80)
return p-7;
748 case 6:
if (p[-6]&0x80)
return p-6;
749 case 5:
if (p[-5]&0x80)
return p-5;
750 case 4:
if (p[-4]&0x80)
return p-4;
752 case 3:
if (p[-3]&0x80)
return p-3;
753 case 2:
if (p[-2]&0x80)
return p-2;
754 case 1:
if (p[-1]&0x80)
return p-1;
759#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
760#define aligned_ptr(value) \
761 __builtin_assume_aligned((value), sizeof(uintptr_t))
763#define aligned_ptr(value) (uintptr_t *)(value)
766 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
769 if (*s & NONASCII_MASK) {
770#ifdef WORDS_BIGENDIAN
771 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
773 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
783 case 7:
if (e[-7]&0x80)
return e-7;
784 case 6:
if (e[-6]&0x80)
return e-6;
785 case 5:
if (e[-5]&0x80)
return e-5;
786 case 4:
if (e[-4]&0x80)
return e-4;
788 case 3:
if (e[-3]&0x80)
return e-3;
789 case 2:
if (e[-2]&0x80)
return e-2;
790 case 1:
if (e[-1]&0x80)
return e-1;
798 const char *e = p +
len;
800 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
802 p = search_nonascii(p, e);
806 if (rb_enc_asciicompat(enc)) {
807 p = search_nonascii(p, e);
810 int ret = rb_enc_precise_mbclen(p, e, enc);
814 p = search_nonascii(p, e);
820 int ret = rb_enc_precise_mbclen(p, e, enc);
836 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
839 p = search_nonascii(p, e);
843 else if (rb_enc_asciicompat(enc)) {
844 p = search_nonascii(p, e);
850 int ret = rb_enc_precise_mbclen(p, e, enc);
857 p = search_nonascii(p, e);
863 int ret = rb_enc_precise_mbclen(p, e, enc);
888 rb_enc_set_index(str1, rb_enc_get_index(str2));
896rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
901 str_enc_copy(dest, src);
902 if (RSTRING_LEN(dest) == 0) {
903 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
914 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
915 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
926rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
928 str_enc_copy(dest, src);
935 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
941 return enc_coderange_scan(str, enc);
950 cr = enc_coderange_scan(str, get_encoding(str));
957rb_enc_str_asciicompat(
VALUE str)
960 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
968 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
977str_mod_check(
VALUE s,
const char *p,
long len)
979 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
985str_capacity(
VALUE str,
const int termlen)
987 if (STR_EMBED_P(str)) {
988 return str_embed_capa(str) - termlen;
990 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
994 return RSTRING(str)->as.heap.aux.capa;
1001 return str_capacity(str, TERM_LEN(str));
1005must_not_null(
const char *
ptr)
1008 rb_raise(rb_eArgError,
"NULL pointer given");
1013str_alloc_embed(
VALUE klass,
size_t capa)
1015 size_t size = rb_str_embed_size(
capa, 0);
1019 NEWOBJ_OF(str,
struct RString, klass,
1023 str->as.embed.ary[0] = 0;
1029str_alloc_heap(
VALUE klass)
1031 NEWOBJ_OF(str,
struct RString, klass,
1035 str->as.heap.aux.capa = 0;
1036 str->as.heap.ptr = NULL;
1042empty_str_alloc(
VALUE klass)
1044 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1045 VALUE str = str_alloc_embed(klass, 0);
1046 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1057 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1061 enc = rb_ascii8bit_encoding();
1064 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1066 int termlen = rb_enc_mbminlen(enc);
1068 if (STR_EMBEDDABLE_P(
len, termlen)) {
1069 str = str_alloc_embed(klass,
len + termlen);
1075 str = str_alloc_heap(klass);
1081 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1084 rb_enc_raw_set(str, enc);
1087 memcpy(RSTRING_PTR(str),
ptr,
len);
1090 memset(RSTRING_PTR(str), 0,
len);
1093 STR_SET_LEN(str,
len);
1094 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1101 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1136 __msan_unpoison_string(
ptr);
1156 if (rb_enc_mbminlen(enc) != 1) {
1157 rb_raise(rb_eArgError,
"wchar encoding given");
1159 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1163str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1168 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1172 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1175 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1176 str = str_alloc_heap(klass);
1180 RBASIC(str)->flags |= STR_NOFREE;
1181 rb_enc_associate_index(str, encindex);
1210static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1212 int ecflags,
VALUE ecopts);
1217 int encidx = rb_enc_to_index(enc);
1218 if (rb_enc_get_index(str) == encidx)
1219 return is_ascii_string(str);
1230 if (!to)
return str;
1231 if (!from) from = rb_enc_get(str);
1232 if (from == to)
return str;
1233 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1234 rb_is_ascii8bit_enc(to)) {
1235 if (STR_ENC_GET(str) != to) {
1237 rb_enc_associate(str, to);
1244 from, to, ecflags, ecopts);
1245 if (
NIL_P(newstr)) {
1253rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1258 olen = RSTRING_LEN(newstr);
1259 if (ofs < -olen || olen < ofs)
1261 if (ofs < 0) ofs += olen;
1263 STR_SET_LEN(newstr, ofs);
1267 rb_str_modify(newstr);
1268 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1276 STR_SET_LEN(str, 0);
1277 rb_enc_associate(str, enc);
1283str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1285 int ecflags,
VALUE ecopts)
1290 VALUE econv_wrapper;
1291 const unsigned char *start, *sp;
1292 unsigned char *dest, *dp;
1293 size_t converted_output = (size_t)ofs;
1298 RBASIC_CLEAR_CLASS(econv_wrapper);
1300 if (!ec)
return Qnil;
1303 sp = (
unsigned char*)
ptr;
1305 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1306 (dp = dest + converted_output),
1310 size_t converted_input = sp - start;
1311 size_t rest =
len - converted_input;
1312 converted_output = dp - dest;
1314 if (converted_input && converted_output &&
1315 rest < (LONG_MAX / converted_output)) {
1316 rest = (rest * converted_output) / converted_input;
1321 olen += rest < 2 ? 2 : rest;
1322 rb_str_resize(newstr, olen);
1329 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1331 rb_enc_associate(newstr, to);
1350 const int eidx = rb_enc_to_index(eenc);
1353 return rb_enc_str_new(
ptr,
len, eenc);
1357 if ((eidx == rb_ascii8bit_encindex()) ||
1358 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1362 ienc = rb_default_internal_encoding();
1363 if (!ienc || eenc == ienc) {
1364 return rb_enc_str_new(
ptr,
len, eenc);
1368 if ((eidx == rb_ascii8bit_encindex()) ||
1369 (eidx == rb_usascii_encindex()) ||
1370 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1371 return rb_enc_str_new(
ptr,
len, ienc);
1374 str = rb_enc_str_new(NULL, 0, ienc);
1377 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1378 rb_str_initialize(str,
ptr,
len, eenc);
1386 int eidx = rb_enc_to_index(eenc);
1387 if (eidx == rb_usascii_encindex() &&
1388 !is_ascii_string(str)) {
1389 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1392 rb_enc_associate_index(str, eidx);
1451str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1453 const int termlen = TERM_LEN(str);
1458 if (str_embed_capa(str2) >=
len + termlen) {
1459 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1460 STR_SET_EMBED(str2);
1461 memcpy(ptr2, RSTRING_PTR(str),
len);
1462 TERM_FILL(ptr2+
len, termlen);
1466 if (STR_SHARED_P(str)) {
1467 root =
RSTRING(str)->as.heap.aux.shared;
1476 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1478 rb_fatal(
"about to free a possible shared root");
1480 char *ptr2 = STR_HEAP_PTR(str2);
1482 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1485 FL_SET(str2, STR_NOEMBED);
1487 STR_SET_SHARED(str2, root);
1490 STR_SET_LEN(str2,
len);
1498 str_replace_shared_without_enc(str2, str);
1499 rb_enc_cr_str_exact_copy(str2, str);
1506 return str_replace_shared(str_alloc_heap(klass), str);
1523rb_str_new_frozen_String(
VALUE orig)
1531rb_str_frozen_bare_string(
VALUE orig)
1533 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1538rb_str_tmp_frozen_acquire(
VALUE orig)
1541 return str_new_frozen_buffer(0, orig, FALSE);
1545rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1547 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1548 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1550 VALUE str = str_alloc_heap(0);
1553 FL_SET(str, STR_SHARED_ROOT);
1555 size_t capa = str_capacity(orig, TERM_LEN(orig));
1561 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1562 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1569 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1570 RBASIC(orig)->flags &= ~STR_NOFREE;
1571 STR_SET_SHARED(orig, str);
1573 RB_OBJ_SET_SHAREABLE(str);
1585rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1590 if (STR_EMBED_P(tmp)) {
1593 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1599 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1603 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1604 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1609 STR_SET_LEN(tmp, 0);
1617 return str_new_frozen_buffer(klass, orig, TRUE);
1627 VALUE str = str_alloc_heap(klass);
1628 STR_SET_LEN(str, RSTRING_LEN(orig));
1629 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1630 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1631 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1632 RBASIC(orig)->flags &= ~STR_NOFREE;
1633 STR_SET_SHARED(orig, str);
1640str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1644 long len = RSTRING_LEN(orig);
1645 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1646 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1648 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1649 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1655 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1656 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1662 if ((ofs > 0) || (rest > 0) ||
1665 str = str_new_shared(klass,
shared);
1667 RSTRING(str)->as.heap.ptr += ofs;
1668 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1676 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1677 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1679 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1680 STR_SET_LEN(str, RSTRING_LEN(orig));
1686 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1689 str = heap_str_make_shared(klass, orig);
1694 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1706str_new_empty_String(
VALUE str)
1709 rb_enc_copy(v, str);
1713#define STR_BUF_MIN_SIZE 63
1718 if (STR_EMBEDDABLE_P(
capa, 1)) {
1726 RSTRING(str)->as.heap.ptr[0] =
'\0';
1746 return str_new(0, 0,
len);
1752 if (STR_EMBED_P(str)) {
1753 RB_DEBUG_COUNTER_INC(obj_str_embed);
1755 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1756 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1757 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1760 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1761 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1766rb_str_memsize(
VALUE str)
1768 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1769 return STR_HEAP_SIZE(str);
1779 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1782static inline void str_discard(
VALUE str);
1783static void str_shared_replace(
VALUE str,
VALUE str2);
1788 if (str != str2) str_shared_replace(str, str2);
1799 enc = STR_ENC_GET(str2);
1802 termlen = rb_enc_mbminlen(enc);
1804 STR_SET_LEN(str, RSTRING_LEN(str2));
1806 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1808 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1809 rb_enc_associate(str, enc);
1813 if (STR_EMBED_P(str2)) {
1815 long len = RSTRING_LEN(str2);
1818 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1819 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1820 RSTRING(str2)->as.heap.ptr = new_ptr;
1821 STR_SET_LEN(str2,
len);
1823 STR_SET_NOEMBED(str2);
1826 STR_SET_NOEMBED(str);
1828 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1830 if (
FL_TEST(str2, STR_SHARED)) {
1832 STR_SET_SHARED(str,
shared);
1835 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1839 STR_SET_EMBED(str2);
1840 RSTRING_PTR(str2)[0] = 0;
1841 STR_SET_LEN(str2, 0);
1842 rb_enc_associate(str, enc);
1856 return rb_obj_as_string_result(str, obj);
1872 len = RSTRING_LEN(str2);
1873 if (STR_SHARED_P(str2)) {
1876 STR_SET_NOEMBED(str);
1877 STR_SET_LEN(str,
len);
1878 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1879 STR_SET_SHARED(str,
shared);
1880 rb_enc_cr_str_exact_copy(str, str2);
1883 str_replace_shared(str, str2);
1892 size_t size = rb_str_embed_size(
capa, 0);
1896 NEWOBJ_OF(str,
struct RString, klass,
1907 NEWOBJ_OF(str,
struct RString, klass,
1910 str->as.heap.aux.capa = 0;
1911 str->as.heap.ptr = NULL;
1921 encidx = rb_enc_get_index(str);
1922 flags &= ~ENCODING_MASK;
1925 if (encidx) rb_enc_associate_index(dup, encidx);
1935 long len = RSTRING_LEN(str);
1940 STR_SET_LEN(dup, RSTRING_LEN(str));
1941 return str_duplicate_setup_encoding(str, dup, flags);
1950 root =
RSTRING(str)->as.heap.aux.shared;
1953 root = str = str_new_frozen(klass, str);
1959 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1960 FL_SET(root, STR_SHARED_ROOT);
1962 flags |= RSTRING_NOEMBED | STR_SHARED;
1964 STR_SET_LEN(dup, RSTRING_LEN(str));
1965 return str_duplicate_setup_encoding(str, dup, flags);
1971 if (STR_EMBED_P(str)) {
1972 return str_duplicate_setup_embed(klass, str, dup);
1975 return str_duplicate_setup_heap(klass, str, dup);
1983 if (STR_EMBED_P(str)) {
1984 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1987 dup = str_alloc_heap(klass);
1990 return str_duplicate_setup(klass, str, dup);
2001rb_str_dup_m(
VALUE str)
2003 if (LIKELY(BARE_STRING_P(str))) {
2014 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2021 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2025 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2026 str_duplicate_setup_embed(klass, str, new_str);
2029 new_str = ec_str_alloc_heap(ec, klass);
2030 str_duplicate_setup_heap(klass, str, new_str);
2039rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2041 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2065 static ID keyword_ids[2];
2066 VALUE orig, opt, venc, vcapa;
2071 if (!keyword_ids[0]) {
2072 keyword_ids[0] = rb_id_encoding();
2073 CONST_ID(keyword_ids[1],
"capacity");
2081 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2082 enc = rb_to_encoding(venc);
2084 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2087 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2089 if (
capa < STR_BUF_MIN_SIZE) {
2090 capa = STR_BUF_MIN_SIZE;
2094 len = RSTRING_LEN(orig);
2098 if (orig == str) n = 0;
2100 str_modifiable(str);
2101 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2103 const size_t size = (size_t)
capa + termlen;
2104 const char *
const old_ptr = RSTRING_PTR(str);
2105 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2106 char *new_ptr =
ALLOC_N(
char, size);
2107 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2108 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2110 RSTRING(str)->as.heap.ptr = new_ptr;
2112 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2113 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2114 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2116 STR_SET_LEN(str,
len);
2119 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2120 rb_enc_cr_str_exact_copy(str, orig);
2122 FL_SET(str, STR_NOEMBED);
2129 rb_enc_associate(str, enc);
2141rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2147 static ID keyword_ids[2];
2157 keyword_ids[0] = rb_id_encoding();
2158 CONST_ID(keyword_ids[1],
"capacity");
2160 encoding = kwargs[0];
2161 capacity = kwargs[1];
2170 if (UNDEF_P(encoding)) {
2172 encoding = rb_obj_encoding(orig);
2176 if (!UNDEF_P(encoding)) {
2177 enc = rb_to_encoding(encoding);
2181 if (UNDEF_P(capacity)) {
2183 VALUE empty_str = str_new(klass,
"", 0);
2185 rb_enc_associate(empty_str, enc);
2189 VALUE copy = str_duplicate(klass, orig);
2190 rb_enc_associate(copy, enc);
2203 if (orig_capa >
capa) {
2208 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2209 STR_SET_LEN(str, 0);
2220#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2235static inline uintptr_t
2236count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2241 d = (d>>6) | (~d>>7);
2242 d &= NONASCII_MASK >> 7;
2245#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2247 return rb_popcount_intptr(d);
2251# if SIZEOF_VOIDP == 8
2260enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2266 long diff = (long)(e - p);
2267 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2272 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2273 const uintptr_t *s, *t;
2274 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2275 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2276 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2277 while (p < (
const char *)s) {
2278 if (is_utf8_lead_byte(*p))
len++;
2282 len += count_utf8_lead_bytes_with_word(s);
2285 p = (
const char *)s;
2288 if (is_utf8_lead_byte(*p))
len++;
2294 else if (rb_enc_asciicompat(enc)) {
2299 q = search_nonascii(p, e);
2305 p += rb_enc_fast_mbclen(p, e, enc);
2312 q = search_nonascii(p, e);
2318 p += rb_enc_mbclen(p, e, enc);
2325 for (c=0; p<e; c++) {
2326 p += rb_enc_mbclen(p, e, enc);
2341rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2349 long diff = (long)(e - p);
2350 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2352 else if (rb_enc_asciicompat(enc)) {
2356 q = search_nonascii(p, e);
2364 ret = rb_enc_precise_mbclen(p, e, enc);
2379 for (c=0; p<e; c++) {
2380 ret = rb_enc_precise_mbclen(p, e, enc);
2387 if (p + rb_enc_mbminlen(enc) <= e)
2388 p += rb_enc_mbminlen(enc);
2404 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2405 if (!enc) enc = STR_ENC_GET(str);
2406 p = RSTRING_PTR(str);
2411 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2416 return enc_strlen(p, e, enc, cr);
2423 return str_strlen(str, NULL);
2437 return LONG2NUM(str_strlen(str, NULL));
2449rb_str_bytesize(
VALUE str)
2468rb_str_empty(
VALUE str)
2470 return RBOOL(RSTRING_LEN(str) == 0);
2489 char *ptr1, *ptr2, *ptr3;
2494 enc = rb_enc_check_str(str1, str2);
2497 termlen = rb_enc_mbminlen(enc);
2498 if (len1 > LONG_MAX - len2) {
2499 rb_raise(rb_eArgError,
"string size too big");
2501 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2502 ptr3 = RSTRING_PTR(str3);
2503 memcpy(ptr3, ptr1, len1);
2504 memcpy(ptr3+len1, ptr2, len2);
2505 TERM_FILL(&ptr3[len1+len2], termlen);
2521 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2524 int enc1 = rb_enc_get_index(str1);
2525 int enc2 = rb_enc_get_index(str2);
2530 else if (enc2 < 0) {
2533 else if (enc1 != enc2) {
2536 else if (len1 > LONG_MAX - len2) {
2570 rb_enc_copy(str2, str);
2575 rb_raise(rb_eArgError,
"negative argument");
2577 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2578 if (STR_EMBEDDABLE_P(
len, 1)) {
2580 memset(RSTRING_PTR(str2), 0,
len + 1);
2587 STR_SET_LEN(str2,
len);
2588 rb_enc_copy(str2, str);
2591 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2592 rb_raise(rb_eArgError,
"argument too big");
2595 len *= RSTRING_LEN(str);
2596 termlen = TERM_LEN(str);
2598 ptr2 = RSTRING_PTR(str2);
2600 n = RSTRING_LEN(str);
2601 memcpy(ptr2, RSTRING_PTR(str), n);
2602 while (n <=
len/2) {
2603 memcpy(ptr2 + n, ptr2, n);
2606 memcpy(ptr2 + n, ptr2,
len-n);
2608 STR_SET_LEN(str2,
len);
2609 TERM_FILL(&ptr2[
len], termlen);
2610 rb_enc_cr_str_copy_for_substr(str2, str);
2647rb_check_lockedtmp(
VALUE str)
2649 if (
FL_TEST(str, STR_TMPLOCK)) {
2656#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2658str_modifiable(
VALUE str)
2662 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2663 if (CHILLED_STRING_P(str)) {
2664 CHILLED_STRING_MUTATED(str);
2666 rb_check_lockedtmp(str);
2667 rb_check_frozen(str);
2672str_dependent_p(
VALUE str)
2674 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2684#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2686str_independent(
VALUE str)
2690 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2691 str_modifiable(str);
2692 return !str_dependent_p(str);
2698str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2708 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2713 STR_SET_LEN(str,
len);
2718 oldptr = RSTRING_PTR(str);
2720 memcpy(
ptr, oldptr,
len);
2722 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2725 STR_SET_NOEMBED(str);
2726 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2727 TERM_FILL(
ptr +
len, termlen);
2729 STR_SET_LEN(str,
len);
2736 if (!str_independent(str))
2737 str_make_independent(str);
2746 int termlen = TERM_LEN(str);
2747 long len = RSTRING_LEN(str);
2750 rb_raise(rb_eArgError,
"negative expanding string size");
2752 if (expand >= LONG_MAX -
len) {
2753 rb_raise(rb_eArgError,
"string size too big");
2756 if (!str_independent(str)) {
2757 str_make_independent_expand(str,
len, expand, termlen);
2759 else if (expand > 0) {
2760 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2767str_modify_keep_cr(
VALUE str)
2769 if (!str_independent(str))
2770 str_make_independent(str);
2777str_discard(
VALUE str)
2779 str_modifiable(str);
2780 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2781 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2782 RSTRING(str)->as.heap.ptr = 0;
2783 STR_SET_LEN(str, 0);
2790 int encindex = rb_enc_get_index(str);
2792 if (RB_UNLIKELY(encindex == -1)) {
2796 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2801 if (!rb_enc_asciicompat(enc)) {
2823 return RSTRING_PTR(str);
2827zero_filled(
const char *s,
int n)
2829 for (; n > 0; --n) {
2836str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2838 const char *e = s +
len;
2840 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2841 if (zero_filled(s, minlen))
return s;
2847str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2852 if (str_dependent_p(str)) {
2853 if (!zero_filled(s +
len, termlen))
2854 str_make_independent_expand(str,
len, 0L, termlen);
2857 TERM_FILL(s +
len, termlen);
2860 return RSTRING_PTR(str);
2864rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2866 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2867 long len = RSTRING_LEN(str);
2871 rb_check_lockedtmp(str);
2872 str_make_independent_expand(str,
len, 0L, termlen);
2874 else if (str_dependent_p(str)) {
2875 if (termlen > oldtermlen)
2876 str_make_independent_expand(str,
len, 0L, termlen);
2879 if (!STR_EMBED_P(str)) {
2884 if (termlen > oldtermlen) {
2885 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2893str_null_check(
VALUE str,
int *w)
2895 char *s = RSTRING_PTR(str);
2896 long len = RSTRING_LEN(str);
2898 const int minlen = rb_enc_mbminlen(enc);
2902 if (str_null_char(s,
len, minlen, enc)) {
2905 return str_fill_term(str, s,
len, minlen);
2908 if (!s || memchr(s, 0,
len)) {
2912 s = str_fill_term(str, s,
len, minlen);
2918rb_str_to_cstr(
VALUE str)
2921 return str_null_check(str, &w);
2929 char *s = str_null_check(str, &w);
2932 rb_raise(rb_eArgError,
"string contains null char");
2934 rb_raise(rb_eArgError,
"string contains null byte");
2940rb_str_fill_terminator(
VALUE str,
const int newminlen)
2942 char *s = RSTRING_PTR(str);
2943 long len = RSTRING_LEN(str);
2944 return str_fill_term(str, s,
len, newminlen);
2950 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2976str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2985 else if (rb_enc_asciicompat(enc)) {
2986 const char *p2, *e2;
2989 while (p < e && 0 < nth) {
2996 p2 = search_nonascii(p, e2);
3005 n = rb_enc_mbclen(p, e, enc);
3016 while (p < e && nth--) {
3017 p += rb_enc_mbclen(p, e, enc);
3028 return str_nth_len(p, e, &nth, enc);
3032str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3037 p = str_nth_len(p, e, &nth, enc);
3046str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3048 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3049 if (!pp)
return e - p;
3056 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3057 STR_ENC_GET(str), single_byte_optimizable(str));
3062str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3065 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3066 const uintptr_t *s, *t;
3067 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3068 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3069 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3070 while (p < (
const char *)s) {
3071 if (is_utf8_lead_byte(*p)) nth--;
3075 nth -= count_utf8_lead_bytes_with_word(s);
3077 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3081 if (is_utf8_lead_byte(*p)) {
3082 if (nth == 0)
break;
3092str_utf8_offset(
const char *p,
const char *e,
long nth)
3094 const char *pp = str_utf8_nth(p, e, &nth);
3103 if (single_byte_optimizable(str) || pos < 0)
3106 char *p = RSTRING_PTR(str);
3107 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3112str_subseq(
VALUE str,
long beg,
long len)
3120 const int termlen = TERM_LEN(str);
3121 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3128 if (str_embed_capa(str2) >=
len + termlen) {
3129 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3130 STR_SET_EMBED(str2);
3131 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3132 TERM_FILL(ptr2+
len, termlen);
3134 STR_SET_LEN(str2,
len);
3138 str_replace_shared(str2, str);
3141 RSTRING(str2)->as.heap.ptr += beg;
3142 if (RSTRING_LEN(str2) >
len) {
3143 STR_SET_LEN(str2,
len);
3153 VALUE str2 = str_subseq(str, beg,
len);
3154 rb_enc_cr_str_copy_for_substr(str2, str);
3163 const long blen = RSTRING_LEN(str);
3165 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3167 if (
len < 0)
return 0;
3168 if (beg < 0 && -beg < 0)
return 0;
3172 if (single_byte_optimizable(str)) {
3173 if (beg > blen)
return 0;
3176 if (beg < 0)
return 0;
3178 if (
len > blen - beg)
3180 if (
len < 0)
return 0;
3185 if (
len > -beg)
len = -beg;
3189 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3192 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3198 slen = str_strlen(str, enc);
3200 if (beg < 0)
return 0;
3202 if (
len == 0)
goto end;
3205 else if (beg > 0 && beg > blen) {
3209 if (beg > str_strlen(str, enc))
return 0;
3214 enc == rb_utf8_encoding()) {
3215 p = str_utf8_nth(s, e, &beg);
3216 if (beg > 0)
return 0;
3217 len = str_utf8_offset(p, e,
len);
3223 p = s + beg * char_sz;
3227 else if (
len * char_sz > e - p)
3232 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3233 if (beg > 0)
return 0;
3237 len = str_offset(p, e,
len, enc, 0);
3245static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3250 return str_substr(str, beg,
len, TRUE);
3260str_substr(
VALUE str,
long beg,
long len,
int empty)
3264 if (!p)
return Qnil;
3265 if (!
len && !empty)
return Qnil;
3267 beg = p - RSTRING_PTR(str);
3269 VALUE str2 = str_subseq(str, beg,
len);
3270 rb_enc_cr_str_copy_for_substr(str2, str);
3278 if (CHILLED_STRING_P(str)) {
3283 rb_str_resize(str, RSTRING_LEN(str));
3301 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3344str_uminus(
VALUE str)
3349 return rb_fstring(str);
3353#define rb_str_dup_frozen rb_str_new_frozen
3358 rb_check_frozen(str);
3359 if (
FL_TEST(str, STR_TMPLOCK)) {
3362 FL_SET(str, STR_TMPLOCK);
3369 rb_check_frozen(str);
3370 if (!
FL_TEST(str, STR_TMPLOCK)) {
3390 const int termlen = TERM_LEN(str);
3392 str_modifiable(str);
3393 if (STR_SHARED_P(str)) {
3396 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3397 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3408 else if (
len > RSTRING_LEN(str)) {
3412 const char *
const new_end = RSTRING_PTR(str) +
len;
3422 else if (
len < RSTRING_LEN(str)) {
3430 STR_SET_LEN(str,
len);
3431 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3438 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3441 int independent = str_independent(str);
3442 long slen = RSTRING_LEN(str);
3443 const int termlen = TERM_LEN(str);
3445 if (slen >
len || (termlen != 1 && slen <
len)) {
3451 if (STR_EMBED_P(str)) {
3452 if (
len == slen)
return str;
3453 if (str_embed_capa(str) >=
len + termlen) {
3454 STR_SET_LEN(str,
len);
3458 str_make_independent_expand(str, slen,
len - slen, termlen);
3460 else if (str_embed_capa(str) >=
len + termlen) {
3461 char *
ptr = STR_HEAP_PTR(str);
3463 if (slen >
len) slen =
len;
3466 STR_SET_LEN(str,
len);
3467 if (independent) ruby_xfree(
ptr);
3470 else if (!independent) {
3471 if (
len == slen)
return str;
3472 str_make_independent_expand(str, slen,
len - slen, termlen);
3476 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3477 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3480 else if (
len == slen)
return str;
3481 STR_SET_LEN(str,
len);
3488str_ensure_available_capa(
VALUE str,
long len)
3490 str_modify_keep_cr(str);
3492 const int termlen = TERM_LEN(str);
3493 long olen = RSTRING_LEN(str);
3495 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3496 rb_raise(rb_eArgError,
"string sizes too big");
3499 long total = olen +
len;
3500 long capa = str_capacity(str, termlen);
3503 if (total >= LONG_MAX / 2) {
3506 while (total >
capa) {
3509 RESIZE_CAPA_TERM(str,
capa, termlen);
3514str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3517 str_modify_keep_cr(str);
3522 if (
len == 0)
return 0;
3524 long total, olen,
off = -1;
3526 const int termlen = TERM_LEN(str);
3529 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3533 long capa = str_capacity(str, termlen);
3535 if (olen > LONG_MAX -
len) {
3536 rb_raise(rb_eArgError,
"string sizes too big");
3540 if (total >= LONG_MAX / 2) {
3543 while (total >
capa) {
3546 RESIZE_CAPA_TERM(str,
capa, termlen);
3547 sptr = RSTRING_PTR(str);
3552 memcpy(sptr + olen,
ptr,
len);
3553 STR_SET_LEN(str, total);
3554 TERM_FILL(sptr + total, termlen);
3559#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3560#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3565 if (
len == 0)
return str;
3567 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3569 return str_buf_cat(str,
ptr,
len);
3580rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3585 if (UNLIKELY(!str_independent(str))) {
3586 str_make_independent(str);
3589 long string_length = -1;
3590 const int null_terminator_length = 1;
3595 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3596 rb_raise(rb_eArgError,
"string sizes too big");
3599 long string_capacity = str_capacity(str, null_terminator_length);
3605 if (LIKELY(string_capacity >= string_length + 1)) {
3607 sptr[string_length] = byte;
3608 STR_SET_LEN(str, string_length + 1);
3609 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3613 str_buf_cat(str, (
char *)&
byte, 1);
3629 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3640rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3641 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3650 if (str_encindex == ptr_encindex) {
3652 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3656 str_enc = rb_enc_from_index(str_encindex);
3657 ptr_enc = rb_enc_from_index(ptr_encindex);
3658 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3661 if (RSTRING_LEN(str) == 0) {
3664 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3670 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3679 *ptr_cr_ret = ptr_cr;
3681 if (str_encindex != ptr_encindex &&
3684 str_enc = rb_enc_from_index(str_encindex);
3685 ptr_enc = rb_enc_from_index(ptr_encindex);
3690 res_encindex = str_encindex;
3695 res_encindex = str_encindex;
3699 res_encindex = ptr_encindex;
3704 res_encindex = str_encindex;
3711 res_encindex = str_encindex;
3717 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3719 str_buf_cat(str,
ptr,
len);
3725 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3732 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3742 if (rb_enc_asciicompat(enc)) {
3743 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3749 unsigned int c = (
unsigned char)*
ptr;
3750 int len = rb_enc_codelen(c, enc);
3751 rb_enc_mbcput(c, buf, enc);
3752 rb_enc_cr_str_buf_cat(str, buf,
len,
3765 if (str_enc_fastpath(str)) {
3769 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3775 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3786 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3802rb_str_concat_literals(
size_t num,
const VALUE *strary)
3806 unsigned long len = 1;
3811 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3813 str_enc_copy_direct(str, strary[0]);
3815 for (i = s; i < num; ++i) {
3816 const VALUE v = strary[i];
3820 if (encidx != ENCINDEX_US_ASCII) {
3822 rb_enc_set_index(str, encidx);
3835rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3837 str_modifiable(str);
3842 else if (argc > 1) {
3845 rb_enc_copy(arg_str, str);
3846 for (i = 0; i < argc; i++) {
3881rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3883 long needed_capacity = 0;
3887 for (
int index = 0; index < argc; index++) {
3888 VALUE obj = argv[index];
3896 needed_capacity += RSTRING_LEN(obj);
3901 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3908 str_ensure_available_capa(str, needed_capacity);
3911 for (
int index = 0; index < argc; index++) {
3912 VALUE obj = argv[index];
3917 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3918 char byte = (char)(
NUM2INT(obj) & 0xFF);
3932 rb_bug(
"append_as_bytes arguments should have been validated");
3936 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3937 TERM_FILL(sptr, TERM_LEN(str));
3942 for (
int index = 0; index < argc; index++) {
3943 VALUE obj = argv[index];
3960 rb_bug(
"append_as_bytes arguments should have been validated");
4039 if (rb_num_to_uint(str2, &code) == 0) {
4052 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4055 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4058 long pos = RSTRING_LEN(str1);
4063 switch (
len = rb_enc_codelen(code, enc)) {
4064 case ONIGERR_INVALID_CODE_POINT_VALUE:
4065 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4067 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4073 rb_enc_mbcput(code, buf, enc);
4074 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4075 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4077 rb_str_resize(str1, pos+
len);
4078 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4091rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4093 int encidx = rb_enc_to_index(enc);
4095 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4100 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4101 return ENCINDEX_ASCII_8BIT;
4123rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4125 str_modifiable(str);
4130 else if (argc > 1) {
4133 rb_enc_copy(arg_str, str);
4134 for (i = 0; i < argc; i++) {
4147 st_index_t precomputed_hash;
4148 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4150 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4151 return precomputed_hash;
4154 return str_do_hash(str);
4161 const char *ptr1, *ptr2;
4164 return (len1 != len2 ||
4166 memcmp(ptr1, ptr2, len1) != 0);
4178rb_str_hash_m(
VALUE str)
4184#define lesser(a,b) (((a)>(b))?(b):(a))
4192 if (RSTRING_LEN(str1) == 0)
return TRUE;
4193 if (RSTRING_LEN(str2) == 0)
return TRUE;
4196 if (idx1 == idx2)
return TRUE;
4201 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4205 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4215 const char *ptr1, *ptr2;
4218 if (str1 == str2)
return 0;
4221 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4230 if (len1 > len2)
return 1;
4233 if (retval > 0)
return 1;
4267 if (str1 == str2)
return Qtrue;
4274 return rb_str_eql_internal(str1, str2);
4288 if (str1 == str2)
return Qtrue;
4290 return rb_str_eql_internal(str1, str2);
4322 return rb_invcmp(str1, str2);
4364 return str_casecmp(str1, s);
4372 const char *p1, *p1end, *p2, *p2end;
4374 enc = rb_enc_compatible(str1, str2);
4379 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4380 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4381 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4382 while (p1 < p1end && p2 < p2end) {
4384 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4385 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4387 return INT2FIX(c1 < c2 ? -1 : 1);
4394 while (p1 < p1end && p2 < p2end) {
4395 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4396 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4398 if (0 <= c1 && 0 <= c2) {
4402 return INT2FIX(c1 < c2 ? -1 : 1);
4406 l1 = rb_enc_mbclen(p1, p1end, enc);
4407 l2 = rb_enc_mbclen(p2, p2end, enc);
4408 len = l1 < l2 ? l1 : l2;
4409 r = memcmp(p1, p2,
len);
4411 return INT2FIX(r < 0 ? -1 : 1);
4413 return INT2FIX(l1 < l2 ? -1 : 1);
4419 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4420 if (p1 == p1end)
return INT2FIX(-1);
4453 return str_casecmp_p(str1, s);
4460 VALUE folded_str1, folded_str2;
4461 VALUE fold_opt = sym_fold;
4463 enc = rb_enc_compatible(str1, str2);
4468 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4469 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4471 return rb_str_eql(folded_str1, folded_str2);
4475strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4476 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4478 const char *search_start = str_ptr;
4479 long pos, search_len = str_len - offset;
4483 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4484 if (pos < 0)
return pos;
4486 if (t == search_start + pos)
break;
4487 search_len -= t - search_start;
4488 if (search_len <= 0)
return -1;
4489 offset += t - search_start;
4492 return pos + offset;
4496#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4497#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4500rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4502 const char *str_ptr, *str_ptr_end, *sub_ptr;
4503 long str_len, sub_len;
4506 enc = rb_enc_check(str, sub);
4507 if (is_broken_string(sub))
return -1;
4509 str_ptr = RSTRING_PTR(str);
4511 str_len = RSTRING_LEN(str);
4512 sub_ptr = RSTRING_PTR(sub);
4513 sub_len = RSTRING_LEN(sub);
4515 if (str_len < sub_len)
return -1;
4518 long str_len_char, sub_len_char;
4519 int single_byte = single_byte_optimizable(str);
4520 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4521 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4523 offset += str_len_char;
4524 if (offset < 0)
return -1;
4526 if (str_len_char - offset < sub_len_char)
return -1;
4527 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4530 if (sub_len == 0)
return offset;
4533 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4546rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4553 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4554 long slen = str_strlen(str, enc);
4556 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4568 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4569 enc, single_byte_optimizable(str));
4580 pos = rb_str_index(str, sub, pos);
4594str_ensure_byte_pos(
VALUE str,
long pos)
4596 if (!single_byte_optimizable(str)) {
4597 const char *s = RSTRING_PTR(str);
4599 const char *p = s + pos;
4600 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4602 "offset %ld does not land on character boundary", pos);
4675rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4681 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4682 long slen = RSTRING_LEN(str);
4684 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4695 str_ensure_byte_pos(str, pos);
4707 pos = rb_str_byteindex(str, sub, pos);
4708 if (pos >= 0)
return LONG2NUM(pos);
4715memrchr(
const char *search_str,
int chr,
long search_len)
4717 const char *ptr = search_str + search_len;
4718 while (ptr > search_str) {
4719 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4729 char *hit, *adjusted;
4731 long slen, searchlen;
4734 sbeg = RSTRING_PTR(str);
4735 slen = RSTRING_LEN(sub);
4736 if (slen == 0)
return s - sbeg;
4738 t = RSTRING_PTR(sub);
4740 searchlen = s - sbeg + 1;
4742 if (memcmp(s, t, slen) == 0) {
4747 hit = memrchr(sbeg, c, searchlen);
4750 if (hit != adjusted) {
4751 searchlen = adjusted - sbeg;
4754 if (memcmp(hit, t, slen) == 0)
4756 searchlen = adjusted - sbeg;
4757 }
while (searchlen > 0);
4771 enc = rb_enc_check(str, sub);
4772 if (is_broken_string(sub))
return -1;
4773 singlebyte = single_byte_optimizable(str);
4774 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4775 slen = str_strlen(sub, enc);
4778 if (
len < slen)
return -1;
4779 if (
len - pos < slen) pos =
len - slen;
4780 if (
len == 0)
return pos;
4782 sbeg = RSTRING_PTR(str);
4785 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4791 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4792 return str_rindex(str, sub, s, enc);
4804rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4809 long pos,
len = str_strlen(str, enc);
4811 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4813 if (pos < 0 && (pos +=
len) < 0) {
4819 if (pos >
len) pos =
len;
4827 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4828 enc, single_byte_optimizable(str));
4839 pos = rb_str_rindex(str, sub, pos);
4849rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4855 enc = rb_enc_check(str, sub);
4856 if (is_broken_string(sub))
return -1;
4857 len = RSTRING_LEN(str);
4858 slen = RSTRING_LEN(sub);
4861 if (
len < slen)
return -1;
4862 if (
len - pos < slen) pos =
len - slen;
4863 if (
len == 0)
return pos;
4865 sbeg = RSTRING_PTR(str);
4868 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4875 return str_rindex(str, sub, s, enc);
4965rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4969 long pos,
len = RSTRING_LEN(str);
4971 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4973 if (pos < 0 && (pos +=
len) < 0) {
4979 if (pos >
len) pos =
len;
4985 str_ensure_byte_pos(str, pos);
4997 pos = rb_str_byterindex(str, sub, pos);
4998 if (pos >= 0)
return LONG2NUM(pos);
5037 switch (OBJ_BUILTIN_TYPE(y)) {
5091rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5098 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5129rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5133 re = get_pat(argv[0]);
5134 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5143static enum neighbor_char
5149 if (rb_enc_mbminlen(enc) > 1) {
5151 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5153 return NEIGHBOR_NOT_CHAR;
5155 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5157 if (!l)
return NEIGHBOR_NOT_CHAR;
5158 if (l !=
len)
return NEIGHBOR_WRAPPED;
5159 rb_enc_mbcput(c, p, enc);
5160 r = rb_enc_precise_mbclen(p, p +
len, enc);
5162 return NEIGHBOR_NOT_CHAR;
5164 return NEIGHBOR_FOUND;
5167 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5170 return NEIGHBOR_WRAPPED;
5171 ++((
unsigned char*)p)[i];
5172 l = rb_enc_precise_mbclen(p, p+
len, enc);
5176 return NEIGHBOR_FOUND;
5179 memset(p+l, 0xff,
len-l);
5185 for (len2 =
len-1; 0 < len2; len2--) {
5186 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5190 memset(p+len2+1, 0xff,
len-(len2+1));
5195static enum neighbor_char
5200 if (rb_enc_mbminlen(enc) > 1) {
5202 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5204 return NEIGHBOR_NOT_CHAR;
5206 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5207 if (!c)
return NEIGHBOR_NOT_CHAR;
5210 if (!l)
return NEIGHBOR_NOT_CHAR;
5211 if (l !=
len)
return NEIGHBOR_WRAPPED;
5212 rb_enc_mbcput(c, p, enc);
5213 r = rb_enc_precise_mbclen(p, p +
len, enc);
5215 return NEIGHBOR_NOT_CHAR;
5217 return NEIGHBOR_FOUND;
5220 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5223 return NEIGHBOR_WRAPPED;
5224 --((
unsigned char*)p)[i];
5225 l = rb_enc_precise_mbclen(p, p+
len, enc);
5229 return NEIGHBOR_FOUND;
5232 memset(p+l, 0,
len-l);
5238 for (len2 =
len-1; 0 < len2; len2--) {
5239 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5243 memset(p+len2+1, 0,
len-(len2+1));
5257static enum neighbor_char
5258enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5260 enum neighbor_char ret;
5264 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5268 const int max_gaps = 1;
5270 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5272 ctype = ONIGENC_CTYPE_DIGIT;
5274 ctype = ONIGENC_CTYPE_ALPHA;
5276 return NEIGHBOR_NOT_CHAR;
5279 for (
try = 0;
try <= max_gaps; ++
try) {
5280 ret = enc_succ_char(p,
len, enc);
5281 if (ret == NEIGHBOR_FOUND) {
5282 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5284 return NEIGHBOR_FOUND;
5291 ret = enc_pred_char(p,
len, enc);
5292 if (ret == NEIGHBOR_FOUND) {
5293 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5306 return NEIGHBOR_NOT_CHAR;
5309 if (ctype != ONIGENC_CTYPE_DIGIT) {
5311 return NEIGHBOR_WRAPPED;
5315 enc_succ_char(carry,
len, enc);
5316 return NEIGHBOR_WRAPPED;
5334 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5335 rb_enc_cr_str_copy_for_substr(str, orig);
5336 return str_succ(str);
5343 char *sbeg, *s, *e, *last_alnum = 0;
5344 int found_alnum = 0;
5346 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5347 long carry_pos = 0, carry_len = 1;
5348 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5350 slen = RSTRING_LEN(str);
5351 if (slen == 0)
return str;
5353 enc = STR_ENC_GET(str);
5354 sbeg = RSTRING_PTR(str);
5355 s = e = sbeg + slen;
5357 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5358 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5364 l = rb_enc_precise_mbclen(s, e, enc);
5365 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5366 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5367 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5369 case NEIGHBOR_NOT_CHAR:
5371 case NEIGHBOR_FOUND:
5373 case NEIGHBOR_WRAPPED:
5378 carry_pos = s - sbeg;
5383 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5384 enum neighbor_char neighbor;
5385 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5386 l = rb_enc_precise_mbclen(s, e, enc);
5387 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5388 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5390 neighbor = enc_succ_char(tmp, l, enc);
5392 case NEIGHBOR_FOUND:
5396 case NEIGHBOR_WRAPPED:
5399 case NEIGHBOR_NOT_CHAR:
5402 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5404 enc_succ_char(s, l, enc);
5406 if (!rb_enc_asciicompat(enc)) {
5407 MEMCPY(carry, s,
char, l);
5410 carry_pos = s - sbeg;
5414 RESIZE_CAPA(str, slen + carry_len);
5415 sbeg = RSTRING_PTR(str);
5416 s = sbeg + carry_pos;
5417 memmove(s + carry_len, s, slen - carry_pos);
5418 memmove(s, carry, carry_len);
5420 STR_SET_LEN(str, slen);
5421 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5437rb_str_succ_bang(
VALUE str)
5445all_digits_p(
const char *s,
long len)
5473 VALUE end, exclusive;
5477 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5483 VALUE current, after_end;
5490 enc = rb_enc_check(beg, end);
5491 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5493 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5494 char c = RSTRING_PTR(beg)[0];
5495 char e = RSTRING_PTR(end)[0];
5497 if (c > e || (excl && c == e))
return beg;
5499 VALUE str = rb_enc_str_new(&c, 1, enc);
5501 if ((*each)(str, arg))
break;
5502 if (!excl && c == e)
break;
5504 if (excl && c == e)
break;
5509 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5510 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5511 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5516 b = rb_str_to_inum(beg, 10, FALSE);
5517 e = rb_str_to_inum(end, 10, FALSE);
5524 if (excl && bi == ei)
break;
5525 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5530 ID op = excl ?
'<' : idLE;
5531 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5536 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5537 b = rb_funcallv(b, succ, 0, 0);
5544 if (n > 0 || (excl && n == 0))
return beg;
5546 after_end = rb_funcallv(end, succ, 0, 0);
5551 next = rb_funcallv(current, succ, 0, 0);
5552 if ((*each)(current, arg))
break;
5553 if (
NIL_P(next))
break;
5557 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5572 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5573 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5574 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5576 b = rb_str_to_inum(beg, 10, FALSE);
5582 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5590 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5591 b = rb_funcallv(b, succ, 0, 0);
5597 VALUE next = rb_funcallv(current, succ, 0, 0);
5598 if ((*each)(current, arg))
break;
5601 if (RSTRING_LEN(current) == 0)
5612 if (!
rb_equal(str, *argp))
return 0;
5626 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5627 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5628 rb_enc_asciicompat(STR_ENC_GET(val))) {
5629 const char *bp = RSTRING_PTR(beg);
5630 const char *ep = RSTRING_PTR(end);
5631 const char *vp = RSTRING_PTR(val);
5632 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5633 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5641 if (b <= v && v < e)
return Qtrue;
5642 return RBOOL(!
RTEST(exclusive) && v == e);
5649 all_digits_p(bp, RSTRING_LEN(beg)) &&
5650 all_digits_p(ep, RSTRING_LEN(end))) {
5655 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5657 return RBOOL(
NIL_P(val));
5680 return rb_str_subpat(str, indx,
INT2FIX(0));
5683 if (rb_str_index(str, indx, 0) != -1)
5689 long beg,
len = str_strlen(str, NULL);
5701 return str_substr(str, idx, 1, FALSE);
5718rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5722 return rb_str_subpat(str, argv[0], argv[1]);
5725 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5729 return rb_str_aref(str, argv[0]);
5735 char *ptr = RSTRING_PTR(str);
5736 long olen = RSTRING_LEN(str), nlen;
5738 str_modifiable(str);
5739 if (
len > olen)
len = olen;
5741 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5743 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5745 ptr =
RSTRING(str)->as.embed.ary;
5746 memmove(ptr, oldptr +
len, nlen);
5747 if (fl == STR_NOEMBED)
xfree(oldptr);
5750 if (!STR_SHARED_P(str)) {
5752 rb_enc_cr_str_exact_copy(shared, str);
5757 STR_SET_LEN(str, nlen);
5759 if (!SHARABLE_MIDDLE_SUBSTRING) {
5760 TERM_FILL(ptr + nlen, TERM_LEN(str));
5767rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5773 if (beg == 0 && vlen == 0) {
5778 str_modify_keep_cr(str);
5782 RESIZE_CAPA(str, slen + vlen -
len);
5783 sptr = RSTRING_PTR(str);
5792 memmove(sptr + beg + vlen,
5794 slen - (beg +
len));
5796 if (vlen < beg &&
len < 0) {
5800 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5803 STR_SET_LEN(str, slen);
5804 TERM_FILL(&sptr[slen], TERM_LEN(str));
5811 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5820 int singlebyte = single_byte_optimizable(str);
5826 enc = rb_enc_check(str, val);
5827 slen = str_strlen(str, enc);
5829 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5838 if (
len > slen - beg) {
5841 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5846 beg = p - RSTRING_PTR(str);
5848 rb_str_update_0(str, beg,
len, val);
5849 rb_enc_associate(str, enc);
5860 long start, end,
len;
5870 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5874 nth += regs->num_regs;
5884 enc = rb_enc_check_str(str, val);
5885 rb_str_update_0(str, start,
len, val);
5886 rb_enc_associate(str, enc);
5894 switch (
TYPE(indx)) {
5896 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5900 beg = rb_str_index(str, indx, 0);
5939rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5943 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5951 return rb_str_aset(str, argv[0], argv[1]);
6003rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6011 str_modify_keep_cr(str);
6019 if ((nth += regs->num_regs) <= 0)
return Qnil;
6021 else if (nth >= regs->num_regs)
return Qnil;
6023 len = END(nth) - beg;
6026 else if (argc == 2) {
6035 beg = p - RSTRING_PTR(str);
6039 beg = rb_str_index(str, indx, 0);
6040 if (beg == -1)
return Qnil;
6041 len = RSTRING_LEN(indx);
6053 beg = p - RSTRING_PTR(str);
6062 beg = p - RSTRING_PTR(str);
6066 rb_enc_cr_str_copy_for_substr(result, str);
6074 char *sptr = RSTRING_PTR(str);
6075 long slen = RSTRING_LEN(str);
6076 if (beg +
len > slen)
6080 slen - (beg +
len));
6082 STR_SET_LEN(str, slen);
6083 TERM_FILL(&sptr[slen], TERM_LEN(str));
6094 switch (OBJ_BUILTIN_TYPE(pat)) {
6113get_pat_quoted(
VALUE pat,
int check)
6117 switch (OBJ_BUILTIN_TYPE(pat)) {
6131 if (check && is_broken_string(pat)) {
6138rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6141 pos = rb_str_byteindex(str, pat, pos);
6142 if (set_backref_str) {
6144 str = rb_str_new_frozen_String(str);
6145 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6147 *match = match_data;
6157 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6162rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6164 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6182rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6196 hash = rb_check_hash_type(argv[1]);
6202 pat = get_pat_quoted(argv[0], 1);
6204 str_modifiable(str);
6205 beg = rb_pat_search(pat, str, 0, 1);
6219 end0 = beg0 + RSTRING_LEN(pat);
6228 if (iter || !
NIL_P(hash)) {
6229 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6235 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6238 str_mod_check(str, p,
len);
6239 rb_check_frozen(str);
6245 enc = rb_enc_compatible(str, repl);
6248 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6252 rb_enc_inspect_name(str_enc),
6253 rb_enc_inspect_name(STR_ENC_GET(repl)));
6255 enc = STR_ENC_GET(repl);
6258 rb_enc_associate(str, enc);
6268 rlen = RSTRING_LEN(repl);
6269 len = RSTRING_LEN(str);
6271 RESIZE_CAPA(str,
len + rlen - plen);
6273 p = RSTRING_PTR(str);
6275 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6277 rp = RSTRING_PTR(repl);
6278 memmove(p + beg0, rp, rlen);
6280 STR_SET_LEN(str,
len);
6281 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6304 rb_str_sub_bang(argc, argv, str);
6309str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6312 long beg, beg0, end0;
6313 long offset, blen, slen,
len, last;
6314 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6316 int need_backref_str = -1;
6326 hash = rb_check_hash_type(argv[1]);
6330 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6338 rb_error_arity(argc, 1, 2);
6341 pat = get_pat_quoted(argv[0], 1);
6342 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6345 if (bang)
return Qnil;
6350 blen = RSTRING_LEN(str) + 30;
6352 sp = RSTRING_PTR(str);
6353 slen = RSTRING_LEN(str);
6355 str_enc = STR_ENC_GET(str);
6356 rb_enc_associate(dest, str_enc);
6363 end0 = beg0 + RSTRING_LEN(pat);
6377 struct RString fake_str = {RBASIC_INIT};
6379 if (mode == FAST_MAP) {
6388 val = rb_hash_aref(hash, key);
6391 str_mod_check(str, sp, slen);
6396 else if (need_backref_str) {
6398 if (need_backref_str < 0) {
6399 need_backref_str = val != repl;
6406 len = beg0 - offset;
6420 if (RSTRING_LEN(str) <= end0)
break;
6421 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6423 offset = end0 +
len;
6425 cp = RSTRING_PTR(str) + offset;
6426 if (offset > RSTRING_LEN(str))
break;
6429 if (mode != FAST_MAP && mode != STR) {
6432 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6437 if (RSTRING_LEN(str) > offset) {
6440 rb_pat_search0(pat, str, last, 1, &match);
6442 str_shared_replace(str, dest);
6467rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6469 str_modify_keep_cr(str);
6470 return str_gsub(argc, argv, str, 1);
6520 return str_gsub(argc, argv, str, 0);
6540 str_modifiable(str);
6541 if (str == str2)
return str;
6545 return str_replace(str, str2);
6562rb_str_clear(
VALUE str)
6566 STR_SET_LEN(str, 0);
6567 RSTRING_PTR(str)[0] = 0;
6568 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6584rb_str_chr(
VALUE str)
6602 pos += RSTRING_LEN(str);
6603 if (pos < 0 || RSTRING_LEN(str) <= pos)
6606 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6626 long len = RSTRING_LEN(str);
6627 char *
ptr, *head, *left = 0;
6631 if (pos < -
len ||
len <= pos)
6638 char byte = (char)(
NUM2INT(w) & 0xFF);
6640 if (!str_independent(str))
6641 str_make_independent(str);
6642 enc = STR_ENC_GET(str);
6643 head = RSTRING_PTR(str);
6645 if (!STR_EMBED_P(str)) {
6652 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6660 width = rb_enc_precise_mbclen(left, head+
len, enc);
6662 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6678str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6680 long n = RSTRING_LEN(str);
6682 if (beg > n ||
len < 0)
return Qnil;
6685 if (beg < 0)
return Qnil;
6690 if (!empty)
return Qnil;
6694 VALUE str2 = str_subseq(str, beg,
len);
6696 str_enc_copy_direct(str2, str);
6698 if (RSTRING_LEN(str2) == 0) {
6699 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6733 long beg,
len = RSTRING_LEN(str);
6741 return str_byte_substr(str, beg,
len, TRUE);
6746 return str_byte_substr(str, idx, 1, FALSE);
6758rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6763 return str_byte_substr(str, beg,
len, TRUE);
6766 return str_byte_aref(str, argv[0]);
6770str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6772 long end, slen = RSTRING_LEN(str);
6775 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6784 if (*
len > slen - *beg) {
6788 str_ensure_byte_pos(str, *beg);
6789 str_ensure_byte_pos(str, end);
6803rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6805 long beg,
len, vbeg, vlen;
6810 if (!(argc == 2 || argc == 3 || argc == 5)) {
6811 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6815 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6816 rb_builtin_class_name(argv[0]));
6823 vlen = RSTRING_LEN(val);
6828 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6829 rb_builtin_class_name(argv[2]));
6841 vlen = RSTRING_LEN(val);
6849 str_check_beg_len(str, &beg, &
len);
6850 str_check_beg_len(val, &vbeg, &vlen);
6851 str_modify_keep_cr(str);
6854 rb_enc_associate(str, rb_enc_check(str, val));
6857 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6879rb_str_reverse(
VALUE str)
6886 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6887 enc = STR_ENC_GET(str);
6893 if (RSTRING_LEN(str) > 1) {
6894 if (single_byte_optimizable(str)) {
6901 int clen = rb_enc_fast_mbclen(s, e, enc);
6909 cr = rb_enc_asciicompat(enc) ?
6912 int clen = rb_enc_mbclen(s, e, enc);
6921 STR_SET_LEN(rev, RSTRING_LEN(str));
6922 str_enc_copy_direct(rev, str);
6944rb_str_reverse_bang(
VALUE str)
6946 if (RSTRING_LEN(str) > 1) {
6947 if (single_byte_optimizable(str)) {
6950 str_modify_keep_cr(str);
6951 s = RSTRING_PTR(str);
6960 str_shared_replace(str, rb_str_reverse(str));
6964 str_modify_keep_cr(str);
6993 i = rb_str_index(str, arg, 0);
6995 return RBOOL(i != -1);
7039 rb_raise(rb_eArgError,
"invalid radix %d", base);
7041 return rb_str_to_inum(str, base, FALSE);
7066rb_str_to_f(
VALUE str)
7083rb_str_to_s(
VALUE str)
7095 char s[RUBY_MAX_CHAR_LEN];
7096 int n = rb_enc_codelen(c, enc);
7098 rb_enc_mbcput(c, s, enc);
7103#define CHAR_ESC_LEN 13
7106rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7108 char buf[CHAR_ESC_LEN + 1];
7116 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7118 else if (c < 0x10000) {
7119 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7122 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7127 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7130 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7133 l = (int)strlen(buf);
7139ruby_escaped_char(
int c)
7142 case '\0':
return "\\0";
7143 case '\n':
return "\\n";
7144 case '\r':
return "\\r";
7145 case '\t':
return "\\t";
7146 case '\f':
return "\\f";
7147 case '\013':
return "\\v";
7148 case '\010':
return "\\b";
7149 case '\007':
return "\\a";
7150 case '\033':
return "\\e";
7151 case '\x7f':
return "\\c?";
7157rb_str_escape(
VALUE str)
7161 const char *p = RSTRING_PTR(str);
7163 const char *prev = p;
7164 char buf[CHAR_ESC_LEN + 1];
7166 int unicode_p = rb_enc_unicode_p(enc);
7167 int asciicompat = rb_enc_asciicompat(enc);
7172 int n = rb_enc_precise_mbclen(p, pend, enc);
7174 if (p > prev) str_buf_cat(result, prev, p - prev);
7175 n = rb_enc_mbminlen(enc);
7177 n = (int)(pend - p);
7179 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7180 str_buf_cat(result, buf, strlen(buf));
7186 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7188 cc = ruby_escaped_char(c);
7190 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7191 str_buf_cat(result, cc, strlen(cc));
7194 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7197 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7198 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7202 if (p > prev) str_buf_cat(result, prev, p - prev);
7221 const char *p, *pend, *prev;
7222 char buf[CHAR_ESC_LEN + 1];
7224 rb_encoding *resenc = rb_default_internal_encoding();
7225 int unicode_p = rb_enc_unicode_p(enc);
7226 int asciicompat = rb_enc_asciicompat(enc);
7228 if (resenc == NULL) resenc = rb_default_external_encoding();
7229 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7230 rb_enc_associate(result, resenc);
7231 str_buf_cat2(result,
"\"");
7239 n = rb_enc_precise_mbclen(p, pend, enc);
7241 if (p > prev) str_buf_cat(result, prev, p - prev);
7242 n = rb_enc_mbminlen(enc);
7244 n = (int)(pend - p);
7246 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7247 str_buf_cat(result, buf, strlen(buf));
7253 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7255 if ((asciicompat || unicode_p) &&
7256 (c ==
'"'|| c ==
'\\' ||
7261 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7262 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7263 str_buf_cat2(result,
"\\");
7264 if (asciicompat || enc == resenc) {
7270 case '\n': cc =
'n';
break;
7271 case '\r': cc =
'r';
break;
7272 case '\t': cc =
't';
break;
7273 case '\f': cc =
'f';
break;
7274 case '\013': cc =
'v';
break;
7275 case '\010': cc =
'b';
break;
7276 case '\007': cc =
'a';
break;
7277 case 033: cc =
'e';
break;
7278 default: cc = 0;
break;
7281 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7284 str_buf_cat(result, buf, 2);
7297 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7301 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7302 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7307 if (p > prev) str_buf_cat(result, prev, p - prev);
7308 str_buf_cat2(result,
"\"");
7313#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7326 int encidx = rb_enc_get_index(str);
7329 const char *p, *pend;
7332 int u8 = (encidx == rb_utf8_encindex());
7333 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7336 if (!rb_enc_asciicompat(enc)) {
7338 len += strlen(enc->name);
7341 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7344 unsigned char c = *p++;
7347 case '"':
case '\\':
7348 case '\n':
case '\r':
7349 case '\t':
case '\f':
7350 case '\013':
case '\010':
case '\007':
case '\033':
7355 clen = IS_EVSTR(p, pend) ? 2 : 1;
7363 if (u8 && c > 0x7F) {
7364 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7366 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7369 else if (cc <= 0xFFFFF)
7382 if (clen > LONG_MAX -
len) {
7389 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7390 q = RSTRING_PTR(result); qend = q +
len + 1;
7394 unsigned char c = *p++;
7396 if (c ==
'"' || c ==
'\\') {
7400 else if (c ==
'#') {
7401 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7404 else if (c ==
'\n') {
7408 else if (c ==
'\r') {
7412 else if (c ==
'\t') {
7416 else if (c ==
'\f') {
7420 else if (c ==
'\013') {
7424 else if (c ==
'\010') {
7428 else if (c ==
'\007') {
7432 else if (c ==
'\033') {
7442 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7444 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7447 snprintf(q, qend-q,
"u%04X", cc);
7449 snprintf(q, qend-q,
"u{%X}", cc);
7454 snprintf(q, qend-q,
"x%02X", c);
7460 if (!rb_enc_asciicompat(enc)) {
7461 snprintf(q, qend-q, nonascii_suffix, enc->name);
7462 encidx = rb_ascii8bit_encindex();
7465 rb_enc_associate_index(result, encidx);
7471unescape_ascii(
unsigned int c)
7495undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7497 const char *s = *ss;
7501 unsigned char buf[6];
7519 *buf = unescape_ascii(*s);
7531 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7532 if (*penc != enc_utf8) {
7534 rb_enc_associate(undumped, enc_utf8);
7551 if (hexlen == 0 || hexlen > 6) {
7557 if (0xd800 <= c && c <= 0xdfff) {
7560 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7570 if (0xd800 <= c && c <= 0xdfff) {
7573 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7601static VALUE rb_str_is_ascii_only_p(
VALUE str);
7613str_undump(
VALUE str)
7615 const char *s = RSTRING_PTR(str);
7618 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7620 bool binary =
false;
7624 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7627 if (!str_null_check(str, &w)) {
7630 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7631 if (*s !=
'"')
goto invalid_format;
7649 static const char force_encoding_suffix[] =
".force_encoding(\"";
7650 static const char dup_suffix[] =
".dup";
7651 const char *encname;
7656 size =
sizeof(dup_suffix) - 1;
7657 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7659 size =
sizeof(force_encoding_suffix) - 1;
7660 if (s_end - s <= size)
goto invalid_format;
7661 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7665 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7669 s = memchr(s,
'"', s_end-s);
7671 if (!s)
goto invalid_format;
7672 if (s_end - s != 2)
goto invalid_format;
7673 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7675 encidx = rb_enc_find_index2(encname, (
long)size);
7679 rb_enc_associate_index(undumped, encidx);
7689 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7700 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7706 if (rb_enc_dummy_p(enc)) {
7713str_true_enc(
VALUE str)
7716 rb_str_check_dummy_enc(enc);
7720static OnigCaseFoldType
7721check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7726 rb_raise(rb_eArgError,
"too many options");
7727 if (argv[0]==sym_turkic) {
7728 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7730 if (argv[1]==sym_lithuanian)
7731 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7733 rb_raise(rb_eArgError,
"invalid second option");
7736 else if (argv[0]==sym_lithuanian) {
7737 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7739 if (argv[1]==sym_turkic)
7740 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7742 rb_raise(rb_eArgError,
"invalid second option");
7746 rb_raise(rb_eArgError,
"too many options");
7747 else if (argv[0]==sym_ascii)
7748 flags |= ONIGENC_CASE_ASCII_ONLY;
7749 else if (argv[0]==sym_fold) {
7750 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7751 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7753 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7756 rb_raise(rb_eArgError,
"invalid option");
7763 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7769#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7770#ifndef CASEMAP_DEBUG
7771# define CASEMAP_DEBUG 0
7779 OnigUChar space[FLEX_ARY_LEN];
7783mapping_buffer_free(
void *p)
7787 while (current_buffer) {
7788 previous_buffer = current_buffer;
7789 current_buffer = current_buffer->next;
7790 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7796 {0, mapping_buffer_free,},
7797 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7805 const OnigUChar *source_current, *source_end;
7806 int target_length = 0;
7807 VALUE buffer_anchor;
7810 size_t buffer_count = 0;
7811 int buffer_length_or_invalid;
7813 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7815 source_current = (OnigUChar*)RSTRING_PTR(source);
7820 while (source_current < source_end) {
7822 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7823 if (CASEMAP_DEBUG) {
7824 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7827 *pre_buffer = current_buffer;
7828 pre_buffer = ¤t_buffer->next;
7829 current_buffer->next = NULL;
7830 current_buffer->capa =
capa;
7831 buffer_length_or_invalid = enc->case_map(flags,
7832 &source_current, source_end,
7833 current_buffer->space,
7834 current_buffer->space+current_buffer->capa,
7836 if (buffer_length_or_invalid < 0) {
7837 current_buffer =
DATA_PTR(buffer_anchor);
7839 mapping_buffer_free(current_buffer);
7840 rb_raise(rb_eArgError,
"input string invalid");
7842 target_length += current_buffer->used = buffer_length_or_invalid;
7844 if (CASEMAP_DEBUG) {
7845 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7848 if (buffer_count==1) {
7849 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7852 char *target_current;
7855 target_current = RSTRING_PTR(target);
7856 current_buffer =
DATA_PTR(buffer_anchor);
7857 while (current_buffer) {
7858 memcpy(target_current, current_buffer->space, current_buffer->used);
7859 target_current += current_buffer->used;
7860 current_buffer = current_buffer->next;
7863 current_buffer =
DATA_PTR(buffer_anchor);
7865 mapping_buffer_free(current_buffer);
7870 str_enc_copy_direct(target, source);
7879 const OnigUChar *source_current, *source_end;
7880 OnigUChar *target_current, *target_end;
7881 long old_length = RSTRING_LEN(source);
7882 int length_or_invalid;
7884 if (old_length == 0)
return Qnil;
7886 source_current = (OnigUChar*)RSTRING_PTR(source);
7888 if (source == target) {
7889 target_current = (OnigUChar*)source_current;
7890 target_end = (OnigUChar*)source_end;
7893 target_current = (OnigUChar*)RSTRING_PTR(target);
7897 length_or_invalid = onigenc_ascii_only_case_map(flags,
7898 &source_current, source_end,
7899 target_current, target_end, enc);
7900 if (length_or_invalid < 0)
7901 rb_raise(rb_eArgError,
"input string invalid");
7902 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7903 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7904 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7905 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7906 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7909 str_enc_copy(target, source);
7915upcase_single(
VALUE str)
7917 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7918 bool modified =
false;
7921 unsigned int c = *(
unsigned char*)s;
7923 if (
'a' <= c && c <=
'z') {
7924 *s =
'A' + (c -
'a');
7945rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7948 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7950 flags = check_case_options(argc, argv, flags);
7951 str_modify_keep_cr(str);
7952 enc = str_true_enc(str);
7953 if (case_option_single_p(flags, enc, str)) {
7954 if (upcase_single(str))
7955 flags |= ONIGENC_CASE_MODIFIED;
7957 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7958 rb_str_ascii_casemap(str, str, &flags, enc);
7960 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7962 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7975rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7978 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7981 flags = check_case_options(argc, argv, flags);
7982 enc = str_true_enc(str);
7983 if (case_option_single_p(flags, enc, str)) {
7984 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7985 str_enc_copy_direct(ret, str);
7988 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7990 rb_str_ascii_casemap(str, ret, &flags, enc);
7993 ret = rb_str_casemap(str, &flags, enc);
8000downcase_single(
VALUE str)
8002 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8003 bool modified =
false;
8006 unsigned int c = *(
unsigned char*)s;
8008 if (
'A' <= c && c <=
'Z') {
8009 *s =
'a' + (c -
'A');
8031rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8034 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8036 flags = check_case_options(argc, argv, flags);
8037 str_modify_keep_cr(str);
8038 enc = str_true_enc(str);
8039 if (case_option_single_p(flags, enc, str)) {
8040 if (downcase_single(str))
8041 flags |= ONIGENC_CASE_MODIFIED;
8043 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8044 rb_str_ascii_casemap(str, str, &flags, enc);
8046 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8048 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8062rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8065 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8068 flags = check_case_options(argc, argv, flags);
8069 enc = str_true_enc(str);
8070 if (case_option_single_p(flags, enc, str)) {
8071 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8072 str_enc_copy_direct(ret, str);
8073 downcase_single(ret);
8075 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8077 rb_str_ascii_casemap(str, ret, &flags, enc);
8080 ret = rb_str_casemap(str, &flags, enc);
8100rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8103 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8105 flags = check_case_options(argc, argv, flags);
8106 str_modify_keep_cr(str);
8107 enc = str_true_enc(str);
8108 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8109 if (flags&ONIGENC_CASE_ASCII_ONLY)
8110 rb_str_ascii_casemap(str, str, &flags, enc);
8112 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8114 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8147rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8150 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8153 flags = check_case_options(argc, argv, flags);
8154 enc = str_true_enc(str);
8155 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8156 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8158 rb_str_ascii_casemap(str, ret, &flags, enc);
8161 ret = rb_str_casemap(str, &flags, enc);
8180rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8183 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8185 flags = check_case_options(argc, argv, flags);
8186 str_modify_keep_cr(str);
8187 enc = str_true_enc(str);
8188 if (flags&ONIGENC_CASE_ASCII_ONLY)
8189 rb_str_ascii_casemap(str, str, &flags, enc);
8191 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8193 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8207rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8210 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8213 flags = check_case_options(argc, argv, flags);
8214 enc = str_true_enc(str);
8215 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8216 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8218 rb_str_ascii_casemap(str, ret, &flags, enc);
8221 ret = rb_str_casemap(str, &flags, enc);
8226typedef unsigned char *USTR;
8230 unsigned int now, max;
8242 if (t->p == t->pend)
return -1;
8243 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8246 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8248 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8250 if (t->p < t->pend) {
8251 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8254 if (t->now < 0x80 && c < 0x80) {
8255 rb_raise(rb_eArgError,
8256 "invalid range \"%c-%c\" in string transliteration",
8260 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8264 else if (t->now < c) {
8273 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8274 if (t->now == t->max) {
8279 if (t->now < t->max) {
8295 const unsigned int errc = -1;
8296 unsigned int trans[256];
8298 struct tr trsrc, trrepl;
8300 unsigned int c, c0, last = 0;
8301 int modify = 0, i, l;
8302 unsigned char *s, *send;
8304 int singlebyte = single_byte_optimizable(str);
8308#define CHECK_IF_ASCII(c) \
8309 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8310 (cr = ENC_CODERANGE_VALID) : 0)
8314 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8315 if (RSTRING_LEN(repl) == 0) {
8316 return rb_str_delete_bang(1, &src, str);
8320 e1 = rb_enc_check(str, src);
8321 e2 = rb_enc_check(str, repl);
8326 enc = rb_enc_check(src, repl);
8328 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8329 if (RSTRING_LEN(src) > 1 &&
8330 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8331 trsrc.p + l < trsrc.pend) {
8335 trrepl.p = RSTRING_PTR(repl);
8336 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8337 trsrc.gen = trrepl.gen = 0;
8338 trsrc.now = trrepl.now = 0;
8339 trsrc.max = trrepl.max = 0;
8342 for (i=0; i<256; i++) {
8345 while ((c = trnext(&trsrc, enc)) != errc) {
8350 if (!hash) hash = rb_hash_new();
8354 while ((c = trnext(&trrepl, enc)) != errc)
8357 for (i=0; i<256; i++) {
8358 if (trans[i] != errc) {
8366 for (i=0; i<256; i++) {
8369 while ((c = trnext(&trsrc, enc)) != errc) {
8370 r = trnext(&trrepl, enc);
8371 if (r == errc) r = trrepl.now;
8374 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8377 if (!hash) hash = rb_hash_new();
8385 str_modify_keep_cr(str);
8386 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8387 termlen = rb_enc_mbminlen(enc);
8390 long offset, max = RSTRING_LEN(str);
8391 unsigned int save = -1;
8392 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8397 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8400 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8403 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8405 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8414 if (cflag) c = last;
8417 else if (cflag) c = errc;
8423 if (c != (
unsigned int)-1) {
8429 tlen = rb_enc_codelen(c, enc);
8435 if (enc != e1) may_modify = 1;
8437 if ((offset = t - buf) + tlen > max) {
8438 size_t MAYBE_UNUSED(old) = max + termlen;
8439 max = offset + tlen + (send - s);
8440 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8443 rb_enc_mbcput(c, t, enc);
8444 if (may_modify && memcmp(s, t, tlen) != 0) {
8450 if (!STR_EMBED_P(str)) {
8451 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8453 TERM_FILL((
char *)t, termlen);
8454 RSTRING(str)->as.heap.ptr = (
char *)buf;
8455 STR_SET_LEN(str, t - buf);
8456 STR_SET_NOEMBED(str);
8457 RSTRING(str)->as.heap.aux.capa = max;
8461 c = (
unsigned char)*s;
8462 if (trans[c] != errc) {
8479 long offset, max = (long)((send - s) * 1.2);
8480 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8485 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8488 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8491 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8493 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8501 if (cflag) c = last;
8504 else if (cflag) c = errc;
8508 c = cflag ? last : errc;
8511 tlen = rb_enc_codelen(c, enc);
8516 if (enc != e1) may_modify = 1;
8518 if ((offset = t - buf) + tlen > max) {
8519 size_t MAYBE_UNUSED(old) = max + termlen;
8520 max = offset + tlen + (long)((send - s) * 1.2);
8521 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8525 rb_enc_mbcput(c, t, enc);
8526 if (may_modify && memcmp(s, t, tlen) != 0) {
8534 if (!STR_EMBED_P(str)) {
8535 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8537 TERM_FILL((
char *)t, termlen);
8538 RSTRING(str)->as.heap.ptr = (
char *)buf;
8539 STR_SET_LEN(str, t - buf);
8540 STR_SET_NOEMBED(str);
8541 RSTRING(str)->as.heap.aux.capa = max;
8547 rb_enc_associate(str, enc);
8569 return tr_trans(str, src, repl, 0);
8614 tr_trans(str, src, repl, 0);
8618#define TR_TABLE_MAX (UCHAR_MAX+1)
8619#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8621tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8624 const unsigned int errc = -1;
8625 char buf[TR_TABLE_MAX];
8628 VALUE table = 0, ptable = 0;
8629 int i, l, cflag = 0;
8631 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8632 tr.gen =
tr.now =
tr.max = 0;
8634 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8639 for (i=0; i<TR_TABLE_MAX; i++) {
8642 stable[TR_TABLE_MAX] = cflag;
8644 else if (stable[TR_TABLE_MAX] && !cflag) {
8645 stable[TR_TABLE_MAX] = 0;
8647 for (i=0; i<TR_TABLE_MAX; i++) {
8651 while ((c = trnext(&
tr, enc)) != errc) {
8652 if (c < TR_TABLE_MAX) {
8653 buf[(
unsigned char)c] = !cflag;
8658 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8661 table = ptable ? ptable : rb_hash_new();
8665 table = rb_hash_new();
8670 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8671 rb_hash_aset(table, key,
Qtrue);
8675 for (i=0; i<TR_TABLE_MAX; i++) {
8676 stable[i] = stable[i] && buf[i];
8678 if (!table && !cflag) {
8685tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8687 if (c < TR_TABLE_MAX) {
8688 return table[c] != 0;
8694 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8695 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8699 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8702 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8717rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8719 char squeez[TR_TABLE_SIZE];
8722 VALUE del = 0, nodel = 0;
8724 int i, ascompat, cr;
8726 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8728 for (i=0; i<argc; i++) {
8732 enc = rb_enc_check(str, s);
8733 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8736 str_modify_keep_cr(str);
8737 ascompat = rb_enc_asciicompat(enc);
8738 s = t = RSTRING_PTR(str);
8745 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8756 c = rb_enc_codepoint_len(s, send, &clen, enc);
8758 if (tr_find(c, squeez, del, nodel)) {
8762 if (t != s) rb_enc_mbcput(c, t, enc);
8769 TERM_FILL(t, TERM_LEN(str));
8770 STR_SET_LEN(str, t - RSTRING_PTR(str));
8773 if (modify)
return str;
8787rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8790 rb_str_delete_bang(argc, argv, str);
8808rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8810 char squeez[TR_TABLE_SIZE];
8812 VALUE del = 0, nodel = 0;
8813 unsigned char *s, *send, *t;
8815 int ascompat, singlebyte = single_byte_optimizable(str);
8819 enc = STR_ENC_GET(str);
8822 for (i=0; i<argc; i++) {
8826 enc = rb_enc_check(str, s);
8827 if (singlebyte && !single_byte_optimizable(s))
8829 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8833 str_modify_keep_cr(str);
8834 s = t = (
unsigned char *)RSTRING_PTR(str);
8835 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8838 ascompat = rb_enc_asciicompat(enc);
8842 unsigned int c = *s++;
8843 if (c != save || (argc > 0 && !squeez[c])) {
8853 if (ascompat && (c = *s) < 0x80) {
8854 if (c != save || (argc > 0 && !squeez[c])) {
8860 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8862 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8863 if (t != s) rb_enc_mbcput(c, t, enc);
8872 TERM_FILL((
char *)t, TERM_LEN(str));
8873 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8874 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8878 if (modify)
return str;
8892rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8895 rb_str_squeeze_bang(argc, argv, str);
8915 return tr_trans(str, src, repl, 1);
8943 tr_trans(str, src, repl, 1);
8956rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8958 char table[TR_TABLE_SIZE];
8960 VALUE del = 0, nodel = 0, tstr;
8970 enc = rb_enc_check(str, tstr);
8973 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8974 (ptstr = RSTRING_PTR(tstr),
8975 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8976 !is_broken_string(str)) {
8978 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8980 s = RSTRING_PTR(str);
8981 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8984 if (*(
unsigned char*)s++ == c) n++;
8990 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8991 for (i=1; i<argc; i++) {
8994 enc = rb_enc_check(str, tstr);
8995 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8998 s = RSTRING_PTR(str);
8999 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9001 ascompat = rb_enc_asciicompat(enc);
9005 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9013 c = rb_enc_codepoint_len(s, send, &clen, enc);
9014 if (tr_find(c, table, del, nodel)) {
9025rb_fs_check(
VALUE val)
9029 if (
NIL_P(val))
return 0;
9034static const char isspacetable[256] = {
9035 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9037 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9050 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9053#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9056split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9058 if (empty_count >= 0 &&
len == 0) {
9059 return empty_count + 1;
9061 if (empty_count > 0) {
9066 }
while (--empty_count > 0);
9070 rb_yield(str_new_empty_String(str));
9071 }
while (--empty_count > 0);
9085 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9089literal_split_pattern(
VALUE spat, split_type_t default_type)
9097 return SPLIT_TYPE_CHARS;
9099 else if (rb_enc_asciicompat(enc)) {
9100 if (
len == 1 && ptr[0] ==
' ') {
9101 return SPLIT_TYPE_AWK;
9106 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9107 return SPLIT_TYPE_AWK;
9110 return default_type;
9123rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9128 split_type_t split_type;
9129 long beg, end, i = 0, empty_count = -1;
9134 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9136 if (lim <= 0) limit =
Qnil;
9137 else if (lim == 1) {
9138 if (RSTRING_LEN(str) == 0)
9149 if (
NIL_P(limit) && !lim) empty_count = 0;
9151 enc = STR_ENC_GET(str);
9152 split_type = SPLIT_TYPE_REGEXP;
9154 spat = get_pat_quoted(spat, 0);
9156 else if (
NIL_P(spat = rb_fs)) {
9157 split_type = SPLIT_TYPE_AWK;
9159 else if (!(spat = rb_fs_check(spat))) {
9160 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9165 if (split_type != SPLIT_TYPE_AWK) {
9170 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9171 if (split_type == SPLIT_TYPE_AWK) {
9173 split_type = SPLIT_TYPE_STRING;
9178 mustnot_broken(spat);
9179 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9187#define SPLIT_STR(beg, len) ( \
9188 empty_count = split_string(result, str, beg, len, empty_count), \
9189 str_mod_check(str, str_start, str_len))
9192 char *ptr = RSTRING_PTR(str);
9193 char *
const str_start = ptr;
9194 const long str_len = RSTRING_LEN(str);
9195 char *
const eptr = str_start + str_len;
9196 if (split_type == SPLIT_TYPE_AWK) {
9203 if (is_ascii_string(str)) {
9204 while (ptr < eptr) {
9205 c = (
unsigned char)*ptr++;
9207 if (ascii_isspace(c)) {
9213 if (!
NIL_P(limit) && lim <= i)
break;
9216 else if (ascii_isspace(c)) {
9217 SPLIT_STR(beg, end-beg);
9220 if (!
NIL_P(limit)) ++i;
9228 while (ptr < eptr) {
9231 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9240 if (!
NIL_P(limit) && lim <= i)
break;
9244 SPLIT_STR(beg, end-beg);
9247 if (!
NIL_P(limit)) ++i;
9255 else if (split_type == SPLIT_TYPE_STRING) {
9256 char *substr_start = ptr;
9257 char *sptr = RSTRING_PTR(spat);
9258 long slen = RSTRING_LEN(spat);
9261 mustnot_broken(str);
9262 enc = rb_enc_check(str, spat);
9263 while (ptr < eptr &&
9264 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9267 if (t != ptr + end) {
9271 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9272 str_mod_check(spat, sptr, slen);
9275 if (!
NIL_P(limit) && lim <= ++i)
break;
9277 beg = ptr - str_start;
9279 else if (split_type == SPLIT_TYPE_CHARS) {
9283 mustnot_broken(str);
9284 enc = rb_enc_get(str);
9285 while (ptr < eptr &&
9286 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9287 SPLIT_STR(ptr - str_start, n);
9289 if (!
NIL_P(limit) && lim <= ++i)
break;
9291 beg = ptr - str_start;
9295 long len = RSTRING_LEN(str);
9303 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9308 if (start == end && BEG(0) == END(0)) {
9313 else if (last_null == 1) {
9314 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9321 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9327 SPLIT_STR(beg, end-beg);
9328 beg = start = END(0);
9332 for (idx=1; idx < regs->num_regs; idx++) {
9333 if (BEG(idx) == -1)
continue;
9334 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9336 if (!
NIL_P(limit) && lim <= ++i)
break;
9338 if (match) rb_match_unbusy(match);
9340 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9341 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9344 return result ? result : str;
9354 return rb_str_split_m(1, &sep, str);
9357#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9372#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9375chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9377 const char *prev = rb_enc_prev_char(p, e, e, enc);
9380 prev = rb_enc_prev_char(p, e, e, enc);
9381 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9393 RSTRING_LEN(rs) != 1 ||
9394 RSTRING_PTR(rs)[0] !=
'\n')) {
9400#define rb_rs get_rs()
9407 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9408 long pos,
len, rslen;
9414 static ID keywords[1];
9419 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9423 if (!ENUM_ELEM(ary, str)) {
9431 if (!RSTRING_LEN(str))
goto end;
9433 ptr = subptr = RSTRING_PTR(str);
9435 len = RSTRING_LEN(str);
9437 rslen = RSTRING_LEN(rs);
9440 enc = rb_enc_get(str);
9442 enc = rb_enc_check(str, rs);
9447 const char *eol = NULL;
9449 while (subend < pend) {
9450 long chomp_rslen = 0;
9452 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9454 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9456 if (eol == subend)
break;
9460 chomp_rslen = -rslen;
9464 if (!subptr) subptr = subend;
9468 }
while (subend < pend);
9470 if (rslen == 0) chomp_rslen = 0;
9472 subend - subptr + (chomp ? chomp_rslen : rslen));
9473 if (ENUM_ELEM(ary, line)) {
9474 str_mod_check(str, ptr,
len);
9476 subptr = eol = NULL;
9481 rsptr = RSTRING_PTR(rs);
9482 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9491 rsptr = RSTRING_PTR(rs);
9492 rslen = RSTRING_LEN(rs);
9495 while (subptr < pend) {
9496 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9500 if (hit != adjusted) {
9504 subend = hit += rslen;
9507 subend = chomp_newline(subptr, subend, enc);
9514 if (ENUM_ELEM(ary, line)) {
9515 str_mod_check(str, ptr,
len);
9520 if (subptr != pend) {
9523 pend = chomp_newline(subptr, pend, enc);
9525 else if (pend - subptr >= rslen &&
9526 memcmp(pend - rslen, rsptr, rslen) == 0) {
9531 ENUM_ELEM(ary, line);
9552rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9555 return rb_str_enumerate_lines(argc, argv, str, 0);
9610rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9612 VALUE ary = WANTARRAY(
"lines", 0);
9613 return rb_str_enumerate_lines(argc, argv, str, ary);
9627 for (i=0; i<RSTRING_LEN(str); i++) {
9628 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9646rb_str_each_byte(
VALUE str)
9649 return rb_str_enumerate_bytes(str, 0);
9661rb_str_bytes(
VALUE str)
9663 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9664 return rb_str_enumerate_bytes(str, ary);
9682 ptr = RSTRING_PTR(str);
9683 len = RSTRING_LEN(str);
9684 enc = rb_enc_get(str);
9687 for (i = 0; i <
len; i += n) {
9688 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9693 for (i = 0; i <
len; i += n) {
9694 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9715rb_str_each_char(
VALUE str)
9718 return rb_str_enumerate_chars(str, 0);
9730rb_str_chars(
VALUE str)
9733 return rb_str_enumerate_chars(str, ary);
9737rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9742 const char *ptr, *end;
9745 if (single_byte_optimizable(str))
9746 return rb_str_enumerate_bytes(str, ary);
9749 ptr = RSTRING_PTR(str);
9751 enc = STR_ENC_GET(str);
9754 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9775rb_str_each_codepoint(
VALUE str)
9778 return rb_str_enumerate_codepoints(str, 0);
9790rb_str_codepoints(
VALUE str)
9793 return rb_str_enumerate_codepoints(str, ary);
9799 int encidx = rb_enc_to_index(enc);
9801 const OnigUChar source_ascii[] =
"\\X";
9802 const OnigUChar *source = source_ascii;
9803 size_t source_len =
sizeof(source_ascii) - 1;
9806#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9807#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9808#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9809#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9810#define CASE_UTF(e) \
9811 case ENCINDEX_UTF_##e: { \
9812 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9813 source = source_UTF_##e; \
9814 source_len = sizeof(source_UTF_##e); \
9817 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9825 regex_t *reg_grapheme_cluster;
9827 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9828 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9830 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9831 onig_error_code_to_str(message, r, &einfo);
9832 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9835 return reg_grapheme_cluster;
9841 int encidx = rb_enc_to_index(enc);
9842 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9844 if (encidx == rb_utf8_encindex()) {
9845 if (!reg_grapheme_cluster_utf8) {
9846 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9849 return reg_grapheme_cluster_utf8;
9858 size_t grapheme_cluster_count = 0;
9860 const char *ptr, *end;
9862 if (!rb_enc_unicode_p(enc)) {
9866 bool cached_reg_grapheme_cluster =
true;
9867 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9868 if (!reg_grapheme_cluster) {
9869 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9870 cached_reg_grapheme_cluster =
false;
9873 ptr = RSTRING_PTR(str);
9877 OnigPosition
len = onig_match(reg_grapheme_cluster,
9878 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9879 (
const OnigUChar *)ptr, NULL, 0);
9880 if (
len <= 0)
break;
9881 grapheme_cluster_count++;
9885 if (!cached_reg_grapheme_cluster) {
9886 onig_free(reg_grapheme_cluster);
9889 return SIZET2NUM(grapheme_cluster_count);
9893rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9897 const char *ptr0, *ptr, *end;
9899 if (!rb_enc_unicode_p(enc)) {
9900 return rb_str_enumerate_chars(str, ary);
9905 bool cached_reg_grapheme_cluster =
true;
9906 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9907 if (!reg_grapheme_cluster) {
9908 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9909 cached_reg_grapheme_cluster =
false;
9912 ptr0 = ptr = RSTRING_PTR(str);
9916 OnigPosition
len = onig_match(reg_grapheme_cluster,
9917 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9918 (
const OnigUChar *)ptr, NULL, 0);
9919 if (
len <= 0)
break;
9924 if (!cached_reg_grapheme_cluster) {
9925 onig_free(reg_grapheme_cluster);
9945rb_str_each_grapheme_cluster(
VALUE str)
9948 return rb_str_enumerate_grapheme_clusters(str, 0);
9960rb_str_grapheme_clusters(
VALUE str)
9963 return rb_str_enumerate_grapheme_clusters(str, ary);
9967chopped_length(
VALUE str)
9970 const char *p, *p2, *beg, *end;
9972 beg = RSTRING_PTR(str);
9973 end = beg + RSTRING_LEN(str);
9974 if (beg >= end)
return 0;
9975 p = rb_enc_prev_char(beg, end, end, enc);
9977 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9978 p2 = rb_enc_prev_char(beg, p, end, enc);
9979 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9997rb_str_chop_bang(
VALUE str)
9999 str_modify_keep_cr(str);
10000 if (RSTRING_LEN(str) > 0) {
10002 len = chopped_length(str);
10003 STR_SET_LEN(str,
len);
10004 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10023rb_str_chop(
VALUE str)
10029smart_chomp(
VALUE str,
const char *e,
const char *p)
10032 if (rb_enc_mbminlen(enc) > 1) {
10037 pp = e - rb_enc_mbminlen(enc);
10040 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10048 if (--e > p && *(e-1) ==
'\r') {
10065 char *pp, *e, *rsptr;
10067 char *
const p = RSTRING_PTR(str);
10068 long len = RSTRING_LEN(str);
10070 if (
len == 0)
return 0;
10073 return smart_chomp(str, e, p);
10076 enc = rb_enc_get(str);
10079 if (rb_enc_mbminlen(enc) > 1) {
10084 pp -= rb_enc_mbminlen(enc);
10087 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10094 while (e > p && *(e-1) ==
'\n') {
10096 if (e > p && *(e-1) ==
'\r')
10102 if (rslen >
len)
return len;
10104 enc = rb_enc_get(rs);
10105 newline = rsptr[rslen-1];
10106 if (rslen == rb_enc_mbminlen(enc)) {
10108 if (newline ==
'\n')
10109 return smart_chomp(str, e, p);
10113 return smart_chomp(str, e, p);
10117 enc = rb_enc_check(str, rs);
10118 if (is_broken_string(rs)) {
10122 if (p[
len-1] == newline &&
10124 memcmp(rsptr, pp, rslen) == 0)) {
10125 if (at_char_boundary(p, pp, e, enc))
10126 return len - rslen;
10138chomp_rs(
int argc,
const VALUE *argv)
10142 VALUE rs = argv[0];
10154 long olen = RSTRING_LEN(str);
10155 long len = chompped_length(str, rs);
10156 if (
len >= olen)
return Qnil;
10157 str_modify_keep_cr(str);
10158 STR_SET_LEN(str,
len);
10159 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10179rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10182 str_modifiable(str);
10183 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10184 rs = chomp_rs(argc, argv);
10186 return rb_str_chomp_string(str, rs);
10199rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10201 VALUE rs = chomp_rs(argc, argv);
10209 const char *
const start = s;
10211 if (!s || s >= e)
return 0;
10214 if (single_byte_optimizable(str)) {
10215 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10220 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10242rb_str_lstrip_bang(
VALUE str)
10246 long olen, loffset;
10248 str_modify_keep_cr(str);
10249 enc = STR_ENC_GET(str);
10251 loffset = lstrip_offset(str, start, start+olen, enc);
10253 long len = olen-loffset;
10254 s = start + loffset;
10255 memmove(start, s,
len);
10256 STR_SET_LEN(str,
len);
10257 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10281rb_str_lstrip(
VALUE str)
10286 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10287 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10296 rb_str_check_dummy_enc(enc);
10300 if (!s || s >= e)
return 0;
10304 if (single_byte_optimizable(str)) {
10306 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10311 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10333rb_str_rstrip_bang(
VALUE str)
10337 long olen, roffset;
10339 str_modify_keep_cr(str);
10340 enc = STR_ENC_GET(str);
10342 roffset = rstrip_offset(str, start, start+olen, enc);
10344 long len = olen - roffset;
10346 STR_SET_LEN(str,
len);
10347 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10370rb_str_rstrip(
VALUE str)
10374 long olen, roffset;
10376 enc = STR_ENC_GET(str);
10378 roffset = rstrip_offset(str, start, start+olen, enc);
10380 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10398rb_str_strip_bang(
VALUE str)
10401 long olen, loffset, roffset;
10404 str_modify_keep_cr(str);
10405 enc = STR_ENC_GET(str);
10407 loffset = lstrip_offset(str, start, start+olen, enc);
10408 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10410 if (loffset > 0 || roffset > 0) {
10411 long len = olen-roffset;
10414 memmove(start, start + loffset,
len);
10416 STR_SET_LEN(str,
len);
10417 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10440rb_str_strip(
VALUE str)
10443 long olen, loffset, roffset;
10447 loffset = lstrip_offset(str, start, start+olen, enc);
10448 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10450 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10455scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10458 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10464 end = pos + RSTRING_LEN(pat);
10478 if (RSTRING_LEN(str) > end)
10479 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10488 if (!regs || regs->num_regs == 1) {
10494 for (
int i = 1; i < regs->num_regs; i++) {
10525 long last = -1, prev = 0;
10526 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10528 pat = get_pat_quoted(pat, 1);
10529 mustnot_broken(str);
10533 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10538 if (last >= 0) rb_pat_search(pat, str, last, 1);
10543 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10547 str_mod_check(str, p,
len);
10549 if (last >= 0) rb_pat_search(pat, str, last, 1);
10601rb_str_hex(
VALUE str)
10603 return rb_str_to_inum(str, 16, FALSE);
10687rb_str_oct(
VALUE str)
10689 return rb_str_to_inum(str, -8, FALSE);
10692#ifndef HAVE_CRYPT_R
10697 rb_nativethread_lock_t lock;
10698} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10767# define CRYPT_END() ALLOCV_END(databuf)
10770 extern char *crypt(
const char *,
const char *);
10771# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10774 const char *s, *saltp;
10777 char salt_8bit_clean[3];
10781 mustnot_wchar(str);
10782 mustnot_wchar(salt);
10784 saltp = RSTRING_PTR(salt);
10785 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10786 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10790 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10791 salt_8bit_clean[0] = saltp[0] & 0x7f;
10792 salt_8bit_clean[1] = saltp[1] & 0x7f;
10793 salt_8bit_clean[2] =
'\0';
10794 saltp = salt_8bit_clean;
10799# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10800 data->initialized = 0;
10802 res = crypt_r(s, saltp, data);
10805 res = crypt(s, saltp);
10820 size_t res_size = strlen(res)+1;
10821 tmp_buf =
ALLOCA_N(
char, res_size);
10822 memcpy(tmp_buf, res, res_size);
10859 char *ptr, *p, *pend;
10862 unsigned long sum0 = 0;
10867 ptr = p = RSTRING_PTR(str);
10868 len = RSTRING_LEN(str);
10874 str_mod_check(str, ptr,
len);
10877 sum0 += (
unsigned char)*p;
10888 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10889 sum0 &= (((
unsigned long)1)<<bits)-1;
10909rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10913 long width,
len, flen = 1, fclen = 1;
10916 const char *f =
" ";
10917 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10919 int singlebyte = 1, cr;
10923 enc = STR_ENC_GET(str);
10924 termlen = rb_enc_mbminlen(enc);
10928 enc = rb_enc_check(str, pad);
10929 f = RSTRING_PTR(pad);
10930 flen = RSTRING_LEN(pad);
10931 fclen = str_strlen(pad, enc);
10932 singlebyte = single_byte_optimizable(pad);
10933 if (flen == 0 || fclen == 0) {
10934 rb_raise(rb_eArgError,
"zero width padding");
10937 len = str_strlen(str, enc);
10938 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10940 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10944 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10945 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10947 size = RSTRING_LEN(str);
10948 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10949 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10950 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10951 rb_raise(rb_eArgError,
"argument too big");
10955 p = RSTRING_PTR(res);
10957 memset(p, *f, llen);
10961 while (llen >= fclen) {
10967 memcpy(p, f, llen2);
10971 memcpy(p, RSTRING_PTR(str), size);
10974 memset(p, *f, rlen);
10978 while (rlen >= fclen) {
10984 memcpy(p, f, rlen2);
10988 TERM_FILL(p, termlen);
10989 STR_SET_LEN(res, p-RSTRING_PTR(res));
11010rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11012 return rb_str_justify(argc, argv, str,
'l');
11024rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11026 return rb_str_justify(argc, argv, str,
'r');
11039rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11041 return rb_str_justify(argc, argv, str,
'c');
11057 sep = get_pat_quoted(sep, 0);
11069 pos = rb_str_index(str, sep, 0);
11070 if (pos < 0)
goto failed;
11075 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11078 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11092 long pos = RSTRING_LEN(str);
11094 sep = get_pat_quoted(sep, 0);
11107 pos = rb_str_rindex(str, sep, pos);
11116 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11118 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11130rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11134 for (i=0; i<argc; i++) {
11135 VALUE tmp = argv[i];
11137 if (rb_reg_start_with_p(tmp, str))
11141 const char *p, *s, *e;
11146 enc = rb_enc_check(str, tmp);
11147 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11148 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11149 p = RSTRING_PTR(str);
11152 if (!at_char_right_boundary(p, s, e, enc))
11154 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11170rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11174 for (i=0; i<argc; i++) {
11175 VALUE tmp = argv[i];
11176 const char *p, *s, *e;
11181 enc = rb_enc_check(str, tmp);
11182 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11183 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11184 p = RSTRING_PTR(str);
11187 if (!at_char_boundary(p, s, e, enc))
11189 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11205deleted_prefix_length(
VALUE str,
VALUE prefix)
11207 const char *strptr, *prefixptr;
11208 long olen, prefixlen;
11213 if (!is_broken_string(prefix) ||
11214 !rb_enc_asciicompat(enc) ||
11215 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11216 enc = rb_enc_check(str, prefix);
11220 prefixlen = RSTRING_LEN(prefix);
11221 if (prefixlen <= 0)
return 0;
11222 olen = RSTRING_LEN(str);
11223 if (olen < prefixlen)
return 0;
11224 strptr = RSTRING_PTR(str);
11225 prefixptr = RSTRING_PTR(prefix);
11226 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11227 if (is_broken_string(prefix)) {
11228 if (!is_broken_string(str)) {
11232 const char *strend = strptr + olen;
11233 const char *after_prefix = strptr + prefixlen;
11234 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11255rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11258 str_modify_keep_cr(str);
11260 prefixlen = deleted_prefix_length(str, prefix);
11261 if (prefixlen <= 0)
return Qnil;
11275rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11279 prefixlen = deleted_prefix_length(str, prefix);
11280 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11282 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11295deleted_suffix_length(
VALUE str,
VALUE suffix)
11297 const char *strptr, *suffixptr;
11298 long olen, suffixlen;
11302 if (is_broken_string(suffix))
return 0;
11303 enc = rb_enc_check(str, suffix);
11306 suffixlen = RSTRING_LEN(suffix);
11307 if (suffixlen <= 0)
return 0;
11308 olen = RSTRING_LEN(str);
11309 if (olen < suffixlen)
return 0;
11310 strptr = RSTRING_PTR(str);
11311 suffixptr = RSTRING_PTR(suffix);
11312 const char *strend = strptr + olen;
11313 const char *before_suffix = strend - suffixlen;
11314 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11315 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11331rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11333 long olen, suffixlen,
len;
11334 str_modifiable(str);
11336 suffixlen = deleted_suffix_length(str, suffix);
11337 if (suffixlen <= 0)
return Qnil;
11339 olen = RSTRING_LEN(str);
11340 str_modify_keep_cr(str);
11341 len = olen - suffixlen;
11342 STR_SET_LEN(str,
len);
11343 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11359rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11363 suffixlen = deleted_suffix_length(str, suffix);
11364 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11366 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11373 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11381 val = rb_fs_check(val);
11384 "value of %"PRIsVALUE
" must be String or Regexp",
11388 rb_warn_deprecated(
"'$;'", NULL);
11405 str_modifiable(str);
11408 int idx = rb_enc_to_index(encoding);
11415 rb_enc_associate_index(str, idx);
11439 if (STR_EMBED_P(str)) {
11440 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11445 str_replace_shared_without_enc(str2, str);
11447 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11477rb_str_valid_encoding_p(
VALUE str)
11497rb_str_is_ascii_only_p(
VALUE str)
11507 static const char ellipsis[] =
"...";
11508 const long ellipsislen =
sizeof(ellipsis) - 1;
11510 const long blen = RSTRING_LEN(str);
11511 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11512 VALUE estr, ret = 0;
11515 if (
len * rb_enc_mbminlen(enc) >= blen ||
11519 else if (
len <= ellipsislen ||
11521 if (rb_enc_asciicompat(enc)) {
11523 rb_enc_associate(ret, enc);
11530 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11535 rb_enc_from_encoding(enc), 0,
Qnil);
11548 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11554 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11573 if (enc == STR_ENC_GET(str)) {
11578 return enc_str_scrub(enc, str, repl, cr);
11586 const char *rep, *p, *e, *p1, *sp;
11592 rb_raise(rb_eArgError,
"both of block and replacement given");
11599 if (!
NIL_P(repl)) {
11600 repl = str_compat_and_valid(repl, enc);
11603 if (rb_enc_dummy_p(enc)) {
11606 encidx = rb_enc_to_index(enc);
11608#define DEFAULT_REPLACE_CHAR(str) do { \
11609 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11610 rep = replace; replen = (int)sizeof(replace); \
11613 slen = RSTRING_LEN(str);
11614 p = RSTRING_PTR(str);
11619 if (rb_enc_asciicompat(enc)) {
11625 else if (!
NIL_P(repl)) {
11626 rep = RSTRING_PTR(repl);
11627 replen = RSTRING_LEN(repl);
11630 else if (encidx == rb_utf8_encindex()) {
11631 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11635 DEFAULT_REPLACE_CHAR(
"?");
11640 p = search_nonascii(p, e);
11645 int ret = rb_enc_precise_mbclen(p, e, enc);
11664 if (e - p < clen) clen = e - p;
11671 for (; clen > 1; clen--) {
11672 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11683 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11684 str_mod_check(str, sp, slen);
11685 repl = str_compat_and_valid(repl, enc);
11692 p = search_nonascii(p, e);
11718 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11719 str_mod_check(str, sp, slen);
11720 repl = str_compat_and_valid(repl, enc);
11729 long mbminlen = rb_enc_mbminlen(enc);
11733 else if (!
NIL_P(repl)) {
11734 rep = RSTRING_PTR(repl);
11735 replen = RSTRING_LEN(repl);
11737 else if (encidx == ENCINDEX_UTF_16BE) {
11738 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11740 else if (encidx == ENCINDEX_UTF_16LE) {
11741 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11743 else if (encidx == ENCINDEX_UTF_32BE) {
11744 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11746 else if (encidx == ENCINDEX_UTF_32LE) {
11747 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11750 DEFAULT_REPLACE_CHAR(
"?");
11754 int ret = rb_enc_precise_mbclen(p, e, enc);
11767 if (e - p < clen) clen = e - p;
11768 if (clen <= mbminlen * 2) {
11773 for (; clen > mbminlen; clen-=mbminlen) {
11774 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11784 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11785 str_mod_check(str, sp, slen);
11786 repl = str_compat_and_valid(repl, enc);
11811 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11812 str_mod_check(str, sp, slen);
11813 repl = str_compat_and_valid(repl, enc);
11853str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11861static ID id_normalize;
11862static ID id_normalized_p;
11863static VALUE mUnicodeNormalize;
11866unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11868 static int UnicodeNormalizeRequired = 0;
11871 if (!UnicodeNormalizeRequired) {
11872 rb_require(
"unicode_normalize/normalize.rb");
11873 UnicodeNormalizeRequired = 1;
11877 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11888rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11890 return unicode_normalize_common(argc, argv, str, id_normalize);
11904rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11906 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11933rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11935 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12067#define sym_equal rb_obj_equal
12070sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12074 int c = rb_enc_precise_mbclen(s, send, enc);
12078 c = rb_enc_mbc_to_codepoint(s, send, enc);
12086rb_str_symname_p(
VALUE sym)
12091 rb_encoding *resenc = rb_default_internal_encoding();
12093 if (resenc == NULL) resenc = rb_default_external_encoding();
12094 enc = STR_ENC_GET(sym);
12095 ptr = RSTRING_PTR(sym);
12096 len = RSTRING_LEN(sym);
12097 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12105rb_str_quote_unprintable(
VALUE str)
12113 resenc = rb_default_internal_encoding();
12114 if (resenc == NULL) resenc = rb_default_external_encoding();
12115 enc = STR_ENC_GET(str);
12116 ptr = RSTRING_PTR(str);
12117 len = RSTRING_LEN(str);
12118 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12119 !sym_printable(ptr, ptr +
len, enc)) {
12120 return rb_str_escape(str);
12126rb_id_quote_unprintable(
ID id)
12128 VALUE str = rb_id2str(
id);
12129 if (!rb_str_symname_p(str)) {
12130 return rb_str_escape(str);
12148sym_inspect(
VALUE sym)
12155 if (!rb_str_symname_p(str)) {
12157 len = RSTRING_LEN(str);
12158 rb_str_resize(str,
len + 1);
12159 dest = RSTRING_PTR(str);
12160 memmove(dest + 1, dest,
len);
12164 VALUE orig_str = str;
12166 len = RSTRING_LEN(orig_str);
12167 str = rb_enc_str_new(0,
len + 1, enc);
12170 ptr = RSTRING_PTR(orig_str);
12171 dest = RSTRING_PTR(str);
12172 memcpy(dest + 1, ptr,
len);
12192rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12197 rb_raise(rb_eArgError,
"no receiver given");
12294 return rb_str_match(
rb_sym2str(sym), other);
12309sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12311 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12324sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12326 return rb_str_match_m_p(argc, argv, sym);
12344 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12355sym_length(
VALUE sym)
12369sym_empty(
VALUE sym)
12403sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12419sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12435sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12449sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12451 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12464sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12466 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12478sym_encoding(
VALUE sym)
12484string_for_symbol(
VALUE name)
12489 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12503 name = string_for_symbol(name);
12504 return rb_intern_str(name);
12513 name = string_for_symbol(name);
12537 return rb_fstring(str);
12543 struct RString fake_str = {RBASIC_INIT};
12544 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12556 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12557 rb_enc_autoload(enc);
12560 struct RString fake_str = {RBASIC_INIT};
12561 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12567 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12568 rb_enc_autoload(enc);
12571 struct RString fake_str = {RBASIC_INIT};
12572 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12585rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12590 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12591 rb_str_buf_cat_byte(str, (
char) code);
12601fstring_set_class_i(
VALUE *str,
void *data)
12605 return ST_CONTINUE;
12613 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12780 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.