14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 rb_gc_register_pinning_obj(str); \
209 FL_SET((shared_str), STR_SHARED_ROOT); \
210 if (RBASIC_CLASS((shared_str)) == 0) \
211 FL_SET_RAW((shared_str), STR_BORROWED); \
215#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
216#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
219#define STR_ENC_GET(str) get_encoding(str)
221#if !defined SHARABLE_MIDDLE_SUBSTRING
222# define SHARABLE_MIDDLE_SUBSTRING 0
224#if !SHARABLE_MIDDLE_SUBSTRING
225#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
227#define SHARABLE_SUBSTRING_P(beg, len, end) 1
232str_embed_capa(
VALUE str)
234 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
238rb_str_reembeddable_p(
VALUE str)
240 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
244rb_str_embed_size(
long capa,
long termlen)
252rb_str_size_as_embedded(
VALUE str)
255 if (STR_EMBED_P(str)) {
257 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
259 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
263 else if (rb_str_reembeddable_p(str)) {
265 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
267 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
270 real_size =
sizeof(
struct RString);
277STR_EMBEDDABLE_P(
long len,
long termlen)
279 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
284static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
285static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
287static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
288static inline void str_modifiable(
VALUE str);
293str_make_independent(
VALUE str)
295 long len = RSTRING_LEN(str);
296 int termlen = TERM_LEN(str);
297 str_make_independent_expand((str),
len, 0L, termlen);
300static inline int str_dependent_p(
VALUE str);
303rb_str_make_independent(
VALUE str)
305 if (str_dependent_p(str)) {
306 str_make_independent(str);
311rb_str_make_embedded(
VALUE str)
316 char *buf =
RSTRING(str)->as.heap.ptr;
320 STR_SET_LEN(str,
len);
323 memcpy(RSTRING_PTR(str), buf,
len);
327 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
331rb_debug_rstring_null_ptr(
const char *func)
333 fprintf(stderr,
"%s is returning NULL!! "
334 "SIGSEGV is highly expected to follow immediately.\n"
335 "If you could reproduce, attach your debugger here, "
336 "and look at the passed string.\n",
341static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
344get_encoding(
VALUE str)
350mustnot_broken(
VALUE str)
352 if (is_broken_string(str)) {
353 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
358mustnot_wchar(
VALUE str)
361 if (rb_enc_mbminlen(enc) > 1) {
362 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
366static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
368#if SIZEOF_LONG == SIZEOF_VOIDP
369#define PRECOMPUTED_FAKESTR_HASH 1
374BARE_STRING_P(
VALUE str)
379static inline st_index_t
380str_do_hash(
VALUE str)
382 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
384 if (e && !is_ascii_string(str)) {
391str_store_precomputed_hash(
VALUE str, st_index_t hash)
397 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
398 size_t free_bytes = str_embed_capa(str) - used_bytes;
402 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
404 FL_SET(str, STR_PRECOMPUTED_HASH);
417 if (
FL_TEST(str, RSTRING_FSTR))
420 bare = BARE_STRING_P(str);
422 if (STR_EMBED_P(str)) {
427 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
434 rb_str_resize(str, RSTRING_LEN(str));
436 fstr = register_fstring(str,
false,
false);
439 str_replace_shared_without_enc(str, fstr);
446static VALUE fstring_table_obj;
449fstring_concurrent_set_hash(
VALUE str)
451#ifdef PRECOMPUTED_FAKESTR_HASH
455 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
472 const char *aptr, *bptr;
479 return (alen == blen &&
481 memcmp(aptr, bptr, alen) == 0);
486 bool force_precompute_hash;
490fstring_concurrent_set_create(
VALUE str,
void *data)
500 long len = RSTRING_LEN(str);
501 long capa =
len +
sizeof(st_index_t);
502 int term_len = TERM_LEN(str);
504 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
506 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
507 STR_SET_LEN(new_str, RSTRING_LEN(str));
509 rb_enc_copy(new_str, str);
510 str_store_precomputed_hash(new_str, str_do_hash(str));
514 rb_enc_copy(new_str, str);
515#ifdef PRECOMPUTED_FAKESTR_HASH
516 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
517 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
531 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
534 if (STR_SHARED_P(str)) {
536 str_make_independent(str);
539 if (!BARE_STRING_P(str)) {
545 RBASIC(str)->flags |= RSTRING_FSTR;
547 RB_OBJ_SET_SHAREABLE(str);
561 .hash = fstring_concurrent_set_hash,
562 .cmp = fstring_concurrent_set_cmp,
563 .create = fstring_concurrent_set_create,
568Init_fstring_table(
void)
570 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
571 rb_gc_register_address(&fstring_table_obj);
575register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
579 .force_precompute_hash = force_precompute_hash
582#if SIZEOF_VOIDP == SIZEOF_LONG
586 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
590 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
592 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
604rb_obj_is_fstring_table(
VALUE obj)
608 return obj == fstring_table_obj;
612rb_gc_free_fstring(
VALUE obj)
614 ASSERT_vm_locking_with_barrier();
620 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
622 RB_DEBUG_COUNTER_INC(obj_str_fstr);
628rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
630 if (fstring_table_obj) {
631 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
636setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
639 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
652 return (
VALUE)fake_str;
661 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
670rb_fstring_new(
const char *ptr,
long len)
672 struct RString fake_str = {RBASIC_INIT};
673 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
679 struct RString fake_str = {RBASIC_INIT};
680 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
684rb_fstring_cstr(
const char *
ptr)
686 return rb_fstring_new(
ptr, strlen(
ptr));
690single_byte_optimizable(
VALUE str)
694 case ENCINDEX_ASCII_8BIT:
695 case ENCINDEX_US_ASCII:
717static inline const char *
718search_nonascii(
const char *p,
const char *e)
722#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
723# if SIZEOF_UINTPTR_T == 8
724# define NONASCII_MASK UINT64_C(0x8080808080808080)
725# elif SIZEOF_UINTPTR_T == 4
726# define NONASCII_MASK UINT32_C(0x80808080)
728# error "don't know what to do."
731# if SIZEOF_UINTPTR_T == 8
732# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
733# elif SIZEOF_UINTPTR_T == 4
734# define NONASCII_MASK 0x80808080UL
736# error "don't know what to do."
740 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
741#if !UNALIGNED_WORD_ACCESS
742 if ((uintptr_t)p % SIZEOF_VOIDP) {
743 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
748 case 7:
if (p[-7]&0x80)
return p-7;
749 case 6:
if (p[-6]&0x80)
return p-6;
750 case 5:
if (p[-5]&0x80)
return p-5;
751 case 4:
if (p[-4]&0x80)
return p-4;
753 case 3:
if (p[-3]&0x80)
return p-3;
754 case 2:
if (p[-2]&0x80)
return p-2;
755 case 1:
if (p[-1]&0x80)
return p-1;
760#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
761#define aligned_ptr(value) \
762 __builtin_assume_aligned((value), sizeof(uintptr_t))
764#define aligned_ptr(value) (value)
767 t = (e - (SIZEOF_VOIDP-1));
769 for (;s < t; s +=
sizeof(uintptr_t)) {
771 memcpy(&word, s,
sizeof(word));
772 if (word & NONASCII_MASK) {
773#ifdef WORDS_BIGENDIAN
774 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
776 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
786 case 7:
if (e[-7]&0x80)
return e-7;
787 case 6:
if (e[-6]&0x80)
return e-6;
788 case 5:
if (e[-5]&0x80)
return e-5;
789 case 4:
if (e[-4]&0x80)
return e-4;
791 case 3:
if (e[-3]&0x80)
return e-3;
792 case 2:
if (e[-2]&0x80)
return e-2;
793 case 1:
if (e[-1]&0x80)
return e-1;
801 const char *e = p +
len;
803 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
805 p = search_nonascii(p, e);
809 if (rb_enc_asciicompat(enc)) {
810 p = search_nonascii(p, e);
813 int ret = rb_enc_precise_mbclen(p, e, enc);
817 p = search_nonascii(p, e);
823 int ret = rb_enc_precise_mbclen(p, e, enc);
839 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
842 p = search_nonascii(p, e);
846 else if (rb_enc_asciicompat(enc)) {
847 p = search_nonascii(p, e);
853 int ret = rb_enc_precise_mbclen(p, e, enc);
860 p = search_nonascii(p, e);
866 int ret = rb_enc_precise_mbclen(p, e, enc);
891 rb_enc_set_index(str1, rb_enc_get_index(str2));
899rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
904 str_enc_copy(dest, src);
905 if (RSTRING_LEN(dest) == 0) {
906 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
917 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
918 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
929rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
931 str_enc_copy(dest, src);
938 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
944 return enc_coderange_scan(str, enc);
953 cr = enc_coderange_scan(str, get_encoding(str));
960rb_enc_str_asciicompat(
VALUE str)
963 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
971 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
980str_mod_check(
VALUE s,
const char *p,
long len)
982 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
988str_capacity(
VALUE str,
const int termlen)
990 if (STR_EMBED_P(str)) {
991 return str_embed_capa(str) - termlen;
993 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
997 return RSTRING(str)->as.heap.aux.capa;
1004 return str_capacity(str, TERM_LEN(str));
1008must_not_null(
const char *
ptr)
1011 rb_raise(rb_eArgError,
"NULL pointer given");
1016str_alloc_embed(
VALUE klass,
size_t capa)
1018 size_t size = rb_str_embed_size(
capa, 0);
1022 NEWOBJ_OF(str,
struct RString, klass,
1026 str->as.embed.ary[0] = 0;
1032str_alloc_heap(
VALUE klass)
1034 NEWOBJ_OF(str,
struct RString, klass,
1038 str->as.heap.aux.capa = 0;
1039 str->as.heap.ptr = NULL;
1045empty_str_alloc(
VALUE klass)
1047 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1048 VALUE str = str_alloc_embed(klass, 0);
1049 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1060 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1064 enc = rb_ascii8bit_encoding();
1067 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1069 int termlen = rb_enc_mbminlen(enc);
1071 if (STR_EMBEDDABLE_P(
len, termlen)) {
1072 str = str_alloc_embed(klass,
len + termlen);
1078 str = str_alloc_heap(klass);
1084 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1087 rb_enc_raw_set(str, enc);
1090 memcpy(RSTRING_PTR(str),
ptr,
len);
1093 memset(RSTRING_PTR(str), 0,
len);
1096 STR_SET_LEN(str,
len);
1097 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1104 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1139 __msan_unpoison_string(
ptr);
1159 if (rb_enc_mbminlen(enc) != 1) {
1160 rb_raise(rb_eArgError,
"wchar encoding given");
1162 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1166str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1171 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1175 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1178 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1179 str = str_alloc_heap(klass);
1183 RBASIC(str)->flags |= STR_NOFREE;
1184 rb_enc_associate_index(str, encindex);
1213static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1215 int ecflags,
VALUE ecopts);
1220 int encidx = rb_enc_to_index(enc);
1221 if (rb_enc_get_index(str) == encidx)
1222 return is_ascii_string(str);
1233 if (!to)
return str;
1234 if (!from) from = rb_enc_get(str);
1235 if (from == to)
return str;
1236 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1237 rb_is_ascii8bit_enc(to)) {
1238 if (STR_ENC_GET(str) != to) {
1240 rb_enc_associate(str, to);
1247 from, to, ecflags, ecopts);
1248 if (
NIL_P(newstr)) {
1256rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1261 olen = RSTRING_LEN(newstr);
1262 if (ofs < -olen || olen < ofs)
1264 if (ofs < 0) ofs += olen;
1266 STR_SET_LEN(newstr, ofs);
1270 rb_str_modify(newstr);
1271 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1279 STR_SET_LEN(str, 0);
1280 rb_enc_associate(str, enc);
1286str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1288 int ecflags,
VALUE ecopts)
1293 VALUE econv_wrapper;
1294 const unsigned char *start, *sp;
1295 unsigned char *dest, *dp;
1296 size_t converted_output = (size_t)ofs;
1301 RBASIC_CLEAR_CLASS(econv_wrapper);
1303 if (!ec)
return Qnil;
1306 sp = (
unsigned char*)
ptr;
1308 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1309 (dp = dest + converted_output),
1313 size_t converted_input = sp - start;
1314 size_t rest =
len - converted_input;
1315 converted_output = dp - dest;
1317 if (converted_input && converted_output &&
1318 rest < (LONG_MAX / converted_output)) {
1319 rest = (rest * converted_output) / converted_input;
1324 olen += rest < 2 ? 2 : rest;
1325 rb_str_resize(newstr, olen);
1332 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1334 rb_enc_associate(newstr, to);
1353 const int eidx = rb_enc_to_index(eenc);
1356 return rb_enc_str_new(
ptr,
len, eenc);
1360 if ((eidx == rb_ascii8bit_encindex()) ||
1361 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1365 ienc = rb_default_internal_encoding();
1366 if (!ienc || eenc == ienc) {
1367 return rb_enc_str_new(
ptr,
len, eenc);
1371 if ((eidx == rb_ascii8bit_encindex()) ||
1372 (eidx == rb_usascii_encindex()) ||
1373 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1374 return rb_enc_str_new(
ptr,
len, ienc);
1377 str = rb_enc_str_new(NULL, 0, ienc);
1380 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1381 rb_str_initialize(str,
ptr,
len, eenc);
1389 int eidx = rb_enc_to_index(eenc);
1390 if (eidx == rb_usascii_encindex() &&
1391 !is_ascii_string(str)) {
1392 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1395 rb_enc_associate_index(str, eidx);
1454str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1456 const int termlen = TERM_LEN(str);
1461 if (str_embed_capa(str2) >=
len + termlen) {
1462 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1463 STR_SET_EMBED(str2);
1464 memcpy(ptr2, RSTRING_PTR(str),
len);
1465 TERM_FILL(ptr2+
len, termlen);
1469 if (STR_SHARED_P(str)) {
1470 root =
RSTRING(str)->as.heap.aux.shared;
1479 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1481 rb_fatal(
"about to free a possible shared root");
1483 char *ptr2 = STR_HEAP_PTR(str2);
1485 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1488 FL_SET(str2, STR_NOEMBED);
1490 STR_SET_SHARED(str2, root);
1493 STR_SET_LEN(str2,
len);
1501 str_replace_shared_without_enc(str2, str);
1502 rb_enc_cr_str_exact_copy(str2, str);
1509 return str_replace_shared(str_alloc_heap(klass), str);
1526rb_str_new_frozen_String(
VALUE orig)
1534rb_str_frozen_bare_string(
VALUE orig)
1536 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1541rb_str_tmp_frozen_acquire(
VALUE orig)
1544 return str_new_frozen_buffer(0, orig, FALSE);
1548rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1550 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1551 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1553 VALUE str = str_alloc_heap(0);
1556 FL_SET(str, STR_SHARED_ROOT);
1558 size_t capa = str_capacity(orig, TERM_LEN(orig));
1564 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1565 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1572 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1573 RBASIC(orig)->flags &= ~STR_NOFREE;
1574 STR_SET_SHARED(orig, str);
1576 RB_OBJ_SET_SHAREABLE(str);
1588rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1593 if (STR_EMBED_P(tmp)) {
1596 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1602 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1606 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1607 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1612 STR_SET_LEN(tmp, 0);
1620 return str_new_frozen_buffer(klass, orig, TRUE);
1630 VALUE str = str_alloc_heap(klass);
1631 STR_SET_LEN(str, RSTRING_LEN(orig));
1632 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1633 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1634 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1635 RBASIC(orig)->flags &= ~STR_NOFREE;
1636 STR_SET_SHARED(orig, str);
1643str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1647 long len = RSTRING_LEN(orig);
1648 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1649 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1651 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1652 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1658 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1659 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1665 if ((ofs > 0) || (rest > 0) ||
1668 str = str_new_shared(klass,
shared);
1670 RSTRING(str)->as.heap.ptr += ofs;
1671 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1679 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1680 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1682 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1683 STR_SET_LEN(str, RSTRING_LEN(orig));
1689 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1692 str = heap_str_make_shared(klass, orig);
1697 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1709str_new_empty_String(
VALUE str)
1712 rb_enc_copy(v, str);
1716#define STR_BUF_MIN_SIZE 63
1721 if (STR_EMBEDDABLE_P(
capa, 1)) {
1729 RSTRING(str)->as.heap.ptr[0] =
'\0';
1749 return str_new(0, 0,
len);
1755 if (STR_EMBED_P(str)) {
1756 RB_DEBUG_COUNTER_INC(obj_str_embed);
1758 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1759 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1760 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1763 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1764 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1769rb_str_memsize(
VALUE str)
1771 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1772 return STR_HEAP_SIZE(str);
1782 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1785static inline void str_discard(
VALUE str);
1786static void str_shared_replace(
VALUE str,
VALUE str2);
1791 if (str != str2) str_shared_replace(str, str2);
1802 enc = STR_ENC_GET(str2);
1805 termlen = rb_enc_mbminlen(enc);
1807 STR_SET_LEN(str, RSTRING_LEN(str2));
1809 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1811 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1812 rb_enc_associate(str, enc);
1816 if (STR_EMBED_P(str2)) {
1818 long len = RSTRING_LEN(str2);
1821 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1822 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1823 RSTRING(str2)->as.heap.ptr = new_ptr;
1824 STR_SET_LEN(str2,
len);
1826 STR_SET_NOEMBED(str2);
1829 STR_SET_NOEMBED(str);
1831 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1833 if (
FL_TEST(str2, STR_SHARED)) {
1835 STR_SET_SHARED(str,
shared);
1838 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1842 STR_SET_EMBED(str2);
1843 RSTRING_PTR(str2)[0] = 0;
1844 STR_SET_LEN(str2, 0);
1845 rb_enc_associate(str, enc);
1859 return rb_obj_as_string_result(str, obj);
1875 len = RSTRING_LEN(str2);
1876 if (STR_SHARED_P(str2)) {
1879 STR_SET_NOEMBED(str);
1880 STR_SET_LEN(str,
len);
1881 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1882 STR_SET_SHARED(str,
shared);
1883 rb_enc_cr_str_exact_copy(str, str2);
1886 str_replace_shared(str, str2);
1895 size_t size = rb_str_embed_size(
capa, 0);
1899 NEWOBJ_OF(str,
struct RString, klass,
1910 NEWOBJ_OF(str,
struct RString, klass,
1913 str->as.heap.aux.capa = 0;
1914 str->as.heap.ptr = NULL;
1924 encidx = rb_enc_get_index(str);
1925 flags &= ~ENCODING_MASK;
1928 if (encidx) rb_enc_associate_index(dup, encidx);
1938 long len = RSTRING_LEN(str);
1943 STR_SET_LEN(dup, RSTRING_LEN(str));
1944 return str_duplicate_setup_encoding(str, dup, flags);
1953 root =
RSTRING(str)->as.heap.aux.shared;
1956 root = str = str_new_frozen(klass, str);
1962 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1964 STR_SET_SHARED(dup, root);
1965 flags |= RSTRING_NOEMBED | STR_SHARED;
1967 STR_SET_LEN(dup, RSTRING_LEN(str));
1968 return str_duplicate_setup_encoding(str, dup, flags);
1974 if (STR_EMBED_P(str)) {
1975 return str_duplicate_setup_embed(klass, str, dup);
1978 return str_duplicate_setup_heap(klass, str, dup);
1986 if (STR_EMBED_P(str)) {
1987 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1990 dup = str_alloc_heap(klass);
1993 return str_duplicate_setup(klass, str, dup);
2004rb_str_dup_m(
VALUE str)
2006 if (LIKELY(BARE_STRING_P(str))) {
2017 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2024 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2028 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2029 str_duplicate_setup_embed(klass, str, new_str);
2032 new_str = ec_str_alloc_heap(ec, klass);
2033 str_duplicate_setup_heap(klass, str, new_str);
2042rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2044 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2068 static ID keyword_ids[2];
2069 VALUE orig, opt, venc, vcapa;
2074 if (!keyword_ids[0]) {
2075 keyword_ids[0] = rb_id_encoding();
2076 CONST_ID(keyword_ids[1],
"capacity");
2084 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2085 enc = rb_to_encoding(venc);
2087 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2090 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2092 if (
capa < STR_BUF_MIN_SIZE) {
2093 capa = STR_BUF_MIN_SIZE;
2097 len = RSTRING_LEN(orig);
2101 if (orig == str) n = 0;
2103 str_modifiable(str);
2104 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2106 const size_t size = (size_t)
capa + termlen;
2107 const char *
const old_ptr = RSTRING_PTR(str);
2108 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2109 char *new_ptr =
ALLOC_N(
char, size);
2110 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2111 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2113 RSTRING(str)->as.heap.ptr = new_ptr;
2115 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2116 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2117 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2119 STR_SET_LEN(str,
len);
2122 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2123 rb_enc_cr_str_exact_copy(str, orig);
2125 FL_SET(str, STR_NOEMBED);
2132 rb_enc_associate(str, enc);
2144rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2150 static ID keyword_ids[2];
2160 keyword_ids[0] = rb_id_encoding();
2161 CONST_ID(keyword_ids[1],
"capacity");
2163 encoding = kwargs[0];
2164 capacity = kwargs[1];
2173 if (UNDEF_P(encoding)) {
2175 encoding = rb_obj_encoding(orig);
2179 if (!UNDEF_P(encoding)) {
2180 enc = rb_to_encoding(encoding);
2184 if (UNDEF_P(capacity)) {
2186 VALUE empty_str = str_new(klass,
"", 0);
2188 rb_enc_associate(empty_str, enc);
2192 VALUE copy = str_duplicate(klass, orig);
2193 rb_enc_associate(copy, enc);
2206 if (orig_capa >
capa) {
2211 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2212 STR_SET_LEN(str, 0);
2223#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2238static inline uintptr_t
2239count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2244 d = (d>>6) | (~d>>7);
2245 d &= NONASCII_MASK >> 7;
2248#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2250 return rb_popcount_intptr(d);
2254# if SIZEOF_VOIDP == 8
2263enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2269 long diff = (long)(e - p);
2270 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2275 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2276 const uintptr_t *s, *t;
2277 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2278 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2279 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2280 while (p < (
const char *)s) {
2281 if (is_utf8_lead_byte(*p))
len++;
2285 len += count_utf8_lead_bytes_with_word(s);
2288 p = (
const char *)s;
2291 if (is_utf8_lead_byte(*p))
len++;
2297 else if (rb_enc_asciicompat(enc)) {
2302 q = search_nonascii(p, e);
2308 p += rb_enc_fast_mbclen(p, e, enc);
2315 q = search_nonascii(p, e);
2321 p += rb_enc_mbclen(p, e, enc);
2328 for (c=0; p<e; c++) {
2329 p += rb_enc_mbclen(p, e, enc);
2344rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2352 long diff = (long)(e - p);
2353 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2355 else if (rb_enc_asciicompat(enc)) {
2359 q = search_nonascii(p, e);
2367 ret = rb_enc_precise_mbclen(p, e, enc);
2382 for (c=0; p<e; c++) {
2383 ret = rb_enc_precise_mbclen(p, e, enc);
2390 if (p + rb_enc_mbminlen(enc) <= e)
2391 p += rb_enc_mbminlen(enc);
2407 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2408 if (!enc) enc = STR_ENC_GET(str);
2409 p = RSTRING_PTR(str);
2414 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2419 return enc_strlen(p, e, enc, cr);
2426 return str_strlen(str, NULL);
2440 return LONG2NUM(str_strlen(str, NULL));
2452rb_str_bytesize(
VALUE str)
2471rb_str_empty(
VALUE str)
2473 return RBOOL(RSTRING_LEN(str) == 0);
2492 char *ptr1, *ptr2, *ptr3;
2497 enc = rb_enc_check_str(str1, str2);
2500 termlen = rb_enc_mbminlen(enc);
2501 if (len1 > LONG_MAX - len2) {
2502 rb_raise(rb_eArgError,
"string size too big");
2504 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2505 ptr3 = RSTRING_PTR(str3);
2506 memcpy(ptr3, ptr1, len1);
2507 memcpy(ptr3+len1, ptr2, len2);
2508 TERM_FILL(&ptr3[len1+len2], termlen);
2524 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2527 int enc1 = rb_enc_get_index(str1);
2528 int enc2 = rb_enc_get_index(str2);
2533 else if (enc2 < 0) {
2536 else if (enc1 != enc2) {
2539 else if (len1 > LONG_MAX - len2) {
2573 rb_enc_copy(str2, str);
2578 rb_raise(rb_eArgError,
"negative argument");
2580 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2581 if (STR_EMBEDDABLE_P(
len, 1)) {
2583 memset(RSTRING_PTR(str2), 0,
len + 1);
2590 STR_SET_LEN(str2,
len);
2591 rb_enc_copy(str2, str);
2594 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2595 rb_raise(rb_eArgError,
"argument too big");
2598 len *= RSTRING_LEN(str);
2599 termlen = TERM_LEN(str);
2601 ptr2 = RSTRING_PTR(str2);
2603 n = RSTRING_LEN(str);
2604 memcpy(ptr2, RSTRING_PTR(str), n);
2605 while (n <=
len/2) {
2606 memcpy(ptr2 + n, ptr2, n);
2609 memcpy(ptr2 + n, ptr2,
len-n);
2611 STR_SET_LEN(str2,
len);
2612 TERM_FILL(&ptr2[
len], termlen);
2613 rb_enc_cr_str_copy_for_substr(str2, str);
2650rb_check_lockedtmp(
VALUE str)
2652 if (
FL_TEST(str, STR_TMPLOCK)) {
2659#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2661str_modifiable(
VALUE str)
2665 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2666 if (CHILLED_STRING_P(str)) {
2667 CHILLED_STRING_MUTATED(str);
2669 rb_check_lockedtmp(str);
2670 rb_check_frozen(str);
2675str_dependent_p(
VALUE str)
2677 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2687#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2689str_independent(
VALUE str)
2693 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2694 str_modifiable(str);
2695 return !str_dependent_p(str);
2701str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2711 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2716 STR_SET_LEN(str,
len);
2721 oldptr = RSTRING_PTR(str);
2723 memcpy(
ptr, oldptr,
len);
2725 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2728 STR_SET_NOEMBED(str);
2729 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2730 TERM_FILL(
ptr +
len, termlen);
2732 STR_SET_LEN(str,
len);
2739 if (!str_independent(str))
2740 str_make_independent(str);
2749 int termlen = TERM_LEN(str);
2750 long len = RSTRING_LEN(str);
2753 rb_raise(rb_eArgError,
"negative expanding string size");
2755 if (expand >= LONG_MAX -
len) {
2756 rb_raise(rb_eArgError,
"string size too big");
2759 if (!str_independent(str)) {
2760 str_make_independent_expand(str,
len, expand, termlen);
2762 else if (expand > 0) {
2763 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2770str_modify_keep_cr(
VALUE str)
2772 if (!str_independent(str))
2773 str_make_independent(str);
2780str_discard(
VALUE str)
2782 str_modifiable(str);
2783 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2784 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2785 RSTRING(str)->as.heap.ptr = 0;
2786 STR_SET_LEN(str, 0);
2793 int encindex = rb_enc_get_index(str);
2795 if (RB_UNLIKELY(encindex == -1)) {
2799 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2804 if (!rb_enc_asciicompat(enc)) {
2826 return RSTRING_PTR(str);
2830zero_filled(
const char *s,
int n)
2832 for (; n > 0; --n) {
2839str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2841 const char *e = s +
len;
2843 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2844 if (zero_filled(s, minlen))
return s;
2850str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2855 if (str_dependent_p(str)) {
2856 if (!zero_filled(s +
len, termlen))
2857 str_make_independent_expand(str,
len, 0L, termlen);
2860 TERM_FILL(s +
len, termlen);
2863 return RSTRING_PTR(str);
2867rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2869 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2870 long len = RSTRING_LEN(str);
2874 rb_check_lockedtmp(str);
2875 str_make_independent_expand(str,
len, 0L, termlen);
2877 else if (str_dependent_p(str)) {
2878 if (termlen > oldtermlen)
2879 str_make_independent_expand(str,
len, 0L, termlen);
2882 if (!STR_EMBED_P(str)) {
2887 if (termlen > oldtermlen) {
2888 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2896str_null_check(
VALUE str,
int *w)
2898 char *s = RSTRING_PTR(str);
2899 long len = RSTRING_LEN(str);
2901 const int minlen = rb_enc_mbminlen(enc);
2905 if (str_null_char(s,
len, minlen, enc)) {
2908 return str_fill_term(str, s,
len, minlen);
2911 if (!s || memchr(s, 0,
len)) {
2915 s = str_fill_term(str, s,
len, minlen);
2921rb_str_to_cstr(
VALUE str)
2924 return str_null_check(str, &w);
2932 char *s = str_null_check(str, &w);
2935 rb_raise(rb_eArgError,
"string contains null char");
2937 rb_raise(rb_eArgError,
"string contains null byte");
2943rb_str_fill_terminator(
VALUE str,
const int newminlen)
2945 char *s = RSTRING_PTR(str);
2946 long len = RSTRING_LEN(str);
2947 return str_fill_term(str, s,
len, newminlen);
2953 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2979str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2988 else if (rb_enc_asciicompat(enc)) {
2989 const char *p2, *e2;
2992 while (p < e && 0 < nth) {
2999 p2 = search_nonascii(p, e2);
3008 n = rb_enc_mbclen(p, e, enc);
3019 while (p < e && nth--) {
3020 p += rb_enc_mbclen(p, e, enc);
3031 return str_nth_len(p, e, &nth, enc);
3035str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3040 p = str_nth_len(p, e, &nth, enc);
3049str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3051 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3052 if (!pp)
return e - p;
3059 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3060 STR_ENC_GET(str), single_byte_optimizable(str));
3065str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3068 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3069 const uintptr_t *s, *t;
3070 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3071 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3072 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3073 while (p < (
const char *)s) {
3074 if (is_utf8_lead_byte(*p)) nth--;
3078 nth -= count_utf8_lead_bytes_with_word(s);
3080 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3084 if (is_utf8_lead_byte(*p)) {
3085 if (nth == 0)
break;
3095str_utf8_offset(
const char *p,
const char *e,
long nth)
3097 const char *pp = str_utf8_nth(p, e, &nth);
3106 if (single_byte_optimizable(str) || pos < 0)
3109 char *p = RSTRING_PTR(str);
3110 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3115str_subseq(
VALUE str,
long beg,
long len)
3123 const int termlen = TERM_LEN(str);
3124 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3131 if (str_embed_capa(str2) >=
len + termlen) {
3132 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3133 STR_SET_EMBED(str2);
3134 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3135 TERM_FILL(ptr2+
len, termlen);
3137 STR_SET_LEN(str2,
len);
3141 str_replace_shared(str2, str);
3144 RSTRING(str2)->as.heap.ptr += beg;
3145 if (RSTRING_LEN(str2) >
len) {
3146 STR_SET_LEN(str2,
len);
3156 VALUE str2 = str_subseq(str, beg,
len);
3157 rb_enc_cr_str_copy_for_substr(str2, str);
3166 const long blen = RSTRING_LEN(str);
3168 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3170 if (
len < 0)
return 0;
3171 if (beg < 0 && -beg < 0)
return 0;
3175 if (single_byte_optimizable(str)) {
3176 if (beg > blen)
return 0;
3179 if (beg < 0)
return 0;
3181 if (
len > blen - beg)
3183 if (
len < 0)
return 0;
3188 if (
len > -beg)
len = -beg;
3192 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3195 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3201 slen = str_strlen(str, enc);
3203 if (beg < 0)
return 0;
3205 if (
len == 0)
goto end;
3208 else if (beg > 0 && beg > blen) {
3212 if (beg > str_strlen(str, enc))
return 0;
3217 enc == rb_utf8_encoding()) {
3218 p = str_utf8_nth(s, e, &beg);
3219 if (beg > 0)
return 0;
3220 len = str_utf8_offset(p, e,
len);
3226 p = s + beg * char_sz;
3230 else if (
len * char_sz > e - p)
3235 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3236 if (beg > 0)
return 0;
3240 len = str_offset(p, e,
len, enc, 0);
3248static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3253 return str_substr(str, beg,
len, TRUE);
3263str_substr(
VALUE str,
long beg,
long len,
int empty)
3267 if (!p)
return Qnil;
3268 if (!
len && !empty)
return Qnil;
3270 beg = p - RSTRING_PTR(str);
3272 VALUE str2 = str_subseq(str, beg,
len);
3273 rb_enc_cr_str_copy_for_substr(str2, str);
3281 if (CHILLED_STRING_P(str)) {
3286 rb_str_resize(str, RSTRING_LEN(str));
3304 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3347str_uminus(
VALUE str)
3352 return rb_fstring(str);
3356#define rb_str_dup_frozen rb_str_new_frozen
3361 rb_check_frozen(str);
3362 if (
FL_TEST(str, STR_TMPLOCK)) {
3365 FL_SET(str, STR_TMPLOCK);
3372 rb_check_frozen(str);
3373 if (!
FL_TEST(str, STR_TMPLOCK)) {
3393 const int termlen = TERM_LEN(str);
3395 str_modifiable(str);
3396 if (STR_SHARED_P(str)) {
3399 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3400 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3411 else if (
len > RSTRING_LEN(str)) {
3415 const char *
const new_end = RSTRING_PTR(str) +
len;
3425 else if (
len < RSTRING_LEN(str)) {
3433 STR_SET_LEN(str,
len);
3434 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3441 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3444 int independent = str_independent(str);
3445 long slen = RSTRING_LEN(str);
3446 const int termlen = TERM_LEN(str);
3448 if (slen >
len || (termlen != 1 && slen <
len)) {
3454 if (STR_EMBED_P(str)) {
3455 if (
len == slen)
return str;
3456 if (str_embed_capa(str) >=
len + termlen) {
3457 STR_SET_LEN(str,
len);
3461 str_make_independent_expand(str, slen,
len - slen, termlen);
3463 else if (str_embed_capa(str) >=
len + termlen) {
3464 char *
ptr = STR_HEAP_PTR(str);
3466 if (slen >
len) slen =
len;
3469 STR_SET_LEN(str,
len);
3470 if (independent) ruby_xfree(
ptr);
3473 else if (!independent) {
3474 if (
len == slen)
return str;
3475 str_make_independent_expand(str, slen,
len - slen, termlen);
3479 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3480 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3483 else if (
len == slen)
return str;
3484 STR_SET_LEN(str,
len);
3491str_ensure_available_capa(
VALUE str,
long len)
3493 str_modify_keep_cr(str);
3495 const int termlen = TERM_LEN(str);
3496 long olen = RSTRING_LEN(str);
3498 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3499 rb_raise(rb_eArgError,
"string sizes too big");
3502 long total = olen +
len;
3503 long capa = str_capacity(str, termlen);
3506 if (total >= LONG_MAX / 2) {
3509 while (total >
capa) {
3512 RESIZE_CAPA_TERM(str,
capa, termlen);
3517str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3520 str_modify_keep_cr(str);
3525 if (
len == 0)
return 0;
3527 long total, olen,
off = -1;
3529 const int termlen = TERM_LEN(str);
3532 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3536 long capa = str_capacity(str, termlen);
3538 if (olen > LONG_MAX -
len) {
3539 rb_raise(rb_eArgError,
"string sizes too big");
3543 if (total >= LONG_MAX / 2) {
3546 while (total >
capa) {
3549 RESIZE_CAPA_TERM(str,
capa, termlen);
3550 sptr = RSTRING_PTR(str);
3555 memcpy(sptr + olen,
ptr,
len);
3556 STR_SET_LEN(str, total);
3557 TERM_FILL(sptr + total, termlen);
3562#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3563#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3568 if (
len == 0)
return str;
3570 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3572 return str_buf_cat(str,
ptr,
len);
3583rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3588 if (UNLIKELY(!str_independent(str))) {
3589 str_make_independent(str);
3592 long string_length = -1;
3593 const int null_terminator_length = 1;
3598 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3599 rb_raise(rb_eArgError,
"string sizes too big");
3602 long string_capacity = str_capacity(str, null_terminator_length);
3608 if (LIKELY(string_capacity >= string_length + 1)) {
3610 sptr[string_length] = byte;
3611 STR_SET_LEN(str, string_length + 1);
3612 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3616 str_buf_cat(str, (
char *)&
byte, 1);
3632 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3643rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3644 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3653 if (str_encindex == ptr_encindex) {
3655 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3659 str_enc = rb_enc_from_index(str_encindex);
3660 ptr_enc = rb_enc_from_index(ptr_encindex);
3661 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3664 if (RSTRING_LEN(str) == 0) {
3667 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3673 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3682 *ptr_cr_ret = ptr_cr;
3684 if (str_encindex != ptr_encindex &&
3687 str_enc = rb_enc_from_index(str_encindex);
3688 ptr_enc = rb_enc_from_index(ptr_encindex);
3693 res_encindex = str_encindex;
3698 res_encindex = str_encindex;
3702 res_encindex = ptr_encindex;
3707 res_encindex = str_encindex;
3714 res_encindex = str_encindex;
3720 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3722 str_buf_cat(str,
ptr,
len);
3728 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3735 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3745 if (rb_enc_asciicompat(enc)) {
3746 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3752 unsigned int c = (
unsigned char)*
ptr;
3753 int len = rb_enc_codelen(c, enc);
3754 rb_enc_mbcput(c, buf, enc);
3755 rb_enc_cr_str_buf_cat(str, buf,
len,
3768 if (str_enc_fastpath(str)) {
3772 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3778 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3789 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3805rb_str_concat_literals(
size_t num,
const VALUE *strary)
3809 unsigned long len = 1;
3814 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3816 str_enc_copy_direct(str, strary[0]);
3818 for (i = s; i < num; ++i) {
3819 const VALUE v = strary[i];
3823 if (encidx != ENCINDEX_US_ASCII) {
3825 rb_enc_set_index(str, encidx);
3838rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3840 str_modifiable(str);
3845 else if (argc > 1) {
3848 rb_enc_copy(arg_str, str);
3849 for (i = 0; i < argc; i++) {
3884rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3886 long needed_capacity = 0;
3890 for (
int index = 0; index < argc; index++) {
3891 VALUE obj = argv[index];
3899 needed_capacity += RSTRING_LEN(obj);
3904 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3911 str_ensure_available_capa(str, needed_capacity);
3914 for (
int index = 0; index < argc; index++) {
3915 VALUE obj = argv[index];
3920 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3921 char byte = (char)(
NUM2INT(obj) & 0xFF);
3935 rb_bug(
"append_as_bytes arguments should have been validated");
3939 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3940 TERM_FILL(sptr, TERM_LEN(str));
3945 for (
int index = 0; index < argc; index++) {
3946 VALUE obj = argv[index];
3963 rb_bug(
"append_as_bytes arguments should have been validated");
4042 if (rb_num_to_uint(str2, &code) == 0) {
4055 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4058 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4061 long pos = RSTRING_LEN(str1);
4066 switch (
len = rb_enc_codelen(code, enc)) {
4067 case ONIGERR_INVALID_CODE_POINT_VALUE:
4068 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4070 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4076 rb_enc_mbcput(code, buf, enc);
4077 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4078 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4080 rb_str_resize(str1, pos+
len);
4081 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4094rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4096 int encidx = rb_enc_to_index(enc);
4098 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4103 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4104 return ENCINDEX_ASCII_8BIT;
4126rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4128 str_modifiable(str);
4133 else if (argc > 1) {
4136 rb_enc_copy(arg_str, str);
4137 for (i = 0; i < argc; i++) {
4150 st_index_t precomputed_hash;
4151 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4153 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4154 return precomputed_hash;
4157 return str_do_hash(str);
4164 const char *ptr1, *ptr2;
4167 return (len1 != len2 ||
4169 memcmp(ptr1, ptr2, len1) != 0);
4181rb_str_hash_m(
VALUE str)
4187#define lesser(a,b) (((a)>(b))?(b):(a))
4195 if (RSTRING_LEN(str1) == 0)
return TRUE;
4196 if (RSTRING_LEN(str2) == 0)
return TRUE;
4199 if (idx1 == idx2)
return TRUE;
4204 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4208 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4218 const char *ptr1, *ptr2;
4221 if (str1 == str2)
return 0;
4224 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4233 if (len1 > len2)
return 1;
4236 if (retval > 0)
return 1;
4270 if (str1 == str2)
return Qtrue;
4277 return rb_str_eql_internal(str1, str2);
4291 if (str1 == str2)
return Qtrue;
4293 return rb_str_eql_internal(str1, str2);
4331 return rb_invcmp(str1, str2);
4373 return str_casecmp(str1, s);
4381 const char *p1, *p1end, *p2, *p2end;
4383 enc = rb_enc_compatible(str1, str2);
4388 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4389 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4390 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4391 while (p1 < p1end && p2 < p2end) {
4393 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4394 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4396 return INT2FIX(c1 < c2 ? -1 : 1);
4403 while (p1 < p1end && p2 < p2end) {
4404 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4405 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4407 if (0 <= c1 && 0 <= c2) {
4411 return INT2FIX(c1 < c2 ? -1 : 1);
4415 l1 = rb_enc_mbclen(p1, p1end, enc);
4416 l2 = rb_enc_mbclen(p2, p2end, enc);
4417 len = l1 < l2 ? l1 : l2;
4418 r = memcmp(p1, p2,
len);
4420 return INT2FIX(r < 0 ? -1 : 1);
4422 return INT2FIX(l1 < l2 ? -1 : 1);
4428 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4429 if (p1 == p1end)
return INT2FIX(-1);
4462 return str_casecmp_p(str1, s);
4469 VALUE folded_str1, folded_str2;
4470 VALUE fold_opt = sym_fold;
4472 enc = rb_enc_compatible(str1, str2);
4477 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4478 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4480 return rb_str_eql(folded_str1, folded_str2);
4484strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4485 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4487 const char *search_start = str_ptr;
4488 long pos, search_len = str_len - offset;
4492 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4493 if (pos < 0)
return pos;
4495 if (t == search_start + pos)
break;
4496 search_len -= t - search_start;
4497 if (search_len <= 0)
return -1;
4498 offset += t - search_start;
4501 return pos + offset;
4505#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4506#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4509rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4511 const char *str_ptr, *str_ptr_end, *sub_ptr;
4512 long str_len, sub_len;
4515 enc = rb_enc_check(str, sub);
4516 if (is_broken_string(sub))
return -1;
4518 str_ptr = RSTRING_PTR(str);
4520 str_len = RSTRING_LEN(str);
4521 sub_ptr = RSTRING_PTR(sub);
4522 sub_len = RSTRING_LEN(sub);
4524 if (str_len < sub_len)
return -1;
4527 long str_len_char, sub_len_char;
4528 int single_byte = single_byte_optimizable(str);
4529 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4530 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4532 offset += str_len_char;
4533 if (offset < 0)
return -1;
4535 if (str_len_char - offset < sub_len_char)
return -1;
4536 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4539 if (sub_len == 0)
return offset;
4542 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4555rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4562 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4563 long slen = str_strlen(str, enc);
4565 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4577 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4578 enc, single_byte_optimizable(str));
4589 pos = rb_str_index(str, sub, pos);
4603str_ensure_byte_pos(
VALUE str,
long pos)
4605 if (!single_byte_optimizable(str)) {
4606 const char *s = RSTRING_PTR(str);
4608 const char *p = s + pos;
4609 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4611 "offset %ld does not land on character boundary", pos);
4684rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4690 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4691 long slen = RSTRING_LEN(str);
4693 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4704 str_ensure_byte_pos(str, pos);
4716 pos = rb_str_byteindex(str, sub, pos);
4717 if (pos >= 0)
return LONG2NUM(pos);
4724memrchr(
const char *search_str,
int chr,
long search_len)
4726 const char *ptr = search_str + search_len;
4727 while (ptr > search_str) {
4728 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4738 char *hit, *adjusted;
4740 long slen, searchlen;
4743 sbeg = RSTRING_PTR(str);
4744 slen = RSTRING_LEN(sub);
4745 if (slen == 0)
return s - sbeg;
4747 t = RSTRING_PTR(sub);
4749 searchlen = s - sbeg + 1;
4751 if (memcmp(s, t, slen) == 0) {
4756 hit = memrchr(sbeg, c, searchlen);
4759 if (hit != adjusted) {
4760 searchlen = adjusted - sbeg;
4763 if (memcmp(hit, t, slen) == 0)
4765 searchlen = adjusted - sbeg;
4766 }
while (searchlen > 0);
4780 enc = rb_enc_check(str, sub);
4781 if (is_broken_string(sub))
return -1;
4782 singlebyte = single_byte_optimizable(str);
4783 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4784 slen = str_strlen(sub, enc);
4787 if (
len < slen)
return -1;
4788 if (
len - pos < slen) pos =
len - slen;
4789 if (
len == 0)
return pos;
4791 sbeg = RSTRING_PTR(str);
4794 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4800 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4801 return str_rindex(str, sub, s, enc);
4813rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4818 long pos,
len = str_strlen(str, enc);
4820 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4822 if (pos < 0 && (pos +=
len) < 0) {
4828 if (pos >
len) pos =
len;
4836 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4837 enc, single_byte_optimizable(str));
4848 pos = rb_str_rindex(str, sub, pos);
4858rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4864 enc = rb_enc_check(str, sub);
4865 if (is_broken_string(sub))
return -1;
4866 len = RSTRING_LEN(str);
4867 slen = RSTRING_LEN(sub);
4870 if (
len < slen)
return -1;
4871 if (
len - pos < slen) pos =
len - slen;
4872 if (
len == 0)
return pos;
4874 sbeg = RSTRING_PTR(str);
4877 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4884 return str_rindex(str, sub, s, enc);
4974rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4978 long pos,
len = RSTRING_LEN(str);
4980 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4982 if (pos < 0 && (pos +=
len) < 0) {
4988 if (pos >
len) pos =
len;
4994 str_ensure_byte_pos(str, pos);
5006 pos = rb_str_byterindex(str, sub, pos);
5007 if (pos >= 0)
return LONG2NUM(pos);
5049 switch (OBJ_BUILTIN_TYPE(y)) {
5103rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5110 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5141rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5145 re = get_pat(argv[0]);
5146 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5155static enum neighbor_char
5161 if (rb_enc_mbminlen(enc) > 1) {
5163 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5165 return NEIGHBOR_NOT_CHAR;
5167 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5169 if (!l)
return NEIGHBOR_NOT_CHAR;
5170 if (l !=
len)
return NEIGHBOR_WRAPPED;
5171 rb_enc_mbcput(c, p, enc);
5172 r = rb_enc_precise_mbclen(p, p +
len, enc);
5174 return NEIGHBOR_NOT_CHAR;
5176 return NEIGHBOR_FOUND;
5179 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5182 return NEIGHBOR_WRAPPED;
5183 ++((
unsigned char*)p)[i];
5184 l = rb_enc_precise_mbclen(p, p+
len, enc);
5188 return NEIGHBOR_FOUND;
5191 memset(p+l, 0xff,
len-l);
5197 for (len2 =
len-1; 0 < len2; len2--) {
5198 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5202 memset(p+len2+1, 0xff,
len-(len2+1));
5207static enum neighbor_char
5212 if (rb_enc_mbminlen(enc) > 1) {
5214 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5216 return NEIGHBOR_NOT_CHAR;
5218 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5219 if (!c)
return NEIGHBOR_NOT_CHAR;
5222 if (!l)
return NEIGHBOR_NOT_CHAR;
5223 if (l !=
len)
return NEIGHBOR_WRAPPED;
5224 rb_enc_mbcput(c, p, enc);
5225 r = rb_enc_precise_mbclen(p, p +
len, enc);
5227 return NEIGHBOR_NOT_CHAR;
5229 return NEIGHBOR_FOUND;
5232 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5235 return NEIGHBOR_WRAPPED;
5236 --((
unsigned char*)p)[i];
5237 l = rb_enc_precise_mbclen(p, p+
len, enc);
5241 return NEIGHBOR_FOUND;
5244 memset(p+l, 0,
len-l);
5250 for (len2 =
len-1; 0 < len2; len2--) {
5251 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5255 memset(p+len2+1, 0,
len-(len2+1));
5269static enum neighbor_char
5270enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5272 enum neighbor_char ret;
5276 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5280 const int max_gaps = 1;
5282 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5284 ctype = ONIGENC_CTYPE_DIGIT;
5286 ctype = ONIGENC_CTYPE_ALPHA;
5288 return NEIGHBOR_NOT_CHAR;
5291 for (
try = 0;
try <= max_gaps; ++
try) {
5292 ret = enc_succ_char(p,
len, enc);
5293 if (ret == NEIGHBOR_FOUND) {
5294 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5296 return NEIGHBOR_FOUND;
5303 ret = enc_pred_char(p,
len, enc);
5304 if (ret == NEIGHBOR_FOUND) {
5305 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5318 return NEIGHBOR_NOT_CHAR;
5321 if (ctype != ONIGENC_CTYPE_DIGIT) {
5323 return NEIGHBOR_WRAPPED;
5327 enc_succ_char(carry,
len, enc);
5328 return NEIGHBOR_WRAPPED;
5346 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5347 rb_enc_cr_str_copy_for_substr(str, orig);
5348 return str_succ(str);
5355 char *sbeg, *s, *e, *last_alnum = 0;
5356 int found_alnum = 0;
5358 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5359 long carry_pos = 0, carry_len = 1;
5360 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5362 slen = RSTRING_LEN(str);
5363 if (slen == 0)
return str;
5365 enc = STR_ENC_GET(str);
5366 sbeg = RSTRING_PTR(str);
5367 s = e = sbeg + slen;
5369 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5370 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5376 l = rb_enc_precise_mbclen(s, e, enc);
5377 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5378 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5379 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5381 case NEIGHBOR_NOT_CHAR:
5383 case NEIGHBOR_FOUND:
5385 case NEIGHBOR_WRAPPED:
5390 carry_pos = s - sbeg;
5395 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5396 enum neighbor_char neighbor;
5397 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5398 l = rb_enc_precise_mbclen(s, e, enc);
5399 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5400 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5402 neighbor = enc_succ_char(tmp, l, enc);
5404 case NEIGHBOR_FOUND:
5408 case NEIGHBOR_WRAPPED:
5411 case NEIGHBOR_NOT_CHAR:
5414 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5416 enc_succ_char(s, l, enc);
5418 if (!rb_enc_asciicompat(enc)) {
5419 MEMCPY(carry, s,
char, l);
5422 carry_pos = s - sbeg;
5426 RESIZE_CAPA(str, slen + carry_len);
5427 sbeg = RSTRING_PTR(str);
5428 s = sbeg + carry_pos;
5429 memmove(s + carry_len, s, slen - carry_pos);
5430 memmove(s, carry, carry_len);
5432 STR_SET_LEN(str, slen);
5433 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5449rb_str_succ_bang(
VALUE str)
5457all_digits_p(
const char *s,
long len)
5485 VALUE end, exclusive;
5489 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5495 VALUE current, after_end;
5502 enc = rb_enc_check(beg, end);
5503 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5505 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5506 char c = RSTRING_PTR(beg)[0];
5507 char e = RSTRING_PTR(end)[0];
5509 if (c > e || (excl && c == e))
return beg;
5511 VALUE str = rb_enc_str_new(&c, 1, enc);
5513 if ((*each)(str, arg))
break;
5514 if (!excl && c == e)
break;
5516 if (excl && c == e)
break;
5521 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5522 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5523 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5528 b = rb_str_to_inum(beg, 10, FALSE);
5529 e = rb_str_to_inum(end, 10, FALSE);
5536 if (excl && bi == ei)
break;
5537 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5542 ID op = excl ?
'<' : idLE;
5543 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5548 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5549 b = rb_funcallv(b, succ, 0, 0);
5556 if (n > 0 || (excl && n == 0))
return beg;
5558 after_end = rb_funcallv(end, succ, 0, 0);
5563 next = rb_funcallv(current, succ, 0, 0);
5564 if ((*each)(current, arg))
break;
5565 if (
NIL_P(next))
break;
5569 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5584 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5585 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5586 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5588 b = rb_str_to_inum(beg, 10, FALSE);
5594 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5602 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5603 b = rb_funcallv(b, succ, 0, 0);
5609 VALUE next = rb_funcallv(current, succ, 0, 0);
5610 if ((*each)(current, arg))
break;
5613 if (RSTRING_LEN(current) == 0)
5624 if (!
rb_equal(str, *argp))
return 0;
5638 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5639 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5640 rb_enc_asciicompat(STR_ENC_GET(val))) {
5641 const char *bp = RSTRING_PTR(beg);
5642 const char *ep = RSTRING_PTR(end);
5643 const char *vp = RSTRING_PTR(val);
5644 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5645 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5653 if (b <= v && v < e)
return Qtrue;
5654 return RBOOL(!
RTEST(exclusive) && v == e);
5661 all_digits_p(bp, RSTRING_LEN(beg)) &&
5662 all_digits_p(ep, RSTRING_LEN(end))) {
5667 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5669 return RBOOL(
NIL_P(val));
5692 return rb_str_subpat(str, indx,
INT2FIX(0));
5695 if (rb_str_index(str, indx, 0) != -1)
5701 long beg,
len = str_strlen(str, NULL);
5713 return str_substr(str, idx, 1, FALSE);
5730rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5734 return rb_str_subpat(str, argv[0], argv[1]);
5737 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5741 return rb_str_aref(str, argv[0]);
5747 char *ptr = RSTRING_PTR(str);
5748 long olen = RSTRING_LEN(str), nlen;
5750 str_modifiable(str);
5751 if (
len > olen)
len = olen;
5753 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5755 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5757 ptr =
RSTRING(str)->as.embed.ary;
5758 memmove(ptr, oldptr +
len, nlen);
5759 if (fl == STR_NOEMBED)
xfree(oldptr);
5762 if (!STR_SHARED_P(str)) {
5764 rb_enc_cr_str_exact_copy(shared, str);
5769 STR_SET_LEN(str, nlen);
5771 if (!SHARABLE_MIDDLE_SUBSTRING) {
5772 TERM_FILL(ptr + nlen, TERM_LEN(str));
5779rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5785 if (beg == 0 && vlen == 0) {
5790 str_modify_keep_cr(str);
5794 RESIZE_CAPA(str, slen + vlen -
len);
5795 sptr = RSTRING_PTR(str);
5804 memmove(sptr + beg + vlen,
5806 slen - (beg +
len));
5808 if (vlen < beg &&
len < 0) {
5812 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5815 STR_SET_LEN(str, slen);
5816 TERM_FILL(&sptr[slen], TERM_LEN(str));
5823 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5832 int singlebyte = single_byte_optimizable(str);
5838 enc = rb_enc_check(str, val);
5839 slen = str_strlen(str, enc);
5841 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5850 if (
len > slen - beg) {
5853 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5858 beg = p - RSTRING_PTR(str);
5860 rb_str_update_0(str, beg,
len, val);
5861 rb_enc_associate(str, enc);
5872 long start, end,
len;
5882 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5886 nth += regs->num_regs;
5896 enc = rb_enc_check_str(str, val);
5897 rb_str_update_0(str, start,
len, val);
5898 rb_enc_associate(str, enc);
5906 switch (
TYPE(indx)) {
5908 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5912 beg = rb_str_index(str, indx, 0);
5951rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5955 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5963 return rb_str_aset(str, argv[0], argv[1]);
6015rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6023 str_modify_keep_cr(str);
6031 if ((nth += regs->num_regs) <= 0)
return Qnil;
6033 else if (nth >= regs->num_regs)
return Qnil;
6035 len = END(nth) - beg;
6038 else if (argc == 2) {
6047 beg = p - RSTRING_PTR(str);
6051 beg = rb_str_index(str, indx, 0);
6052 if (beg == -1)
return Qnil;
6053 len = RSTRING_LEN(indx);
6065 beg = p - RSTRING_PTR(str);
6074 beg = p - RSTRING_PTR(str);
6078 rb_enc_cr_str_copy_for_substr(result, str);
6086 char *sptr = RSTRING_PTR(str);
6087 long slen = RSTRING_LEN(str);
6088 if (beg +
len > slen)
6092 slen - (beg +
len));
6094 STR_SET_LEN(str, slen);
6095 TERM_FILL(&sptr[slen], TERM_LEN(str));
6106 switch (OBJ_BUILTIN_TYPE(pat)) {
6125get_pat_quoted(
VALUE pat,
int check)
6129 switch (OBJ_BUILTIN_TYPE(pat)) {
6143 if (check && is_broken_string(pat)) {
6150rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6153 pos = rb_str_byteindex(str, pat, pos);
6154 if (set_backref_str) {
6156 str = rb_str_new_frozen_String(str);
6157 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6159 *match = match_data;
6169 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6174rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6176 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6194rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6208 hash = rb_check_hash_type(argv[1]);
6214 pat = get_pat_quoted(argv[0], 1);
6216 str_modifiable(str);
6217 beg = rb_pat_search(pat, str, 0, 1);
6231 end0 = beg0 + RSTRING_LEN(pat);
6240 if (iter || !
NIL_P(hash)) {
6241 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6247 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6250 str_mod_check(str, p,
len);
6251 rb_check_frozen(str);
6257 enc = rb_enc_compatible(str, repl);
6260 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6264 rb_enc_inspect_name(str_enc),
6265 rb_enc_inspect_name(STR_ENC_GET(repl)));
6267 enc = STR_ENC_GET(repl);
6270 rb_enc_associate(str, enc);
6280 rlen = RSTRING_LEN(repl);
6281 len = RSTRING_LEN(str);
6283 RESIZE_CAPA(str,
len + rlen - plen);
6285 p = RSTRING_PTR(str);
6287 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6289 rp = RSTRING_PTR(repl);
6290 memmove(p + beg0, rp, rlen);
6292 STR_SET_LEN(str,
len);
6293 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6316 rb_str_sub_bang(argc, argv, str);
6321str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6324 long beg, beg0, end0;
6325 long offset, blen, slen,
len, last;
6326 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6328 int need_backref_str = -1;
6338 hash = rb_check_hash_type(argv[1]);
6342 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6350 rb_error_arity(argc, 1, 2);
6353 pat = get_pat_quoted(argv[0], 1);
6354 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6357 if (bang)
return Qnil;
6362 blen = RSTRING_LEN(str) + 30;
6364 sp = RSTRING_PTR(str);
6365 slen = RSTRING_LEN(str);
6367 str_enc = STR_ENC_GET(str);
6368 rb_enc_associate(dest, str_enc);
6375 end0 = beg0 + RSTRING_LEN(pat);
6389 struct RString fake_str = {RBASIC_INIT};
6391 if (mode == FAST_MAP) {
6400 val = rb_hash_aref(hash, key);
6403 str_mod_check(str, sp, slen);
6408 else if (need_backref_str) {
6410 if (need_backref_str < 0) {
6411 need_backref_str = val != repl;
6418 len = beg0 - offset;
6432 if (RSTRING_LEN(str) <= end0)
break;
6433 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6435 offset = end0 +
len;
6437 cp = RSTRING_PTR(str) + offset;
6438 if (offset > RSTRING_LEN(str))
break;
6441 if (mode != FAST_MAP && mode != STR) {
6444 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6449 if (RSTRING_LEN(str) > offset) {
6452 rb_pat_search0(pat, str, last, 1, &match);
6454 str_shared_replace(str, dest);
6479rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6481 str_modify_keep_cr(str);
6482 return str_gsub(argc, argv, str, 1);
6532 return str_gsub(argc, argv, str, 0);
6552 str_modifiable(str);
6553 if (str == str2)
return str;
6557 return str_replace(str, str2);
6574rb_str_clear(
VALUE str)
6578 STR_SET_LEN(str, 0);
6579 RSTRING_PTR(str)[0] = 0;
6580 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6596rb_str_chr(
VALUE str)
6614 pos += RSTRING_LEN(str);
6615 if (pos < 0 || RSTRING_LEN(str) <= pos)
6618 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6638 long len = RSTRING_LEN(str);
6639 char *
ptr, *head, *left = 0;
6643 if (pos < -
len ||
len <= pos)
6650 char byte = (char)(
NUM2INT(w) & 0xFF);
6652 if (!str_independent(str))
6653 str_make_independent(str);
6654 enc = STR_ENC_GET(str);
6655 head = RSTRING_PTR(str);
6657 if (!STR_EMBED_P(str)) {
6664 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6672 width = rb_enc_precise_mbclen(left, head+
len, enc);
6674 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6690str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6692 long n = RSTRING_LEN(str);
6694 if (beg > n ||
len < 0)
return Qnil;
6697 if (beg < 0)
return Qnil;
6702 if (!empty)
return Qnil;
6706 VALUE str2 = str_subseq(str, beg,
len);
6708 str_enc_copy_direct(str2, str);
6710 if (RSTRING_LEN(str2) == 0) {
6711 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6745 long beg,
len = RSTRING_LEN(str);
6753 return str_byte_substr(str, beg,
len, TRUE);
6758 return str_byte_substr(str, idx, 1, FALSE);
6770rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6775 return str_byte_substr(str, beg,
len, TRUE);
6778 return str_byte_aref(str, argv[0]);
6782str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6784 long end, slen = RSTRING_LEN(str);
6787 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6796 if (*
len > slen - *beg) {
6800 str_ensure_byte_pos(str, *beg);
6801 str_ensure_byte_pos(str, end);
6815rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6817 long beg,
len, vbeg, vlen;
6822 if (!(argc == 2 || argc == 3 || argc == 5)) {
6823 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6827 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6828 rb_builtin_class_name(argv[0]));
6835 vlen = RSTRING_LEN(val);
6840 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6841 rb_builtin_class_name(argv[2]));
6853 vlen = RSTRING_LEN(val);
6861 str_check_beg_len(str, &beg, &
len);
6862 str_check_beg_len(val, &vbeg, &vlen);
6863 str_modify_keep_cr(str);
6866 rb_enc_associate(str, rb_enc_check(str, val));
6869 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6891rb_str_reverse(
VALUE str)
6898 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6899 enc = STR_ENC_GET(str);
6905 if (RSTRING_LEN(str) > 1) {
6906 if (single_byte_optimizable(str)) {
6913 int clen = rb_enc_fast_mbclen(s, e, enc);
6921 cr = rb_enc_asciicompat(enc) ?
6924 int clen = rb_enc_mbclen(s, e, enc);
6933 STR_SET_LEN(rev, RSTRING_LEN(str));
6934 str_enc_copy_direct(rev, str);
6956rb_str_reverse_bang(
VALUE str)
6958 if (RSTRING_LEN(str) > 1) {
6959 if (single_byte_optimizable(str)) {
6962 str_modify_keep_cr(str);
6963 s = RSTRING_PTR(str);
6972 str_shared_replace(str, rb_str_reverse(str));
6976 str_modify_keep_cr(str);
7005 i = rb_str_index(str, arg, 0);
7007 return RBOOL(i != -1);
7051 rb_raise(rb_eArgError,
"invalid radix %d", base);
7053 return rb_str_to_inum(str, base, FALSE);
7078rb_str_to_f(
VALUE str)
7095rb_str_to_s(
VALUE str)
7107 char s[RUBY_MAX_CHAR_LEN];
7108 int n = rb_enc_codelen(c, enc);
7110 rb_enc_mbcput(c, s, enc);
7115#define CHAR_ESC_LEN 13
7118rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7120 char buf[CHAR_ESC_LEN + 1];
7128 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7130 else if (c < 0x10000) {
7131 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7134 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7139 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7142 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7145 l = (int)strlen(buf);
7151ruby_escaped_char(
int c)
7154 case '\0':
return "\\0";
7155 case '\n':
return "\\n";
7156 case '\r':
return "\\r";
7157 case '\t':
return "\\t";
7158 case '\f':
return "\\f";
7159 case '\013':
return "\\v";
7160 case '\010':
return "\\b";
7161 case '\007':
return "\\a";
7162 case '\033':
return "\\e";
7163 case '\x7f':
return "\\c?";
7169rb_str_escape(
VALUE str)
7173 const char *p = RSTRING_PTR(str);
7175 const char *prev = p;
7176 char buf[CHAR_ESC_LEN + 1];
7178 int unicode_p = rb_enc_unicode_p(enc);
7179 int asciicompat = rb_enc_asciicompat(enc);
7184 int n = rb_enc_precise_mbclen(p, pend, enc);
7186 if (p > prev) str_buf_cat(result, prev, p - prev);
7187 n = rb_enc_mbminlen(enc);
7189 n = (int)(pend - p);
7191 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7192 str_buf_cat(result, buf, strlen(buf));
7198 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7200 cc = ruby_escaped_char(c);
7202 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7203 str_buf_cat(result, cc, strlen(cc));
7206 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7209 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7210 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7214 if (p > prev) str_buf_cat(result, prev, p - prev);
7233 const char *p, *pend, *prev;
7234 char buf[CHAR_ESC_LEN + 1];
7236 rb_encoding *resenc = rb_default_internal_encoding();
7237 int unicode_p = rb_enc_unicode_p(enc);
7238 int asciicompat = rb_enc_asciicompat(enc);
7240 if (resenc == NULL) resenc = rb_default_external_encoding();
7241 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7242 rb_enc_associate(result, resenc);
7243 str_buf_cat2(result,
"\"");
7251 n = rb_enc_precise_mbclen(p, pend, enc);
7253 if (p > prev) str_buf_cat(result, prev, p - prev);
7254 n = rb_enc_mbminlen(enc);
7256 n = (int)(pend - p);
7258 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7259 str_buf_cat(result, buf, strlen(buf));
7265 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7267 if ((asciicompat || unicode_p) &&
7268 (c ==
'"'|| c ==
'\\' ||
7273 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7274 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7275 str_buf_cat2(result,
"\\");
7276 if (asciicompat || enc == resenc) {
7282 case '\n': cc =
'n';
break;
7283 case '\r': cc =
'r';
break;
7284 case '\t': cc =
't';
break;
7285 case '\f': cc =
'f';
break;
7286 case '\013': cc =
'v';
break;
7287 case '\010': cc =
'b';
break;
7288 case '\007': cc =
'a';
break;
7289 case 033: cc =
'e';
break;
7290 default: cc = 0;
break;
7293 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7296 str_buf_cat(result, buf, 2);
7309 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7313 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7314 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7319 if (p > prev) str_buf_cat(result, prev, p - prev);
7320 str_buf_cat2(result,
"\"");
7325#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7338 int encidx = rb_enc_get_index(str);
7341 const char *p, *pend;
7344 int u8 = (encidx == rb_utf8_encindex());
7345 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7348 if (!rb_enc_asciicompat(enc)) {
7350 len += strlen(enc->name);
7353 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7356 unsigned char c = *p++;
7359 case '"':
case '\\':
7360 case '\n':
case '\r':
7361 case '\t':
case '\f':
7362 case '\013':
case '\010':
case '\007':
case '\033':
7367 clen = IS_EVSTR(p, pend) ? 2 : 1;
7375 if (u8 && c > 0x7F) {
7376 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7378 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7381 else if (cc <= 0xFFFFF)
7394 if (clen > LONG_MAX -
len) {
7401 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7402 q = RSTRING_PTR(result); qend = q +
len + 1;
7406 unsigned char c = *p++;
7408 if (c ==
'"' || c ==
'\\') {
7412 else if (c ==
'#') {
7413 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7416 else if (c ==
'\n') {
7420 else if (c ==
'\r') {
7424 else if (c ==
'\t') {
7428 else if (c ==
'\f') {
7432 else if (c ==
'\013') {
7436 else if (c ==
'\010') {
7440 else if (c ==
'\007') {
7444 else if (c ==
'\033') {
7454 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7456 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7459 snprintf(q, qend-q,
"u%04X", cc);
7461 snprintf(q, qend-q,
"u{%X}", cc);
7466 snprintf(q, qend-q,
"x%02X", c);
7472 if (!rb_enc_asciicompat(enc)) {
7473 snprintf(q, qend-q, nonascii_suffix, enc->name);
7474 encidx = rb_ascii8bit_encindex();
7477 rb_enc_associate_index(result, encidx);
7483unescape_ascii(
unsigned int c)
7507undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7509 const char *s = *ss;
7513 unsigned char buf[6];
7531 *buf = unescape_ascii(*s);
7543 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7544 if (*penc != enc_utf8) {
7546 rb_enc_associate(undumped, enc_utf8);
7563 if (hexlen == 0 || hexlen > 6) {
7569 if (0xd800 <= c && c <= 0xdfff) {
7572 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7582 if (0xd800 <= c && c <= 0xdfff) {
7585 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7615static VALUE rb_str_is_ascii_only_p(
VALUE str);
7627str_undump(
VALUE str)
7629 const char *s = RSTRING_PTR(str);
7632 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7634 bool binary =
false;
7638 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7641 if (!str_null_check(str, &w)) {
7644 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7645 if (*s !=
'"')
goto invalid_format;
7663 static const char force_encoding_suffix[] =
".force_encoding(\"";
7664 static const char dup_suffix[] =
".dup";
7665 const char *encname;
7670 size =
sizeof(dup_suffix) - 1;
7671 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7673 size =
sizeof(force_encoding_suffix) - 1;
7674 if (s_end - s <= size)
goto invalid_format;
7675 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7679 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7683 s = memchr(s,
'"', s_end-s);
7685 if (!s)
goto invalid_format;
7686 if (s_end - s != 2)
goto invalid_format;
7687 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7689 encidx = rb_enc_find_index2(encname, (
long)size);
7693 rb_enc_associate_index(undumped, encidx);
7703 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7714 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7720 if (rb_enc_dummy_p(enc)) {
7727str_true_enc(
VALUE str)
7730 rb_str_check_dummy_enc(enc);
7734static OnigCaseFoldType
7735check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7740 rb_raise(rb_eArgError,
"too many options");
7741 if (argv[0]==sym_turkic) {
7742 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7744 if (argv[1]==sym_lithuanian)
7745 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7747 rb_raise(rb_eArgError,
"invalid second option");
7750 else if (argv[0]==sym_lithuanian) {
7751 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7753 if (argv[1]==sym_turkic)
7754 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7756 rb_raise(rb_eArgError,
"invalid second option");
7760 rb_raise(rb_eArgError,
"too many options");
7761 else if (argv[0]==sym_ascii)
7762 flags |= ONIGENC_CASE_ASCII_ONLY;
7763 else if (argv[0]==sym_fold) {
7764 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7765 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7767 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7770 rb_raise(rb_eArgError,
"invalid option");
7777 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7783#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7784#ifndef CASEMAP_DEBUG
7785# define CASEMAP_DEBUG 0
7793 OnigUChar space[FLEX_ARY_LEN];
7797mapping_buffer_free(
void *p)
7801 while (current_buffer) {
7802 previous_buffer = current_buffer;
7803 current_buffer = current_buffer->next;
7804 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7810 {0, mapping_buffer_free,},
7819 const OnigUChar *source_current, *source_end;
7820 int target_length = 0;
7821 VALUE buffer_anchor;
7824 size_t buffer_count = 0;
7825 int buffer_length_or_invalid;
7827 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7829 source_current = (OnigUChar*)RSTRING_PTR(source);
7834 while (source_current < source_end) {
7836 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7837 if (CASEMAP_DEBUG) {
7838 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7841 *pre_buffer = current_buffer;
7842 pre_buffer = ¤t_buffer->next;
7843 current_buffer->next = NULL;
7844 current_buffer->capa =
capa;
7845 buffer_length_or_invalid = enc->case_map(flags,
7846 &source_current, source_end,
7847 current_buffer->space,
7848 current_buffer->space+current_buffer->capa,
7850 if (buffer_length_or_invalid < 0) {
7851 current_buffer =
DATA_PTR(buffer_anchor);
7853 mapping_buffer_free(current_buffer);
7854 rb_raise(rb_eArgError,
"input string invalid");
7856 target_length += current_buffer->used = buffer_length_or_invalid;
7858 if (CASEMAP_DEBUG) {
7859 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7862 if (buffer_count==1) {
7863 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7866 char *target_current;
7869 target_current = RSTRING_PTR(target);
7870 current_buffer =
DATA_PTR(buffer_anchor);
7871 while (current_buffer) {
7872 memcpy(target_current, current_buffer->space, current_buffer->used);
7873 target_current += current_buffer->used;
7874 current_buffer = current_buffer->next;
7877 current_buffer =
DATA_PTR(buffer_anchor);
7879 mapping_buffer_free(current_buffer);
7884 str_enc_copy_direct(target, source);
7893 const OnigUChar *source_current, *source_end;
7894 OnigUChar *target_current, *target_end;
7895 long old_length = RSTRING_LEN(source);
7896 int length_or_invalid;
7898 if (old_length == 0)
return Qnil;
7900 source_current = (OnigUChar*)RSTRING_PTR(source);
7902 if (source == target) {
7903 target_current = (OnigUChar*)source_current;
7904 target_end = (OnigUChar*)source_end;
7907 target_current = (OnigUChar*)RSTRING_PTR(target);
7911 length_or_invalid = onigenc_ascii_only_case_map(flags,
7912 &source_current, source_end,
7913 target_current, target_end, enc);
7914 if (length_or_invalid < 0)
7915 rb_raise(rb_eArgError,
"input string invalid");
7916 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7917 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7918 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7919 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7920 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7923 str_enc_copy(target, source);
7929upcase_single(
VALUE str)
7931 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7932 bool modified =
false;
7935 unsigned int c = *(
unsigned char*)s;
7937 if (
'a' <= c && c <=
'z') {
7938 *s =
'A' + (c -
'a');
7959rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7962 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7964 flags = check_case_options(argc, argv, flags);
7965 str_modify_keep_cr(str);
7966 enc = str_true_enc(str);
7967 if (case_option_single_p(flags, enc, str)) {
7968 if (upcase_single(str))
7969 flags |= ONIGENC_CASE_MODIFIED;
7971 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7972 rb_str_ascii_casemap(str, str, &flags, enc);
7974 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7976 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7989rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7992 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7995 flags = check_case_options(argc, argv, flags);
7996 enc = str_true_enc(str);
7997 if (case_option_single_p(flags, enc, str)) {
7998 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7999 str_enc_copy_direct(ret, str);
8002 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8004 rb_str_ascii_casemap(str, ret, &flags, enc);
8007 ret = rb_str_casemap(str, &flags, enc);
8014downcase_single(
VALUE str)
8016 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8017 bool modified =
false;
8020 unsigned int c = *(
unsigned char*)s;
8022 if (
'A' <= c && c <=
'Z') {
8023 *s =
'a' + (c -
'A');
8045rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8048 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8050 flags = check_case_options(argc, argv, flags);
8051 str_modify_keep_cr(str);
8052 enc = str_true_enc(str);
8053 if (case_option_single_p(flags, enc, str)) {
8054 if (downcase_single(str))
8055 flags |= ONIGENC_CASE_MODIFIED;
8057 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8058 rb_str_ascii_casemap(str, str, &flags, enc);
8060 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8062 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8076rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8079 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8082 flags = check_case_options(argc, argv, flags);
8083 enc = str_true_enc(str);
8084 if (case_option_single_p(flags, enc, str)) {
8085 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8086 str_enc_copy_direct(ret, str);
8087 downcase_single(ret);
8089 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8091 rb_str_ascii_casemap(str, ret, &flags, enc);
8094 ret = rb_str_casemap(str, &flags, enc);
8114rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8117 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8119 flags = check_case_options(argc, argv, flags);
8120 str_modify_keep_cr(str);
8121 enc = str_true_enc(str);
8122 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8123 if (flags&ONIGENC_CASE_ASCII_ONLY)
8124 rb_str_ascii_casemap(str, str, &flags, enc);
8126 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8128 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8142rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8145 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8148 flags = check_case_options(argc, argv, flags);
8149 enc = str_true_enc(str);
8150 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8151 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8153 rb_str_ascii_casemap(str, ret, &flags, enc);
8156 ret = rb_str_casemap(str, &flags, enc);
8175rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8178 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8180 flags = check_case_options(argc, argv, flags);
8181 str_modify_keep_cr(str);
8182 enc = str_true_enc(str);
8183 if (flags&ONIGENC_CASE_ASCII_ONLY)
8184 rb_str_ascii_casemap(str, str, &flags, enc);
8186 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8188 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8202rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8205 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8208 flags = check_case_options(argc, argv, flags);
8209 enc = str_true_enc(str);
8210 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8211 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8213 rb_str_ascii_casemap(str, ret, &flags, enc);
8216 ret = rb_str_casemap(str, &flags, enc);
8221typedef unsigned char *USTR;
8225 unsigned int now, max;
8237 if (t->p == t->pend)
return -1;
8238 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8241 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8243 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8245 if (t->p < t->pend) {
8246 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8249 if (t->now < 0x80 && c < 0x80) {
8250 rb_raise(rb_eArgError,
8251 "invalid range \"%c-%c\" in string transliteration",
8255 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8259 else if (t->now < c) {
8268 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8269 if (t->now == t->max) {
8274 if (t->now < t->max) {
8290 const unsigned int errc = -1;
8291 unsigned int trans[256];
8293 struct tr trsrc, trrepl;
8295 unsigned int c, c0, last = 0;
8296 int modify = 0, i, l;
8297 unsigned char *s, *send;
8299 int singlebyte = single_byte_optimizable(str);
8303#define CHECK_IF_ASCII(c) \
8304 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8305 (cr = ENC_CODERANGE_VALID) : 0)
8309 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8310 if (RSTRING_LEN(repl) == 0) {
8311 return rb_str_delete_bang(1, &src, str);
8315 e1 = rb_enc_check(str, src);
8316 e2 = rb_enc_check(str, repl);
8321 enc = rb_enc_check(src, repl);
8323 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8324 if (RSTRING_LEN(src) > 1 &&
8325 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8326 trsrc.p + l < trsrc.pend) {
8330 trrepl.p = RSTRING_PTR(repl);
8331 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8332 trsrc.gen = trrepl.gen = 0;
8333 trsrc.now = trrepl.now = 0;
8334 trsrc.max = trrepl.max = 0;
8337 for (i=0; i<256; i++) {
8340 while ((c = trnext(&trsrc, enc)) != errc) {
8345 if (!hash) hash = rb_hash_new();
8349 while ((c = trnext(&trrepl, enc)) != errc)
8352 for (i=0; i<256; i++) {
8353 if (trans[i] != errc) {
8361 for (i=0; i<256; i++) {
8364 while ((c = trnext(&trsrc, enc)) != errc) {
8365 r = trnext(&trrepl, enc);
8366 if (r == errc) r = trrepl.now;
8369 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8372 if (!hash) hash = rb_hash_new();
8380 str_modify_keep_cr(str);
8381 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8382 termlen = rb_enc_mbminlen(enc);
8385 long offset, max = RSTRING_LEN(str);
8386 unsigned int save = -1;
8387 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8392 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8395 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8398 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8400 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8409 if (cflag) c = last;
8412 else if (cflag) c = errc;
8418 if (c != (
unsigned int)-1) {
8424 tlen = rb_enc_codelen(c, enc);
8430 if (enc != e1) may_modify = 1;
8432 if ((offset = t - buf) + tlen > max) {
8433 size_t MAYBE_UNUSED(old) = max + termlen;
8434 max = offset + tlen + (send - s);
8435 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8438 rb_enc_mbcput(c, t, enc);
8439 if (may_modify && memcmp(s, t, tlen) != 0) {
8445 if (!STR_EMBED_P(str)) {
8446 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8448 TERM_FILL((
char *)t, termlen);
8449 RSTRING(str)->as.heap.ptr = (
char *)buf;
8450 STR_SET_LEN(str, t - buf);
8451 STR_SET_NOEMBED(str);
8452 RSTRING(str)->as.heap.aux.capa = max;
8456 c = (
unsigned char)*s;
8457 if (trans[c] != errc) {
8474 long offset, max = (long)((send - s) * 1.2);
8475 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8480 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8483 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8486 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8488 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8496 if (cflag) c = last;
8499 else if (cflag) c = errc;
8503 c = cflag ? last : errc;
8506 tlen = rb_enc_codelen(c, enc);
8511 if (enc != e1) may_modify = 1;
8513 if ((offset = t - buf) + tlen > max) {
8514 size_t MAYBE_UNUSED(old) = max + termlen;
8515 max = offset + tlen + (long)((send - s) * 1.2);
8516 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8520 rb_enc_mbcput(c, t, enc);
8521 if (may_modify && memcmp(s, t, tlen) != 0) {
8529 if (!STR_EMBED_P(str)) {
8530 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8532 TERM_FILL((
char *)t, termlen);
8533 RSTRING(str)->as.heap.ptr = (
char *)buf;
8534 STR_SET_LEN(str, t - buf);
8535 STR_SET_NOEMBED(str);
8536 RSTRING(str)->as.heap.aux.capa = max;
8542 rb_enc_associate(str, enc);
8564 return tr_trans(str, src, repl, 0);
8609 tr_trans(str, src, repl, 0);
8613#define TR_TABLE_MAX (UCHAR_MAX+1)
8614#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8616tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8619 const unsigned int errc = -1;
8620 char buf[TR_TABLE_MAX];
8623 VALUE table = 0, ptable = 0;
8624 int i, l, cflag = 0;
8626 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8627 tr.gen =
tr.now =
tr.max = 0;
8629 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8634 for (i=0; i<TR_TABLE_MAX; i++) {
8637 stable[TR_TABLE_MAX] = cflag;
8639 else if (stable[TR_TABLE_MAX] && !cflag) {
8640 stable[TR_TABLE_MAX] = 0;
8642 for (i=0; i<TR_TABLE_MAX; i++) {
8646 while ((c = trnext(&
tr, enc)) != errc) {
8647 if (c < TR_TABLE_MAX) {
8648 buf[(
unsigned char)c] = !cflag;
8653 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8656 table = ptable ? ptable : rb_hash_new();
8660 table = rb_hash_new();
8665 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8666 rb_hash_aset(table, key,
Qtrue);
8670 for (i=0; i<TR_TABLE_MAX; i++) {
8671 stable[i] = stable[i] && buf[i];
8673 if (!table && !cflag) {
8680tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8682 if (c < TR_TABLE_MAX) {
8683 return table[c] != 0;
8689 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8690 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8694 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8697 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8712rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8714 char squeez[TR_TABLE_SIZE];
8717 VALUE del = 0, nodel = 0;
8719 int i, ascompat, cr;
8721 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8723 for (i=0; i<argc; i++) {
8727 enc = rb_enc_check(str, s);
8728 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8731 str_modify_keep_cr(str);
8732 ascompat = rb_enc_asciicompat(enc);
8733 s = t = RSTRING_PTR(str);
8740 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8751 c = rb_enc_codepoint_len(s, send, &clen, enc);
8753 if (tr_find(c, squeez, del, nodel)) {
8757 if (t != s) rb_enc_mbcput(c, t, enc);
8764 TERM_FILL(t, TERM_LEN(str));
8765 STR_SET_LEN(str, t - RSTRING_PTR(str));
8768 if (modify)
return str;
8782rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8785 rb_str_delete_bang(argc, argv, str);
8803rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8805 char squeez[TR_TABLE_SIZE];
8807 VALUE del = 0, nodel = 0;
8808 unsigned char *s, *send, *t;
8810 int ascompat, singlebyte = single_byte_optimizable(str);
8814 enc = STR_ENC_GET(str);
8817 for (i=0; i<argc; i++) {
8821 enc = rb_enc_check(str, s);
8822 if (singlebyte && !single_byte_optimizable(s))
8824 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8828 str_modify_keep_cr(str);
8829 s = t = (
unsigned char *)RSTRING_PTR(str);
8830 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8833 ascompat = rb_enc_asciicompat(enc);
8837 unsigned int c = *s++;
8838 if (c != save || (argc > 0 && !squeez[c])) {
8848 if (ascompat && (c = *s) < 0x80) {
8849 if (c != save || (argc > 0 && !squeez[c])) {
8855 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8857 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8858 if (t != s) rb_enc_mbcput(c, t, enc);
8867 TERM_FILL((
char *)t, TERM_LEN(str));
8868 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8869 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8873 if (modify)
return str;
8887rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8890 rb_str_squeeze_bang(argc, argv, str);
8910 return tr_trans(str, src, repl, 1);
8938 tr_trans(str, src, repl, 1);
8951rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8953 char table[TR_TABLE_SIZE];
8955 VALUE del = 0, nodel = 0, tstr;
8965 enc = rb_enc_check(str, tstr);
8968 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8969 (ptstr = RSTRING_PTR(tstr),
8970 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8971 !is_broken_string(str)) {
8973 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8975 s = RSTRING_PTR(str);
8976 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8979 if (*(
unsigned char*)s++ == c) n++;
8985 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8986 for (i=1; i<argc; i++) {
8989 enc = rb_enc_check(str, tstr);
8990 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8993 s = RSTRING_PTR(str);
8994 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8996 ascompat = rb_enc_asciicompat(enc);
9000 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9008 c = rb_enc_codepoint_len(s, send, &clen, enc);
9009 if (tr_find(c, table, del, nodel)) {
9020rb_fs_check(
VALUE val)
9024 if (
NIL_P(val))
return 0;
9029static const char isspacetable[256] = {
9030 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9032 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9035 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9048#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9051split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9053 if (empty_count >= 0 &&
len == 0) {
9054 return empty_count + 1;
9056 if (empty_count > 0) {
9061 }
while (--empty_count > 0);
9065 rb_yield(str_new_empty_String(str));
9066 }
while (--empty_count > 0);
9080 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9084literal_split_pattern(
VALUE spat, split_type_t default_type)
9092 return SPLIT_TYPE_CHARS;
9094 else if (rb_enc_asciicompat(enc)) {
9095 if (
len == 1 && ptr[0] ==
' ') {
9096 return SPLIT_TYPE_AWK;
9101 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9102 return SPLIT_TYPE_AWK;
9105 return default_type;
9118rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9123 split_type_t split_type;
9124 long beg, end, i = 0, empty_count = -1;
9129 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9131 if (lim <= 0) limit =
Qnil;
9132 else if (lim == 1) {
9133 if (RSTRING_LEN(str) == 0)
9144 if (
NIL_P(limit) && !lim) empty_count = 0;
9146 enc = STR_ENC_GET(str);
9147 split_type = SPLIT_TYPE_REGEXP;
9149 spat = get_pat_quoted(spat, 0);
9151 else if (
NIL_P(spat = rb_fs)) {
9152 split_type = SPLIT_TYPE_AWK;
9154 else if (!(spat = rb_fs_check(spat))) {
9155 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9160 if (split_type != SPLIT_TYPE_AWK) {
9165 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9166 if (split_type == SPLIT_TYPE_AWK) {
9168 split_type = SPLIT_TYPE_STRING;
9173 mustnot_broken(spat);
9174 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9182#define SPLIT_STR(beg, len) ( \
9183 empty_count = split_string(result, str, beg, len, empty_count), \
9184 str_mod_check(str, str_start, str_len))
9187 char *ptr = RSTRING_PTR(str);
9188 char *
const str_start = ptr;
9189 const long str_len = RSTRING_LEN(str);
9190 char *
const eptr = str_start + str_len;
9191 if (split_type == SPLIT_TYPE_AWK) {
9198 if (is_ascii_string(str)) {
9199 while (ptr < eptr) {
9200 c = (
unsigned char)*ptr++;
9202 if (ascii_isspace(c)) {
9208 if (!
NIL_P(limit) && lim <= i)
break;
9211 else if (ascii_isspace(c)) {
9212 SPLIT_STR(beg, end-beg);
9215 if (!
NIL_P(limit)) ++i;
9223 while (ptr < eptr) {
9226 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9235 if (!
NIL_P(limit) && lim <= i)
break;
9239 SPLIT_STR(beg, end-beg);
9242 if (!
NIL_P(limit)) ++i;
9250 else if (split_type == SPLIT_TYPE_STRING) {
9251 char *substr_start = ptr;
9252 char *sptr = RSTRING_PTR(spat);
9253 long slen = RSTRING_LEN(spat);
9256 mustnot_broken(str);
9257 enc = rb_enc_check(str, spat);
9258 while (ptr < eptr &&
9259 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9262 if (t != ptr + end) {
9266 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9267 str_mod_check(spat, sptr, slen);
9270 if (!
NIL_P(limit) && lim <= ++i)
break;
9272 beg = ptr - str_start;
9274 else if (split_type == SPLIT_TYPE_CHARS) {
9278 mustnot_broken(str);
9279 enc = rb_enc_get(str);
9280 while (ptr < eptr &&
9281 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9282 SPLIT_STR(ptr - str_start, n);
9284 if (!
NIL_P(limit) && lim <= ++i)
break;
9286 beg = ptr - str_start;
9290 long len = RSTRING_LEN(str);
9298 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9303 if (start == end && BEG(0) == END(0)) {
9308 else if (last_null == 1) {
9309 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9316 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9322 SPLIT_STR(beg, end-beg);
9323 beg = start = END(0);
9327 for (idx=1; idx < regs->num_regs; idx++) {
9328 if (BEG(idx) == -1)
continue;
9329 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9331 if (!
NIL_P(limit) && lim <= ++i)
break;
9333 if (match) rb_match_unbusy(match);
9335 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9336 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9339 return result ? result : str;
9349 return rb_str_split_m(1, &sep, str);
9352#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9367#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9370chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9372 const char *prev = rb_enc_prev_char(p, e, e, enc);
9375 prev = rb_enc_prev_char(p, e, e, enc);
9376 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9388 RSTRING_LEN(rs) != 1 ||
9389 RSTRING_PTR(rs)[0] !=
'\n')) {
9395#define rb_rs get_rs()
9402 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9403 long pos,
len, rslen;
9409 static ID keywords[1];
9414 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9418 if (!ENUM_ELEM(ary, str)) {
9426 if (!RSTRING_LEN(str))
goto end;
9428 ptr = subptr = RSTRING_PTR(str);
9430 len = RSTRING_LEN(str);
9432 rslen = RSTRING_LEN(rs);
9435 enc = rb_enc_get(str);
9437 enc = rb_enc_check(str, rs);
9442 const char *eol = NULL;
9444 while (subend < pend) {
9445 long chomp_rslen = 0;
9447 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9449 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9451 if (eol == subend)
break;
9455 chomp_rslen = -rslen;
9459 if (!subptr) subptr = subend;
9463 }
while (subend < pend);
9465 if (rslen == 0) chomp_rslen = 0;
9467 subend - subptr + (chomp ? chomp_rslen : rslen));
9468 if (ENUM_ELEM(ary, line)) {
9469 str_mod_check(str, ptr,
len);
9471 subptr = eol = NULL;
9476 rsptr = RSTRING_PTR(rs);
9477 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9486 rsptr = RSTRING_PTR(rs);
9487 rslen = RSTRING_LEN(rs);
9490 while (subptr < pend) {
9491 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9495 if (hit != adjusted) {
9499 subend = hit += rslen;
9502 subend = chomp_newline(subptr, subend, enc);
9509 if (ENUM_ELEM(ary, line)) {
9510 str_mod_check(str, ptr,
len);
9515 if (subptr != pend) {
9518 pend = chomp_newline(subptr, pend, enc);
9520 else if (pend - subptr >= rslen &&
9521 memcmp(pend - rslen, rsptr, rslen) == 0) {
9526 ENUM_ELEM(ary, line);
9547rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9550 return rb_str_enumerate_lines(argc, argv, str, 0);
9605rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9607 VALUE ary = WANTARRAY(
"lines", 0);
9608 return rb_str_enumerate_lines(argc, argv, str, ary);
9622 for (i=0; i<RSTRING_LEN(str); i++) {
9623 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9641rb_str_each_byte(
VALUE str)
9644 return rb_str_enumerate_bytes(str, 0);
9656rb_str_bytes(
VALUE str)
9658 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9659 return rb_str_enumerate_bytes(str, ary);
9677 ptr = RSTRING_PTR(str);
9678 len = RSTRING_LEN(str);
9679 enc = rb_enc_get(str);
9682 for (i = 0; i <
len; i += n) {
9683 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9688 for (i = 0; i <
len; i += n) {
9689 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9710rb_str_each_char(
VALUE str)
9713 return rb_str_enumerate_chars(str, 0);
9725rb_str_chars(
VALUE str)
9728 return rb_str_enumerate_chars(str, ary);
9732rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9737 const char *ptr, *end;
9740 if (single_byte_optimizable(str))
9741 return rb_str_enumerate_bytes(str, ary);
9744 ptr = RSTRING_PTR(str);
9746 enc = STR_ENC_GET(str);
9749 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9770rb_str_each_codepoint(
VALUE str)
9773 return rb_str_enumerate_codepoints(str, 0);
9785rb_str_codepoints(
VALUE str)
9788 return rb_str_enumerate_codepoints(str, ary);
9794 int encidx = rb_enc_to_index(enc);
9796 const OnigUChar source_ascii[] =
"\\X";
9797 const OnigUChar *source = source_ascii;
9798 size_t source_len =
sizeof(source_ascii) - 1;
9801#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9802#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9803#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9804#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9805#define CASE_UTF(e) \
9806 case ENCINDEX_UTF_##e: { \
9807 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9808 source = source_UTF_##e; \
9809 source_len = sizeof(source_UTF_##e); \
9812 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9820 regex_t *reg_grapheme_cluster;
9822 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9823 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9825 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9826 onig_error_code_to_str(message, r, &einfo);
9827 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9830 return reg_grapheme_cluster;
9836 int encidx = rb_enc_to_index(enc);
9837 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9839 if (encidx == rb_utf8_encindex()) {
9840 if (!reg_grapheme_cluster_utf8) {
9841 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9844 return reg_grapheme_cluster_utf8;
9853 size_t grapheme_cluster_count = 0;
9855 const char *ptr, *end;
9857 if (!rb_enc_unicode_p(enc)) {
9861 bool cached_reg_grapheme_cluster =
true;
9862 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9863 if (!reg_grapheme_cluster) {
9864 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9865 cached_reg_grapheme_cluster =
false;
9868 ptr = RSTRING_PTR(str);
9872 OnigPosition
len = onig_match(reg_grapheme_cluster,
9873 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9874 (
const OnigUChar *)ptr, NULL, 0);
9875 if (
len <= 0)
break;
9876 grapheme_cluster_count++;
9880 if (!cached_reg_grapheme_cluster) {
9881 onig_free(reg_grapheme_cluster);
9884 return SIZET2NUM(grapheme_cluster_count);
9888rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9892 const char *ptr0, *ptr, *end;
9894 if (!rb_enc_unicode_p(enc)) {
9895 return rb_str_enumerate_chars(str, ary);
9900 bool cached_reg_grapheme_cluster =
true;
9901 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9902 if (!reg_grapheme_cluster) {
9903 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9904 cached_reg_grapheme_cluster =
false;
9907 ptr0 = ptr = RSTRING_PTR(str);
9911 OnigPosition
len = onig_match(reg_grapheme_cluster,
9912 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9913 (
const OnigUChar *)ptr, NULL, 0);
9914 if (
len <= 0)
break;
9919 if (!cached_reg_grapheme_cluster) {
9920 onig_free(reg_grapheme_cluster);
9940rb_str_each_grapheme_cluster(
VALUE str)
9943 return rb_str_enumerate_grapheme_clusters(str, 0);
9955rb_str_grapheme_clusters(
VALUE str)
9958 return rb_str_enumerate_grapheme_clusters(str, ary);
9962chopped_length(
VALUE str)
9965 const char *p, *p2, *beg, *end;
9967 beg = RSTRING_PTR(str);
9968 end = beg + RSTRING_LEN(str);
9969 if (beg >= end)
return 0;
9970 p = rb_enc_prev_char(beg, end, end, enc);
9972 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9973 p2 = rb_enc_prev_char(beg, p, end, enc);
9974 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9992rb_str_chop_bang(
VALUE str)
9994 str_modify_keep_cr(str);
9995 if (RSTRING_LEN(str) > 0) {
9997 len = chopped_length(str);
9998 STR_SET_LEN(str,
len);
9999 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10018rb_str_chop(
VALUE str)
10024smart_chomp(
VALUE str,
const char *e,
const char *p)
10027 if (rb_enc_mbminlen(enc) > 1) {
10032 pp = e - rb_enc_mbminlen(enc);
10035 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10043 if (--e > p && *(e-1) ==
'\r') {
10060 char *pp, *e, *rsptr;
10062 char *
const p = RSTRING_PTR(str);
10063 long len = RSTRING_LEN(str);
10065 if (
len == 0)
return 0;
10068 return smart_chomp(str, e, p);
10071 enc = rb_enc_get(str);
10074 if (rb_enc_mbminlen(enc) > 1) {
10079 pp -= rb_enc_mbminlen(enc);
10082 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10089 while (e > p && *(e-1) ==
'\n') {
10091 if (e > p && *(e-1) ==
'\r')
10097 if (rslen >
len)
return len;
10099 enc = rb_enc_get(rs);
10100 newline = rsptr[rslen-1];
10101 if (rslen == rb_enc_mbminlen(enc)) {
10103 if (newline ==
'\n')
10104 return smart_chomp(str, e, p);
10108 return smart_chomp(str, e, p);
10112 enc = rb_enc_check(str, rs);
10113 if (is_broken_string(rs)) {
10117 if (p[
len-1] == newline &&
10119 memcmp(rsptr, pp, rslen) == 0)) {
10120 if (at_char_boundary(p, pp, e, enc))
10121 return len - rslen;
10133chomp_rs(
int argc,
const VALUE *argv)
10137 VALUE rs = argv[0];
10149 long olen = RSTRING_LEN(str);
10150 long len = chompped_length(str, rs);
10151 if (
len >= olen)
return Qnil;
10152 str_modify_keep_cr(str);
10153 STR_SET_LEN(str,
len);
10154 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10174rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10177 str_modifiable(str);
10178 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10179 rs = chomp_rs(argc, argv);
10181 return rb_str_chomp_string(str, rs);
10194rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10196 VALUE rs = chomp_rs(argc, argv);
10202tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10203 VALUE str,
int num_selectors,
VALUE *selectors)
10207 for (i=0; i<num_selectors; i++) {
10208 VALUE selector = selectors[i];
10212 enc = rb_enc_check(str, selector);
10213 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10220 const char *
const start = s;
10222 if (!s || s >= e)
return 0;
10225 if (single_byte_optimizable(str)) {
10226 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10231 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10241lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10242 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10244 const char *
const start = s;
10246 if (!s || s >= e)
return 0;
10251 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10253 if (!tr_find(cc, table, del, nodel))
break;
10272rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10276 long olen, loffset;
10278 str_modify_keep_cr(str);
10279 enc = STR_ENC_GET(str);
10282 char table[TR_TABLE_SIZE];
10283 VALUE del = 0, nodel = 0;
10285 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10286 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10289 loffset = lstrip_offset(str, start, start+olen, enc);
10293 long len = olen-loffset;
10294 s = start + loffset;
10295 memmove(start, s,
len);
10296 STR_SET_LEN(str,
len);
10297 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10332rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10339 char table[TR_TABLE_SIZE];
10340 VALUE del = 0, nodel = 0;
10342 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10343 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10346 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10348 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10357 rb_str_check_dummy_enc(enc);
10361 if (!s || s >= e)
return 0;
10365 if (single_byte_optimizable(str)) {
10367 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10372 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10382rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10383 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10388 rb_str_check_dummy_enc(enc);
10392 if (!s || s >= e)
return 0;
10396 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10398 if (!tr_find(c, table, del, nodel))
break;
10418rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10422 long olen, roffset;
10424 str_modify_keep_cr(str);
10425 enc = STR_ENC_GET(str);
10428 char table[TR_TABLE_SIZE];
10429 VALUE del = 0, nodel = 0;
10431 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10432 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10435 roffset = rstrip_offset(str, start, start+olen, enc);
10438 long len = olen - roffset;
10440 STR_SET_LEN(str,
len);
10441 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10475rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10479 long olen, roffset;
10481 enc = STR_ENC_GET(str);
10484 char table[TR_TABLE_SIZE];
10485 VALUE del = 0, nodel = 0;
10487 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10488 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10491 roffset = rstrip_offset(str, start, start+olen, enc);
10493 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10511rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10514 long olen, loffset, roffset;
10517 str_modify_keep_cr(str);
10518 enc = STR_ENC_GET(str);
10522 char table[TR_TABLE_SIZE];
10523 VALUE del = 0, nodel = 0;
10525 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10526 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10527 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10530 loffset = lstrip_offset(str, start, start+olen, enc);
10531 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10534 if (loffset > 0 || roffset > 0) {
10535 long len = olen-roffset;
10538 memmove(start, start + loffset,
len);
10540 STR_SET_LEN(str,
len);
10541 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10576rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10579 long olen, loffset, roffset;
10585 char table[TR_TABLE_SIZE];
10586 VALUE del = 0, nodel = 0;
10588 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10589 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10590 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10593 loffset = lstrip_offset(str, start, start+olen, enc);
10594 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10597 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10602scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10605 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10611 end = pos + RSTRING_LEN(pat);
10625 if (RSTRING_LEN(str) > end)
10626 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10635 if (!regs || regs->num_regs == 1) {
10641 for (
int i = 1; i < regs->num_regs; i++) {
10672 long last = -1, prev = 0;
10673 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10675 pat = get_pat_quoted(pat, 1);
10676 mustnot_broken(str);
10680 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10685 if (last >= 0) rb_pat_search(pat, str, last, 1);
10690 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10694 str_mod_check(str, p,
len);
10696 if (last >= 0) rb_pat_search(pat, str, last, 1);
10748rb_str_hex(
VALUE str)
10750 return rb_str_to_inum(str, 16, FALSE);
10834rb_str_oct(
VALUE str)
10836 return rb_str_to_inum(str, -8, FALSE);
10839#ifndef HAVE_CRYPT_R
10844 rb_nativethread_lock_t lock;
10845} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10914# define CRYPT_END() ALLOCV_END(databuf)
10917 extern char *crypt(
const char *,
const char *);
10918# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10921 const char *s, *saltp;
10924 char salt_8bit_clean[3];
10928 mustnot_wchar(str);
10929 mustnot_wchar(salt);
10931 saltp = RSTRING_PTR(salt);
10932 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10933 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10937 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10938 salt_8bit_clean[0] = saltp[0] & 0x7f;
10939 salt_8bit_clean[1] = saltp[1] & 0x7f;
10940 salt_8bit_clean[2] =
'\0';
10941 saltp = salt_8bit_clean;
10946# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10947 data->initialized = 0;
10949 res = crypt_r(s, saltp, data);
10952 res = crypt(s, saltp);
10967 size_t res_size = strlen(res)+1;
10968 tmp_buf =
ALLOCA_N(
char, res_size);
10969 memcpy(tmp_buf, res, res_size);
11006 char *ptr, *p, *pend;
11009 unsigned long sum0 = 0;
11014 ptr = p = RSTRING_PTR(str);
11015 len = RSTRING_LEN(str);
11021 str_mod_check(str, ptr,
len);
11024 sum0 += (
unsigned char)*p;
11035 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11036 sum0 &= (((
unsigned long)1)<<bits)-1;
11056rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11060 long width,
len, flen = 1, fclen = 1;
11063 const char *f =
" ";
11064 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11066 int singlebyte = 1, cr;
11070 enc = STR_ENC_GET(str);
11071 termlen = rb_enc_mbminlen(enc);
11075 enc = rb_enc_check(str, pad);
11076 f = RSTRING_PTR(pad);
11077 flen = RSTRING_LEN(pad);
11078 fclen = str_strlen(pad, enc);
11079 singlebyte = single_byte_optimizable(pad);
11080 if (flen == 0 || fclen == 0) {
11081 rb_raise(rb_eArgError,
"zero width padding");
11084 len = str_strlen(str, enc);
11085 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11087 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11091 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11092 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11094 size = RSTRING_LEN(str);
11095 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11096 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11097 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11098 rb_raise(rb_eArgError,
"argument too big");
11102 p = RSTRING_PTR(res);
11104 memset(p, *f, llen);
11108 while (llen >= fclen) {
11114 memcpy(p, f, llen2);
11118 memcpy(p, RSTRING_PTR(str), size);
11121 memset(p, *f, rlen);
11125 while (rlen >= fclen) {
11131 memcpy(p, f, rlen2);
11135 TERM_FILL(p, termlen);
11136 STR_SET_LEN(res, p-RSTRING_PTR(res));
11157rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11159 return rb_str_justify(argc, argv, str,
'l');
11171rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11173 return rb_str_justify(argc, argv, str,
'r');
11186rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11188 return rb_str_justify(argc, argv, str,
'c');
11204 sep = get_pat_quoted(sep, 0);
11216 pos = rb_str_index(str, sep, 0);
11217 if (pos < 0)
goto failed;
11222 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11225 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11239 long pos = RSTRING_LEN(str);
11241 sep = get_pat_quoted(sep, 0);
11254 pos = rb_str_rindex(str, sep, pos);
11263 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11265 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11277rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11281 for (i=0; i<argc; i++) {
11282 VALUE tmp = argv[i];
11284 if (rb_reg_start_with_p(tmp, str))
11288 const char *p, *s, *e;
11293 enc = rb_enc_check(str, tmp);
11294 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11295 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11296 p = RSTRING_PTR(str);
11299 if (!at_char_right_boundary(p, s, e, enc))
11301 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11317rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11321 for (i=0; i<argc; i++) {
11322 VALUE tmp = argv[i];
11323 const char *p, *s, *e;
11328 enc = rb_enc_check(str, tmp);
11329 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11330 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11331 p = RSTRING_PTR(str);
11334 if (!at_char_boundary(p, s, e, enc))
11336 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11352deleted_prefix_length(
VALUE str,
VALUE prefix)
11354 const char *strptr, *prefixptr;
11355 long olen, prefixlen;
11360 if (!is_broken_string(prefix) ||
11361 !rb_enc_asciicompat(enc) ||
11362 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11363 enc = rb_enc_check(str, prefix);
11367 prefixlen = RSTRING_LEN(prefix);
11368 if (prefixlen <= 0)
return 0;
11369 olen = RSTRING_LEN(str);
11370 if (olen < prefixlen)
return 0;
11371 strptr = RSTRING_PTR(str);
11372 prefixptr = RSTRING_PTR(prefix);
11373 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11374 if (is_broken_string(prefix)) {
11375 if (!is_broken_string(str)) {
11379 const char *strend = strptr + olen;
11380 const char *after_prefix = strptr + prefixlen;
11381 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11402rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11405 str_modify_keep_cr(str);
11407 prefixlen = deleted_prefix_length(str, prefix);
11408 if (prefixlen <= 0)
return Qnil;
11422rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11426 prefixlen = deleted_prefix_length(str, prefix);
11427 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11429 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11442deleted_suffix_length(
VALUE str,
VALUE suffix)
11444 const char *strptr, *suffixptr;
11445 long olen, suffixlen;
11449 if (is_broken_string(suffix))
return 0;
11450 enc = rb_enc_check(str, suffix);
11453 suffixlen = RSTRING_LEN(suffix);
11454 if (suffixlen <= 0)
return 0;
11455 olen = RSTRING_LEN(str);
11456 if (olen < suffixlen)
return 0;
11457 strptr = RSTRING_PTR(str);
11458 suffixptr = RSTRING_PTR(suffix);
11459 const char *strend = strptr + olen;
11460 const char *before_suffix = strend - suffixlen;
11461 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11462 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11478rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11480 long olen, suffixlen,
len;
11481 str_modifiable(str);
11483 suffixlen = deleted_suffix_length(str, suffix);
11484 if (suffixlen <= 0)
return Qnil;
11486 olen = RSTRING_LEN(str);
11487 str_modify_keep_cr(str);
11488 len = olen - suffixlen;
11489 STR_SET_LEN(str,
len);
11490 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11506rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11510 suffixlen = deleted_suffix_length(str, suffix);
11511 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11513 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11520 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11526nil_setter_warning(
ID id)
11528 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11535 if (!
NIL_P(*var)) {
11536 nil_setter_warning(
id);
11543 val = rb_fs_check(val);
11546 "value of %"PRIsVALUE
" must be String or Regexp",
11550 nil_setter_warning(
id);
11567 str_modifiable(str);
11570 int idx = rb_enc_to_index(encoding);
11577 rb_enc_associate_index(str, idx);
11601 if (STR_EMBED_P(str)) {
11602 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11607 str_replace_shared_without_enc(str2, str);
11609 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11639rb_str_valid_encoding_p(
VALUE str)
11659rb_str_is_ascii_only_p(
VALUE str)
11669 static const char ellipsis[] =
"...";
11670 const long ellipsislen =
sizeof(ellipsis) - 1;
11672 const long blen = RSTRING_LEN(str);
11673 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11674 VALUE estr, ret = 0;
11677 if (
len * rb_enc_mbminlen(enc) >= blen ||
11681 else if (
len <= ellipsislen ||
11683 if (rb_enc_asciicompat(enc)) {
11685 rb_enc_associate(ret, enc);
11692 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11697 rb_enc_from_encoding(enc), 0,
Qnil);
11710 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11716 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11735 if (enc == STR_ENC_GET(str)) {
11740 return enc_str_scrub(enc, str, repl, cr);
11748 const char *rep, *p, *e, *p1, *sp;
11754 rb_raise(rb_eArgError,
"both of block and replacement given");
11761 if (!
NIL_P(repl)) {
11762 repl = str_compat_and_valid(repl, enc);
11765 if (rb_enc_dummy_p(enc)) {
11768 encidx = rb_enc_to_index(enc);
11770#define DEFAULT_REPLACE_CHAR(str) do { \
11771 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11772 rep = replace; replen = (int)sizeof(replace); \
11775 slen = RSTRING_LEN(str);
11776 p = RSTRING_PTR(str);
11781 if (rb_enc_asciicompat(enc)) {
11787 else if (!
NIL_P(repl)) {
11788 rep = RSTRING_PTR(repl);
11789 replen = RSTRING_LEN(repl);
11792 else if (encidx == rb_utf8_encindex()) {
11793 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11797 DEFAULT_REPLACE_CHAR(
"?");
11802 p = search_nonascii(p, e);
11807 int ret = rb_enc_precise_mbclen(p, e, enc);
11826 if (e - p < clen) clen = e - p;
11833 for (; clen > 1; clen--) {
11834 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11845 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11846 str_mod_check(str, sp, slen);
11847 repl = str_compat_and_valid(repl, enc);
11854 p = search_nonascii(p, e);
11880 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11881 str_mod_check(str, sp, slen);
11882 repl = str_compat_and_valid(repl, enc);
11891 long mbminlen = rb_enc_mbminlen(enc);
11895 else if (!
NIL_P(repl)) {
11896 rep = RSTRING_PTR(repl);
11897 replen = RSTRING_LEN(repl);
11899 else if (encidx == ENCINDEX_UTF_16BE) {
11900 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11902 else if (encidx == ENCINDEX_UTF_16LE) {
11903 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11905 else if (encidx == ENCINDEX_UTF_32BE) {
11906 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11908 else if (encidx == ENCINDEX_UTF_32LE) {
11909 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11912 DEFAULT_REPLACE_CHAR(
"?");
11916 int ret = rb_enc_precise_mbclen(p, e, enc);
11929 if (e - p < clen) clen = e - p;
11930 if (clen <= mbminlen * 2) {
11935 for (; clen > mbminlen; clen-=mbminlen) {
11936 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11946 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11947 str_mod_check(str, sp, slen);
11948 repl = str_compat_and_valid(repl, enc);
11973 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11974 str_mod_check(str, sp, slen);
11975 repl = str_compat_and_valid(repl, enc);
12015str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12023static ID id_normalize;
12024static ID id_normalized_p;
12025static VALUE mUnicodeNormalize;
12028unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12030 static int UnicodeNormalizeRequired = 0;
12033 if (!UnicodeNormalizeRequired) {
12034 rb_require(
"unicode_normalize/normalize.rb");
12035 UnicodeNormalizeRequired = 1;
12039 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12050rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12052 return unicode_normalize_common(argc, argv, str, id_normalize);
12066rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12068 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12095rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12097 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12229#define sym_equal rb_obj_equal
12232sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12236 int c = rb_enc_precise_mbclen(s, send, enc);
12240 c = rb_enc_mbc_to_codepoint(s, send, enc);
12248rb_str_symname_p(
VALUE sym)
12253 rb_encoding *resenc = rb_default_internal_encoding();
12255 if (resenc == NULL) resenc = rb_default_external_encoding();
12256 enc = STR_ENC_GET(sym);
12257 ptr = RSTRING_PTR(sym);
12258 len = RSTRING_LEN(sym);
12259 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12267rb_str_quote_unprintable(
VALUE str)
12275 resenc = rb_default_internal_encoding();
12276 if (resenc == NULL) resenc = rb_default_external_encoding();
12277 enc = STR_ENC_GET(str);
12278 ptr = RSTRING_PTR(str);
12279 len = RSTRING_LEN(str);
12280 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12281 !sym_printable(ptr, ptr +
len, enc)) {
12282 return rb_str_escape(str);
12288rb_id_quote_unprintable(
ID id)
12290 VALUE str = rb_id2str(
id);
12291 if (!rb_str_symname_p(str)) {
12292 return rb_str_escape(str);
12310sym_inspect(
VALUE sym)
12317 if (!rb_str_symname_p(str)) {
12319 len = RSTRING_LEN(str);
12320 rb_str_resize(str,
len + 1);
12321 dest = RSTRING_PTR(str);
12322 memmove(dest + 1, dest,
len);
12326 VALUE orig_str = str;
12328 len = RSTRING_LEN(orig_str);
12329 str = rb_enc_str_new(0,
len + 1, enc);
12332 ptr = RSTRING_PTR(orig_str);
12333 dest = RSTRING_PTR(str);
12334 memcpy(dest + 1, ptr,
len);
12354rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12359 rb_raise(rb_eArgError,
"no receiver given");
12462 return rb_str_match(
rb_sym2str(sym), other);
12477sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12479 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12492sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12494 return rb_str_match_m_p(argc, argv, sym);
12512 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12523sym_length(
VALUE sym)
12537sym_empty(
VALUE sym)
12571sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12587sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12603sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12617sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12619 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12632sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12634 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12646sym_encoding(
VALUE sym)
12652string_for_symbol(
VALUE name)
12657 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12671 name = string_for_symbol(name);
12672 return rb_intern_str(name);
12681 name = string_for_symbol(name);
12705 return rb_fstring(str);
12711 struct RString fake_str = {RBASIC_INIT};
12712 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12724 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12725 rb_enc_autoload(enc);
12728 struct RString fake_str = {RBASIC_INIT};
12729 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12735 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12736 rb_enc_autoload(enc);
12739 struct RString fake_str = {RBASIC_INIT};
12740 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12751#if USE_YJIT || USE_ZJIT
12753rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12758 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12759 rb_str_buf_cat_byte(str, (
char) code);
12769fstring_set_class_i(
VALUE *str,
void *data)
12773 return ST_CONTINUE;
12781 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12948 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_cObject
Object class.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern_str(), except it tries to convert the parameter object to an instance of rb_cS...
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RUBY_TYPED_FREE_IMMEDIATELY
Macros to see if each corresponding flag is defined.
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@52 as
String's specific fields.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
struct RString::@52::@54 embed
Embedded contents.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
struct RString::@52::@53 heap
Strings that use separated memory region for contents use this pattern.
union RString::@52::@53::@55 aux
Auxiliary info.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.