14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
150str_encindex_fastpath(
int encindex)
154 case ENCINDEX_ASCII_8BIT:
156 case ENCINDEX_US_ASCII:
164str_enc_fastpath(
VALUE str)
169#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
170#define TERM_FILL(ptr, termlen) do {\
171 char *const term_fill_ptr = (ptr);\
172 const int term_fill_len = (termlen);\
173 *term_fill_ptr = '\0';\
174 if (UNLIKELY(term_fill_len > 1))\
175 memset(term_fill_ptr, 0, term_fill_len);\
178#define RESIZE_CAPA(str,capacity) do {\
179 const int termlen = TERM_LEN(str);\
180 RESIZE_CAPA_TERM(str,capacity,termlen);\
182#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
183 if (STR_EMBED_P(str)) {\
184 if (str_embed_capa(str) < capacity + termlen) {\
185 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
186 const long tlen = RSTRING_LEN(str);\
187 memcpy(tmp, RSTRING_PTR(str), tlen);\
188 RSTRING(str)->as.heap.ptr = tmp;\
189 RSTRING(str)->len = tlen;\
190 STR_SET_NOEMBED(str);\
191 RSTRING(str)->as.heap.aux.capa = (capacity);\
195 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
196 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
197 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
198 RSTRING(str)->as.heap.aux.capa = (capacity);\
202#define STR_SET_SHARED(str, shared_str) do { \
203 if (!FL_TEST(str, STR_FAKESTR)) { \
204 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
205 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
206 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
207 FL_SET((str), STR_SHARED); \
208 rb_gc_register_pinning_obj(str); \
209 FL_SET((shared_str), STR_SHARED_ROOT); \
210 if (RBASIC_CLASS((shared_str)) == 0) \
211 FL_SET_RAW((shared_str), STR_BORROWED); \
215#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
216#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
219#define STR_ENC_GET(str) get_encoding(str)
221#if !defined SHARABLE_MIDDLE_SUBSTRING
222# define SHARABLE_MIDDLE_SUBSTRING 0
224#if !SHARABLE_MIDDLE_SUBSTRING
225#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
227#define SHARABLE_SUBSTRING_P(beg, len, end) 1
232str_embed_capa(
VALUE str)
234 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
238rb_str_reembeddable_p(
VALUE str)
240 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
244rb_str_embed_size(
long capa,
long termlen)
252rb_str_size_as_embedded(
VALUE str)
255 if (STR_EMBED_P(str)) {
257 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
259 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
263 else if (rb_str_reembeddable_p(str)) {
265 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
267 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
270 real_size =
sizeof(
struct RString);
277STR_EMBEDDABLE_P(
long len,
long termlen)
279 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
284static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
285static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
287static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
288static inline void str_modifiable(
VALUE str);
293str_make_independent(
VALUE str)
295 long len = RSTRING_LEN(str);
296 int termlen = TERM_LEN(str);
297 str_make_independent_expand((str),
len, 0L, termlen);
300static inline int str_dependent_p(
VALUE str);
303rb_str_make_independent(
VALUE str)
305 if (str_dependent_p(str)) {
306 str_make_independent(str);
311rb_str_make_embedded(
VALUE str)
316 char *buf =
RSTRING(str)->as.heap.ptr;
320 STR_SET_LEN(str,
len);
323 memcpy(RSTRING_PTR(str), buf,
len);
327 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
331rb_debug_rstring_null_ptr(
const char *func)
333 fprintf(stderr,
"%s is returning NULL!! "
334 "SIGSEGV is highly expected to follow immediately.\n"
335 "If you could reproduce, attach your debugger here, "
336 "and look at the passed string.\n",
341static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
344get_encoding(
VALUE str)
350mustnot_broken(
VALUE str)
352 if (is_broken_string(str)) {
353 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
358mustnot_wchar(
VALUE str)
361 if (rb_enc_mbminlen(enc) > 1) {
362 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
366static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
368#if SIZEOF_LONG == SIZEOF_VOIDP
369#define PRECOMPUTED_FAKESTR_HASH 1
374BARE_STRING_P(
VALUE str)
379static inline st_index_t
380str_do_hash(
VALUE str)
382 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
384 if (e && !is_ascii_string(str)) {
391str_store_precomputed_hash(
VALUE str, st_index_t hash)
397 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
398 size_t free_bytes = str_embed_capa(str) - used_bytes;
402 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
404 FL_SET(str, STR_PRECOMPUTED_HASH);
417 if (
FL_TEST(str, RSTRING_FSTR))
420 bare = BARE_STRING_P(str);
422 if (STR_EMBED_P(str)) {
427 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
434 rb_str_resize(str, RSTRING_LEN(str));
436 fstr = register_fstring(str,
false,
false);
439 str_replace_shared_without_enc(str, fstr);
446static VALUE fstring_table_obj;
449fstring_concurrent_set_hash(
VALUE str)
451#ifdef PRECOMPUTED_FAKESTR_HASH
455 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
472 const char *aptr, *bptr;
479 return (alen == blen &&
481 memcmp(aptr, bptr, alen) == 0);
486 bool force_precompute_hash;
490fstring_concurrent_set_create(
VALUE str,
void *data)
500 long len = RSTRING_LEN(str);
501 long capa =
len +
sizeof(st_index_t);
502 int term_len = TERM_LEN(str);
504 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
506 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
507 STR_SET_LEN(new_str, RSTRING_LEN(str));
509 rb_enc_copy(new_str, str);
510 str_store_precomputed_hash(new_str, str_do_hash(str));
514 rb_enc_copy(new_str, str);
515#ifdef PRECOMPUTED_FAKESTR_HASH
516 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
517 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
531 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
534 if (STR_SHARED_P(str)) {
536 str_make_independent(str);
539 if (!BARE_STRING_P(str)) {
545 RBASIC(str)->flags |= RSTRING_FSTR;
547 RB_OBJ_SET_SHAREABLE(str);
561 .hash = fstring_concurrent_set_hash,
562 .cmp = fstring_concurrent_set_cmp,
563 .create = fstring_concurrent_set_create,
568Init_fstring_table(
void)
570 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
571 rb_gc_register_address(&fstring_table_obj);
575register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
579 .force_precompute_hash = force_precompute_hash
582#if SIZEOF_VOIDP == SIZEOF_LONG
586 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
590 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
592 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
604rb_obj_is_fstring_table(
VALUE obj)
608 return obj == fstring_table_obj;
612rb_gc_free_fstring(
VALUE obj)
614 ASSERT_vm_locking_with_barrier();
620 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
622 RB_DEBUG_COUNTER_INC(obj_str_fstr);
628rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
630 if (fstring_table_obj) {
631 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
636setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
639 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
652 return (
VALUE)fake_str;
661 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
670rb_fstring_new(
const char *ptr,
long len)
672 struct RString fake_str = {RBASIC_INIT};
673 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
679 struct RString fake_str = {RBASIC_INIT};
680 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
684rb_fstring_cstr(
const char *
ptr)
686 return rb_fstring_new(
ptr, strlen(
ptr));
690single_byte_optimizable(
VALUE str)
694 case ENCINDEX_ASCII_8BIT:
695 case ENCINDEX_US_ASCII:
717static inline const char *
718search_nonascii(
const char *p,
const char *e)
722#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
723# if SIZEOF_UINTPTR_T == 8
724# define NONASCII_MASK UINT64_C(0x8080808080808080)
725# elif SIZEOF_UINTPTR_T == 4
726# define NONASCII_MASK UINT32_C(0x80808080)
728# error "don't know what to do."
731# if SIZEOF_UINTPTR_T == 8
732# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
733# elif SIZEOF_UINTPTR_T == 4
734# define NONASCII_MASK 0x80808080UL
736# error "don't know what to do."
740 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
741#if !UNALIGNED_WORD_ACCESS
742 if ((uintptr_t)p % SIZEOF_VOIDP) {
743 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
748 case 7:
if (p[-7]&0x80)
return p-7;
749 case 6:
if (p[-6]&0x80)
return p-6;
750 case 5:
if (p[-5]&0x80)
return p-5;
751 case 4:
if (p[-4]&0x80)
return p-4;
753 case 3:
if (p[-3]&0x80)
return p-3;
754 case 2:
if (p[-2]&0x80)
return p-2;
755 case 1:
if (p[-1]&0x80)
return p-1;
760#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
761#define aligned_ptr(value) \
762 __builtin_assume_aligned((value), sizeof(uintptr_t))
764#define aligned_ptr(value) (value)
767 t = (e - (SIZEOF_VOIDP-1));
769 for (;s < t; s +=
sizeof(uintptr_t)) {
771 memcpy(&word, s,
sizeof(word));
772 if (word & NONASCII_MASK) {
773#ifdef WORDS_BIGENDIAN
774 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
776 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
786 case 7:
if (e[-7]&0x80)
return e-7;
787 case 6:
if (e[-6]&0x80)
return e-6;
788 case 5:
if (e[-5]&0x80)
return e-5;
789 case 4:
if (e[-4]&0x80)
return e-4;
791 case 3:
if (e[-3]&0x80)
return e-3;
792 case 2:
if (e[-2]&0x80)
return e-2;
793 case 1:
if (e[-1]&0x80)
return e-1;
801 const char *e = p +
len;
803 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
805 p = search_nonascii(p, e);
809 if (rb_enc_asciicompat(enc)) {
810 p = search_nonascii(p, e);
813 int ret = rb_enc_precise_mbclen(p, e, enc);
817 p = search_nonascii(p, e);
823 int ret = rb_enc_precise_mbclen(p, e, enc);
839 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
842 p = search_nonascii(p, e);
846 else if (rb_enc_asciicompat(enc)) {
847 p = search_nonascii(p, e);
853 int ret = rb_enc_precise_mbclen(p, e, enc);
860 p = search_nonascii(p, e);
866 int ret = rb_enc_precise_mbclen(p, e, enc);
891 rb_enc_set_index(str1, rb_enc_get_index(str2));
899rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
904 str_enc_copy(dest, src);
905 if (RSTRING_LEN(dest) == 0) {
906 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
917 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
918 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
929rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
931 str_enc_copy(dest, src);
938 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
944 return enc_coderange_scan(str, enc);
953 cr = enc_coderange_scan(str, get_encoding(str));
960rb_enc_str_asciicompat(
VALUE str)
963 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
971 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
980str_mod_check(
VALUE s,
const char *p,
long len)
982 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
988str_capacity(
VALUE str,
const int termlen)
990 if (STR_EMBED_P(str)) {
991 return str_embed_capa(str) - termlen;
993 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
997 return RSTRING(str)->as.heap.aux.capa;
1004 return str_capacity(str, TERM_LEN(str));
1008must_not_null(
const char *
ptr)
1011 rb_raise(rb_eArgError,
"NULL pointer given");
1016str_alloc_embed(
VALUE klass,
size_t capa)
1018 size_t size = rb_str_embed_size(
capa, 0);
1022 NEWOBJ_OF(str,
struct RString, klass,
1026 str->as.embed.ary[0] = 0;
1032str_alloc_heap(
VALUE klass)
1034 NEWOBJ_OF(str,
struct RString, klass,
1038 str->as.heap.aux.capa = 0;
1039 str->as.heap.ptr = NULL;
1045empty_str_alloc(
VALUE klass)
1047 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1048 VALUE str = str_alloc_embed(klass, 0);
1049 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1060 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1064 enc = rb_ascii8bit_encoding();
1067 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1069 int termlen = rb_enc_mbminlen(enc);
1071 if (STR_EMBEDDABLE_P(
len, termlen)) {
1072 str = str_alloc_embed(klass,
len + termlen);
1078 str = str_alloc_heap(klass);
1084 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1087 rb_enc_raw_set(str, enc);
1090 memcpy(RSTRING_PTR(str),
ptr,
len);
1093 memset(RSTRING_PTR(str), 0,
len);
1096 STR_SET_LEN(str,
len);
1097 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1104 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1139 __msan_unpoison_string(
ptr);
1159 if (rb_enc_mbminlen(enc) != 1) {
1160 rb_raise(rb_eArgError,
"wchar encoding given");
1162 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1166str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1171 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1175 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1178 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1179 str = str_alloc_heap(klass);
1183 RBASIC(str)->flags |= STR_NOFREE;
1184 rb_enc_associate_index(str, encindex);
1213static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1215 int ecflags,
VALUE ecopts);
1220 int encidx = rb_enc_to_index(enc);
1221 if (rb_enc_get_index(str) == encidx)
1222 return is_ascii_string(str);
1233 if (!to)
return str;
1234 if (!from) from = rb_enc_get(str);
1235 if (from == to)
return str;
1236 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1237 rb_is_ascii8bit_enc(to)) {
1238 if (STR_ENC_GET(str) != to) {
1240 rb_enc_associate(str, to);
1247 from, to, ecflags, ecopts);
1248 if (
NIL_P(newstr)) {
1256rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1261 olen = RSTRING_LEN(newstr);
1262 if (ofs < -olen || olen < ofs)
1264 if (ofs < 0) ofs += olen;
1266 STR_SET_LEN(newstr, ofs);
1270 rb_str_modify(newstr);
1271 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1279 STR_SET_LEN(str, 0);
1280 rb_enc_associate(str, enc);
1286str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1288 int ecflags,
VALUE ecopts)
1293 VALUE econv_wrapper;
1294 const unsigned char *start, *sp;
1295 unsigned char *dest, *dp;
1296 size_t converted_output = (size_t)ofs;
1301 RBASIC_CLEAR_CLASS(econv_wrapper);
1303 if (!ec)
return Qnil;
1306 sp = (
unsigned char*)
ptr;
1308 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1309 (dp = dest + converted_output),
1313 size_t converted_input = sp - start;
1314 size_t rest =
len - converted_input;
1315 converted_output = dp - dest;
1317 if (converted_input && converted_output &&
1318 rest < (LONG_MAX / converted_output)) {
1319 rest = (rest * converted_output) / converted_input;
1324 olen += rest < 2 ? 2 : rest;
1325 rb_str_resize(newstr, olen);
1332 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1334 rb_enc_associate(newstr, to);
1353 const int eidx = rb_enc_to_index(eenc);
1356 return rb_enc_str_new(
ptr,
len, eenc);
1360 if ((eidx == rb_ascii8bit_encindex()) ||
1361 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1365 ienc = rb_default_internal_encoding();
1366 if (!ienc || eenc == ienc) {
1367 return rb_enc_str_new(
ptr,
len, eenc);
1371 if ((eidx == rb_ascii8bit_encindex()) ||
1372 (eidx == rb_usascii_encindex()) ||
1373 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1374 return rb_enc_str_new(
ptr,
len, ienc);
1377 str = rb_enc_str_new(NULL, 0, ienc);
1380 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1381 rb_str_initialize(str,
ptr,
len, eenc);
1389 int eidx = rb_enc_to_index(eenc);
1390 if (eidx == rb_usascii_encindex() &&
1391 !is_ascii_string(str)) {
1392 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1395 rb_enc_associate_index(str, eidx);
1454str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1456 const int termlen = TERM_LEN(str);
1461 if (str_embed_capa(str2) >=
len + termlen) {
1462 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1463 STR_SET_EMBED(str2);
1464 memcpy(ptr2, RSTRING_PTR(str),
len);
1465 TERM_FILL(ptr2+
len, termlen);
1469 if (STR_SHARED_P(str)) {
1470 root =
RSTRING(str)->as.heap.aux.shared;
1479 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1481 rb_fatal(
"about to free a possible shared root");
1483 char *ptr2 = STR_HEAP_PTR(str2);
1485 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1488 FL_SET(str2, STR_NOEMBED);
1490 STR_SET_SHARED(str2, root);
1493 STR_SET_LEN(str2,
len);
1501 str_replace_shared_without_enc(str2, str);
1502 rb_enc_cr_str_exact_copy(str2, str);
1509 return str_replace_shared(str_alloc_heap(klass), str);
1526rb_str_new_frozen_String(
VALUE orig)
1534rb_str_frozen_bare_string(
VALUE orig)
1536 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1541rb_str_tmp_frozen_acquire(
VALUE orig)
1544 return str_new_frozen_buffer(0, orig, FALSE);
1548rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1550 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1551 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1553 VALUE str = str_alloc_heap(0);
1556 FL_SET(str, STR_SHARED_ROOT);
1558 size_t capa = str_capacity(orig, TERM_LEN(orig));
1564 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1565 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1572 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1573 RBASIC(orig)->flags &= ~STR_NOFREE;
1574 STR_SET_SHARED(orig, str);
1576 RB_OBJ_SET_SHAREABLE(str);
1588rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1593 if (STR_EMBED_P(tmp)) {
1596 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1602 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1606 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1607 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1612 STR_SET_LEN(tmp, 0);
1620 return str_new_frozen_buffer(klass, orig, TRUE);
1630 VALUE str = str_alloc_heap(klass);
1631 STR_SET_LEN(str, RSTRING_LEN(orig));
1632 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1633 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1634 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1635 RBASIC(orig)->flags &= ~STR_NOFREE;
1636 STR_SET_SHARED(orig, str);
1643str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1647 long len = RSTRING_LEN(orig);
1648 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1649 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1651 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1652 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1658 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1659 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1665 if ((ofs > 0) || (rest > 0) ||
1668 str = str_new_shared(klass,
shared);
1670 RSTRING(str)->as.heap.ptr += ofs;
1671 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1679 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1680 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1682 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1683 STR_SET_LEN(str, RSTRING_LEN(orig));
1689 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1692 str = heap_str_make_shared(klass, orig);
1697 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1709str_new_empty_String(
VALUE str)
1712 rb_enc_copy(v, str);
1716#define STR_BUF_MIN_SIZE 63
1721 if (STR_EMBEDDABLE_P(
capa, 1)) {
1729 RSTRING(str)->as.heap.ptr[0] =
'\0';
1749 return str_new(0, 0,
len);
1755 if (STR_EMBED_P(str)) {
1756 RB_DEBUG_COUNTER_INC(obj_str_embed);
1758 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1759 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1760 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1763 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1764 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1769rb_str_memsize(
VALUE str)
1771 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1772 return STR_HEAP_SIZE(str);
1782 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1785static inline void str_discard(
VALUE str);
1786static void str_shared_replace(
VALUE str,
VALUE str2);
1791 if (str != str2) str_shared_replace(str, str2);
1802 enc = STR_ENC_GET(str2);
1805 termlen = rb_enc_mbminlen(enc);
1807 STR_SET_LEN(str, RSTRING_LEN(str2));
1809 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1811 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1812 rb_enc_associate(str, enc);
1816 if (STR_EMBED_P(str2)) {
1818 long len = RSTRING_LEN(str2);
1821 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1822 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1823 RSTRING(str2)->as.heap.ptr = new_ptr;
1824 STR_SET_LEN(str2,
len);
1826 STR_SET_NOEMBED(str2);
1829 STR_SET_NOEMBED(str);
1831 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1833 if (
FL_TEST(str2, STR_SHARED)) {
1835 STR_SET_SHARED(str,
shared);
1838 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1842 STR_SET_EMBED(str2);
1843 RSTRING_PTR(str2)[0] = 0;
1844 STR_SET_LEN(str2, 0);
1845 rb_enc_associate(str, enc);
1859 return rb_obj_as_string_result(str, obj);
1875 len = RSTRING_LEN(str2);
1876 if (STR_SHARED_P(str2)) {
1879 STR_SET_NOEMBED(str);
1880 STR_SET_LEN(str,
len);
1881 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1882 STR_SET_SHARED(str,
shared);
1883 rb_enc_cr_str_exact_copy(str, str2);
1886 str_replace_shared(str, str2);
1895 size_t size = rb_str_embed_size(
capa, 0);
1899 NEWOBJ_OF(str,
struct RString, klass,
1910 NEWOBJ_OF(str,
struct RString, klass,
1913 str->as.heap.aux.capa = 0;
1914 str->as.heap.ptr = NULL;
1924 encidx = rb_enc_get_index(str);
1925 flags &= ~ENCODING_MASK;
1928 if (encidx) rb_enc_associate_index(dup, encidx);
1938 long len = RSTRING_LEN(str);
1943 STR_SET_LEN(dup, RSTRING_LEN(str));
1944 return str_duplicate_setup_encoding(str, dup, flags);
1953 root =
RSTRING(str)->as.heap.aux.shared;
1956 root = str = str_new_frozen(klass, str);
1962 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1963 FL_SET(root, STR_SHARED_ROOT);
1965 flags |= RSTRING_NOEMBED | STR_SHARED;
1967 STR_SET_LEN(dup, RSTRING_LEN(str));
1968 return str_duplicate_setup_encoding(str, dup, flags);
1974 if (STR_EMBED_P(str)) {
1975 return str_duplicate_setup_embed(klass, str, dup);
1978 return str_duplicate_setup_heap(klass, str, dup);
1986 if (STR_EMBED_P(str)) {
1987 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1990 dup = str_alloc_heap(klass);
1993 return str_duplicate_setup(klass, str, dup);
2004rb_str_dup_m(
VALUE str)
2006 if (LIKELY(BARE_STRING_P(str))) {
2017 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2024 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2028 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2029 str_duplicate_setup_embed(klass, str, new_str);
2032 new_str = ec_str_alloc_heap(ec, klass);
2033 str_duplicate_setup_heap(klass, str, new_str);
2042rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2044 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2068 static ID keyword_ids[2];
2069 VALUE orig, opt, venc, vcapa;
2074 if (!keyword_ids[0]) {
2075 keyword_ids[0] = rb_id_encoding();
2076 CONST_ID(keyword_ids[1],
"capacity");
2084 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2085 enc = rb_to_encoding(venc);
2087 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2090 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2092 if (
capa < STR_BUF_MIN_SIZE) {
2093 capa = STR_BUF_MIN_SIZE;
2097 len = RSTRING_LEN(orig);
2101 if (orig == str) n = 0;
2103 str_modifiable(str);
2104 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2106 const size_t size = (size_t)
capa + termlen;
2107 const char *
const old_ptr = RSTRING_PTR(str);
2108 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2109 char *new_ptr =
ALLOC_N(
char, size);
2110 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2111 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2113 RSTRING(str)->as.heap.ptr = new_ptr;
2115 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2116 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2117 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2119 STR_SET_LEN(str,
len);
2122 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2123 rb_enc_cr_str_exact_copy(str, orig);
2125 FL_SET(str, STR_NOEMBED);
2132 rb_enc_associate(str, enc);
2144rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2150 static ID keyword_ids[2];
2160 keyword_ids[0] = rb_id_encoding();
2161 CONST_ID(keyword_ids[1],
"capacity");
2163 encoding = kwargs[0];
2164 capacity = kwargs[1];
2173 if (UNDEF_P(encoding)) {
2175 encoding = rb_obj_encoding(orig);
2179 if (!UNDEF_P(encoding)) {
2180 enc = rb_to_encoding(encoding);
2184 if (UNDEF_P(capacity)) {
2186 VALUE empty_str = str_new(klass,
"", 0);
2188 rb_enc_associate(empty_str, enc);
2192 VALUE copy = str_duplicate(klass, orig);
2193 rb_enc_associate(copy, enc);
2206 if (orig_capa >
capa) {
2211 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2212 STR_SET_LEN(str, 0);
2223#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2238static inline uintptr_t
2239count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2244 d = (d>>6) | (~d>>7);
2245 d &= NONASCII_MASK >> 7;
2248#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2250 return rb_popcount_intptr(d);
2254# if SIZEOF_VOIDP == 8
2263enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2269 long diff = (long)(e - p);
2270 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2275 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2276 const uintptr_t *s, *t;
2277 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2278 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2279 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2280 while (p < (
const char *)s) {
2281 if (is_utf8_lead_byte(*p))
len++;
2285 len += count_utf8_lead_bytes_with_word(s);
2288 p = (
const char *)s;
2291 if (is_utf8_lead_byte(*p))
len++;
2297 else if (rb_enc_asciicompat(enc)) {
2302 q = search_nonascii(p, e);
2308 p += rb_enc_fast_mbclen(p, e, enc);
2315 q = search_nonascii(p, e);
2321 p += rb_enc_mbclen(p, e, enc);
2328 for (c=0; p<e; c++) {
2329 p += rb_enc_mbclen(p, e, enc);
2344rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2352 long diff = (long)(e - p);
2353 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2355 else if (rb_enc_asciicompat(enc)) {
2359 q = search_nonascii(p, e);
2367 ret = rb_enc_precise_mbclen(p, e, enc);
2382 for (c=0; p<e; c++) {
2383 ret = rb_enc_precise_mbclen(p, e, enc);
2390 if (p + rb_enc_mbminlen(enc) <= e)
2391 p += rb_enc_mbminlen(enc);
2407 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2408 if (!enc) enc = STR_ENC_GET(str);
2409 p = RSTRING_PTR(str);
2414 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2419 return enc_strlen(p, e, enc, cr);
2426 return str_strlen(str, NULL);
2440 return LONG2NUM(str_strlen(str, NULL));
2452rb_str_bytesize(
VALUE str)
2471rb_str_empty(
VALUE str)
2473 return RBOOL(RSTRING_LEN(str) == 0);
2492 char *ptr1, *ptr2, *ptr3;
2497 enc = rb_enc_check_str(str1, str2);
2500 termlen = rb_enc_mbminlen(enc);
2501 if (len1 > LONG_MAX - len2) {
2502 rb_raise(rb_eArgError,
"string size too big");
2504 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2505 ptr3 = RSTRING_PTR(str3);
2506 memcpy(ptr3, ptr1, len1);
2507 memcpy(ptr3+len1, ptr2, len2);
2508 TERM_FILL(&ptr3[len1+len2], termlen);
2524 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2527 int enc1 = rb_enc_get_index(str1);
2528 int enc2 = rb_enc_get_index(str2);
2533 else if (enc2 < 0) {
2536 else if (enc1 != enc2) {
2539 else if (len1 > LONG_MAX - len2) {
2573 rb_enc_copy(str2, str);
2578 rb_raise(rb_eArgError,
"negative argument");
2580 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2581 if (STR_EMBEDDABLE_P(
len, 1)) {
2583 memset(RSTRING_PTR(str2), 0,
len + 1);
2590 STR_SET_LEN(str2,
len);
2591 rb_enc_copy(str2, str);
2594 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2595 rb_raise(rb_eArgError,
"argument too big");
2598 len *= RSTRING_LEN(str);
2599 termlen = TERM_LEN(str);
2601 ptr2 = RSTRING_PTR(str2);
2603 n = RSTRING_LEN(str);
2604 memcpy(ptr2, RSTRING_PTR(str), n);
2605 while (n <=
len/2) {
2606 memcpy(ptr2 + n, ptr2, n);
2609 memcpy(ptr2 + n, ptr2,
len-n);
2611 STR_SET_LEN(str2,
len);
2612 TERM_FILL(&ptr2[
len], termlen);
2613 rb_enc_cr_str_copy_for_substr(str2, str);
2650rb_check_lockedtmp(
VALUE str)
2652 if (
FL_TEST(str, STR_TMPLOCK)) {
2659#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2661str_modifiable(
VALUE str)
2665 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2666 if (CHILLED_STRING_P(str)) {
2667 CHILLED_STRING_MUTATED(str);
2669 rb_check_lockedtmp(str);
2670 rb_check_frozen(str);
2675str_dependent_p(
VALUE str)
2677 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2687#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2689str_independent(
VALUE str)
2693 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2694 str_modifiable(str);
2695 return !str_dependent_p(str);
2701str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2711 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2716 STR_SET_LEN(str,
len);
2721 oldptr = RSTRING_PTR(str);
2723 memcpy(
ptr, oldptr,
len);
2725 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2728 STR_SET_NOEMBED(str);
2729 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2730 TERM_FILL(
ptr +
len, termlen);
2732 STR_SET_LEN(str,
len);
2739 if (!str_independent(str))
2740 str_make_independent(str);
2749 int termlen = TERM_LEN(str);
2750 long len = RSTRING_LEN(str);
2753 rb_raise(rb_eArgError,
"negative expanding string size");
2755 if (expand >= LONG_MAX -
len) {
2756 rb_raise(rb_eArgError,
"string size too big");
2759 if (!str_independent(str)) {
2760 str_make_independent_expand(str,
len, expand, termlen);
2762 else if (expand > 0) {
2763 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2770str_modify_keep_cr(
VALUE str)
2772 if (!str_independent(str))
2773 str_make_independent(str);
2780str_discard(
VALUE str)
2782 str_modifiable(str);
2783 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2784 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2785 RSTRING(str)->as.heap.ptr = 0;
2786 STR_SET_LEN(str, 0);
2793 int encindex = rb_enc_get_index(str);
2795 if (RB_UNLIKELY(encindex == -1)) {
2799 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2804 if (!rb_enc_asciicompat(enc)) {
2826 return RSTRING_PTR(str);
2830zero_filled(
const char *s,
int n)
2832 for (; n > 0; --n) {
2839str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2841 const char *e = s +
len;
2843 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2844 if (zero_filled(s, minlen))
return s;
2850str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2855 if (str_dependent_p(str)) {
2856 if (!zero_filled(s +
len, termlen))
2857 str_make_independent_expand(str,
len, 0L, termlen);
2860 TERM_FILL(s +
len, termlen);
2863 return RSTRING_PTR(str);
2867rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2869 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2870 long len = RSTRING_LEN(str);
2874 rb_check_lockedtmp(str);
2875 str_make_independent_expand(str,
len, 0L, termlen);
2877 else if (str_dependent_p(str)) {
2878 if (termlen > oldtermlen)
2879 str_make_independent_expand(str,
len, 0L, termlen);
2882 if (!STR_EMBED_P(str)) {
2887 if (termlen > oldtermlen) {
2888 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2896str_null_check(
VALUE str,
int *w)
2898 char *s = RSTRING_PTR(str);
2899 long len = RSTRING_LEN(str);
2901 const int minlen = rb_enc_mbminlen(enc);
2905 if (str_null_char(s,
len, minlen, enc)) {
2908 return str_fill_term(str, s,
len, minlen);
2911 if (!s || memchr(s, 0,
len)) {
2915 s = str_fill_term(str, s,
len, minlen);
2921rb_str_to_cstr(
VALUE str)
2924 return str_null_check(str, &w);
2932 char *s = str_null_check(str, &w);
2935 rb_raise(rb_eArgError,
"string contains null char");
2937 rb_raise(rb_eArgError,
"string contains null byte");
2943rb_str_fill_terminator(
VALUE str,
const int newminlen)
2945 char *s = RSTRING_PTR(str);
2946 long len = RSTRING_LEN(str);
2947 return str_fill_term(str, s,
len, newminlen);
2953 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2979str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2988 else if (rb_enc_asciicompat(enc)) {
2989 const char *p2, *e2;
2992 while (p < e && 0 < nth) {
2999 p2 = search_nonascii(p, e2);
3008 n = rb_enc_mbclen(p, e, enc);
3019 while (p < e && nth--) {
3020 p += rb_enc_mbclen(p, e, enc);
3031 return str_nth_len(p, e, &nth, enc);
3035str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3040 p = str_nth_len(p, e, &nth, enc);
3049str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3051 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3052 if (!pp)
return e - p;
3059 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3060 STR_ENC_GET(str), single_byte_optimizable(str));
3065str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3068 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3069 const uintptr_t *s, *t;
3070 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3071 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3072 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3073 while (p < (
const char *)s) {
3074 if (is_utf8_lead_byte(*p)) nth--;
3078 nth -= count_utf8_lead_bytes_with_word(s);
3080 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3084 if (is_utf8_lead_byte(*p)) {
3085 if (nth == 0)
break;
3095str_utf8_offset(
const char *p,
const char *e,
long nth)
3097 const char *pp = str_utf8_nth(p, e, &nth);
3106 if (single_byte_optimizable(str) || pos < 0)
3109 char *p = RSTRING_PTR(str);
3110 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3115str_subseq(
VALUE str,
long beg,
long len)
3123 const int termlen = TERM_LEN(str);
3124 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3131 if (str_embed_capa(str2) >=
len + termlen) {
3132 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3133 STR_SET_EMBED(str2);
3134 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3135 TERM_FILL(ptr2+
len, termlen);
3137 STR_SET_LEN(str2,
len);
3141 str_replace_shared(str2, str);
3144 RSTRING(str2)->as.heap.ptr += beg;
3145 if (RSTRING_LEN(str2) >
len) {
3146 STR_SET_LEN(str2,
len);
3156 VALUE str2 = str_subseq(str, beg,
len);
3157 rb_enc_cr_str_copy_for_substr(str2, str);
3166 const long blen = RSTRING_LEN(str);
3168 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3170 if (
len < 0)
return 0;
3171 if (beg < 0 && -beg < 0)
return 0;
3175 if (single_byte_optimizable(str)) {
3176 if (beg > blen)
return 0;
3179 if (beg < 0)
return 0;
3181 if (
len > blen - beg)
3183 if (
len < 0)
return 0;
3188 if (
len > -beg)
len = -beg;
3192 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3195 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3201 slen = str_strlen(str, enc);
3203 if (beg < 0)
return 0;
3205 if (
len == 0)
goto end;
3208 else if (beg > 0 && beg > blen) {
3212 if (beg > str_strlen(str, enc))
return 0;
3217 enc == rb_utf8_encoding()) {
3218 p = str_utf8_nth(s, e, &beg);
3219 if (beg > 0)
return 0;
3220 len = str_utf8_offset(p, e,
len);
3226 p = s + beg * char_sz;
3230 else if (
len * char_sz > e - p)
3235 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3236 if (beg > 0)
return 0;
3240 len = str_offset(p, e,
len, enc, 0);
3248static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3253 return str_substr(str, beg,
len, TRUE);
3263str_substr(
VALUE str,
long beg,
long len,
int empty)
3267 if (!p)
return Qnil;
3268 if (!
len && !empty)
return Qnil;
3270 beg = p - RSTRING_PTR(str);
3272 VALUE str2 = str_subseq(str, beg,
len);
3273 rb_enc_cr_str_copy_for_substr(str2, str);
3281 if (CHILLED_STRING_P(str)) {
3286 rb_str_resize(str, RSTRING_LEN(str));
3304 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3347str_uminus(
VALUE str)
3352 return rb_fstring(str);
3356#define rb_str_dup_frozen rb_str_new_frozen
3361 rb_check_frozen(str);
3362 if (
FL_TEST(str, STR_TMPLOCK)) {
3365 FL_SET(str, STR_TMPLOCK);
3372 rb_check_frozen(str);
3373 if (!
FL_TEST(str, STR_TMPLOCK)) {
3393 const int termlen = TERM_LEN(str);
3395 str_modifiable(str);
3396 if (STR_SHARED_P(str)) {
3399 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3400 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3411 else if (
len > RSTRING_LEN(str)) {
3415 const char *
const new_end = RSTRING_PTR(str) +
len;
3425 else if (
len < RSTRING_LEN(str)) {
3433 STR_SET_LEN(str,
len);
3434 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3441 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3444 int independent = str_independent(str);
3445 long slen = RSTRING_LEN(str);
3446 const int termlen = TERM_LEN(str);
3448 if (slen >
len || (termlen != 1 && slen <
len)) {
3454 if (STR_EMBED_P(str)) {
3455 if (
len == slen)
return str;
3456 if (str_embed_capa(str) >=
len + termlen) {
3457 STR_SET_LEN(str,
len);
3461 str_make_independent_expand(str, slen,
len - slen, termlen);
3463 else if (str_embed_capa(str) >=
len + termlen) {
3464 char *
ptr = STR_HEAP_PTR(str);
3466 if (slen >
len) slen =
len;
3469 STR_SET_LEN(str,
len);
3470 if (independent) ruby_xfree(
ptr);
3473 else if (!independent) {
3474 if (
len == slen)
return str;
3475 str_make_independent_expand(str, slen,
len - slen, termlen);
3479 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3480 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3483 else if (
len == slen)
return str;
3484 STR_SET_LEN(str,
len);
3491str_ensure_available_capa(
VALUE str,
long len)
3493 str_modify_keep_cr(str);
3495 const int termlen = TERM_LEN(str);
3496 long olen = RSTRING_LEN(str);
3498 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3499 rb_raise(rb_eArgError,
"string sizes too big");
3502 long total = olen +
len;
3503 long capa = str_capacity(str, termlen);
3506 if (total >= LONG_MAX / 2) {
3509 while (total >
capa) {
3512 RESIZE_CAPA_TERM(str,
capa, termlen);
3517str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3520 str_modify_keep_cr(str);
3525 if (
len == 0)
return 0;
3527 long total, olen,
off = -1;
3529 const int termlen = TERM_LEN(str);
3532 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3536 long capa = str_capacity(str, termlen);
3538 if (olen > LONG_MAX -
len) {
3539 rb_raise(rb_eArgError,
"string sizes too big");
3543 if (total >= LONG_MAX / 2) {
3546 while (total >
capa) {
3549 RESIZE_CAPA_TERM(str,
capa, termlen);
3550 sptr = RSTRING_PTR(str);
3555 memcpy(sptr + olen,
ptr,
len);
3556 STR_SET_LEN(str, total);
3557 TERM_FILL(sptr + total, termlen);
3562#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3563#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3568 if (
len == 0)
return str;
3570 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3572 return str_buf_cat(str,
ptr,
len);
3583rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3588 if (UNLIKELY(!str_independent(str))) {
3589 str_make_independent(str);
3592 long string_length = -1;
3593 const int null_terminator_length = 1;
3598 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3599 rb_raise(rb_eArgError,
"string sizes too big");
3602 long string_capacity = str_capacity(str, null_terminator_length);
3608 if (LIKELY(string_capacity >= string_length + 1)) {
3610 sptr[string_length] = byte;
3611 STR_SET_LEN(str, string_length + 1);
3612 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3616 str_buf_cat(str, (
char *)&
byte, 1);
3632 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3643rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3644 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3653 if (str_encindex == ptr_encindex) {
3655 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3659 str_enc = rb_enc_from_index(str_encindex);
3660 ptr_enc = rb_enc_from_index(ptr_encindex);
3661 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3664 if (RSTRING_LEN(str) == 0) {
3667 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3673 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3682 *ptr_cr_ret = ptr_cr;
3684 if (str_encindex != ptr_encindex &&
3687 str_enc = rb_enc_from_index(str_encindex);
3688 ptr_enc = rb_enc_from_index(ptr_encindex);
3693 res_encindex = str_encindex;
3698 res_encindex = str_encindex;
3702 res_encindex = ptr_encindex;
3707 res_encindex = str_encindex;
3714 res_encindex = str_encindex;
3720 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3722 str_buf_cat(str,
ptr,
len);
3728 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3735 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3745 if (rb_enc_asciicompat(enc)) {
3746 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3752 unsigned int c = (
unsigned char)*
ptr;
3753 int len = rb_enc_codelen(c, enc);
3754 rb_enc_mbcput(c, buf, enc);
3755 rb_enc_cr_str_buf_cat(str, buf,
len,
3768 if (str_enc_fastpath(str)) {
3772 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3778 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3789 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3805rb_str_concat_literals(
size_t num,
const VALUE *strary)
3809 unsigned long len = 1;
3814 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3816 str_enc_copy_direct(str, strary[0]);
3818 for (i = s; i < num; ++i) {
3819 const VALUE v = strary[i];
3823 if (encidx != ENCINDEX_US_ASCII) {
3825 rb_enc_set_index(str, encidx);
3838rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3840 str_modifiable(str);
3845 else if (argc > 1) {
3848 rb_enc_copy(arg_str, str);
3849 for (i = 0; i < argc; i++) {
3884rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3886 long needed_capacity = 0;
3890 for (
int index = 0; index < argc; index++) {
3891 VALUE obj = argv[index];
3899 needed_capacity += RSTRING_LEN(obj);
3904 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3911 str_ensure_available_capa(str, needed_capacity);
3914 for (
int index = 0; index < argc; index++) {
3915 VALUE obj = argv[index];
3920 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3921 char byte = (char)(
NUM2INT(obj) & 0xFF);
3935 rb_bug(
"append_as_bytes arguments should have been validated");
3939 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3940 TERM_FILL(sptr, TERM_LEN(str));
3945 for (
int index = 0; index < argc; index++) {
3946 VALUE obj = argv[index];
3963 rb_bug(
"append_as_bytes arguments should have been validated");
4042 if (rb_num_to_uint(str2, &code) == 0) {
4055 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4058 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4061 long pos = RSTRING_LEN(str1);
4066 switch (
len = rb_enc_codelen(code, enc)) {
4067 case ONIGERR_INVALID_CODE_POINT_VALUE:
4068 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4070 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4076 rb_enc_mbcput(code, buf, enc);
4077 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4078 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4080 rb_str_resize(str1, pos+
len);
4081 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4094rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4096 int encidx = rb_enc_to_index(enc);
4098 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4103 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4104 return ENCINDEX_ASCII_8BIT;
4126rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4128 str_modifiable(str);
4133 else if (argc > 1) {
4136 rb_enc_copy(arg_str, str);
4137 for (i = 0; i < argc; i++) {
4150 st_index_t precomputed_hash;
4151 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4153 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4154 return precomputed_hash;
4157 return str_do_hash(str);
4164 const char *ptr1, *ptr2;
4167 return (len1 != len2 ||
4169 memcmp(ptr1, ptr2, len1) != 0);
4181rb_str_hash_m(
VALUE str)
4187#define lesser(a,b) (((a)>(b))?(b):(a))
4195 if (RSTRING_LEN(str1) == 0)
return TRUE;
4196 if (RSTRING_LEN(str2) == 0)
return TRUE;
4199 if (idx1 == idx2)
return TRUE;
4204 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4208 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4218 const char *ptr1, *ptr2;
4221 if (str1 == str2)
return 0;
4224 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4233 if (len1 > len2)
return 1;
4236 if (retval > 0)
return 1;
4270 if (str1 == str2)
return Qtrue;
4277 return rb_str_eql_internal(str1, str2);
4291 if (str1 == str2)
return Qtrue;
4293 return rb_str_eql_internal(str1, str2);
4331 return rb_invcmp(str1, str2);
4373 return str_casecmp(str1, s);
4381 const char *p1, *p1end, *p2, *p2end;
4383 enc = rb_enc_compatible(str1, str2);
4388 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4389 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4390 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4391 while (p1 < p1end && p2 < p2end) {
4393 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4394 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4396 return INT2FIX(c1 < c2 ? -1 : 1);
4403 while (p1 < p1end && p2 < p2end) {
4404 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4405 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4407 if (0 <= c1 && 0 <= c2) {
4411 return INT2FIX(c1 < c2 ? -1 : 1);
4415 l1 = rb_enc_mbclen(p1, p1end, enc);
4416 l2 = rb_enc_mbclen(p2, p2end, enc);
4417 len = l1 < l2 ? l1 : l2;
4418 r = memcmp(p1, p2,
len);
4420 return INT2FIX(r < 0 ? -1 : 1);
4422 return INT2FIX(l1 < l2 ? -1 : 1);
4428 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4429 if (p1 == p1end)
return INT2FIX(-1);
4462 return str_casecmp_p(str1, s);
4469 VALUE folded_str1, folded_str2;
4470 VALUE fold_opt = sym_fold;
4472 enc = rb_enc_compatible(str1, str2);
4477 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4478 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4480 return rb_str_eql(folded_str1, folded_str2);
4484strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4485 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4487 const char *search_start = str_ptr;
4488 long pos, search_len = str_len - offset;
4492 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4493 if (pos < 0)
return pos;
4495 if (t == search_start + pos)
break;
4496 search_len -= t - search_start;
4497 if (search_len <= 0)
return -1;
4498 offset += t - search_start;
4501 return pos + offset;
4505#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4506#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4509rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4511 const char *str_ptr, *str_ptr_end, *sub_ptr;
4512 long str_len, sub_len;
4515 enc = rb_enc_check(str, sub);
4516 if (is_broken_string(sub))
return -1;
4518 str_ptr = RSTRING_PTR(str);
4520 str_len = RSTRING_LEN(str);
4521 sub_ptr = RSTRING_PTR(sub);
4522 sub_len = RSTRING_LEN(sub);
4524 if (str_len < sub_len)
return -1;
4527 long str_len_char, sub_len_char;
4528 int single_byte = single_byte_optimizable(str);
4529 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4530 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4532 offset += str_len_char;
4533 if (offset < 0)
return -1;
4535 if (str_len_char - offset < sub_len_char)
return -1;
4536 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4539 if (sub_len == 0)
return offset;
4542 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4555rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4562 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4563 long slen = str_strlen(str, enc);
4565 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4577 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4578 enc, single_byte_optimizable(str));
4589 pos = rb_str_index(str, sub, pos);
4603str_ensure_byte_pos(
VALUE str,
long pos)
4605 if (!single_byte_optimizable(str)) {
4606 const char *s = RSTRING_PTR(str);
4608 const char *p = s + pos;
4609 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4611 "offset %ld does not land on character boundary", pos);
4684rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4690 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4691 long slen = RSTRING_LEN(str);
4693 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4704 str_ensure_byte_pos(str, pos);
4716 pos = rb_str_byteindex(str, sub, pos);
4717 if (pos >= 0)
return LONG2NUM(pos);
4724memrchr(
const char *search_str,
int chr,
long search_len)
4726 const char *ptr = search_str + search_len;
4727 while (ptr > search_str) {
4728 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4738 char *hit, *adjusted;
4740 long slen, searchlen;
4743 sbeg = RSTRING_PTR(str);
4744 slen = RSTRING_LEN(sub);
4745 if (slen == 0)
return s - sbeg;
4747 t = RSTRING_PTR(sub);
4749 searchlen = s - sbeg + 1;
4751 if (memcmp(s, t, slen) == 0) {
4756 hit = memrchr(sbeg, c, searchlen);
4759 if (hit != adjusted) {
4760 searchlen = adjusted - sbeg;
4763 if (memcmp(hit, t, slen) == 0)
4765 searchlen = adjusted - sbeg;
4766 }
while (searchlen > 0);
4780 enc = rb_enc_check(str, sub);
4781 if (is_broken_string(sub))
return -1;
4782 singlebyte = single_byte_optimizable(str);
4783 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4784 slen = str_strlen(sub, enc);
4787 if (
len < slen)
return -1;
4788 if (
len - pos < slen) pos =
len - slen;
4789 if (
len == 0)
return pos;
4791 sbeg = RSTRING_PTR(str);
4794 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4800 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4801 return str_rindex(str, sub, s, enc);
4813rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4818 long pos,
len = str_strlen(str, enc);
4820 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4822 if (pos < 0 && (pos +=
len) < 0) {
4828 if (pos >
len) pos =
len;
4836 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4837 enc, single_byte_optimizable(str));
4848 pos = rb_str_rindex(str, sub, pos);
4858rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4864 enc = rb_enc_check(str, sub);
4865 if (is_broken_string(sub))
return -1;
4866 len = RSTRING_LEN(str);
4867 slen = RSTRING_LEN(sub);
4870 if (
len < slen)
return -1;
4871 if (
len - pos < slen) pos =
len - slen;
4872 if (
len == 0)
return pos;
4874 sbeg = RSTRING_PTR(str);
4877 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4884 return str_rindex(str, sub, s, enc);
4974rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4978 long pos,
len = RSTRING_LEN(str);
4980 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4982 if (pos < 0 && (pos +=
len) < 0) {
4988 if (pos >
len) pos =
len;
4994 str_ensure_byte_pos(str, pos);
5006 pos = rb_str_byterindex(str, sub, pos);
5007 if (pos >= 0)
return LONG2NUM(pos);
5046 switch (OBJ_BUILTIN_TYPE(y)) {
5100rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5107 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5138rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5142 re = get_pat(argv[0]);
5143 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5152static enum neighbor_char
5158 if (rb_enc_mbminlen(enc) > 1) {
5160 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5162 return NEIGHBOR_NOT_CHAR;
5164 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5166 if (!l)
return NEIGHBOR_NOT_CHAR;
5167 if (l !=
len)
return NEIGHBOR_WRAPPED;
5168 rb_enc_mbcput(c, p, enc);
5169 r = rb_enc_precise_mbclen(p, p +
len, enc);
5171 return NEIGHBOR_NOT_CHAR;
5173 return NEIGHBOR_FOUND;
5176 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5179 return NEIGHBOR_WRAPPED;
5180 ++((
unsigned char*)p)[i];
5181 l = rb_enc_precise_mbclen(p, p+
len, enc);
5185 return NEIGHBOR_FOUND;
5188 memset(p+l, 0xff,
len-l);
5194 for (len2 =
len-1; 0 < len2; len2--) {
5195 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5199 memset(p+len2+1, 0xff,
len-(len2+1));
5204static enum neighbor_char
5209 if (rb_enc_mbminlen(enc) > 1) {
5211 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5213 return NEIGHBOR_NOT_CHAR;
5215 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5216 if (!c)
return NEIGHBOR_NOT_CHAR;
5219 if (!l)
return NEIGHBOR_NOT_CHAR;
5220 if (l !=
len)
return NEIGHBOR_WRAPPED;
5221 rb_enc_mbcput(c, p, enc);
5222 r = rb_enc_precise_mbclen(p, p +
len, enc);
5224 return NEIGHBOR_NOT_CHAR;
5226 return NEIGHBOR_FOUND;
5229 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5232 return NEIGHBOR_WRAPPED;
5233 --((
unsigned char*)p)[i];
5234 l = rb_enc_precise_mbclen(p, p+
len, enc);
5238 return NEIGHBOR_FOUND;
5241 memset(p+l, 0,
len-l);
5247 for (len2 =
len-1; 0 < len2; len2--) {
5248 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5252 memset(p+len2+1, 0,
len-(len2+1));
5266static enum neighbor_char
5267enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5269 enum neighbor_char ret;
5273 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5277 const int max_gaps = 1;
5279 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5281 ctype = ONIGENC_CTYPE_DIGIT;
5283 ctype = ONIGENC_CTYPE_ALPHA;
5285 return NEIGHBOR_NOT_CHAR;
5288 for (
try = 0;
try <= max_gaps; ++
try) {
5289 ret = enc_succ_char(p,
len, enc);
5290 if (ret == NEIGHBOR_FOUND) {
5291 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5293 return NEIGHBOR_FOUND;
5300 ret = enc_pred_char(p,
len, enc);
5301 if (ret == NEIGHBOR_FOUND) {
5302 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5315 return NEIGHBOR_NOT_CHAR;
5318 if (ctype != ONIGENC_CTYPE_DIGIT) {
5320 return NEIGHBOR_WRAPPED;
5324 enc_succ_char(carry,
len, enc);
5325 return NEIGHBOR_WRAPPED;
5343 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5344 rb_enc_cr_str_copy_for_substr(str, orig);
5345 return str_succ(str);
5352 char *sbeg, *s, *e, *last_alnum = 0;
5353 int found_alnum = 0;
5355 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5356 long carry_pos = 0, carry_len = 1;
5357 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5359 slen = RSTRING_LEN(str);
5360 if (slen == 0)
return str;
5362 enc = STR_ENC_GET(str);
5363 sbeg = RSTRING_PTR(str);
5364 s = e = sbeg + slen;
5366 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5367 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5373 l = rb_enc_precise_mbclen(s, e, enc);
5374 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5375 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5376 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5378 case NEIGHBOR_NOT_CHAR:
5380 case NEIGHBOR_FOUND:
5382 case NEIGHBOR_WRAPPED:
5387 carry_pos = s - sbeg;
5392 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5393 enum neighbor_char neighbor;
5394 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5395 l = rb_enc_precise_mbclen(s, e, enc);
5396 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5397 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5399 neighbor = enc_succ_char(tmp, l, enc);
5401 case NEIGHBOR_FOUND:
5405 case NEIGHBOR_WRAPPED:
5408 case NEIGHBOR_NOT_CHAR:
5411 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5413 enc_succ_char(s, l, enc);
5415 if (!rb_enc_asciicompat(enc)) {
5416 MEMCPY(carry, s,
char, l);
5419 carry_pos = s - sbeg;
5423 RESIZE_CAPA(str, slen + carry_len);
5424 sbeg = RSTRING_PTR(str);
5425 s = sbeg + carry_pos;
5426 memmove(s + carry_len, s, slen - carry_pos);
5427 memmove(s, carry, carry_len);
5429 STR_SET_LEN(str, slen);
5430 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5446rb_str_succ_bang(
VALUE str)
5454all_digits_p(
const char *s,
long len)
5482 VALUE end, exclusive;
5486 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5492 VALUE current, after_end;
5499 enc = rb_enc_check(beg, end);
5500 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5502 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5503 char c = RSTRING_PTR(beg)[0];
5504 char e = RSTRING_PTR(end)[0];
5506 if (c > e || (excl && c == e))
return beg;
5508 VALUE str = rb_enc_str_new(&c, 1, enc);
5510 if ((*each)(str, arg))
break;
5511 if (!excl && c == e)
break;
5513 if (excl && c == e)
break;
5518 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5519 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5520 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5525 b = rb_str_to_inum(beg, 10, FALSE);
5526 e = rb_str_to_inum(end, 10, FALSE);
5533 if (excl && bi == ei)
break;
5534 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5539 ID op = excl ?
'<' : idLE;
5540 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5545 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5546 b = rb_funcallv(b, succ, 0, 0);
5553 if (n > 0 || (excl && n == 0))
return beg;
5555 after_end = rb_funcallv(end, succ, 0, 0);
5560 next = rb_funcallv(current, succ, 0, 0);
5561 if ((*each)(current, arg))
break;
5562 if (
NIL_P(next))
break;
5566 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5581 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5582 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5583 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5585 b = rb_str_to_inum(beg, 10, FALSE);
5591 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5599 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5600 b = rb_funcallv(b, succ, 0, 0);
5606 VALUE next = rb_funcallv(current, succ, 0, 0);
5607 if ((*each)(current, arg))
break;
5610 if (RSTRING_LEN(current) == 0)
5621 if (!
rb_equal(str, *argp))
return 0;
5635 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5636 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5637 rb_enc_asciicompat(STR_ENC_GET(val))) {
5638 const char *bp = RSTRING_PTR(beg);
5639 const char *ep = RSTRING_PTR(end);
5640 const char *vp = RSTRING_PTR(val);
5641 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5642 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5650 if (b <= v && v < e)
return Qtrue;
5651 return RBOOL(!
RTEST(exclusive) && v == e);
5658 all_digits_p(bp, RSTRING_LEN(beg)) &&
5659 all_digits_p(ep, RSTRING_LEN(end))) {
5664 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5666 return RBOOL(
NIL_P(val));
5689 return rb_str_subpat(str, indx,
INT2FIX(0));
5692 if (rb_str_index(str, indx, 0) != -1)
5698 long beg,
len = str_strlen(str, NULL);
5710 return str_substr(str, idx, 1, FALSE);
5727rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5731 return rb_str_subpat(str, argv[0], argv[1]);
5734 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5738 return rb_str_aref(str, argv[0]);
5744 char *ptr = RSTRING_PTR(str);
5745 long olen = RSTRING_LEN(str), nlen;
5747 str_modifiable(str);
5748 if (
len > olen)
len = olen;
5750 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5752 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5754 ptr =
RSTRING(str)->as.embed.ary;
5755 memmove(ptr, oldptr +
len, nlen);
5756 if (fl == STR_NOEMBED)
xfree(oldptr);
5759 if (!STR_SHARED_P(str)) {
5761 rb_enc_cr_str_exact_copy(shared, str);
5766 STR_SET_LEN(str, nlen);
5768 if (!SHARABLE_MIDDLE_SUBSTRING) {
5769 TERM_FILL(ptr + nlen, TERM_LEN(str));
5776rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5782 if (beg == 0 && vlen == 0) {
5787 str_modify_keep_cr(str);
5791 RESIZE_CAPA(str, slen + vlen -
len);
5792 sptr = RSTRING_PTR(str);
5801 memmove(sptr + beg + vlen,
5803 slen - (beg +
len));
5805 if (vlen < beg &&
len < 0) {
5809 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5812 STR_SET_LEN(str, slen);
5813 TERM_FILL(&sptr[slen], TERM_LEN(str));
5820 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5829 int singlebyte = single_byte_optimizable(str);
5835 enc = rb_enc_check(str, val);
5836 slen = str_strlen(str, enc);
5838 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5847 if (
len > slen - beg) {
5850 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5855 beg = p - RSTRING_PTR(str);
5857 rb_str_update_0(str, beg,
len, val);
5858 rb_enc_associate(str, enc);
5869 long start, end,
len;
5879 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5883 nth += regs->num_regs;
5893 enc = rb_enc_check_str(str, val);
5894 rb_str_update_0(str, start,
len, val);
5895 rb_enc_associate(str, enc);
5903 switch (
TYPE(indx)) {
5905 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5909 beg = rb_str_index(str, indx, 0);
5948rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5952 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5960 return rb_str_aset(str, argv[0], argv[1]);
6012rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6020 str_modify_keep_cr(str);
6028 if ((nth += regs->num_regs) <= 0)
return Qnil;
6030 else if (nth >= regs->num_regs)
return Qnil;
6032 len = END(nth) - beg;
6035 else if (argc == 2) {
6044 beg = p - RSTRING_PTR(str);
6048 beg = rb_str_index(str, indx, 0);
6049 if (beg == -1)
return Qnil;
6050 len = RSTRING_LEN(indx);
6062 beg = p - RSTRING_PTR(str);
6071 beg = p - RSTRING_PTR(str);
6075 rb_enc_cr_str_copy_for_substr(result, str);
6083 char *sptr = RSTRING_PTR(str);
6084 long slen = RSTRING_LEN(str);
6085 if (beg +
len > slen)
6089 slen - (beg +
len));
6091 STR_SET_LEN(str, slen);
6092 TERM_FILL(&sptr[slen], TERM_LEN(str));
6103 switch (OBJ_BUILTIN_TYPE(pat)) {
6122get_pat_quoted(
VALUE pat,
int check)
6126 switch (OBJ_BUILTIN_TYPE(pat)) {
6140 if (check && is_broken_string(pat)) {
6147rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6150 pos = rb_str_byteindex(str, pat, pos);
6151 if (set_backref_str) {
6153 str = rb_str_new_frozen_String(str);
6154 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6156 *match = match_data;
6166 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6171rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6173 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6191rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6205 hash = rb_check_hash_type(argv[1]);
6211 pat = get_pat_quoted(argv[0], 1);
6213 str_modifiable(str);
6214 beg = rb_pat_search(pat, str, 0, 1);
6228 end0 = beg0 + RSTRING_LEN(pat);
6237 if (iter || !
NIL_P(hash)) {
6238 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6244 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6247 str_mod_check(str, p,
len);
6248 rb_check_frozen(str);
6254 enc = rb_enc_compatible(str, repl);
6257 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6261 rb_enc_inspect_name(str_enc),
6262 rb_enc_inspect_name(STR_ENC_GET(repl)));
6264 enc = STR_ENC_GET(repl);
6267 rb_enc_associate(str, enc);
6277 rlen = RSTRING_LEN(repl);
6278 len = RSTRING_LEN(str);
6280 RESIZE_CAPA(str,
len + rlen - plen);
6282 p = RSTRING_PTR(str);
6284 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6286 rp = RSTRING_PTR(repl);
6287 memmove(p + beg0, rp, rlen);
6289 STR_SET_LEN(str,
len);
6290 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6313 rb_str_sub_bang(argc, argv, str);
6318str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6321 long beg, beg0, end0;
6322 long offset, blen, slen,
len, last;
6323 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6325 int need_backref_str = -1;
6335 hash = rb_check_hash_type(argv[1]);
6339 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6347 rb_error_arity(argc, 1, 2);
6350 pat = get_pat_quoted(argv[0], 1);
6351 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6354 if (bang)
return Qnil;
6359 blen = RSTRING_LEN(str) + 30;
6361 sp = RSTRING_PTR(str);
6362 slen = RSTRING_LEN(str);
6364 str_enc = STR_ENC_GET(str);
6365 rb_enc_associate(dest, str_enc);
6372 end0 = beg0 + RSTRING_LEN(pat);
6386 struct RString fake_str = {RBASIC_INIT};
6388 if (mode == FAST_MAP) {
6397 val = rb_hash_aref(hash, key);
6400 str_mod_check(str, sp, slen);
6405 else if (need_backref_str) {
6407 if (need_backref_str < 0) {
6408 need_backref_str = val != repl;
6415 len = beg0 - offset;
6429 if (RSTRING_LEN(str) <= end0)
break;
6430 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6432 offset = end0 +
len;
6434 cp = RSTRING_PTR(str) + offset;
6435 if (offset > RSTRING_LEN(str))
break;
6438 if (mode != FAST_MAP && mode != STR) {
6441 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6446 if (RSTRING_LEN(str) > offset) {
6449 rb_pat_search0(pat, str, last, 1, &match);
6451 str_shared_replace(str, dest);
6476rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6478 str_modify_keep_cr(str);
6479 return str_gsub(argc, argv, str, 1);
6529 return str_gsub(argc, argv, str, 0);
6549 str_modifiable(str);
6550 if (str == str2)
return str;
6554 return str_replace(str, str2);
6571rb_str_clear(
VALUE str)
6575 STR_SET_LEN(str, 0);
6576 RSTRING_PTR(str)[0] = 0;
6577 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6593rb_str_chr(
VALUE str)
6611 pos += RSTRING_LEN(str);
6612 if (pos < 0 || RSTRING_LEN(str) <= pos)
6615 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6635 long len = RSTRING_LEN(str);
6636 char *
ptr, *head, *left = 0;
6640 if (pos < -
len ||
len <= pos)
6647 char byte = (char)(
NUM2INT(w) & 0xFF);
6649 if (!str_independent(str))
6650 str_make_independent(str);
6651 enc = STR_ENC_GET(str);
6652 head = RSTRING_PTR(str);
6654 if (!STR_EMBED_P(str)) {
6661 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6669 width = rb_enc_precise_mbclen(left, head+
len, enc);
6671 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6687str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6689 long n = RSTRING_LEN(str);
6691 if (beg > n ||
len < 0)
return Qnil;
6694 if (beg < 0)
return Qnil;
6699 if (!empty)
return Qnil;
6703 VALUE str2 = str_subseq(str, beg,
len);
6705 str_enc_copy_direct(str2, str);
6707 if (RSTRING_LEN(str2) == 0) {
6708 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6742 long beg,
len = RSTRING_LEN(str);
6750 return str_byte_substr(str, beg,
len, TRUE);
6755 return str_byte_substr(str, idx, 1, FALSE);
6767rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6772 return str_byte_substr(str, beg,
len, TRUE);
6775 return str_byte_aref(str, argv[0]);
6779str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6781 long end, slen = RSTRING_LEN(str);
6784 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6793 if (*
len > slen - *beg) {
6797 str_ensure_byte_pos(str, *beg);
6798 str_ensure_byte_pos(str, end);
6812rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6814 long beg,
len, vbeg, vlen;
6819 if (!(argc == 2 || argc == 3 || argc == 5)) {
6820 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6824 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6825 rb_builtin_class_name(argv[0]));
6832 vlen = RSTRING_LEN(val);
6837 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6838 rb_builtin_class_name(argv[2]));
6850 vlen = RSTRING_LEN(val);
6858 str_check_beg_len(str, &beg, &
len);
6859 str_check_beg_len(val, &vbeg, &vlen);
6860 str_modify_keep_cr(str);
6863 rb_enc_associate(str, rb_enc_check(str, val));
6866 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6888rb_str_reverse(
VALUE str)
6895 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6896 enc = STR_ENC_GET(str);
6902 if (RSTRING_LEN(str) > 1) {
6903 if (single_byte_optimizable(str)) {
6910 int clen = rb_enc_fast_mbclen(s, e, enc);
6918 cr = rb_enc_asciicompat(enc) ?
6921 int clen = rb_enc_mbclen(s, e, enc);
6930 STR_SET_LEN(rev, RSTRING_LEN(str));
6931 str_enc_copy_direct(rev, str);
6953rb_str_reverse_bang(
VALUE str)
6955 if (RSTRING_LEN(str) > 1) {
6956 if (single_byte_optimizable(str)) {
6959 str_modify_keep_cr(str);
6960 s = RSTRING_PTR(str);
6969 str_shared_replace(str, rb_str_reverse(str));
6973 str_modify_keep_cr(str);
7002 i = rb_str_index(str, arg, 0);
7004 return RBOOL(i != -1);
7048 rb_raise(rb_eArgError,
"invalid radix %d", base);
7050 return rb_str_to_inum(str, base, FALSE);
7075rb_str_to_f(
VALUE str)
7092rb_str_to_s(
VALUE str)
7104 char s[RUBY_MAX_CHAR_LEN];
7105 int n = rb_enc_codelen(c, enc);
7107 rb_enc_mbcput(c, s, enc);
7112#define CHAR_ESC_LEN 13
7115rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7117 char buf[CHAR_ESC_LEN + 1];
7125 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7127 else if (c < 0x10000) {
7128 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7131 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7136 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7139 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7142 l = (int)strlen(buf);
7148ruby_escaped_char(
int c)
7151 case '\0':
return "\\0";
7152 case '\n':
return "\\n";
7153 case '\r':
return "\\r";
7154 case '\t':
return "\\t";
7155 case '\f':
return "\\f";
7156 case '\013':
return "\\v";
7157 case '\010':
return "\\b";
7158 case '\007':
return "\\a";
7159 case '\033':
return "\\e";
7160 case '\x7f':
return "\\c?";
7166rb_str_escape(
VALUE str)
7170 const char *p = RSTRING_PTR(str);
7172 const char *prev = p;
7173 char buf[CHAR_ESC_LEN + 1];
7175 int unicode_p = rb_enc_unicode_p(enc);
7176 int asciicompat = rb_enc_asciicompat(enc);
7181 int n = rb_enc_precise_mbclen(p, pend, enc);
7183 if (p > prev) str_buf_cat(result, prev, p - prev);
7184 n = rb_enc_mbminlen(enc);
7186 n = (int)(pend - p);
7188 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7189 str_buf_cat(result, buf, strlen(buf));
7195 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7197 cc = ruby_escaped_char(c);
7199 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7200 str_buf_cat(result, cc, strlen(cc));
7203 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7206 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7207 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7211 if (p > prev) str_buf_cat(result, prev, p - prev);
7230 const char *p, *pend, *prev;
7231 char buf[CHAR_ESC_LEN + 1];
7233 rb_encoding *resenc = rb_default_internal_encoding();
7234 int unicode_p = rb_enc_unicode_p(enc);
7235 int asciicompat = rb_enc_asciicompat(enc);
7237 if (resenc == NULL) resenc = rb_default_external_encoding();
7238 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7239 rb_enc_associate(result, resenc);
7240 str_buf_cat2(result,
"\"");
7248 n = rb_enc_precise_mbclen(p, pend, enc);
7250 if (p > prev) str_buf_cat(result, prev, p - prev);
7251 n = rb_enc_mbminlen(enc);
7253 n = (int)(pend - p);
7255 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7256 str_buf_cat(result, buf, strlen(buf));
7262 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7264 if ((asciicompat || unicode_p) &&
7265 (c ==
'"'|| c ==
'\\' ||
7270 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7271 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7272 str_buf_cat2(result,
"\\");
7273 if (asciicompat || enc == resenc) {
7279 case '\n': cc =
'n';
break;
7280 case '\r': cc =
'r';
break;
7281 case '\t': cc =
't';
break;
7282 case '\f': cc =
'f';
break;
7283 case '\013': cc =
'v';
break;
7284 case '\010': cc =
'b';
break;
7285 case '\007': cc =
'a';
break;
7286 case 033: cc =
'e';
break;
7287 default: cc = 0;
break;
7290 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7293 str_buf_cat(result, buf, 2);
7306 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7310 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7311 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7316 if (p > prev) str_buf_cat(result, prev, p - prev);
7317 str_buf_cat2(result,
"\"");
7322#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7335 int encidx = rb_enc_get_index(str);
7338 const char *p, *pend;
7341 int u8 = (encidx == rb_utf8_encindex());
7342 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7345 if (!rb_enc_asciicompat(enc)) {
7347 len += strlen(enc->name);
7350 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7353 unsigned char c = *p++;
7356 case '"':
case '\\':
7357 case '\n':
case '\r':
7358 case '\t':
case '\f':
7359 case '\013':
case '\010':
case '\007':
case '\033':
7364 clen = IS_EVSTR(p, pend) ? 2 : 1;
7372 if (u8 && c > 0x7F) {
7373 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7375 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7378 else if (cc <= 0xFFFFF)
7391 if (clen > LONG_MAX -
len) {
7398 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7399 q = RSTRING_PTR(result); qend = q +
len + 1;
7403 unsigned char c = *p++;
7405 if (c ==
'"' || c ==
'\\') {
7409 else if (c ==
'#') {
7410 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7413 else if (c ==
'\n') {
7417 else if (c ==
'\r') {
7421 else if (c ==
'\t') {
7425 else if (c ==
'\f') {
7429 else if (c ==
'\013') {
7433 else if (c ==
'\010') {
7437 else if (c ==
'\007') {
7441 else if (c ==
'\033') {
7451 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7453 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7456 snprintf(q, qend-q,
"u%04X", cc);
7458 snprintf(q, qend-q,
"u{%X}", cc);
7463 snprintf(q, qend-q,
"x%02X", c);
7469 if (!rb_enc_asciicompat(enc)) {
7470 snprintf(q, qend-q, nonascii_suffix, enc->name);
7471 encidx = rb_ascii8bit_encindex();
7474 rb_enc_associate_index(result, encidx);
7480unescape_ascii(
unsigned int c)
7504undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7506 const char *s = *ss;
7510 unsigned char buf[6];
7528 *buf = unescape_ascii(*s);
7540 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7541 if (*penc != enc_utf8) {
7543 rb_enc_associate(undumped, enc_utf8);
7560 if (hexlen == 0 || hexlen > 6) {
7566 if (0xd800 <= c && c <= 0xdfff) {
7569 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7579 if (0xd800 <= c && c <= 0xdfff) {
7582 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7612static VALUE rb_str_is_ascii_only_p(
VALUE str);
7624str_undump(
VALUE str)
7626 const char *s = RSTRING_PTR(str);
7629 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7631 bool binary =
false;
7635 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7638 if (!str_null_check(str, &w)) {
7641 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7642 if (*s !=
'"')
goto invalid_format;
7660 static const char force_encoding_suffix[] =
".force_encoding(\"";
7661 static const char dup_suffix[] =
".dup";
7662 const char *encname;
7667 size =
sizeof(dup_suffix) - 1;
7668 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7670 size =
sizeof(force_encoding_suffix) - 1;
7671 if (s_end - s <= size)
goto invalid_format;
7672 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7676 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7680 s = memchr(s,
'"', s_end-s);
7682 if (!s)
goto invalid_format;
7683 if (s_end - s != 2)
goto invalid_format;
7684 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7686 encidx = rb_enc_find_index2(encname, (
long)size);
7690 rb_enc_associate_index(undumped, encidx);
7700 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7711 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7717 if (rb_enc_dummy_p(enc)) {
7724str_true_enc(
VALUE str)
7727 rb_str_check_dummy_enc(enc);
7731static OnigCaseFoldType
7732check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7737 rb_raise(rb_eArgError,
"too many options");
7738 if (argv[0]==sym_turkic) {
7739 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7741 if (argv[1]==sym_lithuanian)
7742 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7744 rb_raise(rb_eArgError,
"invalid second option");
7747 else if (argv[0]==sym_lithuanian) {
7748 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7750 if (argv[1]==sym_turkic)
7751 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7753 rb_raise(rb_eArgError,
"invalid second option");
7757 rb_raise(rb_eArgError,
"too many options");
7758 else if (argv[0]==sym_ascii)
7759 flags |= ONIGENC_CASE_ASCII_ONLY;
7760 else if (argv[0]==sym_fold) {
7761 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7762 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7764 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7767 rb_raise(rb_eArgError,
"invalid option");
7774 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7780#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7781#ifndef CASEMAP_DEBUG
7782# define CASEMAP_DEBUG 0
7790 OnigUChar space[FLEX_ARY_LEN];
7794mapping_buffer_free(
void *p)
7798 while (current_buffer) {
7799 previous_buffer = current_buffer;
7800 current_buffer = current_buffer->next;
7801 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7807 {0, mapping_buffer_free,},
7808 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7816 const OnigUChar *source_current, *source_end;
7817 int target_length = 0;
7818 VALUE buffer_anchor;
7821 size_t buffer_count = 0;
7822 int buffer_length_or_invalid;
7824 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7826 source_current = (OnigUChar*)RSTRING_PTR(source);
7831 while (source_current < source_end) {
7833 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7834 if (CASEMAP_DEBUG) {
7835 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7838 *pre_buffer = current_buffer;
7839 pre_buffer = ¤t_buffer->next;
7840 current_buffer->next = NULL;
7841 current_buffer->capa =
capa;
7842 buffer_length_or_invalid = enc->case_map(flags,
7843 &source_current, source_end,
7844 current_buffer->space,
7845 current_buffer->space+current_buffer->capa,
7847 if (buffer_length_or_invalid < 0) {
7848 current_buffer =
DATA_PTR(buffer_anchor);
7850 mapping_buffer_free(current_buffer);
7851 rb_raise(rb_eArgError,
"input string invalid");
7853 target_length += current_buffer->used = buffer_length_or_invalid;
7855 if (CASEMAP_DEBUG) {
7856 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7859 if (buffer_count==1) {
7860 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7863 char *target_current;
7866 target_current = RSTRING_PTR(target);
7867 current_buffer =
DATA_PTR(buffer_anchor);
7868 while (current_buffer) {
7869 memcpy(target_current, current_buffer->space, current_buffer->used);
7870 target_current += current_buffer->used;
7871 current_buffer = current_buffer->next;
7874 current_buffer =
DATA_PTR(buffer_anchor);
7876 mapping_buffer_free(current_buffer);
7881 str_enc_copy_direct(target, source);
7890 const OnigUChar *source_current, *source_end;
7891 OnigUChar *target_current, *target_end;
7892 long old_length = RSTRING_LEN(source);
7893 int length_or_invalid;
7895 if (old_length == 0)
return Qnil;
7897 source_current = (OnigUChar*)RSTRING_PTR(source);
7899 if (source == target) {
7900 target_current = (OnigUChar*)source_current;
7901 target_end = (OnigUChar*)source_end;
7904 target_current = (OnigUChar*)RSTRING_PTR(target);
7908 length_or_invalid = onigenc_ascii_only_case_map(flags,
7909 &source_current, source_end,
7910 target_current, target_end, enc);
7911 if (length_or_invalid < 0)
7912 rb_raise(rb_eArgError,
"input string invalid");
7913 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7914 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7915 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7916 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7917 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7920 str_enc_copy(target, source);
7926upcase_single(
VALUE str)
7928 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7929 bool modified =
false;
7932 unsigned int c = *(
unsigned char*)s;
7934 if (
'a' <= c && c <=
'z') {
7935 *s =
'A' + (c -
'a');
7956rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7959 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7961 flags = check_case_options(argc, argv, flags);
7962 str_modify_keep_cr(str);
7963 enc = str_true_enc(str);
7964 if (case_option_single_p(flags, enc, str)) {
7965 if (upcase_single(str))
7966 flags |= ONIGENC_CASE_MODIFIED;
7968 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7969 rb_str_ascii_casemap(str, str, &flags, enc);
7971 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7973 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7986rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7989 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7992 flags = check_case_options(argc, argv, flags);
7993 enc = str_true_enc(str);
7994 if (case_option_single_p(flags, enc, str)) {
7995 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7996 str_enc_copy_direct(ret, str);
7999 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8001 rb_str_ascii_casemap(str, ret, &flags, enc);
8004 ret = rb_str_casemap(str, &flags, enc);
8011downcase_single(
VALUE str)
8013 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8014 bool modified =
false;
8017 unsigned int c = *(
unsigned char*)s;
8019 if (
'A' <= c && c <=
'Z') {
8020 *s =
'a' + (c -
'A');
8042rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8045 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8047 flags = check_case_options(argc, argv, flags);
8048 str_modify_keep_cr(str);
8049 enc = str_true_enc(str);
8050 if (case_option_single_p(flags, enc, str)) {
8051 if (downcase_single(str))
8052 flags |= ONIGENC_CASE_MODIFIED;
8054 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8055 rb_str_ascii_casemap(str, str, &flags, enc);
8057 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8059 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8073rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8076 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8079 flags = check_case_options(argc, argv, flags);
8080 enc = str_true_enc(str);
8081 if (case_option_single_p(flags, enc, str)) {
8082 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8083 str_enc_copy_direct(ret, str);
8084 downcase_single(ret);
8086 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8088 rb_str_ascii_casemap(str, ret, &flags, enc);
8091 ret = rb_str_casemap(str, &flags, enc);
8111rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8114 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8116 flags = check_case_options(argc, argv, flags);
8117 str_modify_keep_cr(str);
8118 enc = str_true_enc(str);
8119 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8120 if (flags&ONIGENC_CASE_ASCII_ONLY)
8121 rb_str_ascii_casemap(str, str, &flags, enc);
8123 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8125 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8139rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8142 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8145 flags = check_case_options(argc, argv, flags);
8146 enc = str_true_enc(str);
8147 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8148 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8150 rb_str_ascii_casemap(str, ret, &flags, enc);
8153 ret = rb_str_casemap(str, &flags, enc);
8172rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8175 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8177 flags = check_case_options(argc, argv, flags);
8178 str_modify_keep_cr(str);
8179 enc = str_true_enc(str);
8180 if (flags&ONIGENC_CASE_ASCII_ONLY)
8181 rb_str_ascii_casemap(str, str, &flags, enc);
8183 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8185 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8199rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8202 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8205 flags = check_case_options(argc, argv, flags);
8206 enc = str_true_enc(str);
8207 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8208 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8210 rb_str_ascii_casemap(str, ret, &flags, enc);
8213 ret = rb_str_casemap(str, &flags, enc);
8218typedef unsigned char *USTR;
8222 unsigned int now, max;
8234 if (t->p == t->pend)
return -1;
8235 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8238 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8240 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8242 if (t->p < t->pend) {
8243 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8246 if (t->now < 0x80 && c < 0x80) {
8247 rb_raise(rb_eArgError,
8248 "invalid range \"%c-%c\" in string transliteration",
8252 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8256 else if (t->now < c) {
8265 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8266 if (t->now == t->max) {
8271 if (t->now < t->max) {
8287 const unsigned int errc = -1;
8288 unsigned int trans[256];
8290 struct tr trsrc, trrepl;
8292 unsigned int c, c0, last = 0;
8293 int modify = 0, i, l;
8294 unsigned char *s, *send;
8296 int singlebyte = single_byte_optimizable(str);
8300#define CHECK_IF_ASCII(c) \
8301 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8302 (cr = ENC_CODERANGE_VALID) : 0)
8306 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8307 if (RSTRING_LEN(repl) == 0) {
8308 return rb_str_delete_bang(1, &src, str);
8312 e1 = rb_enc_check(str, src);
8313 e2 = rb_enc_check(str, repl);
8318 enc = rb_enc_check(src, repl);
8320 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8321 if (RSTRING_LEN(src) > 1 &&
8322 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8323 trsrc.p + l < trsrc.pend) {
8327 trrepl.p = RSTRING_PTR(repl);
8328 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8329 trsrc.gen = trrepl.gen = 0;
8330 trsrc.now = trrepl.now = 0;
8331 trsrc.max = trrepl.max = 0;
8334 for (i=0; i<256; i++) {
8337 while ((c = trnext(&trsrc, enc)) != errc) {
8342 if (!hash) hash = rb_hash_new();
8346 while ((c = trnext(&trrepl, enc)) != errc)
8349 for (i=0; i<256; i++) {
8350 if (trans[i] != errc) {
8358 for (i=0; i<256; i++) {
8361 while ((c = trnext(&trsrc, enc)) != errc) {
8362 r = trnext(&trrepl, enc);
8363 if (r == errc) r = trrepl.now;
8366 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8369 if (!hash) hash = rb_hash_new();
8377 str_modify_keep_cr(str);
8378 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8379 termlen = rb_enc_mbminlen(enc);
8382 long offset, max = RSTRING_LEN(str);
8383 unsigned int save = -1;
8384 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8389 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8392 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8395 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8397 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8406 if (cflag) c = last;
8409 else if (cflag) c = errc;
8415 if (c != (
unsigned int)-1) {
8421 tlen = rb_enc_codelen(c, enc);
8427 if (enc != e1) may_modify = 1;
8429 if ((offset = t - buf) + tlen > max) {
8430 size_t MAYBE_UNUSED(old) = max + termlen;
8431 max = offset + tlen + (send - s);
8432 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8435 rb_enc_mbcput(c, t, enc);
8436 if (may_modify && memcmp(s, t, tlen) != 0) {
8442 if (!STR_EMBED_P(str)) {
8443 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8445 TERM_FILL((
char *)t, termlen);
8446 RSTRING(str)->as.heap.ptr = (
char *)buf;
8447 STR_SET_LEN(str, t - buf);
8448 STR_SET_NOEMBED(str);
8449 RSTRING(str)->as.heap.aux.capa = max;
8453 c = (
unsigned char)*s;
8454 if (trans[c] != errc) {
8471 long offset, max = (long)((send - s) * 1.2);
8472 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8477 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8480 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8483 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8485 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8493 if (cflag) c = last;
8496 else if (cflag) c = errc;
8500 c = cflag ? last : errc;
8503 tlen = rb_enc_codelen(c, enc);
8508 if (enc != e1) may_modify = 1;
8510 if ((offset = t - buf) + tlen > max) {
8511 size_t MAYBE_UNUSED(old) = max + termlen;
8512 max = offset + tlen + (long)((send - s) * 1.2);
8513 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8517 rb_enc_mbcput(c, t, enc);
8518 if (may_modify && memcmp(s, t, tlen) != 0) {
8526 if (!STR_EMBED_P(str)) {
8527 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8529 TERM_FILL((
char *)t, termlen);
8530 RSTRING(str)->as.heap.ptr = (
char *)buf;
8531 STR_SET_LEN(str, t - buf);
8532 STR_SET_NOEMBED(str);
8533 RSTRING(str)->as.heap.aux.capa = max;
8539 rb_enc_associate(str, enc);
8561 return tr_trans(str, src, repl, 0);
8606 tr_trans(str, src, repl, 0);
8610#define TR_TABLE_MAX (UCHAR_MAX+1)
8611#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8613tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8616 const unsigned int errc = -1;
8617 char buf[TR_TABLE_MAX];
8620 VALUE table = 0, ptable = 0;
8621 int i, l, cflag = 0;
8623 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8624 tr.gen =
tr.now =
tr.max = 0;
8626 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8631 for (i=0; i<TR_TABLE_MAX; i++) {
8634 stable[TR_TABLE_MAX] = cflag;
8636 else if (stable[TR_TABLE_MAX] && !cflag) {
8637 stable[TR_TABLE_MAX] = 0;
8639 for (i=0; i<TR_TABLE_MAX; i++) {
8643 while ((c = trnext(&
tr, enc)) != errc) {
8644 if (c < TR_TABLE_MAX) {
8645 buf[(
unsigned char)c] = !cflag;
8650 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8653 table = ptable ? ptable : rb_hash_new();
8657 table = rb_hash_new();
8662 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8663 rb_hash_aset(table, key,
Qtrue);
8667 for (i=0; i<TR_TABLE_MAX; i++) {
8668 stable[i] = stable[i] && buf[i];
8670 if (!table && !cflag) {
8677tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8679 if (c < TR_TABLE_MAX) {
8680 return table[c] != 0;
8686 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8687 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8691 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8694 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8709rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8711 char squeez[TR_TABLE_SIZE];
8714 VALUE del = 0, nodel = 0;
8716 int i, ascompat, cr;
8718 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8720 for (i=0; i<argc; i++) {
8724 enc = rb_enc_check(str, s);
8725 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8728 str_modify_keep_cr(str);
8729 ascompat = rb_enc_asciicompat(enc);
8730 s = t = RSTRING_PTR(str);
8737 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8748 c = rb_enc_codepoint_len(s, send, &clen, enc);
8750 if (tr_find(c, squeez, del, nodel)) {
8754 if (t != s) rb_enc_mbcput(c, t, enc);
8761 TERM_FILL(t, TERM_LEN(str));
8762 STR_SET_LEN(str, t - RSTRING_PTR(str));
8765 if (modify)
return str;
8779rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8782 rb_str_delete_bang(argc, argv, str);
8800rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8802 char squeez[TR_TABLE_SIZE];
8804 VALUE del = 0, nodel = 0;
8805 unsigned char *s, *send, *t;
8807 int ascompat, singlebyte = single_byte_optimizable(str);
8811 enc = STR_ENC_GET(str);
8814 for (i=0; i<argc; i++) {
8818 enc = rb_enc_check(str, s);
8819 if (singlebyte && !single_byte_optimizable(s))
8821 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8825 str_modify_keep_cr(str);
8826 s = t = (
unsigned char *)RSTRING_PTR(str);
8827 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8830 ascompat = rb_enc_asciicompat(enc);
8834 unsigned int c = *s++;
8835 if (c != save || (argc > 0 && !squeez[c])) {
8845 if (ascompat && (c = *s) < 0x80) {
8846 if (c != save || (argc > 0 && !squeez[c])) {
8852 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8854 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8855 if (t != s) rb_enc_mbcput(c, t, enc);
8864 TERM_FILL((
char *)t, TERM_LEN(str));
8865 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8866 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8870 if (modify)
return str;
8884rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8887 rb_str_squeeze_bang(argc, argv, str);
8907 return tr_trans(str, src, repl, 1);
8935 tr_trans(str, src, repl, 1);
8948rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8950 char table[TR_TABLE_SIZE];
8952 VALUE del = 0, nodel = 0, tstr;
8962 enc = rb_enc_check(str, tstr);
8965 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8966 (ptstr = RSTRING_PTR(tstr),
8967 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8968 !is_broken_string(str)) {
8970 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8972 s = RSTRING_PTR(str);
8973 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8976 if (*(
unsigned char*)s++ == c) n++;
8982 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8983 for (i=1; i<argc; i++) {
8986 enc = rb_enc_check(str, tstr);
8987 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8990 s = RSTRING_PTR(str);
8991 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8993 ascompat = rb_enc_asciicompat(enc);
8997 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9005 c = rb_enc_codepoint_len(s, send, &clen, enc);
9006 if (tr_find(c, table, del, nodel)) {
9017rb_fs_check(
VALUE val)
9021 if (
NIL_P(val))
return 0;
9026static const char isspacetable[256] = {
9027 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9029 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9035 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9045#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9048split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9050 if (empty_count >= 0 &&
len == 0) {
9051 return empty_count + 1;
9053 if (empty_count > 0) {
9058 }
while (--empty_count > 0);
9062 rb_yield(str_new_empty_String(str));
9063 }
while (--empty_count > 0);
9077 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9081literal_split_pattern(
VALUE spat, split_type_t default_type)
9089 return SPLIT_TYPE_CHARS;
9091 else if (rb_enc_asciicompat(enc)) {
9092 if (
len == 1 && ptr[0] ==
' ') {
9093 return SPLIT_TYPE_AWK;
9098 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9099 return SPLIT_TYPE_AWK;
9102 return default_type;
9115rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9120 split_type_t split_type;
9121 long beg, end, i = 0, empty_count = -1;
9126 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9128 if (lim <= 0) limit =
Qnil;
9129 else if (lim == 1) {
9130 if (RSTRING_LEN(str) == 0)
9141 if (
NIL_P(limit) && !lim) empty_count = 0;
9143 enc = STR_ENC_GET(str);
9144 split_type = SPLIT_TYPE_REGEXP;
9146 spat = get_pat_quoted(spat, 0);
9148 else if (
NIL_P(spat = rb_fs)) {
9149 split_type = SPLIT_TYPE_AWK;
9151 else if (!(spat = rb_fs_check(spat))) {
9152 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9157 if (split_type != SPLIT_TYPE_AWK) {
9162 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9163 if (split_type == SPLIT_TYPE_AWK) {
9165 split_type = SPLIT_TYPE_STRING;
9170 mustnot_broken(spat);
9171 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9179#define SPLIT_STR(beg, len) ( \
9180 empty_count = split_string(result, str, beg, len, empty_count), \
9181 str_mod_check(str, str_start, str_len))
9184 char *ptr = RSTRING_PTR(str);
9185 char *
const str_start = ptr;
9186 const long str_len = RSTRING_LEN(str);
9187 char *
const eptr = str_start + str_len;
9188 if (split_type == SPLIT_TYPE_AWK) {
9195 if (is_ascii_string(str)) {
9196 while (ptr < eptr) {
9197 c = (
unsigned char)*ptr++;
9199 if (ascii_isspace(c)) {
9205 if (!
NIL_P(limit) && lim <= i)
break;
9208 else if (ascii_isspace(c)) {
9209 SPLIT_STR(beg, end-beg);
9212 if (!
NIL_P(limit)) ++i;
9220 while (ptr < eptr) {
9223 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9232 if (!
NIL_P(limit) && lim <= i)
break;
9236 SPLIT_STR(beg, end-beg);
9239 if (!
NIL_P(limit)) ++i;
9247 else if (split_type == SPLIT_TYPE_STRING) {
9248 char *substr_start = ptr;
9249 char *sptr = RSTRING_PTR(spat);
9250 long slen = RSTRING_LEN(spat);
9253 mustnot_broken(str);
9254 enc = rb_enc_check(str, spat);
9255 while (ptr < eptr &&
9256 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9259 if (t != ptr + end) {
9263 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9264 str_mod_check(spat, sptr, slen);
9267 if (!
NIL_P(limit) && lim <= ++i)
break;
9269 beg = ptr - str_start;
9271 else if (split_type == SPLIT_TYPE_CHARS) {
9275 mustnot_broken(str);
9276 enc = rb_enc_get(str);
9277 while (ptr < eptr &&
9278 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9279 SPLIT_STR(ptr - str_start, n);
9281 if (!
NIL_P(limit) && lim <= ++i)
break;
9283 beg = ptr - str_start;
9287 long len = RSTRING_LEN(str);
9295 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9300 if (start == end && BEG(0) == END(0)) {
9305 else if (last_null == 1) {
9306 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9313 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9319 SPLIT_STR(beg, end-beg);
9320 beg = start = END(0);
9324 for (idx=1; idx < regs->num_regs; idx++) {
9325 if (BEG(idx) == -1)
continue;
9326 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9328 if (!
NIL_P(limit) && lim <= ++i)
break;
9330 if (match) rb_match_unbusy(match);
9332 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9333 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9336 return result ? result : str;
9346 return rb_str_split_m(1, &sep, str);
9349#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9364#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9367chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9369 const char *prev = rb_enc_prev_char(p, e, e, enc);
9372 prev = rb_enc_prev_char(p, e, e, enc);
9373 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9385 RSTRING_LEN(rs) != 1 ||
9386 RSTRING_PTR(rs)[0] !=
'\n')) {
9392#define rb_rs get_rs()
9399 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9400 long pos,
len, rslen;
9406 static ID keywords[1];
9411 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9415 if (!ENUM_ELEM(ary, str)) {
9423 if (!RSTRING_LEN(str))
goto end;
9425 ptr = subptr = RSTRING_PTR(str);
9427 len = RSTRING_LEN(str);
9429 rslen = RSTRING_LEN(rs);
9432 enc = rb_enc_get(str);
9434 enc = rb_enc_check(str, rs);
9439 const char *eol = NULL;
9441 while (subend < pend) {
9442 long chomp_rslen = 0;
9444 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9446 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9448 if (eol == subend)
break;
9452 chomp_rslen = -rslen;
9456 if (!subptr) subptr = subend;
9460 }
while (subend < pend);
9462 if (rslen == 0) chomp_rslen = 0;
9464 subend - subptr + (chomp ? chomp_rslen : rslen));
9465 if (ENUM_ELEM(ary, line)) {
9466 str_mod_check(str, ptr,
len);
9468 subptr = eol = NULL;
9473 rsptr = RSTRING_PTR(rs);
9474 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9483 rsptr = RSTRING_PTR(rs);
9484 rslen = RSTRING_LEN(rs);
9487 while (subptr < pend) {
9488 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9492 if (hit != adjusted) {
9496 subend = hit += rslen;
9499 subend = chomp_newline(subptr, subend, enc);
9506 if (ENUM_ELEM(ary, line)) {
9507 str_mod_check(str, ptr,
len);
9512 if (subptr != pend) {
9515 pend = chomp_newline(subptr, pend, enc);
9517 else if (pend - subptr >= rslen &&
9518 memcmp(pend - rslen, rsptr, rslen) == 0) {
9523 ENUM_ELEM(ary, line);
9544rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9547 return rb_str_enumerate_lines(argc, argv, str, 0);
9602rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9604 VALUE ary = WANTARRAY(
"lines", 0);
9605 return rb_str_enumerate_lines(argc, argv, str, ary);
9619 for (i=0; i<RSTRING_LEN(str); i++) {
9620 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9638rb_str_each_byte(
VALUE str)
9641 return rb_str_enumerate_bytes(str, 0);
9653rb_str_bytes(
VALUE str)
9655 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9656 return rb_str_enumerate_bytes(str, ary);
9674 ptr = RSTRING_PTR(str);
9675 len = RSTRING_LEN(str);
9676 enc = rb_enc_get(str);
9679 for (i = 0; i <
len; i += n) {
9680 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9685 for (i = 0; i <
len; i += n) {
9686 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9707rb_str_each_char(
VALUE str)
9710 return rb_str_enumerate_chars(str, 0);
9722rb_str_chars(
VALUE str)
9725 return rb_str_enumerate_chars(str, ary);
9729rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9734 const char *ptr, *end;
9737 if (single_byte_optimizable(str))
9738 return rb_str_enumerate_bytes(str, ary);
9741 ptr = RSTRING_PTR(str);
9743 enc = STR_ENC_GET(str);
9746 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9767rb_str_each_codepoint(
VALUE str)
9770 return rb_str_enumerate_codepoints(str, 0);
9782rb_str_codepoints(
VALUE str)
9785 return rb_str_enumerate_codepoints(str, ary);
9791 int encidx = rb_enc_to_index(enc);
9793 const OnigUChar source_ascii[] =
"\\X";
9794 const OnigUChar *source = source_ascii;
9795 size_t source_len =
sizeof(source_ascii) - 1;
9798#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9799#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9800#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9801#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9802#define CASE_UTF(e) \
9803 case ENCINDEX_UTF_##e: { \
9804 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9805 source = source_UTF_##e; \
9806 source_len = sizeof(source_UTF_##e); \
9809 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9817 regex_t *reg_grapheme_cluster;
9819 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9820 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9822 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9823 onig_error_code_to_str(message, r, &einfo);
9824 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9827 return reg_grapheme_cluster;
9833 int encidx = rb_enc_to_index(enc);
9834 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9836 if (encidx == rb_utf8_encindex()) {
9837 if (!reg_grapheme_cluster_utf8) {
9838 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9841 return reg_grapheme_cluster_utf8;
9850 size_t grapheme_cluster_count = 0;
9852 const char *ptr, *end;
9854 if (!rb_enc_unicode_p(enc)) {
9858 bool cached_reg_grapheme_cluster =
true;
9859 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9860 if (!reg_grapheme_cluster) {
9861 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9862 cached_reg_grapheme_cluster =
false;
9865 ptr = RSTRING_PTR(str);
9869 OnigPosition
len = onig_match(reg_grapheme_cluster,
9870 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9871 (
const OnigUChar *)ptr, NULL, 0);
9872 if (
len <= 0)
break;
9873 grapheme_cluster_count++;
9877 if (!cached_reg_grapheme_cluster) {
9878 onig_free(reg_grapheme_cluster);
9881 return SIZET2NUM(grapheme_cluster_count);
9885rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9889 const char *ptr0, *ptr, *end;
9891 if (!rb_enc_unicode_p(enc)) {
9892 return rb_str_enumerate_chars(str, ary);
9897 bool cached_reg_grapheme_cluster =
true;
9898 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9899 if (!reg_grapheme_cluster) {
9900 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9901 cached_reg_grapheme_cluster =
false;
9904 ptr0 = ptr = RSTRING_PTR(str);
9908 OnigPosition
len = onig_match(reg_grapheme_cluster,
9909 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9910 (
const OnigUChar *)ptr, NULL, 0);
9911 if (
len <= 0)
break;
9916 if (!cached_reg_grapheme_cluster) {
9917 onig_free(reg_grapheme_cluster);
9937rb_str_each_grapheme_cluster(
VALUE str)
9940 return rb_str_enumerate_grapheme_clusters(str, 0);
9952rb_str_grapheme_clusters(
VALUE str)
9955 return rb_str_enumerate_grapheme_clusters(str, ary);
9959chopped_length(
VALUE str)
9962 const char *p, *p2, *beg, *end;
9964 beg = RSTRING_PTR(str);
9965 end = beg + RSTRING_LEN(str);
9966 if (beg >= end)
return 0;
9967 p = rb_enc_prev_char(beg, end, end, enc);
9969 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9970 p2 = rb_enc_prev_char(beg, p, end, enc);
9971 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9989rb_str_chop_bang(
VALUE str)
9991 str_modify_keep_cr(str);
9992 if (RSTRING_LEN(str) > 0) {
9994 len = chopped_length(str);
9995 STR_SET_LEN(str,
len);
9996 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10015rb_str_chop(
VALUE str)
10021smart_chomp(
VALUE str,
const char *e,
const char *p)
10024 if (rb_enc_mbminlen(enc) > 1) {
10029 pp = e - rb_enc_mbminlen(enc);
10032 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10040 if (--e > p && *(e-1) ==
'\r') {
10057 char *pp, *e, *rsptr;
10059 char *
const p = RSTRING_PTR(str);
10060 long len = RSTRING_LEN(str);
10062 if (
len == 0)
return 0;
10065 return smart_chomp(str, e, p);
10068 enc = rb_enc_get(str);
10071 if (rb_enc_mbminlen(enc) > 1) {
10076 pp -= rb_enc_mbminlen(enc);
10079 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10086 while (e > p && *(e-1) ==
'\n') {
10088 if (e > p && *(e-1) ==
'\r')
10094 if (rslen >
len)
return len;
10096 enc = rb_enc_get(rs);
10097 newline = rsptr[rslen-1];
10098 if (rslen == rb_enc_mbminlen(enc)) {
10100 if (newline ==
'\n')
10101 return smart_chomp(str, e, p);
10105 return smart_chomp(str, e, p);
10109 enc = rb_enc_check(str, rs);
10110 if (is_broken_string(rs)) {
10114 if (p[
len-1] == newline &&
10116 memcmp(rsptr, pp, rslen) == 0)) {
10117 if (at_char_boundary(p, pp, e, enc))
10118 return len - rslen;
10130chomp_rs(
int argc,
const VALUE *argv)
10134 VALUE rs = argv[0];
10146 long olen = RSTRING_LEN(str);
10147 long len = chompped_length(str, rs);
10148 if (
len >= olen)
return Qnil;
10149 str_modify_keep_cr(str);
10150 STR_SET_LEN(str,
len);
10151 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10171rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10174 str_modifiable(str);
10175 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10176 rs = chomp_rs(argc, argv);
10178 return rb_str_chomp_string(str, rs);
10191rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10193 VALUE rs = chomp_rs(argc, argv);
10199tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10200 VALUE str,
int num_selectors,
VALUE *selectors)
10204 for (i=0; i<num_selectors; i++) {
10205 VALUE selector = selectors[i];
10209 enc = rb_enc_check(str, selector);
10210 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10217 const char *
const start = s;
10219 if (!s || s >= e)
return 0;
10222 if (single_byte_optimizable(str)) {
10223 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10228 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10238lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10239 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10241 const char *
const start = s;
10243 if (!s || s >= e)
return 0;
10248 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10250 if (!tr_find(cc, table, del, nodel))
break;
10269rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10273 long olen, loffset;
10275 str_modify_keep_cr(str);
10276 enc = STR_ENC_GET(str);
10279 char table[TR_TABLE_SIZE];
10280 VALUE del = 0, nodel = 0;
10282 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10283 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10286 loffset = lstrip_offset(str, start, start+olen, enc);
10290 long len = olen-loffset;
10291 s = start + loffset;
10292 memmove(start, s,
len);
10293 STR_SET_LEN(str,
len);
10294 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10329rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10336 char table[TR_TABLE_SIZE];
10337 VALUE del = 0, nodel = 0;
10339 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10340 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10343 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10345 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10354 rb_str_check_dummy_enc(enc);
10358 if (!s || s >= e)
return 0;
10362 if (single_byte_optimizable(str)) {
10364 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10369 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10379rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10380 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10385 rb_str_check_dummy_enc(enc);
10389 if (!s || s >= e)
return 0;
10393 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10395 if (!tr_find(c, table, del, nodel))
break;
10415rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10419 long olen, roffset;
10421 str_modify_keep_cr(str);
10422 enc = STR_ENC_GET(str);
10425 char table[TR_TABLE_SIZE];
10426 VALUE del = 0, nodel = 0;
10428 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10429 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10432 roffset = rstrip_offset(str, start, start+olen, enc);
10435 long len = olen - roffset;
10437 STR_SET_LEN(str,
len);
10438 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10472rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10476 long olen, roffset;
10478 enc = STR_ENC_GET(str);
10481 char table[TR_TABLE_SIZE];
10482 VALUE del = 0, nodel = 0;
10484 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10485 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10488 roffset = rstrip_offset(str, start, start+olen, enc);
10490 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10508rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10511 long olen, loffset, roffset;
10514 str_modify_keep_cr(str);
10515 enc = STR_ENC_GET(str);
10519 char table[TR_TABLE_SIZE];
10520 VALUE del = 0, nodel = 0;
10522 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10523 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10524 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10527 loffset = lstrip_offset(str, start, start+olen, enc);
10528 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10531 if (loffset > 0 || roffset > 0) {
10532 long len = olen-roffset;
10535 memmove(start, start + loffset,
len);
10537 STR_SET_LEN(str,
len);
10538 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10573rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10576 long olen, loffset, roffset;
10582 char table[TR_TABLE_SIZE];
10583 VALUE del = 0, nodel = 0;
10585 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10586 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10587 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10590 loffset = lstrip_offset(str, start, start+olen, enc);
10591 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10594 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10599scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10602 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10608 end = pos + RSTRING_LEN(pat);
10622 if (RSTRING_LEN(str) > end)
10623 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10632 if (!regs || regs->num_regs == 1) {
10638 for (
int i = 1; i < regs->num_regs; i++) {
10669 long last = -1, prev = 0;
10670 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10672 pat = get_pat_quoted(pat, 1);
10673 mustnot_broken(str);
10677 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10682 if (last >= 0) rb_pat_search(pat, str, last, 1);
10687 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10691 str_mod_check(str, p,
len);
10693 if (last >= 0) rb_pat_search(pat, str, last, 1);
10745rb_str_hex(
VALUE str)
10747 return rb_str_to_inum(str, 16, FALSE);
10831rb_str_oct(
VALUE str)
10833 return rb_str_to_inum(str, -8, FALSE);
10836#ifndef HAVE_CRYPT_R
10841 rb_nativethread_lock_t lock;
10842} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10911# define CRYPT_END() ALLOCV_END(databuf)
10914 extern char *crypt(
const char *,
const char *);
10915# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10918 const char *s, *saltp;
10921 char salt_8bit_clean[3];
10925 mustnot_wchar(str);
10926 mustnot_wchar(salt);
10928 saltp = RSTRING_PTR(salt);
10929 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10930 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10934 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10935 salt_8bit_clean[0] = saltp[0] & 0x7f;
10936 salt_8bit_clean[1] = saltp[1] & 0x7f;
10937 salt_8bit_clean[2] =
'\0';
10938 saltp = salt_8bit_clean;
10943# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10944 data->initialized = 0;
10946 res = crypt_r(s, saltp, data);
10949 res = crypt(s, saltp);
10964 size_t res_size = strlen(res)+1;
10965 tmp_buf =
ALLOCA_N(
char, res_size);
10966 memcpy(tmp_buf, res, res_size);
11003 char *ptr, *p, *pend;
11006 unsigned long sum0 = 0;
11011 ptr = p = RSTRING_PTR(str);
11012 len = RSTRING_LEN(str);
11018 str_mod_check(str, ptr,
len);
11021 sum0 += (
unsigned char)*p;
11032 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11033 sum0 &= (((
unsigned long)1)<<bits)-1;
11053rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11057 long width,
len, flen = 1, fclen = 1;
11060 const char *f =
" ";
11061 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11063 int singlebyte = 1, cr;
11067 enc = STR_ENC_GET(str);
11068 termlen = rb_enc_mbminlen(enc);
11072 enc = rb_enc_check(str, pad);
11073 f = RSTRING_PTR(pad);
11074 flen = RSTRING_LEN(pad);
11075 fclen = str_strlen(pad, enc);
11076 singlebyte = single_byte_optimizable(pad);
11077 if (flen == 0 || fclen == 0) {
11078 rb_raise(rb_eArgError,
"zero width padding");
11081 len = str_strlen(str, enc);
11082 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11084 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11088 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11089 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11091 size = RSTRING_LEN(str);
11092 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11093 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11094 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11095 rb_raise(rb_eArgError,
"argument too big");
11099 p = RSTRING_PTR(res);
11101 memset(p, *f, llen);
11105 while (llen >= fclen) {
11111 memcpy(p, f, llen2);
11115 memcpy(p, RSTRING_PTR(str), size);
11118 memset(p, *f, rlen);
11122 while (rlen >= fclen) {
11128 memcpy(p, f, rlen2);
11132 TERM_FILL(p, termlen);
11133 STR_SET_LEN(res, p-RSTRING_PTR(res));
11154rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11156 return rb_str_justify(argc, argv, str,
'l');
11168rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11170 return rb_str_justify(argc, argv, str,
'r');
11183rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11185 return rb_str_justify(argc, argv, str,
'c');
11201 sep = get_pat_quoted(sep, 0);
11213 pos = rb_str_index(str, sep, 0);
11214 if (pos < 0)
goto failed;
11219 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11222 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11236 long pos = RSTRING_LEN(str);
11238 sep = get_pat_quoted(sep, 0);
11251 pos = rb_str_rindex(str, sep, pos);
11260 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11262 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11274rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11278 for (i=0; i<argc; i++) {
11279 VALUE tmp = argv[i];
11281 if (rb_reg_start_with_p(tmp, str))
11285 const char *p, *s, *e;
11290 enc = rb_enc_check(str, tmp);
11291 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11292 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11293 p = RSTRING_PTR(str);
11296 if (!at_char_right_boundary(p, s, e, enc))
11298 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11314rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11318 for (i=0; i<argc; i++) {
11319 VALUE tmp = argv[i];
11320 const char *p, *s, *e;
11325 enc = rb_enc_check(str, tmp);
11326 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11327 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11328 p = RSTRING_PTR(str);
11331 if (!at_char_boundary(p, s, e, enc))
11333 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11349deleted_prefix_length(
VALUE str,
VALUE prefix)
11351 const char *strptr, *prefixptr;
11352 long olen, prefixlen;
11357 if (!is_broken_string(prefix) ||
11358 !rb_enc_asciicompat(enc) ||
11359 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11360 enc = rb_enc_check(str, prefix);
11364 prefixlen = RSTRING_LEN(prefix);
11365 if (prefixlen <= 0)
return 0;
11366 olen = RSTRING_LEN(str);
11367 if (olen < prefixlen)
return 0;
11368 strptr = RSTRING_PTR(str);
11369 prefixptr = RSTRING_PTR(prefix);
11370 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11371 if (is_broken_string(prefix)) {
11372 if (!is_broken_string(str)) {
11376 const char *strend = strptr + olen;
11377 const char *after_prefix = strptr + prefixlen;
11378 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11399rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11402 str_modify_keep_cr(str);
11404 prefixlen = deleted_prefix_length(str, prefix);
11405 if (prefixlen <= 0)
return Qnil;
11419rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11423 prefixlen = deleted_prefix_length(str, prefix);
11424 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11426 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11439deleted_suffix_length(
VALUE str,
VALUE suffix)
11441 const char *strptr, *suffixptr;
11442 long olen, suffixlen;
11446 if (is_broken_string(suffix))
return 0;
11447 enc = rb_enc_check(str, suffix);
11450 suffixlen = RSTRING_LEN(suffix);
11451 if (suffixlen <= 0)
return 0;
11452 olen = RSTRING_LEN(str);
11453 if (olen < suffixlen)
return 0;
11454 strptr = RSTRING_PTR(str);
11455 suffixptr = RSTRING_PTR(suffix);
11456 const char *strend = strptr + olen;
11457 const char *before_suffix = strend - suffixlen;
11458 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11459 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11475rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11477 long olen, suffixlen,
len;
11478 str_modifiable(str);
11480 suffixlen = deleted_suffix_length(str, suffix);
11481 if (suffixlen <= 0)
return Qnil;
11483 olen = RSTRING_LEN(str);
11484 str_modify_keep_cr(str);
11485 len = olen - suffixlen;
11486 STR_SET_LEN(str,
len);
11487 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11503rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11507 suffixlen = deleted_suffix_length(str, suffix);
11508 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11510 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11517 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11523nil_setter_warning(
ID id)
11525 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11532 if (!
NIL_P(*var)) {
11533 nil_setter_warning(
id);
11540 val = rb_fs_check(val);
11543 "value of %"PRIsVALUE
" must be String or Regexp",
11547 nil_setter_warning(
id);
11564 str_modifiable(str);
11567 int idx = rb_enc_to_index(encoding);
11574 rb_enc_associate_index(str, idx);
11598 if (STR_EMBED_P(str)) {
11599 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11604 str_replace_shared_without_enc(str2, str);
11606 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11636rb_str_valid_encoding_p(
VALUE str)
11656rb_str_is_ascii_only_p(
VALUE str)
11666 static const char ellipsis[] =
"...";
11667 const long ellipsislen =
sizeof(ellipsis) - 1;
11669 const long blen = RSTRING_LEN(str);
11670 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11671 VALUE estr, ret = 0;
11674 if (
len * rb_enc_mbminlen(enc) >= blen ||
11678 else if (
len <= ellipsislen ||
11680 if (rb_enc_asciicompat(enc)) {
11682 rb_enc_associate(ret, enc);
11689 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11694 rb_enc_from_encoding(enc), 0,
Qnil);
11707 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11713 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11732 if (enc == STR_ENC_GET(str)) {
11737 return enc_str_scrub(enc, str, repl, cr);
11745 const char *rep, *p, *e, *p1, *sp;
11751 rb_raise(rb_eArgError,
"both of block and replacement given");
11758 if (!
NIL_P(repl)) {
11759 repl = str_compat_and_valid(repl, enc);
11762 if (rb_enc_dummy_p(enc)) {
11765 encidx = rb_enc_to_index(enc);
11767#define DEFAULT_REPLACE_CHAR(str) do { \
11768 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11769 rep = replace; replen = (int)sizeof(replace); \
11772 slen = RSTRING_LEN(str);
11773 p = RSTRING_PTR(str);
11778 if (rb_enc_asciicompat(enc)) {
11784 else if (!
NIL_P(repl)) {
11785 rep = RSTRING_PTR(repl);
11786 replen = RSTRING_LEN(repl);
11789 else if (encidx == rb_utf8_encindex()) {
11790 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11794 DEFAULT_REPLACE_CHAR(
"?");
11799 p = search_nonascii(p, e);
11804 int ret = rb_enc_precise_mbclen(p, e, enc);
11823 if (e - p < clen) clen = e - p;
11830 for (; clen > 1; clen--) {
11831 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11842 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11843 str_mod_check(str, sp, slen);
11844 repl = str_compat_and_valid(repl, enc);
11851 p = search_nonascii(p, e);
11877 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11878 str_mod_check(str, sp, slen);
11879 repl = str_compat_and_valid(repl, enc);
11888 long mbminlen = rb_enc_mbminlen(enc);
11892 else if (!
NIL_P(repl)) {
11893 rep = RSTRING_PTR(repl);
11894 replen = RSTRING_LEN(repl);
11896 else if (encidx == ENCINDEX_UTF_16BE) {
11897 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11899 else if (encidx == ENCINDEX_UTF_16LE) {
11900 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11902 else if (encidx == ENCINDEX_UTF_32BE) {
11903 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11905 else if (encidx == ENCINDEX_UTF_32LE) {
11906 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11909 DEFAULT_REPLACE_CHAR(
"?");
11913 int ret = rb_enc_precise_mbclen(p, e, enc);
11926 if (e - p < clen) clen = e - p;
11927 if (clen <= mbminlen * 2) {
11932 for (; clen > mbminlen; clen-=mbminlen) {
11933 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11943 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11944 str_mod_check(str, sp, slen);
11945 repl = str_compat_and_valid(repl, enc);
11970 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11971 str_mod_check(str, sp, slen);
11972 repl = str_compat_and_valid(repl, enc);
12012str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12020static ID id_normalize;
12021static ID id_normalized_p;
12022static VALUE mUnicodeNormalize;
12025unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12027 static int UnicodeNormalizeRequired = 0;
12030 if (!UnicodeNormalizeRequired) {
12031 rb_require(
"unicode_normalize/normalize.rb");
12032 UnicodeNormalizeRequired = 1;
12036 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12047rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12049 return unicode_normalize_common(argc, argv, str, id_normalize);
12063rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12065 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12092rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12094 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12226#define sym_equal rb_obj_equal
12229sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12233 int c = rb_enc_precise_mbclen(s, send, enc);
12237 c = rb_enc_mbc_to_codepoint(s, send, enc);
12245rb_str_symname_p(
VALUE sym)
12250 rb_encoding *resenc = rb_default_internal_encoding();
12252 if (resenc == NULL) resenc = rb_default_external_encoding();
12253 enc = STR_ENC_GET(sym);
12254 ptr = RSTRING_PTR(sym);
12255 len = RSTRING_LEN(sym);
12256 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12264rb_str_quote_unprintable(
VALUE str)
12272 resenc = rb_default_internal_encoding();
12273 if (resenc == NULL) resenc = rb_default_external_encoding();
12274 enc = STR_ENC_GET(str);
12275 ptr = RSTRING_PTR(str);
12276 len = RSTRING_LEN(str);
12277 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12278 !sym_printable(ptr, ptr +
len, enc)) {
12279 return rb_str_escape(str);
12285rb_id_quote_unprintable(
ID id)
12287 VALUE str = rb_id2str(
id);
12288 if (!rb_str_symname_p(str)) {
12289 return rb_str_escape(str);
12307sym_inspect(
VALUE sym)
12314 if (!rb_str_symname_p(str)) {
12316 len = RSTRING_LEN(str);
12317 rb_str_resize(str,
len + 1);
12318 dest = RSTRING_PTR(str);
12319 memmove(dest + 1, dest,
len);
12323 VALUE orig_str = str;
12325 len = RSTRING_LEN(orig_str);
12326 str = rb_enc_str_new(0,
len + 1, enc);
12329 ptr = RSTRING_PTR(orig_str);
12330 dest = RSTRING_PTR(str);
12331 memcpy(dest + 1, ptr,
len);
12351rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12356 rb_raise(rb_eArgError,
"no receiver given");
12459 return rb_str_match(
rb_sym2str(sym), other);
12474sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12476 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12489sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12491 return rb_str_match_m_p(argc, argv, sym);
12509 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12520sym_length(
VALUE sym)
12534sym_empty(
VALUE sym)
12568sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12584sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12600sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12614sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12616 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12629sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12631 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12643sym_encoding(
VALUE sym)
12649string_for_symbol(
VALUE name)
12654 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12668 name = string_for_symbol(name);
12669 return rb_intern_str(name);
12678 name = string_for_symbol(name);
12702 return rb_fstring(str);
12708 struct RString fake_str = {RBASIC_INIT};
12709 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12721 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12722 rb_enc_autoload(enc);
12725 struct RString fake_str = {RBASIC_INIT};
12726 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12732 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12733 rb_enc_autoload(enc);
12736 struct RString fake_str = {RBASIC_INIT};
12737 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12748#if USE_YJIT || USE_ZJIT
12750rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12755 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12756 rb_str_buf_cat_byte(str, (
char) code);
12766fstring_set_class_i(
VALUE *str,
void *data)
12770 return ST_CONTINUE;
12778 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12945 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern_str(), except it tries to convert the parameter object to an instance of rb_cS...
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
union RString::@51::@52::@54 aux
Auxiliary info.
long len
Length of the string, not including terminating NUL character.
struct RString::@51::@53 embed
Embedded contents.
VALUE shared
Parent of the string.
union RString::@51 as
String's specific fields.
char * ptr
Pointer to the contents of the string.
struct RString::@51::@52 heap
Strings that use separated memory region for contents use this pattern.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.