14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
49#include "ruby_assert.h"
54#if defined HAVE_CRYPT_R
55# if defined HAVE_CRYPT_H
58#elif !defined HAVE_CRYPT
59# include "missing/crypt.h"
60# define HAVE_CRYPT_R 1
63#define BEG(no) (regs->beg[(no)])
64#define END(no) (regs->end[(no)])
67#undef rb_usascii_str_new
71#undef rb_usascii_str_new_cstr
72#undef rb_utf8_str_new_cstr
73#undef rb_enc_str_new_cstr
74#undef rb_external_str_new_cstr
75#undef rb_locale_str_new_cstr
76#undef rb_str_dup_frozen
77#undef rb_str_buf_new_cstr
131#define RUBY_MAX_CHAR_LEN 16
132#define STR_PRECOMPUTED_HASH FL_USER4
133#define STR_SHARED_ROOT FL_USER5
134#define STR_BORROWED FL_USER6
135#define STR_TMPLOCK FL_USER7
136#define STR_NOFREE FL_USER18
137#define STR_FAKESTR FL_USER19
139#define STR_SET_NOEMBED(str) do {\
140 FL_SET((str), STR_NOEMBED);\
141 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
143#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
145#define STR_SET_LEN(str, n) do { \
146 RSTRING(str)->len = (n); \
149#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
150#define TERM_FILL(ptr, termlen) do {\
151 char *const term_fill_ptr = (ptr);\
152 const int term_fill_len = (termlen);\
153 *term_fill_ptr = '\0';\
154 if (UNLIKELY(term_fill_len > 1))\
155 memset(term_fill_ptr, 0, term_fill_len);\
158#define RESIZE_CAPA(str,capacity) do {\
159 const int termlen = TERM_LEN(str);\
160 RESIZE_CAPA_TERM(str,capacity,termlen);\
162#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
163 if (STR_EMBED_P(str)) {\
164 if (str_embed_capa(str) < capacity + termlen) {\
165 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
166 const long tlen = RSTRING_LEN(str);\
167 memcpy(tmp, RSTRING_PTR(str), str_embed_capa(str));\
168 RSTRING(str)->as.heap.ptr = tmp;\
169 RSTRING(str)->len = tlen;\
170 STR_SET_NOEMBED(str);\
171 RSTRING(str)->as.heap.aux.capa = (capacity);\
175 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
176 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
177 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
178 RSTRING(str)->as.heap.aux.capa = (capacity);\
182#define STR_SET_SHARED(str, shared_str) do { \
183 if (!FL_TEST(str, STR_FAKESTR)) { \
184 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
185 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
186 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
187 FL_SET((str), STR_SHARED); \
188 rb_gc_register_pinning_obj(str); \
189 FL_SET((shared_str), STR_SHARED_ROOT); \
190 if (RBASIC_CLASS((shared_str)) == 0) \
191 FL_SET_RAW((shared_str), STR_BORROWED); \
195#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
196#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
199#define STR_ENC_GET(str) get_encoding(str)
202zero_filled(
const char *s,
int n)
205 if (*s++)
return false;
210#if !defined SHARABLE_MIDDLE_SUBSTRING
211# define SHARABLE_MIDDLE_SUBSTRING 0
215SHARABLE_SUBSTRING_P(
VALUE str,
long beg,
long len)
217#if SHARABLE_MIDDLE_SUBSTRING
220 long end = beg +
len;
221 long source_len = RSTRING_LEN(str);
222 return end == source_len || zero_filled(RSTRING_PTR(str) + end, TERM_LEN(str));
227str_embed_capa(
VALUE str)
229 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
233rb_str_reembeddable_p(
VALUE str)
235 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
239rb_str_embed_size(
long capa,
long termlen)
247rb_str_size_as_embedded(
VALUE str)
250 if (STR_EMBED_P(str)) {
252 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
254 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
258 else if (rb_str_reembeddable_p(str)) {
260 if (
FL_TEST_RAW(str, STR_PRECOMPUTED_HASH))
capa +=
sizeof(st_index_t);
262 real_size = rb_str_embed_size(
capa, TERM_LEN(str));
265 real_size =
sizeof(
struct RString);
272STR_EMBEDDABLE_P(
long len,
long termlen)
274 return rb_gc_size_allocatable_p(rb_str_embed_size(
len, termlen));
279static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
280static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
282static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
283static inline void str_modifiable(
VALUE str);
288str_make_independent(
VALUE str)
290 long len = RSTRING_LEN(str);
291 int termlen = TERM_LEN(str);
292 str_make_independent_expand((str),
len, 0L, termlen);
295static inline int str_dependent_p(
VALUE str);
298rb_str_make_independent(
VALUE str)
300 if (str_dependent_p(str)) {
301 str_make_independent(str);
306rb_str_make_embedded(
VALUE str)
311 int termlen = TERM_LEN(str);
312 char *buf =
RSTRING(str)->as.heap.ptr;
313 long old_capa =
RSTRING(str)->as.heap.aux.capa + termlen;
317 STR_SET_LEN(str,
len);
320 memcpy(RSTRING_PTR(str), buf,
len);
321 SIZED_FREE_N(buf, old_capa);
328rb_debug_rstring_null_ptr(
const char *func)
330 fprintf(stderr,
"%s is returning NULL!! "
331 "SIGSEGV is highly expected to follow immediately.\n"
332 "If you could reproduce, attach your debugger here, "
333 "and look at the passed string.\n",
338static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
341get_encoding(
VALUE str)
347mustnot_broken(
VALUE str)
349 if (is_broken_string(str)) {
350 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
355mustnot_wchar(
VALUE str)
358 if (rb_enc_mbminlen(enc) > 1) {
359 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
363static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
365#if SIZEOF_LONG == SIZEOF_VOIDP
366#define PRECOMPUTED_FAKESTR_HASH 1
371BARE_STRING_P(
VALUE str)
376static inline st_index_t
377str_do_hash(
VALUE str)
379 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
381 if (e && !is_ascii_string(str)) {
388str_store_precomputed_hash(
VALUE str, st_index_t hash)
394 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
395 size_t free_bytes = str_embed_capa(str) - used_bytes;
399 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
401 FL_SET(str, STR_PRECOMPUTED_HASH);
414 if (
FL_TEST(str, RSTRING_FSTR))
417 bare = BARE_STRING_P(str);
419 if (STR_EMBED_P(str)) {
424 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
431 rb_str_resize(str, RSTRING_LEN(str));
433 fstr = register_fstring(str,
false,
false);
436 str_replace_shared_without_enc(str, fstr);
443static VALUE fstring_table_obj;
446fstring_concurrent_set_hash(
VALUE str)
448#ifdef PRECOMPUTED_FAKESTR_HASH
452 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
469 const char *aptr, *bptr;
476 return (alen == blen &&
478 memcmp(aptr, bptr, alen) == 0);
483 bool force_precompute_hash;
487fstring_concurrent_set_create(
VALUE str,
void *data)
497 long len = RSTRING_LEN(str);
498 long capa =
len +
sizeof(st_index_t);
499 int term_len = TERM_LEN(str);
501 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
503 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
504 STR_SET_LEN(new_str, RSTRING_LEN(str));
506 rb_enc_copy(new_str, str);
507 str_store_precomputed_hash(new_str, str_do_hash(str));
511 rb_enc_copy(new_str, str);
512#ifdef PRECOMPUTED_FAKESTR_HASH
513 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
514 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
528 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
531 if (STR_SHARED_P(str)) {
533 str_make_independent(str);
536 if (!BARE_STRING_P(str)) {
542 RBASIC(str)->flags |= RSTRING_FSTR;
544 RB_OBJ_SET_SHAREABLE(str);
558 .hash = fstring_concurrent_set_hash,
559 .cmp = fstring_concurrent_set_cmp,
560 .create = fstring_concurrent_set_create,
565Init_fstring_table(
void)
567 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
568 rb_gc_register_address(&fstring_table_obj);
572register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
576 .force_precompute_hash = force_precompute_hash
579#if SIZEOF_VOIDP == SIZEOF_LONG
583 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
587 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
589 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
601rb_obj_is_fstring_table(
VALUE obj)
605 return obj == fstring_table_obj;
609rb_gc_free_fstring(
VALUE obj)
611 ASSERT_vm_locking_with_barrier();
617 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
619 RB_DEBUG_COUNTER_INC(obj_str_fstr);
625rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
627 if (fstring_table_obj) {
628 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
633setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
636 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
649 return (
VALUE)fake_str;
658 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
667rb_fstring_new(
const char *ptr,
long len)
669 struct RString fake_str = {RBASIC_INIT};
670 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
676 struct RString fake_str = {RBASIC_INIT};
677 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
681rb_fstring_cstr(
const char *
ptr)
683 return rb_fstring_new(
ptr, strlen(
ptr));
687single_byte_optimizable(
VALUE str)
691 case ENCINDEX_ASCII_8BIT:
692 case ENCINDEX_US_ASCII:
714static inline const char *
715search_nonascii(
const char *p,
const char *e)
719#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
720# if SIZEOF_UINTPTR_T == 8
721# define NONASCII_MASK UINT64_C(0x8080808080808080)
722# elif SIZEOF_UINTPTR_T == 4
723# define NONASCII_MASK UINT32_C(0x80808080)
725# error "don't know what to do."
728# if SIZEOF_UINTPTR_T == 8
729# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
730# elif SIZEOF_UINTPTR_T == 4
731# define NONASCII_MASK 0x80808080UL
733# error "don't know what to do."
737 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
738#if !UNALIGNED_WORD_ACCESS
739 if ((uintptr_t)p % SIZEOF_VOIDP) {
740 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
745 case 7:
if (p[-7]&0x80)
return p-7;
746 case 6:
if (p[-6]&0x80)
return p-6;
747 case 5:
if (p[-5]&0x80)
return p-5;
748 case 4:
if (p[-4]&0x80)
return p-4;
750 case 3:
if (p[-3]&0x80)
return p-3;
751 case 2:
if (p[-2]&0x80)
return p-2;
752 case 1:
if (p[-1]&0x80)
return p-1;
757#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
758#define aligned_ptr(value) \
759 __builtin_assume_aligned((value), sizeof(uintptr_t))
761#define aligned_ptr(value) (value)
764 t = (e - (SIZEOF_VOIDP-1));
766 for (;s < t; s +=
sizeof(uintptr_t)) {
768 memcpy(&word, s,
sizeof(word));
769 if (word & NONASCII_MASK) {
770#ifdef WORDS_BIGENDIAN
771 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
773 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
783 case 7:
if (e[-7]&0x80)
return e-7;
784 case 6:
if (e[-6]&0x80)
return e-6;
785 case 5:
if (e[-5]&0x80)
return e-5;
786 case 4:
if (e[-4]&0x80)
return e-4;
788 case 3:
if (e[-3]&0x80)
return e-3;
789 case 2:
if (e[-2]&0x80)
return e-2;
790 case 1:
if (e[-1]&0x80)
return e-1;
798 const char *e = p +
len;
800 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
802 p = search_nonascii(p, e);
806 if (rb_enc_asciicompat(enc)) {
807 p = search_nonascii(p, e);
810 int ret = rb_enc_precise_mbclen(p, e, enc);
814 p = search_nonascii(p, e);
820 int ret = rb_enc_precise_mbclen(p, e, enc);
836 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
839 p = search_nonascii(p, e);
843 else if (rb_enc_asciicompat(enc)) {
844 p = search_nonascii(p, e);
850 int ret = rb_enc_precise_mbclen(p, e, enc);
857 p = search_nonascii(p, e);
863 int ret = rb_enc_precise_mbclen(p, e, enc);
888 rb_enc_set_index(str1, rb_enc_get_index(str2));
896rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
901 str_enc_copy(dest, src);
902 if (RSTRING_LEN(dest) == 0) {
903 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
914 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
915 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
926rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
928 str_enc_copy(dest, src);
935 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
941 return enc_coderange_scan(str, enc);
945rbimpl_enc_str_coderange_scan(
VALUE str)
947 int cr = enc_coderange_scan(str, get_encoding(str));
952#undef rb_enc_str_coderange
959 cr = rbimpl_enc_str_coderange_scan(str);
963#define rb_enc_str_coderange rb_enc_str_coderange_inline
966rb_enc_str_asciicompat(
VALUE str)
969 return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
977 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
986str_mod_check(
VALUE s,
const char *p,
long len)
988 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
994str_capacity(
VALUE str,
const int termlen)
996 if (STR_EMBED_P(str)) {
997 return str_embed_capa(str) - termlen;
999 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
1003 return RSTRING(str)->as.heap.aux.capa;
1010 return str_capacity(str, TERM_LEN(str));
1014must_not_null(
const char *
ptr)
1017 rb_raise(rb_eArgError,
"NULL pointer given");
1022str_alloc_embed(
VALUE klass,
size_t capa)
1024 size_t size = rb_str_embed_size(
capa, 0);
1028 NEWOBJ_OF(str,
struct RString, klass,
1032 str->as.embed.ary[0] = 0;
1038str_alloc_heap(
VALUE klass)
1040 NEWOBJ_OF(str,
struct RString, klass,
1044 str->as.heap.aux.capa = 0;
1045 str->as.heap.ptr = NULL;
1051empty_str_alloc(
VALUE klass)
1053 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1054 VALUE str = str_alloc_embed(klass, 0);
1055 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1066 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1070 enc = rb_ascii8bit_encoding();
1073 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1075 int termlen = rb_enc_mbminlen(enc);
1077 if (STR_EMBEDDABLE_P(
len, termlen)) {
1078 str = str_alloc_embed(klass,
len + termlen);
1084 str = str_alloc_heap(klass);
1090 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1093 rb_enc_raw_set(str, enc);
1096 memcpy(RSTRING_PTR(str),
ptr,
len);
1099 memset(RSTRING_PTR(str), 0,
len);
1102 STR_SET_LEN(str,
len);
1103 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1110 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1145 __msan_unpoison_string(
ptr);
1165 if (rb_enc_mbminlen(enc) != 1) {
1166 rb_raise(rb_eArgError,
"wchar encoding given");
1168 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1172str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1177 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1181 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1184 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1185 str = str_alloc_heap(klass);
1189 RBASIC(str)->flags |= STR_NOFREE;
1190 rb_enc_associate_index(str, encindex);
1219static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1221 int ecflags,
VALUE ecopts);
1226 int encidx = rb_enc_to_index(enc);
1227 if (rb_enc_get_index(str) == encidx)
1228 return is_ascii_string(str);
1239 if (!to)
return str;
1240 if (!from) from = rb_enc_get(str);
1241 if (from == to)
return str;
1242 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1243 rb_is_ascii8bit_enc(to)) {
1244 if (STR_ENC_GET(str) != to) {
1246 rb_enc_associate(str, to);
1253 from, to, ecflags, ecopts);
1254 if (
NIL_P(newstr)) {
1262rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1267 olen = RSTRING_LEN(newstr);
1268 if (ofs < -olen || olen < ofs)
1270 if (ofs < 0) ofs += olen;
1272 STR_SET_LEN(newstr, ofs);
1276 rb_str_modify(newstr);
1277 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1285 STR_SET_LEN(str, 0);
1286 rb_enc_associate(str, enc);
1292str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1294 int ecflags,
VALUE ecopts)
1299 VALUE econv_wrapper;
1300 const unsigned char *start, *sp;
1301 unsigned char *dest, *dp;
1302 size_t converted_output = (size_t)ofs;
1307 RBASIC_CLEAR_CLASS(econv_wrapper);
1309 if (!ec)
return Qnil;
1312 sp = (
unsigned char*)
ptr;
1314 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1315 (dp = dest + converted_output),
1319 size_t converted_input = sp - start;
1320 size_t rest =
len - converted_input;
1321 converted_output = dp - dest;
1323 if (converted_input && converted_output &&
1324 rest < (LONG_MAX / converted_output)) {
1325 rest = (rest * converted_output) / converted_input;
1330 olen += rest < 2 ? 2 : rest;
1331 rb_str_resize(newstr, olen);
1338 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1340 rb_enc_associate(newstr, to);
1359 const int eidx = rb_enc_to_index(eenc);
1362 return rb_enc_str_new(
ptr,
len, eenc);
1366 if ((eidx == rb_ascii8bit_encindex()) ||
1367 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1371 ienc = rb_default_internal_encoding();
1372 if (!ienc || eenc == ienc) {
1373 return rb_enc_str_new(
ptr,
len, eenc);
1377 if ((eidx == rb_ascii8bit_encindex()) ||
1378 (eidx == rb_usascii_encindex()) ||
1379 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1380 return rb_enc_str_new(
ptr,
len, ienc);
1383 str = rb_enc_str_new(NULL, 0, ienc);
1386 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1387 rb_str_initialize(str,
ptr,
len, eenc);
1395 int eidx = rb_enc_to_index(eenc);
1396 if (eidx == rb_usascii_encindex() &&
1397 !is_ascii_string(str)) {
1398 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1401 rb_enc_associate_index(str, eidx);
1460str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1462 const int termlen = TERM_LEN(str);
1467 if (str_embed_capa(str2) >=
len + termlen) {
1468 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1469 STR_SET_EMBED(str2);
1470 memcpy(ptr2, RSTRING_PTR(str),
len);
1471 TERM_FILL(ptr2+
len, termlen);
1475 if (STR_SHARED_P(str)) {
1476 root =
RSTRING(str)->as.heap.aux.shared;
1485 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1487 rb_fatal(
"about to free a possible shared root");
1489 char *ptr2 = STR_HEAP_PTR(str2);
1491 SIZED_FREE_N(ptr2, STR_HEAP_SIZE(str2));
1494 FL_SET(str2, STR_NOEMBED);
1496 STR_SET_SHARED(str2, root);
1499 STR_SET_LEN(str2,
len);
1507 str_replace_shared_without_enc(str2, str);
1508 rb_enc_cr_str_exact_copy(str2, str);
1515 return str_replace_shared(str_alloc_heap(klass), str);
1532rb_str_new_frozen_String(
VALUE orig)
1540rb_str_frozen_bare_string(
VALUE orig)
1542 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1547rb_str_tmp_frozen_acquire(
VALUE orig)
1550 return str_new_frozen_buffer(0, orig, FALSE);
1554rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1556 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1557 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1559 VALUE str = str_alloc_heap(0);
1562 FL_SET(str, STR_SHARED_ROOT);
1564 size_t capa = str_capacity(orig, TERM_LEN(orig));
1570 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1571 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1578 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1579 RBASIC(orig)->flags &= ~STR_NOFREE;
1580 STR_SET_SHARED(orig, str);
1582 RB_OBJ_SET_SHAREABLE(str);
1588 RSTRING(str)->as.heap.aux.capa =
capa + (TERM_LEN(orig) - TERM_LEN(str));
1594rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1599 if (STR_EMBED_P(tmp)) {
1602 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1608 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1612 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1613 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1618 STR_SET_LEN(tmp, 0);
1626 return str_new_frozen_buffer(klass, orig, TRUE);
1636 VALUE str = str_alloc_heap(klass);
1637 STR_SET_LEN(str, RSTRING_LEN(orig));
1638 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1639 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1640 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1641 RBASIC(orig)->flags &= ~STR_NOFREE;
1642 STR_SET_SHARED(orig, str);
1649str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1653 long len = RSTRING_LEN(orig);
1654 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1655 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1657 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1658 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1664 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1665 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1671 if ((ofs > 0) || (rest > 0) ||
1674 str = str_new_shared(klass,
shared);
1676 RSTRING(str)->as.heap.ptr += ofs;
1677 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1685 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1686 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1688 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1689 STR_SET_LEN(str, RSTRING_LEN(orig));
1695 str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
1698 str = heap_str_make_shared(klass, orig);
1703 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1715str_new_empty_String(
VALUE str)
1718 rb_enc_copy(v, str);
1722#define STR_BUF_MIN_SIZE 63
1727 if (STR_EMBEDDABLE_P(
capa, 1)) {
1735 RSTRING(str)->as.heap.ptr[0] =
'\0';
1755 return str_new(0, 0,
len);
1761 if (STR_EMBED_P(str)) {
1762 RB_DEBUG_COUNTER_INC(obj_str_embed);
1764 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1765 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1766 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1769 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1770 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1775rb_str_memsize(
VALUE str)
1777 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1778 return STR_HEAP_SIZE(str);
1788 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1791static inline void str_discard(
VALUE str);
1792static void str_shared_replace(
VALUE str,
VALUE str2);
1797 if (str != str2) str_shared_replace(str, str2);
1808 enc = STR_ENC_GET(str2);
1811 termlen = rb_enc_mbminlen(enc);
1813 STR_SET_LEN(str, RSTRING_LEN(str2));
1815 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1817 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1818 rb_enc_associate(str, enc);
1822 if (STR_EMBED_P(str2)) {
1824 long len = RSTRING_LEN(str2);
1827 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1828 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1829 RSTRING(str2)->as.heap.ptr = new_ptr;
1830 STR_SET_LEN(str2,
len);
1832 STR_SET_NOEMBED(str2);
1835 STR_SET_NOEMBED(str);
1837 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1839 if (
FL_TEST(str2, STR_SHARED)) {
1841 STR_SET_SHARED(str,
shared);
1844 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1848 STR_SET_EMBED(str2);
1849 RSTRING_PTR(str2)[0] = 0;
1850 STR_SET_LEN(str2, 0);
1851 rb_enc_associate(str, enc);
1865 return rb_obj_as_string_result(str, obj);
1881 len = RSTRING_LEN(str2);
1882 if (STR_SHARED_P(str2)) {
1885 STR_SET_NOEMBED(str);
1886 STR_SET_LEN(str,
len);
1887 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1888 STR_SET_SHARED(str,
shared);
1889 rb_enc_cr_str_exact_copy(str, str2);
1892 str_replace_shared(str, str2);
1901 size_t size = rb_str_embed_size(
capa, 0);
1905 NEWOBJ_OF(str,
struct RString, klass,
1916 NEWOBJ_OF(str,
struct RString, klass,
1919 str->as.heap.aux.capa = 0;
1920 str->as.heap.ptr = NULL;
1930 encidx = rb_enc_get_index(str);
1931 flags &= ~ENCODING_MASK;
1934 if (encidx) rb_enc_associate_index(dup, encidx);
1944 long len = RSTRING_LEN(str);
1949 STR_SET_LEN(dup, RSTRING_LEN(str));
1950 return str_duplicate_setup_encoding(str, dup, flags);
1959 root =
RSTRING(str)->as.heap.aux.shared;
1962 root = str = str_new_frozen(klass, str);
1968 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1970 STR_SET_SHARED(dup, root);
1971 flags |= RSTRING_NOEMBED | STR_SHARED;
1973 STR_SET_LEN(dup, RSTRING_LEN(str));
1974 return str_duplicate_setup_encoding(str, dup, flags);
1980 if (STR_EMBED_P(str)) {
1981 return str_duplicate_setup_embed(klass, str, dup);
1984 return str_duplicate_setup_heap(klass, str, dup);
1992 if (STR_EMBED_P(str)) {
1993 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1996 dup = str_alloc_heap(klass);
1999 return str_duplicate_setup(klass, str, dup);
2010rb_str_dup_m(
VALUE str)
2012 if (LIKELY(BARE_STRING_P(str))) {
2023 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2030 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2034 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2035 str_duplicate_setup_embed(klass, str, new_str);
2038 new_str = ec_str_alloc_heap(ec, klass);
2039 str_duplicate_setup_heap(klass, str, new_str);
2048rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2050 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2074 static ID keyword_ids[2];
2075 VALUE orig, opt, venc, vcapa;
2080 if (!keyword_ids[0]) {
2081 keyword_ids[0] = rb_id_encoding();
2082 CONST_ID(keyword_ids[1],
"capacity");
2090 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2091 enc = rb_to_encoding(venc);
2093 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2096 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2098 if (
capa < STR_BUF_MIN_SIZE) {
2099 capa = STR_BUF_MIN_SIZE;
2103 len = RSTRING_LEN(orig);
2107 if (orig == str) n = 0;
2109 str_modifiable(str);
2110 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2112 const size_t size = (size_t)
capa + termlen;
2113 const char *
const old_ptr = RSTRING_PTR(str);
2114 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2115 char *new_ptr =
ALLOC_N(
char, size);
2116 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2117 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2119 RSTRING(str)->as.heap.ptr = new_ptr;
2121 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2122 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2123 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2125 STR_SET_LEN(str,
len);
2128 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2129 rb_enc_cr_str_exact_copy(str, orig);
2131 FL_SET(str, STR_NOEMBED);
2138 rb_enc_associate(str, enc);
2150rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2156 static ID keyword_ids[2];
2166 keyword_ids[0] = rb_id_encoding();
2167 CONST_ID(keyword_ids[1],
"capacity");
2169 encoding = kwargs[0];
2170 capacity = kwargs[1];
2179 if (UNDEF_P(encoding)) {
2181 encoding = rb_obj_encoding(orig);
2185 if (!UNDEF_P(encoding)) {
2186 enc = rb_to_encoding(encoding);
2190 if (UNDEF_P(capacity)) {
2192 VALUE empty_str = str_new(klass,
"", 0);
2194 rb_enc_associate(empty_str, enc);
2198 VALUE copy = str_duplicate(klass, orig);
2199 rb_enc_associate(copy, enc);
2212 if (orig_capa >
capa) {
2217 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2218 STR_SET_LEN(str, 0);
2229#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2244static inline uintptr_t
2245count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2250 d = (d>>6) | (~d>>7);
2251 d &= NONASCII_MASK >> 7;
2254#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2256 return rb_popcount_intptr(d);
2260# if SIZEOF_VOIDP == 8
2269enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2275 long diff = (long)(e - p);
2276 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2281 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2282 const uintptr_t *s, *t;
2283 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2284 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2285 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2286 while (p < (
const char *)s) {
2287 if (is_utf8_lead_byte(*p))
len++;
2291 len += count_utf8_lead_bytes_with_word(s);
2294 p = (
const char *)s;
2297 if (is_utf8_lead_byte(*p))
len++;
2303 else if (rb_enc_asciicompat(enc)) {
2308 q = search_nonascii(p, e);
2314 p += rb_enc_fast_mbclen(p, e, enc);
2321 q = search_nonascii(p, e);
2327 p += rb_enc_mbclen(p, e, enc);
2334 for (c=0; p<e; c++) {
2335 p += rb_enc_mbclen(p, e, enc);
2350rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2358 long diff = (long)(e - p);
2359 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2361 else if (rb_enc_asciicompat(enc)) {
2365 q = search_nonascii(p, e);
2373 ret = rb_enc_precise_mbclen(p, e, enc);
2388 for (c=0; p<e; c++) {
2389 ret = rb_enc_precise_mbclen(p, e, enc);
2396 if (p + rb_enc_mbminlen(enc) <= e)
2397 p += rb_enc_mbminlen(enc);
2413 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2414 if (!enc) enc = STR_ENC_GET(str);
2415 p = RSTRING_PTR(str);
2420 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2425 return enc_strlen(p, e, enc, cr);
2432 return str_strlen(str, NULL);
2446 return LONG2NUM(str_strlen(str, NULL));
2458rb_str_bytesize(
VALUE str)
2477rb_str_empty(
VALUE str)
2479 return RBOOL(RSTRING_LEN(str) == 0);
2498 char *ptr1, *ptr2, *ptr3;
2503 enc = rb_enc_check_str(str1, str2);
2506 termlen = rb_enc_mbminlen(enc);
2507 if (len1 > LONG_MAX - len2) {
2508 rb_raise(rb_eArgError,
"string size too big");
2510 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2511 ptr3 = RSTRING_PTR(str3);
2512 memcpy(ptr3, ptr1, len1);
2513 memcpy(ptr3+len1, ptr2, len2);
2514 TERM_FILL(&ptr3[len1+len2], termlen);
2530 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2533 int enc1 = rb_enc_get_index(str1);
2534 int enc2 = rb_enc_get_index(str2);
2539 else if (enc2 < 0) {
2542 else if (enc1 != enc2) {
2545 else if (len1 > LONG_MAX - len2) {
2579 rb_enc_copy(str2, str);
2584 rb_raise(rb_eArgError,
"negative argument");
2586 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2587 if (STR_EMBEDDABLE_P(
len, 1)) {
2589 memset(RSTRING_PTR(str2), 0,
len + 1);
2596 STR_SET_LEN(str2,
len);
2597 rb_enc_copy(str2, str);
2600 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2601 rb_raise(rb_eArgError,
"argument too big");
2604 len *= RSTRING_LEN(str);
2605 termlen = TERM_LEN(str);
2607 ptr2 = RSTRING_PTR(str2);
2609 n = RSTRING_LEN(str);
2610 memcpy(ptr2, RSTRING_PTR(str), n);
2611 while (n <=
len/2) {
2612 memcpy(ptr2 + n, ptr2, n);
2615 memcpy(ptr2 + n, ptr2,
len-n);
2617 STR_SET_LEN(str2,
len);
2618 TERM_FILL(&ptr2[
len], termlen);
2619 rb_enc_cr_str_copy_for_substr(str2, str);
2658rb_check_lockedtmp(
VALUE str)
2660 if (
FL_TEST(str, STR_TMPLOCK)) {
2667#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2669str_modifiable(
VALUE str)
2673 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2674 if (CHILLED_STRING_P(str)) {
2675 CHILLED_STRING_MUTATED(str);
2677 rb_check_lockedtmp(str);
2678 rb_check_frozen(str);
2683str_dependent_p(
VALUE str)
2685 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2695#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2697str_independent(
VALUE str)
2701 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2702 str_modifiable(str);
2703 return !str_dependent_p(str);
2709str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2719 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2724 STR_SET_LEN(str,
len);
2729 oldptr = RSTRING_PTR(str);
2731 memcpy(
ptr, oldptr,
len);
2733 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2734 SIZED_FREE_N(oldptr, STR_HEAP_SIZE(str));
2736 STR_SET_NOEMBED(str);
2737 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2738 TERM_FILL(
ptr +
len, termlen);
2740 STR_SET_LEN(str,
len);
2747 if (!str_independent(str))
2748 str_make_independent(str);
2757 int termlen = TERM_LEN(str);
2758 long len = RSTRING_LEN(str);
2761 rb_raise(rb_eArgError,
"negative expanding string size");
2763 if (expand >= LONG_MAX -
len) {
2764 rb_raise(rb_eArgError,
"string size too big");
2767 if (!str_independent(str)) {
2768 str_make_independent_expand(str,
len, expand, termlen);
2770 else if (expand > 0) {
2771 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2778str_modify_keep_cr(
VALUE str)
2780 if (!str_independent(str))
2781 str_make_independent(str);
2788str_discard(
VALUE str)
2790 str_modifiable(str);
2791 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2792 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2793 RSTRING(str)->as.heap.ptr = 0;
2794 STR_SET_LEN(str, 0);
2801 int encindex = rb_enc_get_index(str);
2803 if (RB_UNLIKELY(encindex == -1)) {
2807 if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
2812 if (!rb_enc_asciicompat(enc)) {
2834 return RSTRING_PTR(str);
2838str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2840 const char *e = s +
len;
2842 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2843 if (zero_filled(s, minlen))
return s;
2849str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2854 if (str_dependent_p(str)) {
2855 if (!zero_filled(s +
len, termlen))
2856 str_make_independent_expand(str,
len, 0L, termlen);
2859 TERM_FILL(s +
len, termlen);
2862 return RSTRING_PTR(str);
2866rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2868 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2869 long len = RSTRING_LEN(str);
2873 rb_check_lockedtmp(str);
2874 str_make_independent_expand(str,
len, 0L, termlen);
2876 else if (str_dependent_p(str)) {
2877 if (termlen > oldtermlen)
2878 str_make_independent_expand(str,
len, 0L, termlen);
2881 if (!STR_EMBED_P(str)) {
2886 if (termlen > oldtermlen) {
2887 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2895str_null_check(
VALUE str,
int *w)
2897 char *s = RSTRING_PTR(str);
2898 long len = RSTRING_LEN(str);
2901 if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
2903 minlen = rb_enc_mbminlen(enc);
2907 if (str_null_char(s,
len, minlen, enc)) {
2910 return str_fill_term(str, s,
len, minlen);
2915 if (!s || memchr(s, 0,
len)) {
2919 s = str_fill_term(str, s,
len, minlen);
2925rb_str_null_check(
VALUE str)
2933 if (RB_LIKELY(rb_str_enc_fastpath(str))) {
2934 if (!s || memchr(s, 0,
len)) {
2935 rb_raise(rb_eArgError,
"string contains null byte");
2940 const char *s = str_null_check(str, &w);
2943 rb_raise(rb_eArgError,
"string contains null char");
2945 rb_raise(rb_eArgError,
"string contains null byte");
2953rb_str_to_cstr(
VALUE str)
2956 return str_null_check(str, &w);
2964 char *s = str_null_check(str, &w);
2967 rb_raise(rb_eArgError,
"string contains null char");
2969 rb_raise(rb_eArgError,
"string contains null byte");
2975rb_str_fill_terminator(
VALUE str,
const int newminlen)
2977 char *s = RSTRING_PTR(str);
2978 long len = RSTRING_LEN(str);
2979 return str_fill_term(str, s,
len, newminlen);
2985 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
3011str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
3020 else if (rb_enc_asciicompat(enc)) {
3021 const char *p2, *e2;
3024 while (p < e && 0 < nth) {
3031 p2 = search_nonascii(p, e2);
3040 n = rb_enc_mbclen(p, e, enc);
3051 while (p < e && nth--) {
3052 p += rb_enc_mbclen(p, e, enc);
3063 return str_nth_len(p, e, &nth, enc);
3067str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3072 p = str_nth_len(p, e, &nth, enc);
3081str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3083 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3084 if (!pp)
return e - p;
3091 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3092 STR_ENC_GET(str), single_byte_optimizable(str));
3097str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3100 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3101 const uintptr_t *s, *t;
3102 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3103 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3104 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3105 while (p < (
const char *)s) {
3106 if (is_utf8_lead_byte(*p)) nth--;
3110 nth -= count_utf8_lead_bytes_with_word(s);
3112 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3116 if (is_utf8_lead_byte(*p)) {
3117 if (nth == 0)
break;
3127str_utf8_offset(
const char *p,
const char *e,
long nth)
3129 const char *pp = str_utf8_nth(p, e, &nth);
3138 if (single_byte_optimizable(str) || pos < 0)
3141 char *p = RSTRING_PTR(str);
3142 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3147str_subseq(
VALUE str,
long beg,
long len)
3155 const int termlen = TERM_LEN(str);
3156 if (!SHARABLE_SUBSTRING_P(str, beg,
len)) {
3157 str2 = rb_enc_str_new(RSTRING_PTR(str) + beg,
len, rb_str_enc_get(str));
3166 if (str_embed_capa(str2) >=
len + termlen) {
3167 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3168 STR_SET_EMBED(str2);
3169 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3170 TERM_FILL(ptr2+
len, termlen);
3172 STR_SET_LEN(str2,
len);
3180 str_replace_shared(str2, str);
3186 RSTRING(str2)->as.heap.ptr += beg;
3187 if (RSTRING_LEN(str2) >
len) {
3188 STR_SET_LEN(str2,
len);
3198 VALUE str2 = str_subseq(str, beg,
len);
3199 rb_enc_cr_str_copy_for_substr(str2, str);
3208 const long blen = RSTRING_LEN(str);
3210 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3212 if (
len < 0)
return 0;
3213 if (beg < 0 && -beg < 0)
return 0;
3217 if (single_byte_optimizable(str)) {
3218 if (beg > blen)
return 0;
3221 if (beg < 0)
return 0;
3223 if (
len > blen - beg)
3225 if (
len < 0)
return 0;
3230 if (
len > -beg)
len = -beg;
3234 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3237 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3243 slen = str_strlen(str, enc);
3245 if (beg < 0)
return 0;
3247 if (
len == 0)
goto end;
3250 else if (beg > 0 && beg > blen) {
3254 if (beg > str_strlen(str, enc))
return 0;
3259 enc == rb_utf8_encoding()) {
3260 p = str_utf8_nth(s, e, &beg);
3261 if (beg > 0)
return 0;
3262 len = str_utf8_offset(p, e,
len);
3268 p = s + beg * char_sz;
3272 else if (
len * char_sz > e - p)
3277 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3278 if (beg > 0)
return 0;
3282 len = str_offset(p, e,
len, enc, 0);
3290static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3295 return str_substr(str, beg,
len, TRUE);
3305str_substr(
VALUE str,
long beg,
long len,
int empty)
3309 if (!p)
return Qnil;
3310 if (!
len && !empty)
return Qnil;
3312 beg = p - RSTRING_PTR(str);
3314 VALUE str2 = str_subseq(str, beg,
len);
3315 rb_enc_cr_str_copy_for_substr(str2, str);
3323 if (CHILLED_STRING_P(str)) {
3328 rb_str_resize(str, RSTRING_LEN(str));
3346 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3389str_uminus(
VALUE str)
3394 return rb_fstring(str);
3398#define rb_str_dup_frozen rb_str_new_frozen
3403 rb_check_frozen(str);
3404 if (
FL_TEST(str, STR_TMPLOCK)) {
3407 FL_SET(str, STR_TMPLOCK);
3414 rb_check_frozen(str);
3415 if (!
FL_TEST(str, STR_TMPLOCK)) {
3435 const int termlen = TERM_LEN(str);
3437 str_modifiable(str);
3438 if (STR_SHARED_P(str)) {
3441 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3442 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3453 else if (
len > RSTRING_LEN(str)) {
3457 const char *
const new_end = RSTRING_PTR(str) +
len;
3467 else if (
len < RSTRING_LEN(str)) {
3475 STR_SET_LEN(str,
len);
3476 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3483 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3486 int independent = str_independent(str);
3487 long slen = RSTRING_LEN(str);
3488 const int termlen = TERM_LEN(str);
3490 if (slen >
len || (termlen != 1 && slen <
len)) {
3496 if (STR_EMBED_P(str)) {
3497 if (
len == slen)
return str;
3498 if (str_embed_capa(str) >=
len + termlen) {
3499 STR_SET_LEN(str,
len);
3503 str_make_independent_expand(str, slen,
len - slen, termlen);
3505 else if (str_embed_capa(str) >=
len + termlen) {
3507 char *
ptr = STR_HEAP_PTR(str);
3509 if (slen >
len) slen =
len;
3512 STR_SET_LEN(str,
len);
3514 SIZED_FREE_N(
ptr,
capa + termlen);
3518 else if (!independent) {
3519 if (
len == slen)
return str;
3520 str_make_independent_expand(str, slen,
len - slen, termlen);
3524 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3525 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3528 else if (
len == slen)
return str;
3529 STR_SET_LEN(str,
len);
3536str_ensure_available_capa(
VALUE str,
long len)
3538 str_modify_keep_cr(str);
3540 const int termlen = TERM_LEN(str);
3541 long olen = RSTRING_LEN(str);
3543 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3544 rb_raise(rb_eArgError,
"string sizes too big");
3547 long total = olen +
len;
3548 long capa = str_capacity(str, termlen);
3551 if (total >= LONG_MAX / 2) {
3554 while (total >
capa) {
3557 RESIZE_CAPA_TERM(str,
capa, termlen);
3562str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3565 str_modify_keep_cr(str);
3570 if (
len == 0)
return 0;
3572 long total, olen,
off = -1;
3574 const int termlen = TERM_LEN(str);
3577 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3581 long capa = str_capacity(str, termlen);
3583 if (olen > LONG_MAX -
len) {
3584 rb_raise(rb_eArgError,
"string sizes too big");
3588 if (total >= LONG_MAX / 2) {
3591 while (total >
capa) {
3594 RESIZE_CAPA_TERM(str,
capa, termlen);
3595 sptr = RSTRING_PTR(str);
3600 memcpy(sptr + olen,
ptr,
len);
3601 STR_SET_LEN(str, total);
3602 TERM_FILL(sptr + total, termlen);
3607#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3608#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3613 if (
len == 0)
return str;
3615 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3617 return str_buf_cat(str,
ptr,
len);
3628rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3633 if (UNLIKELY(!str_independent(str))) {
3634 str_make_independent(str);
3637 long string_length = -1;
3638 const int null_terminator_length = 1;
3643 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3644 rb_raise(rb_eArgError,
"string sizes too big");
3647 long string_capacity = str_capacity(str, null_terminator_length);
3653 if (LIKELY(string_capacity >= string_length + 1)) {
3655 sptr[string_length] = byte;
3656 STR_SET_LEN(str, string_length + 1);
3657 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3661 str_buf_cat(str, (
char *)&
byte, 1);
3677 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3688rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3689 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3698 if (str_encindex == ptr_encindex) {
3700 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3704 str_enc = rb_enc_from_index(str_encindex);
3705 ptr_enc = rb_enc_from_index(ptr_encindex);
3706 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3709 if (RSTRING_LEN(str) == 0) {
3712 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3718 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3722 str_cr = rb_enc_str_coderange(str);
3727 *ptr_cr_ret = ptr_cr;
3729 if (str_encindex != ptr_encindex &&
3732 str_enc = rb_enc_from_index(str_encindex);
3733 ptr_enc = rb_enc_from_index(ptr_encindex);
3738 res_encindex = str_encindex;
3743 res_encindex = str_encindex;
3747 res_encindex = ptr_encindex;
3752 res_encindex = str_encindex;
3759 res_encindex = str_encindex;
3765 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3767 str_buf_cat(str,
ptr,
len);
3773 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3780 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3790 if (rb_enc_asciicompat(enc)) {
3791 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3797 unsigned int c = (
unsigned char)*
ptr;
3798 int len = rb_enc_codelen(c, enc);
3799 rb_enc_mbcput(c, buf, enc);
3800 rb_enc_cr_str_buf_cat(str, buf,
len,
3811 int str2_cr = rb_enc_str_coderange(str2);
3813 if (rb_str_enc_fastpath(str)) {
3817 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3823 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3834 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3850rb_str_concat_literals(
size_t num,
const VALUE *strary)
3854 unsigned long len = 1;
3859 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3861 str_enc_copy_direct(str, strary[0]);
3863 for (i = s; i < num; ++i) {
3864 const VALUE v = strary[i];
3868 if (encidx != ENCINDEX_US_ASCII) {
3870 rb_enc_set_index(str, encidx);
3883rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3885 str_modifiable(str);
3890 else if (argc > 1) {
3893 rb_enc_copy(arg_str, str);
3894 for (i = 0; i < argc; i++) {
3929rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3931 long needed_capacity = 0;
3935 for (
int index = 0; index < argc; index++) {
3936 VALUE obj = argv[index];
3944 needed_capacity += RSTRING_LEN(obj);
3949 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3956 str_ensure_available_capa(str, needed_capacity);
3959 for (
int index = 0; index < argc; index++) {
3960 VALUE obj = argv[index];
3965 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3966 char byte = (char)(
NUM2INT(obj) & 0xFF);
3980 rb_bug(
"append_as_bytes arguments should have been validated");
3984 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3985 TERM_FILL(sptr, TERM_LEN(str));
3990 for (
int index = 0; index < argc; index++) {
3991 VALUE obj = argv[index];
4008 rb_bug(
"append_as_bytes arguments should have been validated");
4087 if (rb_num_to_uint(str2, &code) == 0) {
4100 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4103 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4106 long pos = RSTRING_LEN(str1);
4111 switch (
len = rb_enc_codelen(code, enc)) {
4112 case ONIGERR_INVALID_CODE_POINT_VALUE:
4113 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4115 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4121 rb_enc_mbcput(code, buf, enc);
4122 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4123 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4125 rb_str_resize(str1, pos+
len);
4126 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4139rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4141 int encidx = rb_enc_to_index(enc);
4143 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4148 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4149 return ENCINDEX_ASCII_8BIT;
4171rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4173 str_modifiable(str);
4178 else if (argc > 1) {
4181 rb_enc_copy(arg_str, str);
4182 for (i = 0; i < argc; i++) {
4195 st_index_t precomputed_hash;
4196 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4198 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4199 return precomputed_hash;
4202 return str_do_hash(str);
4209 const char *ptr1, *ptr2;
4212 return (len1 != len2 ||
4214 memcmp(ptr1, ptr2, len1) != 0);
4226rb_str_hash_m(
VALUE str)
4232#define lesser(a,b) (((a)>(b))?(b):(a))
4240 if (RSTRING_LEN(str1) == 0)
return TRUE;
4241 if (RSTRING_LEN(str2) == 0)
return TRUE;
4244 if (idx1 == idx2)
return TRUE;
4245 rc1 = rb_enc_str_coderange(str1);
4246 rc2 = rb_enc_str_coderange(str2);
4249 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4253 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4263 const char *ptr1, *ptr2;
4266 if (str1 == str2)
return 0;
4269 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4278 if (len1 > len2)
return 1;
4281 if (retval > 0)
return 1;
4315 if (str1 == str2)
return Qtrue;
4322 return rb_str_eql_internal(str1, str2);
4336 if (str1 == str2)
return Qtrue;
4338 return rb_str_eql_internal(str1, str2);
4376 return rb_invcmp(str1, str2);
4418 return str_casecmp(str1, s);
4426 const char *p1, *p1end, *p2, *p2end;
4428 enc = rb_enc_compatible(str1, str2);
4433 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4434 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4435 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4436 while (p1 < p1end && p2 < p2end) {
4438 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4439 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4441 return INT2FIX(c1 < c2 ? -1 : 1);
4448 while (p1 < p1end && p2 < p2end) {
4449 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4450 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4452 if (0 <= c1 && 0 <= c2) {
4456 return INT2FIX(c1 < c2 ? -1 : 1);
4460 l1 = rb_enc_mbclen(p1, p1end, enc);
4461 l2 = rb_enc_mbclen(p2, p2end, enc);
4462 len = l1 < l2 ? l1 : l2;
4463 r = memcmp(p1, p2,
len);
4465 return INT2FIX(r < 0 ? -1 : 1);
4467 return INT2FIX(l1 < l2 ? -1 : 1);
4473 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4474 if (p1 == p1end)
return INT2FIX(-1);
4507 return str_casecmp_p(str1, s);
4514 VALUE folded_str1, folded_str2;
4515 VALUE fold_opt = sym_fold;
4517 enc = rb_enc_compatible(str1, str2);
4522 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4523 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4525 return rb_str_eql(folded_str1, folded_str2);
4529strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4530 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4532 const char *search_start = str_ptr;
4533 long pos, search_len = str_len - offset;
4537 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4538 if (pos < 0)
return pos;
4540 if (t == search_start + pos)
break;
4541 search_len -= t - search_start;
4542 if (search_len <= 0)
return -1;
4543 offset += t - search_start;
4546 return pos + offset;
4550#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4551#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4554rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4556 const char *str_ptr, *str_ptr_end, *sub_ptr;
4557 long str_len, sub_len;
4560 enc = rb_enc_check(str, sub);
4561 if (is_broken_string(sub))
return -1;
4563 str_ptr = RSTRING_PTR(str);
4565 str_len = RSTRING_LEN(str);
4566 sub_ptr = RSTRING_PTR(sub);
4567 sub_len = RSTRING_LEN(sub);
4569 if (str_len < sub_len)
return -1;
4572 long str_len_char, sub_len_char;
4573 int single_byte = single_byte_optimizable(str);
4574 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4575 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4577 offset += str_len_char;
4578 if (offset < 0)
return -1;
4580 if (str_len_char - offset < sub_len_char)
return -1;
4581 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4584 if (sub_len == 0)
return offset;
4587 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4600rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4607 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4608 long slen = str_strlen(str, enc);
4610 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4622 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4623 enc, single_byte_optimizable(str));
4634 pos = rb_str_index(str, sub, pos);
4648str_ensure_byte_pos(
VALUE str,
long pos)
4650 if (!single_byte_optimizable(str)) {
4651 const char *s = RSTRING_PTR(str);
4653 const char *p = s + pos;
4654 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4656 "offset %ld does not land on character boundary", pos);
4729rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4735 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4736 long slen = RSTRING_LEN(str);
4738 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4749 str_ensure_byte_pos(str, pos);
4761 pos = rb_str_byteindex(str, sub, pos);
4762 if (pos >= 0)
return LONG2NUM(pos);
4769memrchr(
const char *search_str,
int chr,
long search_len)
4771 const char *ptr = search_str + search_len;
4772 while (ptr > search_str) {
4773 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4783 char *hit, *adjusted;
4785 long slen, searchlen;
4788 sbeg = RSTRING_PTR(str);
4789 slen = RSTRING_LEN(sub);
4790 if (slen == 0)
return s - sbeg;
4792 t = RSTRING_PTR(sub);
4794 searchlen = s - sbeg + 1;
4796 if (memcmp(s, t, slen) == 0) {
4801 hit = memrchr(sbeg, c, searchlen);
4804 if (hit != adjusted) {
4805 searchlen = adjusted - sbeg;
4808 if (memcmp(hit, t, slen) == 0)
4810 searchlen = adjusted - sbeg;
4811 }
while (searchlen > 0);
4825 enc = rb_enc_check(str, sub);
4826 if (is_broken_string(sub))
return -1;
4827 singlebyte = single_byte_optimizable(str);
4828 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4829 slen = str_strlen(sub, enc);
4832 if (
len < slen)
return -1;
4833 if (
len - pos < slen) pos =
len - slen;
4834 if (
len == 0)
return pos;
4836 sbeg = RSTRING_PTR(str);
4839 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4845 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4846 return str_rindex(str, sub, s, enc);
4858rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4863 long pos,
len = str_strlen(str, enc);
4865 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4867 if (pos < 0 && (pos +=
len) < 0) {
4873 if (pos >
len) pos =
len;
4881 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4882 enc, single_byte_optimizable(str));
4893 pos = rb_str_rindex(str, sub, pos);
4903rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4909 enc = rb_enc_check(str, sub);
4910 if (is_broken_string(sub))
return -1;
4911 len = RSTRING_LEN(str);
4912 slen = RSTRING_LEN(sub);
4915 if (
len < slen)
return -1;
4916 if (
len - pos < slen) pos =
len - slen;
4917 if (
len == 0)
return pos;
4919 sbeg = RSTRING_PTR(str);
4922 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4929 return str_rindex(str, sub, s, enc);
5019rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
5023 long pos,
len = RSTRING_LEN(str);
5025 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
5027 if (pos < 0 && (pos +=
len) < 0) {
5033 if (pos >
len) pos =
len;
5039 str_ensure_byte_pos(str, pos);
5051 pos = rb_str_byterindex(str, sub, pos);
5052 if (pos >= 0)
return LONG2NUM(pos);
5094 switch (OBJ_BUILTIN_TYPE(y)) {
5148rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5155 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5186rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5190 re = get_pat(argv[0]);
5191 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5200static enum neighbor_char
5206 if (rb_enc_mbminlen(enc) > 1) {
5208 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5210 return NEIGHBOR_NOT_CHAR;
5212 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5214 if (!l)
return NEIGHBOR_NOT_CHAR;
5215 if (l !=
len)
return NEIGHBOR_WRAPPED;
5216 rb_enc_mbcput(c, p, enc);
5217 r = rb_enc_precise_mbclen(p, p +
len, enc);
5219 return NEIGHBOR_NOT_CHAR;
5221 return NEIGHBOR_FOUND;
5224 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5227 return NEIGHBOR_WRAPPED;
5228 ++((
unsigned char*)p)[i];
5229 l = rb_enc_precise_mbclen(p, p+
len, enc);
5233 return NEIGHBOR_FOUND;
5236 memset(p+l, 0xff,
len-l);
5242 for (len2 =
len-1; 0 < len2; len2--) {
5243 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5247 memset(p+len2+1, 0xff,
len-(len2+1));
5252static enum neighbor_char
5257 if (rb_enc_mbminlen(enc) > 1) {
5259 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5261 return NEIGHBOR_NOT_CHAR;
5263 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5264 if (!c)
return NEIGHBOR_NOT_CHAR;
5267 if (!l)
return NEIGHBOR_NOT_CHAR;
5268 if (l !=
len)
return NEIGHBOR_WRAPPED;
5269 rb_enc_mbcput(c, p, enc);
5270 r = rb_enc_precise_mbclen(p, p +
len, enc);
5272 return NEIGHBOR_NOT_CHAR;
5274 return NEIGHBOR_FOUND;
5277 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5280 return NEIGHBOR_WRAPPED;
5281 --((
unsigned char*)p)[i];
5282 l = rb_enc_precise_mbclen(p, p+
len, enc);
5286 return NEIGHBOR_FOUND;
5289 memset(p+l, 0,
len-l);
5295 for (len2 =
len-1; 0 < len2; len2--) {
5296 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5300 memset(p+len2+1, 0,
len-(len2+1));
5314static enum neighbor_char
5315enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5317 enum neighbor_char ret;
5321 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5325 const int max_gaps = 1;
5327 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5329 ctype = ONIGENC_CTYPE_DIGIT;
5331 ctype = ONIGENC_CTYPE_ALPHA;
5333 return NEIGHBOR_NOT_CHAR;
5336 for (
try = 0;
try <= max_gaps; ++
try) {
5337 ret = enc_succ_char(p,
len, enc);
5338 if (ret == NEIGHBOR_FOUND) {
5339 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5341 return NEIGHBOR_FOUND;
5348 ret = enc_pred_char(p,
len, enc);
5349 if (ret == NEIGHBOR_FOUND) {
5350 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5363 return NEIGHBOR_NOT_CHAR;
5366 if (ctype != ONIGENC_CTYPE_DIGIT) {
5368 return NEIGHBOR_WRAPPED;
5372 enc_succ_char(carry,
len, enc);
5373 return NEIGHBOR_WRAPPED;
5391 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5392 rb_enc_cr_str_copy_for_substr(str, orig);
5393 return str_succ(str);
5400 char *sbeg, *s, *e, *last_alnum = 0;
5401 int found_alnum = 0;
5403 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5404 long carry_pos = 0, carry_len = 1;
5405 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5407 slen = RSTRING_LEN(str);
5408 if (slen == 0)
return str;
5410 enc = STR_ENC_GET(str);
5411 sbeg = RSTRING_PTR(str);
5412 s = e = sbeg + slen;
5414 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5415 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5421 l = rb_enc_precise_mbclen(s, e, enc);
5422 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5423 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5424 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5426 case NEIGHBOR_NOT_CHAR:
5428 case NEIGHBOR_FOUND:
5430 case NEIGHBOR_WRAPPED:
5435 carry_pos = s - sbeg;
5440 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5441 enum neighbor_char neighbor;
5442 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5443 l = rb_enc_precise_mbclen(s, e, enc);
5444 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5445 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5447 neighbor = enc_succ_char(tmp, l, enc);
5449 case NEIGHBOR_FOUND:
5453 case NEIGHBOR_WRAPPED:
5456 case NEIGHBOR_NOT_CHAR:
5459 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5461 enc_succ_char(s, l, enc);
5463 if (!rb_enc_asciicompat(enc)) {
5464 MEMCPY(carry, s,
char, l);
5467 carry_pos = s - sbeg;
5471 RESIZE_CAPA(str, slen + carry_len);
5472 sbeg = RSTRING_PTR(str);
5473 s = sbeg + carry_pos;
5474 memmove(s + carry_len, s, slen - carry_pos);
5475 memmove(s, carry, carry_len);
5477 STR_SET_LEN(str, slen);
5478 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5479 rb_enc_str_coderange(str);
5494rb_str_succ_bang(
VALUE str)
5502all_digits_p(
const char *s,
long len)
5530 VALUE end, exclusive;
5534 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5540 VALUE current, after_end;
5547 enc = rb_enc_check(beg, end);
5548 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5550 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5551 char c = RSTRING_PTR(beg)[0];
5552 char e = RSTRING_PTR(end)[0];
5554 if (c > e || (excl && c == e))
return beg;
5556 VALUE str = rb_enc_str_new(&c, 1, enc);
5558 if ((*each)(str, arg))
break;
5559 if (!excl && c == e)
break;
5561 if (excl && c == e)
break;
5566 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5567 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5568 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5573 b = rb_str_to_inum(beg, 10, FALSE);
5574 e = rb_str_to_inum(end, 10, FALSE);
5581 if (excl && bi == ei)
break;
5582 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5587 ID op = excl ?
'<' : idLE;
5588 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5593 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5594 b = rb_funcallv(b, succ, 0, 0);
5601 if (n > 0 || (excl && n == 0))
return beg;
5603 after_end = rb_funcallv(end, succ, 0, 0);
5608 next = rb_funcallv(current, succ, 0, 0);
5609 if ((*each)(current, arg))
break;
5610 if (
NIL_P(next))
break;
5614 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5629 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5630 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5631 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5633 b = rb_str_to_inum(beg, 10, FALSE);
5639 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5647 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5648 b = rb_funcallv(b, succ, 0, 0);
5654 VALUE next = rb_funcallv(current, succ, 0, 0);
5655 if ((*each)(current, arg))
break;
5658 if (RSTRING_LEN(current) == 0)
5669 if (!
rb_equal(str, *argp))
return 0;
5683 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5684 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5685 rb_enc_asciicompat(STR_ENC_GET(val))) {
5686 const char *bp = RSTRING_PTR(beg);
5687 const char *ep = RSTRING_PTR(end);
5688 const char *vp = RSTRING_PTR(val);
5689 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5690 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5698 if (b <= v && v < e)
return Qtrue;
5699 return RBOOL(!
RTEST(exclusive) && v == e);
5706 all_digits_p(bp, RSTRING_LEN(beg)) &&
5707 all_digits_p(ep, RSTRING_LEN(end))) {
5712 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5714 return RBOOL(
NIL_P(val));
5737 return rb_str_subpat(str, indx,
INT2FIX(0));
5740 if (rb_str_index(str, indx, 0) != -1)
5746 long beg,
len = str_strlen(str, NULL);
5758 return str_substr(str, idx, 1, FALSE);
5775rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5779 return rb_str_subpat(str, argv[0], argv[1]);
5782 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5786 return rb_str_aref(str, argv[0]);
5792 char *ptr = RSTRING_PTR(str);
5793 long olen = RSTRING_LEN(str), nlen;
5795 str_modifiable(str);
5796 if (
len > olen)
len = olen;
5798 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5800 size_t old_capa =
RSTRING(str)->as.heap.aux.capa + TERM_LEN(str);
5801 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5803 ptr =
RSTRING(str)->as.embed.ary;
5804 memmove(ptr, oldptr +
len, nlen);
5805 if (fl == STR_NOEMBED) {
5806 SIZED_FREE_N(oldptr, old_capa);
5810 if (!STR_SHARED_P(str)) {
5812 rb_enc_cr_str_exact_copy(shared, str);
5817 STR_SET_LEN(str, nlen);
5819 if (!SHARABLE_MIDDLE_SUBSTRING) {
5820 TERM_FILL(ptr + nlen, TERM_LEN(str));
5827rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5833 if (beg == 0 && vlen == 0) {
5838 str_modify_keep_cr(str);
5842 RESIZE_CAPA(str, slen + vlen -
len);
5843 sptr = RSTRING_PTR(str);
5847 cr = rb_enc_str_coderange(val);
5852 memmove(sptr + beg + vlen,
5854 slen - (beg +
len));
5856 if (vlen < beg &&
len < 0) {
5860 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5863 STR_SET_LEN(str, slen);
5864 TERM_FILL(&sptr[slen], TERM_LEN(str));
5871 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5880 int singlebyte = single_byte_optimizable(str);
5886 enc = rb_enc_check(str, val);
5887 slen = str_strlen(str, enc);
5889 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5898 if (
len > slen - beg) {
5901 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5906 beg = p - RSTRING_PTR(str);
5908 rb_str_update_0(str, beg,
len, val);
5909 rb_enc_associate(str, enc);
5920 long start, end,
len;
5930 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5934 nth += regs->num_regs;
5944 enc = rb_enc_check_str(str, val);
5945 rb_str_update_0(str, start,
len, val);
5946 rb_enc_associate(str, enc);
5954 switch (
TYPE(indx)) {
5956 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5960 beg = rb_str_index(str, indx, 0);
5999rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
6003 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
6011 return rb_str_aset(str, argv[0], argv[1]);
6063rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6071 str_modify_keep_cr(str);
6079 if ((nth += regs->num_regs) <= 0)
return Qnil;
6081 else if (nth >= regs->num_regs)
return Qnil;
6083 len = END(nth) - beg;
6086 else if (argc == 2) {
6095 beg = p - RSTRING_PTR(str);
6099 beg = rb_str_index(str, indx, 0);
6100 if (beg == -1)
return Qnil;
6101 len = RSTRING_LEN(indx);
6113 beg = p - RSTRING_PTR(str);
6122 beg = p - RSTRING_PTR(str);
6126 rb_enc_cr_str_copy_for_substr(result, str);
6134 char *sptr = RSTRING_PTR(str);
6135 long slen = RSTRING_LEN(str);
6136 if (beg +
len > slen)
6140 slen - (beg +
len));
6142 STR_SET_LEN(str, slen);
6143 TERM_FILL(&sptr[slen], TERM_LEN(str));
6154 switch (OBJ_BUILTIN_TYPE(pat)) {
6173get_pat_quoted(
VALUE pat,
int check)
6177 switch (OBJ_BUILTIN_TYPE(pat)) {
6191 if (check && is_broken_string(pat)) {
6198rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6201 pos = rb_str_byteindex(str, pat, pos);
6202 if (set_backref_str) {
6204 str = rb_str_new_frozen_String(str);
6205 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6207 *match = match_data;
6217 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6222rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6224 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6242rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6257 hash = rb_check_hash_type(repl);
6264 pat = get_pat_quoted(argv[0], 1);
6266 str_modifiable(str);
6267 beg = rb_pat_search(pat, str, 0, 1);
6281 end0 = beg0 + RSTRING_LEN(pat);
6290 if (iter || !
NIL_P(hash)) {
6291 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6297 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6300 str_mod_check(str, p,
len);
6301 rb_check_frozen(str);
6307 enc = rb_enc_compatible(str, repl);
6310 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6314 rb_enc_inspect_name(str_enc),
6315 rb_enc_inspect_name(STR_ENC_GET(repl)));
6317 enc = STR_ENC_GET(repl);
6320 rb_enc_associate(str, enc);
6330 rlen = RSTRING_LEN(repl);
6331 len = RSTRING_LEN(str);
6333 RESIZE_CAPA(str,
len + rlen - plen);
6335 p = RSTRING_PTR(str);
6337 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6339 rp = RSTRING_PTR(repl);
6340 memmove(p + beg0, rp, rlen);
6342 STR_SET_LEN(str,
len);
6343 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6366 rb_str_sub_bang(argc, argv, str);
6371str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6374 long beg, beg0, end0;
6375 long offset, blen, slen,
len, last;
6376 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6378 int need_backref_str = -1;
6389 hash = rb_check_hash_type(repl);
6393 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6402 rb_error_arity(argc, 1, 2);
6405 pat = get_pat_quoted(argv[0], 1);
6406 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6409 if (bang)
return Qnil;
6414 blen = RSTRING_LEN(str) + 30;
6416 sp = RSTRING_PTR(str);
6417 slen = RSTRING_LEN(str);
6419 str_enc = STR_ENC_GET(str);
6420 rb_enc_associate(dest, str_enc);
6427 end0 = beg0 + RSTRING_LEN(pat);
6441 struct RString fake_str = {RBASIC_INIT};
6443 if (mode == FAST_MAP) {
6452 val = rb_hash_aref(hash, key);
6455 str_mod_check(str, sp, slen);
6460 else if (need_backref_str) {
6462 if (need_backref_str < 0) {
6463 need_backref_str = val != repl;
6470 len = beg0 - offset;
6484 if (RSTRING_LEN(str) <= end0)
break;
6485 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6487 offset = end0 +
len;
6489 cp = RSTRING_PTR(str) + offset;
6490 if (offset > RSTRING_LEN(str))
break;
6493 if (mode != FAST_MAP && mode != STR) {
6496 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6501 if (RSTRING_LEN(str) > offset) {
6504 rb_pat_search0(pat, str, last, 1, &match);
6506 str_shared_replace(str, dest);
6531rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6533 str_modify_keep_cr(str);
6534 return str_gsub(argc, argv, str, 1);
6584 return str_gsub(argc, argv, str, 0);
6604 str_modifiable(str);
6605 if (str == str2)
return str;
6609 return str_replace(str, str2);
6626rb_str_clear(
VALUE str)
6630 STR_SET_LEN(str, 0);
6631 RSTRING_PTR(str)[0] = 0;
6632 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6648rb_str_chr(
VALUE str)
6666 pos += RSTRING_LEN(str);
6667 if (pos < 0 || RSTRING_LEN(str) <= pos)
6670 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6690 long len = RSTRING_LEN(str);
6691 char *
ptr, *head, *left = 0;
6695 if (pos < -
len ||
len <= pos)
6702 char byte = (char)(
NUM2INT(w) & 0xFF);
6704 if (!str_independent(str))
6705 str_make_independent(str);
6706 enc = STR_ENC_GET(str);
6707 head = RSTRING_PTR(str);
6709 if (!STR_EMBED_P(str)) {
6716 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6724 width = rb_enc_precise_mbclen(left, head+
len, enc);
6726 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6742str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6744 long n = RSTRING_LEN(str);
6746 if (beg > n ||
len < 0)
return Qnil;
6749 if (beg < 0)
return Qnil;
6754 if (!empty)
return Qnil;
6758 VALUE str2 = str_subseq(str, beg,
len);
6760 str_enc_copy_direct(str2, str);
6762 if (RSTRING_LEN(str2) == 0) {
6763 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6797 long beg,
len = RSTRING_LEN(str);
6805 return str_byte_substr(str, beg,
len, TRUE);
6810 return str_byte_substr(str, idx, 1, FALSE);
6822rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6827 return str_byte_substr(str, beg,
len, TRUE);
6830 return str_byte_aref(str, argv[0]);
6834str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6836 long end, slen = RSTRING_LEN(str);
6839 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6848 if (*
len > slen - *beg) {
6852 str_ensure_byte_pos(str, *beg);
6853 str_ensure_byte_pos(str, end);
6867rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6869 long beg,
len, vbeg, vlen;
6874 if (!(argc == 2 || argc == 3 || argc == 5)) {
6875 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6879 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6880 rb_builtin_class_name(argv[0]));
6887 vlen = RSTRING_LEN(val);
6892 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6893 rb_builtin_class_name(argv[2]));
6905 vlen = RSTRING_LEN(val);
6913 str_check_beg_len(str, &beg, &
len);
6914 str_check_beg_len(val, &vbeg, &vlen);
6915 str_modify_keep_cr(str);
6918 rb_enc_associate(str, rb_enc_check(str, val));
6921 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6943rb_str_reverse(
VALUE str)
6950 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6951 enc = STR_ENC_GET(str);
6957 if (RSTRING_LEN(str) > 1) {
6958 if (single_byte_optimizable(str)) {
6965 int clen = rb_enc_fast_mbclen(s, e, enc);
6973 cr = rb_enc_asciicompat(enc) ?
6976 int clen = rb_enc_mbclen(s, e, enc);
6985 STR_SET_LEN(rev, RSTRING_LEN(str));
6986 str_enc_copy_direct(rev, str);
7008rb_str_reverse_bang(
VALUE str)
7010 if (RSTRING_LEN(str) > 1) {
7011 if (single_byte_optimizable(str)) {
7014 str_modify_keep_cr(str);
7015 s = RSTRING_PTR(str);
7024 str_shared_replace(str, rb_str_reverse(str));
7028 str_modify_keep_cr(str);
7057 i = rb_str_index(str, arg, 0);
7059 return RBOOL(i != -1);
7103 rb_raise(rb_eArgError,
"invalid radix %d", base);
7105 return rb_str_to_inum(str, base, FALSE);
7130rb_str_to_f(
VALUE str)
7147rb_str_to_s(
VALUE str)
7159 char s[RUBY_MAX_CHAR_LEN];
7160 int n = rb_enc_codelen(c, enc);
7162 rb_enc_mbcput(c, s, enc);
7167#define CHAR_ESC_LEN 13
7170rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7172 char buf[CHAR_ESC_LEN + 1];
7180 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7182 else if (c < 0x10000) {
7183 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7186 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7191 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7194 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7197 l = (int)strlen(buf);
7203ruby_escaped_char(
int c)
7206 case '\0':
return "\\0";
7207 case '\n':
return "\\n";
7208 case '\r':
return "\\r";
7209 case '\t':
return "\\t";
7210 case '\f':
return "\\f";
7211 case '\013':
return "\\v";
7212 case '\010':
return "\\b";
7213 case '\007':
return "\\a";
7214 case '\033':
return "\\e";
7215 case '\x7f':
return "\\c?";
7221rb_str_escape(
VALUE str)
7225 const char *p = RSTRING_PTR(str);
7227 const char *prev = p;
7228 char buf[CHAR_ESC_LEN + 1];
7230 int unicode_p = rb_enc_unicode_p(enc);
7231 int asciicompat = rb_enc_asciicompat(enc);
7236 int n = rb_enc_precise_mbclen(p, pend, enc);
7238 if (p > prev) str_buf_cat(result, prev, p - prev);
7239 n = rb_enc_mbminlen(enc);
7241 n = (int)(pend - p);
7243 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7244 str_buf_cat(result, buf, strlen(buf));
7250 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7252 cc = ruby_escaped_char(c);
7254 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7255 str_buf_cat(result, cc, strlen(cc));
7258 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7261 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7262 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7266 if (p > prev) str_buf_cat(result, prev, p - prev);
7285 const char *p, *pend, *prev;
7286 char buf[CHAR_ESC_LEN + 1];
7288 rb_encoding *resenc = rb_default_internal_encoding();
7289 int unicode_p = rb_enc_unicode_p(enc);
7290 int asciicompat = rb_enc_asciicompat(enc);
7292 if (resenc == NULL) resenc = rb_default_external_encoding();
7293 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7294 rb_enc_associate(result, resenc);
7295 str_buf_cat2(result,
"\"");
7303 n = rb_enc_precise_mbclen(p, pend, enc);
7305 if (p > prev) str_buf_cat(result, prev, p - prev);
7306 n = rb_enc_mbminlen(enc);
7308 n = (int)(pend - p);
7310 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7311 str_buf_cat(result, buf, strlen(buf));
7317 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7319 if ((asciicompat || unicode_p) &&
7320 (c ==
'"'|| c ==
'\\' ||
7325 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7326 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7327 str_buf_cat2(result,
"\\");
7328 if (asciicompat || enc == resenc) {
7334 case '\n': cc =
'n';
break;
7335 case '\r': cc =
'r';
break;
7336 case '\t': cc =
't';
break;
7337 case '\f': cc =
'f';
break;
7338 case '\013': cc =
'v';
break;
7339 case '\010': cc =
'b';
break;
7340 case '\007': cc =
'a';
break;
7341 case 033: cc =
'e';
break;
7342 default: cc = 0;
break;
7345 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7348 str_buf_cat(result, buf, 2);
7361 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7365 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7366 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7371 if (p > prev) str_buf_cat(result, prev, p - prev);
7372 str_buf_cat2(result,
"\"");
7377#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7390 int encidx = rb_enc_get_index(str);
7393 const char *p, *pend;
7396 int u8 = (encidx == rb_utf8_encindex());
7397 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7400 if (!rb_enc_asciicompat(enc)) {
7402 len += strlen(enc->name);
7405 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7408 unsigned char c = *p++;
7411 case '"':
case '\\':
7412 case '\n':
case '\r':
7413 case '\t':
case '\f':
7414 case '\013':
case '\010':
case '\007':
case '\033':
7419 clen = IS_EVSTR(p, pend) ? 2 : 1;
7427 if (u8 && c > 0x7F) {
7428 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7430 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7433 else if (cc <= 0xFFFFF)
7446 if (clen > LONG_MAX -
len) {
7453 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7454 q = RSTRING_PTR(result); qend = q +
len + 1;
7458 unsigned char c = *p++;
7460 if (c ==
'"' || c ==
'\\') {
7464 else if (c ==
'#') {
7465 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7468 else if (c ==
'\n') {
7472 else if (c ==
'\r') {
7476 else if (c ==
'\t') {
7480 else if (c ==
'\f') {
7484 else if (c ==
'\013') {
7488 else if (c ==
'\010') {
7492 else if (c ==
'\007') {
7496 else if (c ==
'\033') {
7506 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7508 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7511 snprintf(q, qend-q,
"u%04X", cc);
7513 snprintf(q, qend-q,
"u{%X}", cc);
7518 snprintf(q, qend-q,
"x%02X", c);
7524 if (!rb_enc_asciicompat(enc)) {
7525 snprintf(q, qend-q, nonascii_suffix, enc->name);
7526 encidx = rb_ascii8bit_encindex();
7529 rb_enc_associate_index(result, encidx);
7535unescape_ascii(
unsigned int c)
7559undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7561 const char *s = *ss;
7565 unsigned char buf[6];
7583 *buf = unescape_ascii(*s);
7595 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7596 if (*penc != enc_utf8) {
7598 rb_enc_associate(undumped, enc_utf8);
7615 if (hexlen == 0 || hexlen > 6) {
7621 if (0xd800 <= c && c <= 0xdfff) {
7624 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7634 if (0xd800 <= c && c <= 0xdfff) {
7637 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7667static VALUE rb_str_is_ascii_only_p(
VALUE str);
7679str_undump(
VALUE str)
7681 const char *s = RSTRING_PTR(str);
7684 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7686 bool binary =
false;
7690 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7693 if (!str_null_check(str, &w)) {
7696 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7697 if (*s !=
'"')
goto invalid_format;
7715 static const char force_encoding_suffix[] =
".force_encoding(\"";
7716 static const char dup_suffix[] =
".dup";
7717 const char *encname;
7722 size =
sizeof(dup_suffix) - 1;
7723 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7725 size =
sizeof(force_encoding_suffix) - 1;
7726 if (s_end - s <= size)
goto invalid_format;
7727 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7731 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7735 s = memchr(s,
'"', s_end-s);
7737 if (!s)
goto invalid_format;
7738 if (s_end - s != 2)
goto invalid_format;
7739 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7741 encidx = rb_enc_find_index2(encname, (
long)size);
7745 rb_enc_associate_index(undumped, encidx);
7755 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7766 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7772 if (rb_enc_dummy_p(enc)) {
7779str_true_enc(
VALUE str)
7782 rb_str_check_dummy_enc(enc);
7786static OnigCaseFoldType
7787check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7792 rb_raise(rb_eArgError,
"too many options");
7793 if (argv[0]==sym_turkic) {
7794 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7796 if (argv[1]==sym_lithuanian)
7797 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7799 rb_raise(rb_eArgError,
"invalid second option");
7802 else if (argv[0]==sym_lithuanian) {
7803 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7805 if (argv[1]==sym_turkic)
7806 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7808 rb_raise(rb_eArgError,
"invalid second option");
7812 rb_raise(rb_eArgError,
"too many options");
7813 else if (argv[0]==sym_ascii)
7814 flags |= ONIGENC_CASE_ASCII_ONLY;
7815 else if (argv[0]==sym_fold) {
7816 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7817 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7819 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7822 rb_raise(rb_eArgError,
"invalid option");
7829 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7835#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7836#ifndef CASEMAP_DEBUG
7837# define CASEMAP_DEBUG 0
7845 OnigUChar space[FLEX_ARY_LEN];
7849mapping_buffer_free(
void *p)
7853 while (current_buffer) {
7854 previous_buffer = current_buffer;
7855 current_buffer = current_buffer->next;
7856 ruby_xfree_sized(previous_buffer, offsetof(
mapping_buffer, space) + previous_buffer->capa);
7862 {0, mapping_buffer_free,},
7871 const OnigUChar *source_current, *source_end;
7872 int target_length = 0;
7873 VALUE buffer_anchor;
7876 size_t buffer_count = 0;
7877 int buffer_length_or_invalid;
7879 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7881 source_current = (OnigUChar*)RSTRING_PTR(source);
7886 while (source_current < source_end) {
7888 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7889 if (CASEMAP_DEBUG) {
7890 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7893 *pre_buffer = current_buffer;
7894 pre_buffer = ¤t_buffer->next;
7895 current_buffer->next = NULL;
7896 current_buffer->capa =
capa;
7897 buffer_length_or_invalid = enc->case_map(flags,
7898 &source_current, source_end,
7899 current_buffer->space,
7900 current_buffer->space+current_buffer->capa,
7902 if (buffer_length_or_invalid < 0) {
7903 current_buffer =
DATA_PTR(buffer_anchor);
7905 mapping_buffer_free(current_buffer);
7906 rb_raise(rb_eArgError,
"input string invalid");
7908 target_length += current_buffer->used = buffer_length_or_invalid;
7910 if (CASEMAP_DEBUG) {
7911 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7914 if (buffer_count==1) {
7915 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7918 char *target_current;
7921 target_current = RSTRING_PTR(target);
7922 current_buffer =
DATA_PTR(buffer_anchor);
7923 while (current_buffer) {
7924 memcpy(target_current, current_buffer->space, current_buffer->used);
7925 target_current += current_buffer->used;
7926 current_buffer = current_buffer->next;
7929 current_buffer =
DATA_PTR(buffer_anchor);
7931 mapping_buffer_free(current_buffer);
7936 str_enc_copy_direct(target, source);
7945 const OnigUChar *source_current, *source_end;
7946 OnigUChar *target_current, *target_end;
7947 long old_length = RSTRING_LEN(source);
7948 int length_or_invalid;
7950 if (old_length == 0)
return Qnil;
7952 source_current = (OnigUChar*)RSTRING_PTR(source);
7954 if (source == target) {
7955 target_current = (OnigUChar*)source_current;
7956 target_end = (OnigUChar*)source_end;
7959 target_current = (OnigUChar*)RSTRING_PTR(target);
7963 length_or_invalid = onigenc_ascii_only_case_map(flags,
7964 &source_current, source_end,
7965 target_current, target_end, enc);
7966 if (length_or_invalid < 0)
7967 rb_raise(rb_eArgError,
"input string invalid");
7968 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7969 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7970 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7971 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7972 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7975 str_enc_copy(target, source);
7981upcase_single(
VALUE str)
7983 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7984 bool modified =
false;
7987 unsigned int c = *(
unsigned char*)s;
7989 if (
'a' <= c && c <=
'z') {
7990 *s =
'A' + (c -
'a');
8011rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
8014 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8016 flags = check_case_options(argc, argv, flags);
8017 str_modify_keep_cr(str);
8018 enc = str_true_enc(str);
8019 if (case_option_single_p(flags, enc, str)) {
8020 if (upcase_single(str))
8021 flags |= ONIGENC_CASE_MODIFIED;
8023 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8024 rb_str_ascii_casemap(str, str, &flags, enc);
8026 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8028 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8041rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8044 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8047 flags = check_case_options(argc, argv, flags);
8048 enc = str_true_enc(str);
8049 if (case_option_single_p(flags, enc, str)) {
8050 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8051 str_enc_copy_direct(ret, str);
8054 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8056 rb_str_ascii_casemap(str, ret, &flags, enc);
8059 ret = rb_str_casemap(str, &flags, enc);
8066downcase_single(
VALUE str)
8068 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8069 bool modified =
false;
8072 unsigned int c = *(
unsigned char*)s;
8074 if (
'A' <= c && c <=
'Z') {
8075 *s =
'a' + (c -
'A');
8097rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8100 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8102 flags = check_case_options(argc, argv, flags);
8103 str_modify_keep_cr(str);
8104 enc = str_true_enc(str);
8105 if (case_option_single_p(flags, enc, str)) {
8106 if (downcase_single(str))
8107 flags |= ONIGENC_CASE_MODIFIED;
8109 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8110 rb_str_ascii_casemap(str, str, &flags, enc);
8112 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8114 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8128rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8131 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8134 flags = check_case_options(argc, argv, flags);
8135 enc = str_true_enc(str);
8136 if (case_option_single_p(flags, enc, str)) {
8137 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8138 str_enc_copy_direct(ret, str);
8139 downcase_single(ret);
8141 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8143 rb_str_ascii_casemap(str, ret, &flags, enc);
8146 ret = rb_str_casemap(str, &flags, enc);
8166rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8169 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8171 flags = check_case_options(argc, argv, flags);
8172 str_modify_keep_cr(str);
8173 enc = str_true_enc(str);
8174 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8175 if (flags&ONIGENC_CASE_ASCII_ONLY)
8176 rb_str_ascii_casemap(str, str, &flags, enc);
8178 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8180 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8194rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8197 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8200 flags = check_case_options(argc, argv, flags);
8201 enc = str_true_enc(str);
8202 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8203 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8205 rb_str_ascii_casemap(str, ret, &flags, enc);
8208 ret = rb_str_casemap(str, &flags, enc);
8227rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8230 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8232 flags = check_case_options(argc, argv, flags);
8233 str_modify_keep_cr(str);
8234 enc = str_true_enc(str);
8235 if (flags&ONIGENC_CASE_ASCII_ONLY)
8236 rb_str_ascii_casemap(str, str, &flags, enc);
8238 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8240 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8254rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8257 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8260 flags = check_case_options(argc, argv, flags);
8261 enc = str_true_enc(str);
8262 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8263 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8265 rb_str_ascii_casemap(str, ret, &flags, enc);
8268 ret = rb_str_casemap(str, &flags, enc);
8273typedef unsigned char *USTR;
8277 unsigned int now, max;
8289 if (t->p == t->pend)
return -1;
8290 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8293 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8295 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8297 if (t->p < t->pend) {
8298 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8301 if (t->now < 0x80 && c < 0x80) {
8302 rb_raise(rb_eArgError,
8303 "invalid range \"%c-%c\" in string transliteration",
8307 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8311 else if (t->now < c) {
8320 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8321 if (t->now == t->max) {
8326 if (t->now < t->max) {
8342 const unsigned int errc = -1;
8343 unsigned int trans[256];
8345 struct tr trsrc, trrepl;
8347 unsigned int c, c0, last = 0;
8348 int modify = 0, i, l;
8349 unsigned char *s, *send;
8351 int singlebyte = single_byte_optimizable(str);
8355#define CHECK_IF_ASCII(c) \
8356 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8357 (cr = ENC_CODERANGE_VALID) : 0)
8361 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8362 if (RSTRING_LEN(repl) == 0) {
8363 return rb_str_delete_bang(1, &src, str);
8367 e1 = rb_enc_check(str, src);
8368 e2 = rb_enc_check(str, repl);
8373 enc = rb_enc_check(src, repl);
8375 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8376 if (RSTRING_LEN(src) > 1 &&
8377 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8378 trsrc.p + l < trsrc.pend) {
8382 trrepl.p = RSTRING_PTR(repl);
8383 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8384 trsrc.gen = trrepl.gen = 0;
8385 trsrc.now = trrepl.now = 0;
8386 trsrc.max = trrepl.max = 0;
8389 for (i=0; i<256; i++) {
8392 while ((c = trnext(&trsrc, enc)) != errc) {
8397 if (!hash) hash = rb_hash_new();
8401 while ((c = trnext(&trrepl, enc)) != errc)
8404 for (i=0; i<256; i++) {
8405 if (trans[i] != errc) {
8413 for (i=0; i<256; i++) {
8416 while ((c = trnext(&trsrc, enc)) != errc) {
8417 r = trnext(&trrepl, enc);
8418 if (r == errc) r = trrepl.now;
8421 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8424 if (!hash) hash = rb_hash_new();
8432 str_modify_keep_cr(str);
8433 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8434 termlen = rb_enc_mbminlen(enc);
8437 long offset, max = RSTRING_LEN(str);
8438 unsigned int save = -1;
8439 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8444 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8446 SIZED_FREE_N(buf, max + termlen);
8447 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8450 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8452 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8461 if (cflag) c = last;
8464 else if (cflag) c = errc;
8470 if (c != (
unsigned int)-1) {
8476 tlen = rb_enc_codelen(c, enc);
8482 if (enc != e1) may_modify = 1;
8484 if ((offset = t - buf) + tlen > max) {
8485 size_t MAYBE_UNUSED(old) = max + termlen;
8486 max = offset + tlen + (send - s);
8487 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8490 rb_enc_mbcput(c, t, enc);
8491 if (may_modify && memcmp(s, t, tlen) != 0) {
8497 if (!STR_EMBED_P(str)) {
8498 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8500 TERM_FILL((
char *)t, termlen);
8501 RSTRING(str)->as.heap.ptr = (
char *)buf;
8502 STR_SET_LEN(str, t - buf);
8503 STR_SET_NOEMBED(str);
8504 RSTRING(str)->as.heap.aux.capa = max;
8508 c = (
unsigned char)*s;
8509 if (trans[c] != errc) {
8526 long offset, max = (long)((send - s) * 1.2);
8527 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8532 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8534 SIZED_FREE_N(buf, max + termlen);
8535 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8538 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8540 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8548 if (cflag) c = last;
8551 else if (cflag) c = errc;
8555 c = cflag ? last : errc;
8558 tlen = rb_enc_codelen(c, enc);
8563 if (enc != e1) may_modify = 1;
8565 if ((offset = t - buf) + tlen > max) {
8566 size_t MAYBE_UNUSED(old) = max + termlen;
8567 max = offset + tlen + (long)((send - s) * 1.2);
8568 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8572 rb_enc_mbcput(c, t, enc);
8573 if (may_modify && memcmp(s, t, tlen) != 0) {
8581 if (!STR_EMBED_P(str)) {
8582 SIZED_FREE_N(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8584 TERM_FILL((
char *)t, termlen);
8585 RSTRING(str)->as.heap.ptr = (
char *)buf;
8586 STR_SET_LEN(str, t - buf);
8587 STR_SET_NOEMBED(str);
8588 RSTRING(str)->as.heap.aux.capa = max;
8594 rb_enc_associate(str, enc);
8616 return tr_trans(str, src, repl, 0);
8661 tr_trans(str, src, repl, 0);
8665#define TR_TABLE_MAX (UCHAR_MAX+1)
8666#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8668tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8671 const unsigned int errc = -1;
8672 char buf[TR_TABLE_MAX];
8675 VALUE table = 0, ptable = 0;
8676 int i, l, cflag = 0;
8678 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8679 tr.gen =
tr.now =
tr.max = 0;
8681 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8686 for (i=0; i<TR_TABLE_MAX; i++) {
8689 stable[TR_TABLE_MAX] = cflag;
8691 else if (stable[TR_TABLE_MAX] && !cflag) {
8692 stable[TR_TABLE_MAX] = 0;
8694 for (i=0; i<TR_TABLE_MAX; i++) {
8698 while ((c = trnext(&
tr, enc)) != errc) {
8699 if (c < TR_TABLE_MAX) {
8700 buf[(
unsigned char)c] = !cflag;
8705 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8708 table = ptable ? ptable : rb_hash_new();
8712 table = rb_hash_new();
8717 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8718 rb_hash_aset(table, key,
Qtrue);
8722 for (i=0; i<TR_TABLE_MAX; i++) {
8723 stable[i] = stable[i] && buf[i];
8725 if (!table && !cflag) {
8732tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8734 if (c < TR_TABLE_MAX) {
8735 return table[c] != 0;
8741 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8742 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8746 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8749 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8764rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8766 char squeez[TR_TABLE_SIZE];
8769 VALUE del = 0, nodel = 0;
8771 int i, ascompat, cr;
8773 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8775 for (i=0; i<argc; i++) {
8779 enc = rb_enc_check(str, s);
8780 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8783 str_modify_keep_cr(str);
8784 ascompat = rb_enc_asciicompat(enc);
8785 s = t = RSTRING_PTR(str);
8792 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8803 c = rb_enc_codepoint_len(s, send, &clen, enc);
8805 if (tr_find(c, squeez, del, nodel)) {
8809 if (t != s) rb_enc_mbcput(c, t, enc);
8816 TERM_FILL(t, TERM_LEN(str));
8817 STR_SET_LEN(str, t - RSTRING_PTR(str));
8820 if (modify)
return str;
8834rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8837 rb_str_delete_bang(argc, argv, str);
8855rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8857 char squeez[TR_TABLE_SIZE];
8859 VALUE del = 0, nodel = 0;
8860 unsigned char *s, *send, *t;
8862 int ascompat, singlebyte = single_byte_optimizable(str);
8866 enc = STR_ENC_GET(str);
8869 for (i=0; i<argc; i++) {
8873 enc = rb_enc_check(str, s);
8874 if (singlebyte && !single_byte_optimizable(s))
8876 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8880 str_modify_keep_cr(str);
8881 s = t = (
unsigned char *)RSTRING_PTR(str);
8882 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8885 ascompat = rb_enc_asciicompat(enc);
8889 unsigned int c = *s++;
8890 if (c != save || (argc > 0 && !squeez[c])) {
8900 if (ascompat && (c = *s) < 0x80) {
8901 if (c != save || (argc > 0 && !squeez[c])) {
8907 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8909 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8910 if (t != s) rb_enc_mbcput(c, t, enc);
8919 TERM_FILL((
char *)t, TERM_LEN(str));
8920 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8921 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8925 if (modify)
return str;
8939rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8942 rb_str_squeeze_bang(argc, argv, str);
8962 return tr_trans(str, src, repl, 1);
8990 tr_trans(str, src, repl, 1);
9003rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9005 char table[TR_TABLE_SIZE];
9007 VALUE del = 0, nodel = 0, tstr;
9017 enc = rb_enc_check(str, tstr);
9020 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9021 (ptstr = RSTRING_PTR(tstr),
9022 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9023 !is_broken_string(str)) {
9025 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9027 s = RSTRING_PTR(str);
9028 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9031 if (*(
unsigned char*)s++ == c) n++;
9037 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9038 for (i=1; i<argc; i++) {
9041 enc = rb_enc_check(str, tstr);
9042 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9045 s = RSTRING_PTR(str);
9046 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9048 ascompat = rb_enc_asciicompat(enc);
9052 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9060 c = rb_enc_codepoint_len(s, send, &clen, enc);
9061 if (tr_find(c, table, del, nodel)) {
9072rb_fs_check(
VALUE val)
9076 if (
NIL_P(val))
return 0;
9081static const char isspacetable[256] = {
9082 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9084 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9085 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9086 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9087 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9088 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9089 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9090 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9091 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9092 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9093 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9094 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9095 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9096 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9097 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9100#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9103split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9105 if (empty_count >= 0 &&
len == 0) {
9106 return empty_count + 1;
9108 if (empty_count > 0) {
9113 }
while (--empty_count > 0);
9117 rb_yield(str_new_empty_String(str));
9118 }
while (--empty_count > 0);
9132 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9136literal_split_pattern(
VALUE spat, split_type_t default_type)
9144 return SPLIT_TYPE_CHARS;
9146 else if (rb_enc_asciicompat(enc)) {
9147 if (
len == 1 && ptr[0] ==
' ') {
9148 return SPLIT_TYPE_AWK;
9153 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9154 return SPLIT_TYPE_AWK;
9157 return default_type;
9170rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9175 split_type_t split_type;
9176 long beg, end, i = 0, empty_count = -1;
9181 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9183 if (lim <= 0) limit =
Qnil;
9184 else if (lim == 1) {
9185 if (RSTRING_LEN(str) == 0)
9196 if (
NIL_P(limit) && !lim) empty_count = 0;
9198 enc = STR_ENC_GET(str);
9199 split_type = SPLIT_TYPE_REGEXP;
9201 spat = get_pat_quoted(spat, 0);
9203 else if (
NIL_P(spat = rb_fs)) {
9204 split_type = SPLIT_TYPE_AWK;
9206 else if (!(spat = rb_fs_check(spat))) {
9207 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9212 if (split_type != SPLIT_TYPE_AWK) {
9217 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9218 if (split_type == SPLIT_TYPE_AWK) {
9220 split_type = SPLIT_TYPE_STRING;
9225 mustnot_broken(spat);
9226 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9234#define SPLIT_STR(beg, len) ( \
9235 empty_count = split_string(result, str, beg, len, empty_count), \
9236 str_mod_check(str, str_start, str_len))
9239 char *ptr = RSTRING_PTR(str);
9240 char *
const str_start = ptr;
9241 const long str_len = RSTRING_LEN(str);
9242 char *
const eptr = str_start + str_len;
9243 if (split_type == SPLIT_TYPE_AWK) {
9250 if (is_ascii_string(str)) {
9251 while (ptr < eptr) {
9252 c = (
unsigned char)*ptr++;
9254 if (ascii_isspace(c)) {
9260 if (!
NIL_P(limit) && lim <= i)
break;
9263 else if (ascii_isspace(c)) {
9264 SPLIT_STR(beg, end-beg);
9267 if (!
NIL_P(limit)) ++i;
9275 while (ptr < eptr) {
9278 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9287 if (!
NIL_P(limit) && lim <= i)
break;
9291 SPLIT_STR(beg, end-beg);
9294 if (!
NIL_P(limit)) ++i;
9302 else if (split_type == SPLIT_TYPE_STRING) {
9303 char *substr_start = ptr;
9304 char *sptr = RSTRING_PTR(spat);
9305 long slen = RSTRING_LEN(spat);
9308 mustnot_broken(str);
9309 enc = rb_enc_check(str, spat);
9310 while (ptr < eptr &&
9311 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9314 if (t != ptr + end) {
9318 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9319 str_mod_check(spat, sptr, slen);
9322 if (!
NIL_P(limit) && lim <= ++i)
break;
9324 beg = ptr - str_start;
9326 else if (split_type == SPLIT_TYPE_CHARS) {
9330 mustnot_broken(str);
9331 enc = rb_enc_get(str);
9332 while (ptr < eptr &&
9333 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9334 SPLIT_STR(ptr - str_start, n);
9336 if (!
NIL_P(limit) && lim <= ++i)
break;
9338 beg = ptr - str_start;
9342 long len = RSTRING_LEN(str);
9350 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9355 if (start == end && BEG(0) == END(0)) {
9360 else if (last_null == 1) {
9361 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9368 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9374 SPLIT_STR(beg, end-beg);
9375 beg = start = END(0);
9379 for (idx=1; idx < regs->num_regs; idx++) {
9380 if (BEG(idx) == -1)
continue;
9381 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9383 if (!
NIL_P(limit) && lim <= ++i)
break;
9385 if (match) rb_match_unbusy(match);
9387 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9388 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9391 return result ? result : str;
9401 return rb_str_split_m(1, &sep, str);
9404#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9419#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9422chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9424 const char *prev = rb_enc_prev_char(p, e, e, enc);
9427 prev = rb_enc_prev_char(p, e, e, enc);
9428 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9440 RSTRING_LEN(rs) != 1 ||
9441 RSTRING_PTR(rs)[0] !=
'\n')) {
9447#define rb_rs get_rs()
9454 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9455 long pos,
len, rslen;
9461 static ID keywords[1];
9466 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9470 if (!ENUM_ELEM(ary, str)) {
9478 if (!RSTRING_LEN(str))
goto end;
9480 ptr = subptr = RSTRING_PTR(str);
9482 len = RSTRING_LEN(str);
9484 rslen = RSTRING_LEN(rs);
9487 enc = rb_enc_get(str);
9489 enc = rb_enc_check(str, rs);
9494 const char *eol = NULL;
9496 while (subend < pend) {
9497 long chomp_rslen = 0;
9499 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9501 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9503 if (eol == subend)
break;
9507 chomp_rslen = -rslen;
9511 if (!subptr) subptr = subend;
9515 }
while (subend < pend);
9517 if (rslen == 0) chomp_rslen = 0;
9519 subend - subptr + (chomp ? chomp_rslen : rslen));
9520 if (ENUM_ELEM(ary, line)) {
9521 str_mod_check(str, ptr,
len);
9523 subptr = eol = NULL;
9528 rsptr = RSTRING_PTR(rs);
9529 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9538 rsptr = RSTRING_PTR(rs);
9539 rslen = RSTRING_LEN(rs);
9542 while (subptr < pend) {
9543 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9547 if (hit != adjusted) {
9551 subend = hit += rslen;
9554 subend = chomp_newline(subptr, subend, enc);
9561 if (ENUM_ELEM(ary, line)) {
9562 str_mod_check(str, ptr,
len);
9567 if (subptr != pend) {
9570 pend = chomp_newline(subptr, pend, enc);
9572 else if (pend - subptr >= rslen &&
9573 memcmp(pend - rslen, rsptr, rslen) == 0) {
9578 ENUM_ELEM(ary, line);
9599rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9602 return rb_str_enumerate_lines(argc, argv, str, 0);
9657rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9659 VALUE ary = WANTARRAY(
"lines", 0);
9660 return rb_str_enumerate_lines(argc, argv, str, ary);
9674 for (i=0; i<RSTRING_LEN(str); i++) {
9675 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9693rb_str_each_byte(
VALUE str)
9696 return rb_str_enumerate_bytes(str, 0);
9708rb_str_bytes(
VALUE str)
9710 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9711 return rb_str_enumerate_bytes(str, ary);
9729 ptr = RSTRING_PTR(str);
9730 len = RSTRING_LEN(str);
9731 enc = rb_enc_get(str);
9734 for (i = 0; i <
len; i += n) {
9735 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9740 for (i = 0; i <
len; i += n) {
9741 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9762rb_str_each_char(
VALUE str)
9765 return rb_str_enumerate_chars(str, 0);
9777rb_str_chars(
VALUE str)
9780 return rb_str_enumerate_chars(str, ary);
9784rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9789 const char *ptr, *end;
9792 if (single_byte_optimizable(str))
9793 return rb_str_enumerate_bytes(str, ary);
9796 ptr = RSTRING_PTR(str);
9798 enc = STR_ENC_GET(str);
9801 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9822rb_str_each_codepoint(
VALUE str)
9825 return rb_str_enumerate_codepoints(str, 0);
9837rb_str_codepoints(
VALUE str)
9840 return rb_str_enumerate_codepoints(str, ary);
9846 int encidx = rb_enc_to_index(enc);
9848 const OnigUChar source_ascii[] =
"\\X";
9849 const OnigUChar *source = source_ascii;
9850 size_t source_len =
sizeof(source_ascii) - 1;
9853#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9854#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9855#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9856#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9857#define CASE_UTF(e) \
9858 case ENCINDEX_UTF_##e: { \
9859 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9860 source = source_UTF_##e; \
9861 source_len = sizeof(source_UTF_##e); \
9864 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9872 regex_t *reg_grapheme_cluster;
9874 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9875 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9877 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9878 onig_error_code_to_str(message, r, &einfo);
9879 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9882 return reg_grapheme_cluster;
9888 int encidx = rb_enc_to_index(enc);
9889 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9891 if (encidx == rb_utf8_encindex()) {
9892 if (!reg_grapheme_cluster_utf8) {
9893 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9896 return reg_grapheme_cluster_utf8;
9905 size_t grapheme_cluster_count = 0;
9907 const char *ptr, *end;
9909 if (!rb_enc_unicode_p(enc)) {
9913 bool cached_reg_grapheme_cluster =
true;
9914 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9915 if (!reg_grapheme_cluster) {
9916 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9917 cached_reg_grapheme_cluster =
false;
9920 ptr = RSTRING_PTR(str);
9924 OnigPosition
len = onig_match(reg_grapheme_cluster,
9925 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9926 (
const OnigUChar *)ptr, NULL, 0);
9927 if (
len <= 0)
break;
9928 grapheme_cluster_count++;
9932 if (!cached_reg_grapheme_cluster) {
9933 onig_free(reg_grapheme_cluster);
9936 return SIZET2NUM(grapheme_cluster_count);
9940rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9944 const char *ptr0, *ptr, *end;
9946 if (!rb_enc_unicode_p(enc)) {
9947 return rb_str_enumerate_chars(str, ary);
9952 bool cached_reg_grapheme_cluster =
true;
9953 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9954 if (!reg_grapheme_cluster) {
9955 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9956 cached_reg_grapheme_cluster =
false;
9959 ptr0 = ptr = RSTRING_PTR(str);
9963 OnigPosition
len = onig_match(reg_grapheme_cluster,
9964 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9965 (
const OnigUChar *)ptr, NULL, 0);
9966 if (
len <= 0)
break;
9971 if (!cached_reg_grapheme_cluster) {
9972 onig_free(reg_grapheme_cluster);
9992rb_str_each_grapheme_cluster(
VALUE str)
9995 return rb_str_enumerate_grapheme_clusters(str, 0);
10007rb_str_grapheme_clusters(
VALUE str)
10010 return rb_str_enumerate_grapheme_clusters(str, ary);
10014chopped_length(
VALUE str)
10017 const char *p, *p2, *beg, *end;
10019 beg = RSTRING_PTR(str);
10020 end = beg + RSTRING_LEN(str);
10021 if (beg >= end)
return 0;
10022 p = rb_enc_prev_char(beg, end, end, enc);
10024 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10025 p2 = rb_enc_prev_char(beg, p, end, enc);
10026 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10044rb_str_chop_bang(
VALUE str)
10046 str_modify_keep_cr(str);
10047 if (RSTRING_LEN(str) > 0) {
10049 len = chopped_length(str);
10050 STR_SET_LEN(str,
len);
10051 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10070rb_str_chop(
VALUE str)
10076smart_chomp(
VALUE str,
const char *e,
const char *p)
10079 if (rb_enc_mbminlen(enc) > 1) {
10084 pp = e - rb_enc_mbminlen(enc);
10087 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10095 if (--e > p && *(e-1) ==
'\r') {
10112 char *pp, *e, *rsptr;
10114 char *
const p = RSTRING_PTR(str);
10115 long len = RSTRING_LEN(str);
10117 if (
len == 0)
return 0;
10120 return smart_chomp(str, e, p);
10123 enc = rb_enc_get(str);
10126 if (rb_enc_mbminlen(enc) > 1) {
10131 pp -= rb_enc_mbminlen(enc);
10134 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10141 while (e > p && *(e-1) ==
'\n') {
10143 if (e > p && *(e-1) ==
'\r')
10149 if (rslen >
len)
return len;
10151 enc = rb_enc_get(rs);
10152 newline = rsptr[rslen-1];
10153 if (rslen == rb_enc_mbminlen(enc)) {
10155 if (newline ==
'\n')
10156 return smart_chomp(str, e, p);
10160 return smart_chomp(str, e, p);
10164 enc = rb_enc_check(str, rs);
10165 if (is_broken_string(rs)) {
10169 if (p[
len-1] == newline &&
10171 memcmp(rsptr, pp, rslen) == 0)) {
10172 if (at_char_boundary(p, pp, e, enc))
10173 return len - rslen;
10185chomp_rs(
int argc,
const VALUE *argv)
10189 VALUE rs = argv[0];
10201 long olen = RSTRING_LEN(str);
10202 long len = chompped_length(str, rs);
10203 if (
len >= olen)
return Qnil;
10204 str_modify_keep_cr(str);
10205 STR_SET_LEN(str,
len);
10206 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10226rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10229 str_modifiable(str);
10230 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10231 rs = chomp_rs(argc, argv);
10233 return rb_str_chomp_string(str, rs);
10246rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10248 VALUE rs = chomp_rs(argc, argv);
10254tr_setup_table_multi(
char table[TR_TABLE_SIZE],
VALUE *tablep,
VALUE *ctablep,
10255 VALUE str,
int num_selectors,
VALUE *selectors)
10259 for (i=0; i<num_selectors; i++) {
10260 VALUE selector = selectors[i];
10264 enc = rb_enc_check(str, selector);
10265 tr_setup_table(selector, table, i==0, tablep, ctablep, enc);
10272 const char *
const start = s;
10274 if (!s || s >= e)
return 0;
10277 if (single_byte_optimizable(str)) {
10278 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10283 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10293lstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10294 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10296 const char *
const start = s;
10298 if (!s || s >= e)
return 0;
10303 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10305 if (!tr_find(cc, table, del, nodel))
break;
10324rb_str_lstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10328 long olen, loffset;
10330 str_modify_keep_cr(str);
10331 enc = STR_ENC_GET(str);
10334 char table[TR_TABLE_SIZE];
10335 VALUE del = 0, nodel = 0;
10337 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10338 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10341 loffset = lstrip_offset(str, start, start+olen, enc);
10345 long len = olen-loffset;
10346 s = start + loffset;
10347 memmove(start, s,
len);
10348 STR_SET_LEN(str,
len);
10349 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10384rb_str_lstrip(
int argc,
VALUE *argv,
VALUE str)
10391 char table[TR_TABLE_SIZE];
10392 VALUE del = 0, nodel = 0;
10394 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10395 loffset = lstrip_offset_table(str, start, start+
len, STR_ENC_GET(str), table, del, nodel);
10398 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10400 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10409 rb_str_check_dummy_enc(enc);
10413 if (!s || s >= e)
return 0;
10417 if (single_byte_optimizable(str)) {
10419 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10424 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10434rstrip_offset_table(
VALUE str,
const char *s,
const char *e,
rb_encoding *enc,
10435 char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
10440 rb_str_check_dummy_enc(enc);
10444 if (!s || s >= e)
return 0;
10448 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10450 if (!tr_find(c, table, del, nodel))
break;
10470rb_str_rstrip_bang(
int argc,
VALUE *argv,
VALUE str)
10474 long olen, roffset;
10476 str_modify_keep_cr(str);
10477 enc = STR_ENC_GET(str);
10480 char table[TR_TABLE_SIZE];
10481 VALUE del = 0, nodel = 0;
10483 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10484 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10487 roffset = rstrip_offset(str, start, start+olen, enc);
10490 long len = olen - roffset;
10492 STR_SET_LEN(str,
len);
10493 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10527rb_str_rstrip(
int argc,
VALUE *argv,
VALUE str)
10531 long olen, roffset;
10533 enc = STR_ENC_GET(str);
10536 char table[TR_TABLE_SIZE];
10537 VALUE del = 0, nodel = 0;
10539 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10540 roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10543 roffset = rstrip_offset(str, start, start+olen, enc);
10545 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10563rb_str_strip_bang(
int argc,
VALUE *argv,
VALUE str)
10566 long olen, loffset, roffset;
10569 str_modify_keep_cr(str);
10570 enc = STR_ENC_GET(str);
10574 char table[TR_TABLE_SIZE];
10575 VALUE del = 0, nodel = 0;
10577 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10578 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10579 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10582 loffset = lstrip_offset(str, start, start+olen, enc);
10583 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10586 if (loffset > 0 || roffset > 0) {
10587 long len = olen-roffset;
10590 memmove(start, start + loffset,
len);
10592 STR_SET_LEN(str,
len);
10593 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10628rb_str_strip(
int argc,
VALUE *argv,
VALUE str)
10631 long olen, loffset, roffset;
10637 char table[TR_TABLE_SIZE];
10638 VALUE del = 0, nodel = 0;
10640 tr_setup_table_multi(table, &del, &nodel, str, argc, argv);
10641 loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel);
10642 roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel);
10645 loffset = lstrip_offset(str, start, start+olen, enc);
10646 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10649 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10654scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10657 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10663 end = pos + RSTRING_LEN(pat);
10677 if (RSTRING_LEN(str) > end)
10678 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10687 if (!regs || regs->num_regs == 1) {
10693 for (
int i = 1; i < regs->num_regs; i++) {
10724 long last = -1, prev = 0;
10725 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10727 pat = get_pat_quoted(pat, 1);
10728 mustnot_broken(str);
10732 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10737 if (last >= 0) rb_pat_search(pat, str, last, 1);
10742 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10746 str_mod_check(str, p,
len);
10748 if (last >= 0) rb_pat_search(pat, str, last, 1);
10800rb_str_hex(
VALUE str)
10802 return rb_str_to_inum(str, 16, FALSE);
10886rb_str_oct(
VALUE str)
10888 return rb_str_to_inum(str, -8, FALSE);
10891#ifndef HAVE_CRYPT_R
10896 rb_nativethread_lock_t lock;
10897} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10966# define CRYPT_END() ALLOCV_END(databuf)
10969 extern char *crypt(
const char *,
const char *);
10970# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10973 const char *s, *saltp;
10976 char salt_8bit_clean[3];
10980 mustnot_wchar(str);
10981 mustnot_wchar(salt);
10983 saltp = RSTRING_PTR(salt);
10984 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10985 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10989 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10990 salt_8bit_clean[0] = saltp[0] & 0x7f;
10991 salt_8bit_clean[1] = saltp[1] & 0x7f;
10992 salt_8bit_clean[2] =
'\0';
10993 saltp = salt_8bit_clean;
10998# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10999 data->initialized = 0;
11001 res = crypt_r(s, saltp, data);
11004 res = crypt(s, saltp);
11019 size_t res_size = strlen(res)+1;
11020 tmp_buf =
ALLOCA_N(
char, res_size);
11021 memcpy(tmp_buf, res, res_size);
11058 char *ptr, *p, *pend;
11061 unsigned long sum0 = 0;
11066 ptr = p = RSTRING_PTR(str);
11067 len = RSTRING_LEN(str);
11073 str_mod_check(str, ptr,
len);
11076 sum0 += (
unsigned char)*p;
11087 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11088 sum0 &= (((
unsigned long)1)<<bits)-1;
11108rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11112 long width,
len, flen = 1, fclen = 1;
11115 const char *f =
" ";
11116 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11118 int singlebyte = 1, cr;
11122 enc = STR_ENC_GET(str);
11123 termlen = rb_enc_mbminlen(enc);
11127 enc = rb_enc_check(str, pad);
11128 f = RSTRING_PTR(pad);
11129 flen = RSTRING_LEN(pad);
11130 fclen = str_strlen(pad, enc);
11131 singlebyte = single_byte_optimizable(pad);
11132 if (flen == 0 || fclen == 0) {
11133 rb_raise(rb_eArgError,
"zero width padding");
11136 len = str_strlen(str, enc);
11137 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11139 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11143 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11144 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11146 size = RSTRING_LEN(str);
11147 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11148 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11149 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11150 rb_raise(rb_eArgError,
"argument too big");
11154 p = RSTRING_PTR(res);
11156 memset(p, *f, llen);
11160 while (llen >= fclen) {
11166 memcpy(p, f, llen2);
11170 memcpy(p, RSTRING_PTR(str), size);
11173 memset(p, *f, rlen);
11177 while (rlen >= fclen) {
11183 memcpy(p, f, rlen2);
11187 TERM_FILL(p, termlen);
11188 STR_SET_LEN(res, p-RSTRING_PTR(res));
11209rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11211 return rb_str_justify(argc, argv, str,
'l');
11223rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11225 return rb_str_justify(argc, argv, str,
'r');
11238rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11240 return rb_str_justify(argc, argv, str,
'c');
11256 sep = get_pat_quoted(sep, 0);
11268 pos = rb_str_index(str, sep, 0);
11269 if (pos < 0)
goto failed;
11274 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11277 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11291 long pos = RSTRING_LEN(str);
11293 sep = get_pat_quoted(sep, 0);
11306 pos = rb_str_rindex(str, sep, pos);
11315 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11317 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11329rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11333 for (i=0; i<argc; i++) {
11334 VALUE tmp = argv[i];
11336 if (rb_reg_start_with_p(tmp, str))
11340 const char *p, *s, *e;
11345 enc = rb_enc_check(str, tmp);
11346 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11347 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11348 p = RSTRING_PTR(str);
11351 if (!at_char_right_boundary(p, s, e, enc))
11353 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11369rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11373 for (i=0; i<argc; i++) {
11374 VALUE tmp = argv[i];
11375 const char *p, *s, *e;
11380 enc = rb_enc_check(str, tmp);
11381 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11382 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11383 p = RSTRING_PTR(str);
11386 if (!at_char_boundary(p, s, e, enc))
11388 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11404deleted_prefix_length(
VALUE str,
VALUE prefix)
11406 const char *strptr, *prefixptr;
11407 long olen, prefixlen;
11412 if (!is_broken_string(prefix) ||
11413 !rb_enc_asciicompat(enc) ||
11414 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11415 enc = rb_enc_check(str, prefix);
11419 prefixlen = RSTRING_LEN(prefix);
11420 if (prefixlen <= 0)
return 0;
11421 olen = RSTRING_LEN(str);
11422 if (olen < prefixlen)
return 0;
11423 strptr = RSTRING_PTR(str);
11424 prefixptr = RSTRING_PTR(prefix);
11425 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11426 if (is_broken_string(prefix)) {
11427 if (!is_broken_string(str)) {
11431 const char *strend = strptr + olen;
11432 const char *after_prefix = strptr + prefixlen;
11433 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11454rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11457 str_modify_keep_cr(str);
11459 prefixlen = deleted_prefix_length(str, prefix);
11460 if (prefixlen <= 0)
return Qnil;
11474rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11478 prefixlen = deleted_prefix_length(str, prefix);
11479 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11481 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11494deleted_suffix_length(
VALUE str,
VALUE suffix)
11496 const char *strptr, *suffixptr;
11497 long olen, suffixlen;
11501 if (is_broken_string(suffix))
return 0;
11502 enc = rb_enc_check(str, suffix);
11505 suffixlen = RSTRING_LEN(suffix);
11506 if (suffixlen <= 0)
return 0;
11507 olen = RSTRING_LEN(str);
11508 if (olen < suffixlen)
return 0;
11509 strptr = RSTRING_PTR(str);
11510 suffixptr = RSTRING_PTR(suffix);
11511 const char *strend = strptr + olen;
11512 const char *before_suffix = strend - suffixlen;
11513 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11514 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11530rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11532 long olen, suffixlen,
len;
11533 str_modifiable(str);
11535 suffixlen = deleted_suffix_length(str, suffix);
11536 if (suffixlen <= 0)
return Qnil;
11538 olen = RSTRING_LEN(str);
11539 str_modify_keep_cr(str);
11540 len = olen - suffixlen;
11541 STR_SET_LEN(str,
len);
11542 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11558rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11562 suffixlen = deleted_suffix_length(str, suffix);
11563 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11565 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11572 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11578nil_setter_warning(
ID id)
11580 rb_warn_deprecated(
"non-nil '%"PRIsVALUE
"'", NULL, rb_id2str(
id));
11587 if (!
NIL_P(*var)) {
11588 nil_setter_warning(
id);
11595 val = rb_fs_check(val);
11598 "value of %"PRIsVALUE
" must be String or Regexp",
11602 nil_setter_warning(
id);
11619 str_modifiable(str);
11622 int idx = rb_enc_to_index(encoding);
11629 rb_enc_associate_index(str, idx);
11653 if (STR_EMBED_P(str)) {
11654 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11659 str_replace_shared_without_enc(str2, str);
11661 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11691rb_str_valid_encoding_p(
VALUE str)
11693 int cr = rb_enc_str_coderange(str);
11711rb_str_is_ascii_only_p(
VALUE str)
11713 int cr = rb_enc_str_coderange(str);
11721 static const char ellipsis[] =
"...";
11722 const long ellipsislen =
sizeof(ellipsis) - 1;
11724 const long blen = RSTRING_LEN(str);
11725 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11726 VALUE estr, ret = 0;
11729 if (
len * rb_enc_mbminlen(enc) >= blen ||
11733 else if (
len <= ellipsislen ||
11735 if (rb_enc_asciicompat(enc)) {
11737 rb_enc_associate(ret, enc);
11744 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11749 rb_enc_from_encoding(enc), 0,
Qnil);
11760 cr = rb_enc_str_coderange(str);
11762 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11768 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11787 if (enc == STR_ENC_GET(str)) {
11792 return enc_str_scrub(enc, str, repl, cr);
11800 const char *rep, *p, *e, *p1, *sp;
11806 rb_raise(rb_eArgError,
"both of block and replacement given");
11813 if (!
NIL_P(repl)) {
11814 repl = str_compat_and_valid(repl, enc);
11817 if (rb_enc_dummy_p(enc)) {
11820 encidx = rb_enc_to_index(enc);
11822#define DEFAULT_REPLACE_CHAR(str) do { \
11823 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11824 rep = replace; replen = (int)sizeof(replace); \
11827 slen = RSTRING_LEN(str);
11828 p = RSTRING_PTR(str);
11833 if (rb_enc_asciicompat(enc)) {
11839 else if (!
NIL_P(repl)) {
11840 rep = RSTRING_PTR(repl);
11841 replen = RSTRING_LEN(repl);
11844 else if (encidx == rb_utf8_encindex()) {
11845 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11849 DEFAULT_REPLACE_CHAR(
"?");
11854 p = search_nonascii(p, e);
11859 int ret = rb_enc_precise_mbclen(p, e, enc);
11878 if (e - p < clen) clen = e - p;
11885 for (; clen > 1; clen--) {
11886 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11897 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11898 str_mod_check(str, sp, slen);
11899 repl = str_compat_and_valid(repl, enc);
11906 p = search_nonascii(p, e);
11932 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11933 str_mod_check(str, sp, slen);
11934 repl = str_compat_and_valid(repl, enc);
11943 long mbminlen = rb_enc_mbminlen(enc);
11947 else if (!
NIL_P(repl)) {
11948 rep = RSTRING_PTR(repl);
11949 replen = RSTRING_LEN(repl);
11951 else if (encidx == ENCINDEX_UTF_16BE) {
11952 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11954 else if (encidx == ENCINDEX_UTF_16LE) {
11955 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11957 else if (encidx == ENCINDEX_UTF_32BE) {
11958 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11960 else if (encidx == ENCINDEX_UTF_32LE) {
11961 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11964 DEFAULT_REPLACE_CHAR(
"?");
11968 int ret = rb_enc_precise_mbclen(p, e, enc);
11981 if (e - p < clen) clen = e - p;
11982 if (clen <= mbminlen * 2) {
11987 for (; clen > mbminlen; clen-=mbminlen) {
11988 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11998 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11999 str_mod_check(str, sp, slen);
12000 repl = str_compat_and_valid(repl, enc);
12025 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
12026 str_mod_check(str, sp, slen);
12027 repl = str_compat_and_valid(repl, enc);
12067str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12075static ID id_normalize;
12076static ID id_normalized_p;
12077static VALUE mUnicodeNormalize;
12080unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12082 static int UnicodeNormalizeRequired = 0;
12085 if (!UnicodeNormalizeRequired) {
12086 rb_require(
"unicode_normalize/normalize.rb");
12087 UnicodeNormalizeRequired = 1;
12091 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12102rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12104 return unicode_normalize_common(argc, argv, str, id_normalize);
12118rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12120 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12147rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12149 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12281#define sym_equal rb_obj_equal
12284sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12288 int c = rb_enc_precise_mbclen(s, send, enc);
12292 c = rb_enc_mbc_to_codepoint(s, send, enc);
12300rb_str_symname_p(
VALUE sym)
12305 rb_encoding *resenc = rb_default_internal_encoding();
12307 if (resenc == NULL) resenc = rb_default_external_encoding();
12308 enc = STR_ENC_GET(sym);
12309 ptr = RSTRING_PTR(sym);
12310 len = RSTRING_LEN(sym);
12311 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12319rb_str_quote_unprintable(
VALUE str)
12327 resenc = rb_default_internal_encoding();
12328 if (resenc == NULL) resenc = rb_default_external_encoding();
12329 enc = STR_ENC_GET(str);
12330 ptr = RSTRING_PTR(str);
12331 len = RSTRING_LEN(str);
12332 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12333 !sym_printable(ptr, ptr +
len, enc)) {
12334 return rb_str_escape(str);
12340rb_id_quote_unprintable(
ID id)
12342 VALUE str = rb_id2str(
id);
12343 if (!rb_str_symname_p(str)) {
12344 return rb_str_escape(str);
12362sym_inspect(
VALUE sym)
12369 if (!rb_str_symname_p(str)) {
12371 len = RSTRING_LEN(str);
12372 rb_str_resize(str,
len + 1);
12373 dest = RSTRING_PTR(str);
12374 memmove(dest + 1, dest,
len);
12378 VALUE orig_str = str;
12380 len = RSTRING_LEN(orig_str);
12381 str = rb_enc_str_new(0,
len + 1, enc);
12384 ptr = RSTRING_PTR(orig_str);
12385 dest = RSTRING_PTR(str);
12386 memcpy(dest + 1, ptr,
len);
12406rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12411 rb_raise(rb_eArgError,
"no receiver given");
12514 return rb_str_match(
rb_sym2str(sym), other);
12529sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12531 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12544sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12546 return rb_str_match_m_p(argc, argv, sym);
12564 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12575sym_length(
VALUE sym)
12589sym_empty(
VALUE sym)
12623sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12639sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12655sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12669sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12671 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12684sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12686 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12698sym_encoding(
VALUE sym)
12704string_for_symbol(
VALUE name)
12709 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12723 name = string_for_symbol(name);
12724 return rb_intern_str(name);
12733 name = string_for_symbol(name);
12757 return rb_fstring(str);
12763 struct RString fake_str = {RBASIC_INIT};
12764 int encidx = ENCINDEX_US_ASCII;
12767 encidx = ENCINDEX_ASCII_8BIT;
12770 VALUE str = setup_fake_str(&fake_str,
ptr,
len, encidx);
12772 return register_fstring(str,
true,
false);
12784 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12785 rb_enc_autoload(enc);
12788 struct RString fake_str = {RBASIC_INIT};
12789 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12795 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12796 rb_enc_autoload(enc);
12799 struct RString fake_str = {RBASIC_INIT};
12800 VALUE str = register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12811#if USE_YJIT || USE_ZJIT
12813rb_jit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12818 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12819 rb_str_buf_cat_byte(str, (
char) code);
12829fstring_set_class_i(
VALUE *str,
void *data)
12833 return ST_CONTINUE;
12841 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
13008 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_cObject
Object class.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern_str(), except it tries to convert the parameter object to an instance of rb_cS...
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RUBY_TYPED_FREE_IMMEDIATELY
Macros to see if each corresponding flag is defined.
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RString::@53::@55 embed
Embedded contents.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
union RString::@53 as
String's specific fields.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
struct RString::@53::@54 heap
Strings that use separated memory region for contents use this pattern.
union RString::@53::@54::@56 aux
Auxiliary info.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.