14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/concurrent_set.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/hash.h"
36#include "internal/numeric.h"
37#include "internal/object.h"
38#include "internal/proc.h"
39#include "internal/re.h"
40#include "internal/sanitizers.h"
41#include "internal/string.h"
42#include "internal/transcode.h"
48#include "ruby_assert.h"
53#if defined HAVE_CRYPT_R
54# if defined HAVE_CRYPT_H
57#elif !defined HAVE_CRYPT
58# include "missing/crypt.h"
59# define HAVE_CRYPT_R 1
62#define BEG(no) (regs->beg[(no)])
63#define END(no) (regs->end[(no)])
66#undef rb_usascii_str_new
70#undef rb_usascii_str_new_cstr
71#undef rb_utf8_str_new_cstr
72#undef rb_enc_str_new_cstr
73#undef rb_external_str_new_cstr
74#undef rb_locale_str_new_cstr
75#undef rb_str_dup_frozen
76#undef rb_str_buf_new_cstr
130#define RUBY_MAX_CHAR_LEN 16
131#define STR_PRECOMPUTED_HASH FL_USER4
132#define STR_SHARED_ROOT FL_USER5
133#define STR_BORROWED FL_USER6
134#define STR_TMPLOCK FL_USER7
135#define STR_NOFREE FL_USER18
136#define STR_FAKESTR FL_USER19
138#define STR_SET_NOEMBED(str) do {\
139 FL_SET((str), STR_NOEMBED);\
140 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
142#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
144#define STR_SET_LEN(str, n) do { \
145 RSTRING(str)->len = (n); \
149str_encindex_fastpath(
int encindex)
153 case ENCINDEX_ASCII_8BIT:
155 case ENCINDEX_US_ASCII:
163str_enc_fastpath(
VALUE str)
168#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
169#define TERM_FILL(ptr, termlen) do {\
170 char *const term_fill_ptr = (ptr);\
171 const int term_fill_len = (termlen);\
172 *term_fill_ptr = '\0';\
173 if (UNLIKELY(term_fill_len > 1))\
174 memset(term_fill_ptr, 0, term_fill_len);\
177#define RESIZE_CAPA(str,capacity) do {\
178 const int termlen = TERM_LEN(str);\
179 RESIZE_CAPA_TERM(str,capacity,termlen);\
181#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
182 if (STR_EMBED_P(str)) {\
183 if (str_embed_capa(str) < capacity + termlen) {\
184 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
185 const long tlen = RSTRING_LEN(str);\
186 memcpy(tmp, RSTRING_PTR(str), tlen);\
187 RSTRING(str)->as.heap.ptr = tmp;\
188 RSTRING(str)->len = tlen;\
189 STR_SET_NOEMBED(str);\
190 RSTRING(str)->as.heap.aux.capa = (capacity);\
194 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
195 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
196 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
197 RSTRING(str)->as.heap.aux.capa = (capacity);\
201#define STR_SET_SHARED(str, shared_str) do { \
202 if (!FL_TEST(str, STR_FAKESTR)) { \
203 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
204 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
205 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
206 FL_SET((str), STR_SHARED); \
207 FL_SET((shared_str), STR_SHARED_ROOT); \
208 if (RBASIC_CLASS((shared_str)) == 0) \
209 FL_SET_RAW((shared_str), STR_BORROWED); \
213#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
214#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
217#define STR_ENC_GET(str) get_encoding(str)
219#if !defined SHARABLE_MIDDLE_SUBSTRING
220# define SHARABLE_MIDDLE_SUBSTRING 0
222#if !SHARABLE_MIDDLE_SUBSTRING
223#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
225#define SHARABLE_SUBSTRING_P(beg, len, end) 1
230str_embed_capa(
VALUE str)
232 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
236rb_str_reembeddable_p(
VALUE str)
238 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
242rb_str_embed_size(
long capa)
248rb_str_size_as_embedded(
VALUE str)
251 if (STR_EMBED_P(str)) {
252 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
256 else if (rb_str_reembeddable_p(str)) {
257 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
260 real_size =
sizeof(
struct RString);
264 real_size +=
sizeof(st_index_t);
271STR_EMBEDDABLE_P(
long len,
long termlen)
273 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
278static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
279static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
281static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
282static inline void str_modifiable(
VALUE str);
287str_make_independent(
VALUE str)
289 long len = RSTRING_LEN(str);
290 int termlen = TERM_LEN(str);
291 str_make_independent_expand((str),
len, 0L, termlen);
294static inline int str_dependent_p(
VALUE str);
297rb_str_make_independent(
VALUE str)
299 if (str_dependent_p(str)) {
300 str_make_independent(str);
305rb_str_make_embedded(
VALUE str)
310 char *buf =
RSTRING(str)->as.heap.ptr;
314 STR_SET_LEN(str,
len);
317 memcpy(RSTRING_PTR(str), buf,
len);
321 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
325rb_debug_rstring_null_ptr(
const char *func)
327 fprintf(stderr,
"%s is returning NULL!! "
328 "SIGSEGV is highly expected to follow immediately.\n"
329 "If you could reproduce, attach your debugger here, "
330 "and look at the passed string.\n",
335static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
338get_encoding(
VALUE str)
344mustnot_broken(
VALUE str)
346 if (is_broken_string(str)) {
347 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
352mustnot_wchar(
VALUE str)
355 if (rb_enc_mbminlen(enc) > 1) {
356 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
360static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
362#if SIZEOF_LONG == SIZEOF_VOIDP
363#define PRECOMPUTED_FAKESTR_HASH 1
368BARE_STRING_P(
VALUE str)
373static inline st_index_t
374str_do_hash(
VALUE str)
376 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
378 if (e && !is_ascii_string(str)) {
385str_store_precomputed_hash(
VALUE str, st_index_t hash)
391 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
392 size_t free_bytes = str_embed_capa(str) - used_bytes;
396 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
398 FL_SET(str, STR_PRECOMPUTED_HASH);
411 if (
FL_TEST(str, RSTRING_FSTR))
414 bare = BARE_STRING_P(str);
416 if (STR_EMBED_P(str)) {
421 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
428 rb_str_resize(str, RSTRING_LEN(str));
430 fstr = register_fstring(str,
false,
false);
433 str_replace_shared_without_enc(str, fstr);
440static VALUE fstring_table_obj;
443fstring_concurrent_set_hash(
VALUE str)
445#ifdef PRECOMPUTED_FAKESTR_HASH
449 h = (st_index_t)
RSTRING(str)->as.heap.aux.capa;
466 const char *aptr, *bptr;
473 return (alen == blen &&
475 memcmp(aptr, bptr, alen) == 0);
480 bool force_precompute_hash;
484fstring_concurrent_set_create(
VALUE str,
void *data)
494 long len = RSTRING_LEN(str);
495 long capa =
len +
sizeof(st_index_t);
496 int term_len = TERM_LEN(str);
498 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
500 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
501 STR_SET_LEN(new_str, RSTRING_LEN(str));
503 rb_enc_copy(new_str, str);
504 str_store_precomputed_hash(new_str, str_do_hash(str));
508 rb_enc_copy(new_str, str);
509#ifdef PRECOMPUTED_FAKESTR_HASH
510 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
511 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
525 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
528 if (STR_SHARED_P(str)) {
530 str_make_independent(str);
533 if (!BARE_STRING_P(str)) {
539 RBASIC(str)->flags |= RSTRING_FSTR;
552 .hash = fstring_concurrent_set_hash,
553 .cmp = fstring_concurrent_set_cmp,
554 .create = fstring_concurrent_set_create,
559Init_fstring_table(
void)
561 fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192);
562 rb_gc_register_address(&fstring_table_obj);
566register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
570 .force_precompute_hash = force_precompute_hash
573#if SIZEOF_VOIDP == SIZEOF_LONG
577 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
581 VALUE result = rb_concurrent_set_find_or_insert(&fstring_table_obj, str, &args);
583 RUBY_ASSERT(!rb_objspace_garbage_object_p(result));
593rb_obj_is_fstring_table(
VALUE obj)
597 return obj == fstring_table_obj;
601rb_gc_free_fstring(
VALUE obj)
606 rb_concurrent_set_delete_by_identity(fstring_table_obj, obj);
608 RB_DEBUG_COUNTER_INC(obj_str_fstr);
614rb_fstring_foreach_with_replace(
int (*callback)(
VALUE *str,
void *data),
void *data)
616 if (fstring_table_obj) {
617 rb_concurrent_set_foreach_with_replace(fstring_table_obj, callback, data);
622setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
625 RBASIC_SET_SHAPE_ID((
VALUE)fake_str, ROOT_SHAPE_ID);
638 return (
VALUE)fake_str;
647 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
656rb_fstring_new(
const char *ptr,
long len)
658 struct RString fake_str = {RBASIC_INIT};
659 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
false,
false);
665 struct RString fake_str = {RBASIC_INIT};
666 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
false,
false);
670rb_fstring_cstr(
const char *
ptr)
672 return rb_fstring_new(
ptr, strlen(
ptr));
676single_byte_optimizable(
VALUE str)
680 case ENCINDEX_ASCII_8BIT:
681 case ENCINDEX_US_ASCII:
703static inline const char *
704search_nonascii(
const char *p,
const char *e)
706 const uintptr_t *s, *t;
708#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
709# if SIZEOF_UINTPTR_T == 8
710# define NONASCII_MASK UINT64_C(0x8080808080808080)
711# elif SIZEOF_UINTPTR_T == 4
712# define NONASCII_MASK UINT32_C(0x80808080)
714# error "don't know what to do."
717# if SIZEOF_UINTPTR_T == 8
718# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
719# elif SIZEOF_UINTPTR_T == 4
720# define NONASCII_MASK 0x80808080UL
722# error "don't know what to do."
726 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
727#if !UNALIGNED_WORD_ACCESS
728 if ((uintptr_t)p % SIZEOF_VOIDP) {
729 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
734 case 7:
if (p[-7]&0x80)
return p-7;
735 case 6:
if (p[-6]&0x80)
return p-6;
736 case 5:
if (p[-5]&0x80)
return p-5;
737 case 4:
if (p[-4]&0x80)
return p-4;
739 case 3:
if (p[-3]&0x80)
return p-3;
740 case 2:
if (p[-2]&0x80)
return p-2;
741 case 1:
if (p[-1]&0x80)
return p-1;
746#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
747#define aligned_ptr(value) \
748 __builtin_assume_aligned((value), sizeof(uintptr_t))
750#define aligned_ptr(value) (uintptr_t *)(value)
753 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
756 if (*s & NONASCII_MASK) {
757#ifdef WORDS_BIGENDIAN
758 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
760 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
770 case 7:
if (e[-7]&0x80)
return e-7;
771 case 6:
if (e[-6]&0x80)
return e-6;
772 case 5:
if (e[-5]&0x80)
return e-5;
773 case 4:
if (e[-4]&0x80)
return e-4;
775 case 3:
if (e[-3]&0x80)
return e-3;
776 case 2:
if (e[-2]&0x80)
return e-2;
777 case 1:
if (e[-1]&0x80)
return e-1;
785 const char *e = p +
len;
787 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
789 p = search_nonascii(p, e);
793 if (rb_enc_asciicompat(enc)) {
794 p = search_nonascii(p, e);
797 int ret = rb_enc_precise_mbclen(p, e, enc);
801 p = search_nonascii(p, e);
807 int ret = rb_enc_precise_mbclen(p, e, enc);
823 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
826 p = search_nonascii(p, e);
830 else if (rb_enc_asciicompat(enc)) {
831 p = search_nonascii(p, e);
837 int ret = rb_enc_precise_mbclen(p, e, enc);
844 p = search_nonascii(p, e);
850 int ret = rb_enc_precise_mbclen(p, e, enc);
875 rb_enc_set_index(str1, rb_enc_get_index(str2));
883rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
888 str_enc_copy(dest, src);
889 if (RSTRING_LEN(dest) == 0) {
890 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
901 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
902 search_nonascii(RSTRING_PTR(dest),
RSTRING_END(dest)))
913rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
915 str_enc_copy(dest, src);
922 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
928 return enc_coderange_scan(str, enc);
937 cr = enc_coderange_scan(str, get_encoding(str));
944rb_enc_str_asciicompat(
VALUE str)
947 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
955 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
964str_mod_check(
VALUE s,
const char *p,
long len)
966 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
972str_capacity(
VALUE str,
const int termlen)
974 if (STR_EMBED_P(str)) {
975 return str_embed_capa(str) - termlen;
977 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
981 return RSTRING(str)->as.heap.aux.capa;
988 return str_capacity(str, TERM_LEN(str));
992must_not_null(
const char *
ptr)
995 rb_raise(rb_eArgError,
"NULL pointer given");
1000str_alloc_embed(
VALUE klass,
size_t capa)
1002 size_t size = rb_str_embed_size(
capa);
1006 NEWOBJ_OF(str,
struct RString, klass,
1010 str->as.embed.ary[0] = 0;
1016str_alloc_heap(
VALUE klass)
1018 NEWOBJ_OF(str,
struct RString, klass,
1022 str->as.heap.aux.capa = 0;
1023 str->as.heap.ptr = NULL;
1029empty_str_alloc(
VALUE klass)
1031 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1032 VALUE str = str_alloc_embed(klass, 0);
1033 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1044 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1048 enc = rb_ascii8bit_encoding();
1051 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1053 int termlen = rb_enc_mbminlen(enc);
1055 if (STR_EMBEDDABLE_P(
len, termlen)) {
1056 str = str_alloc_embed(klass,
len + termlen);
1062 str = str_alloc_heap(klass);
1068 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1071 rb_enc_raw_set(str, enc);
1074 memcpy(RSTRING_PTR(str),
ptr,
len);
1077 memset(RSTRING_PTR(str), 0,
len);
1080 STR_SET_LEN(str,
len);
1081 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
1088 return str_enc_new(klass,
ptr,
len, rb_ascii8bit_encoding());
1123 __msan_unpoison_string(
ptr);
1143 if (rb_enc_mbminlen(enc) != 1) {
1144 rb_raise(rb_eArgError,
"wchar encoding given");
1146 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1150str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1155 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1159 str = str_enc_new(klass,
ptr,
len, rb_enc_from_index(encindex));
1162 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1163 str = str_alloc_heap(klass);
1167 RBASIC(str)->flags |= STR_NOFREE;
1168 rb_enc_associate_index(str, encindex);
1197static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1199 int ecflags,
VALUE ecopts);
1204 int encidx = rb_enc_to_index(enc);
1205 if (rb_enc_get_index(str) == encidx)
1206 return is_ascii_string(str);
1217 if (!to)
return str;
1218 if (!from) from = rb_enc_get(str);
1219 if (from == to)
return str;
1220 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1221 rb_is_ascii8bit_enc(to)) {
1222 if (STR_ENC_GET(str) != to) {
1224 rb_enc_associate(str, to);
1231 from, to, ecflags, ecopts);
1232 if (
NIL_P(newstr)) {
1240rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1245 olen = RSTRING_LEN(newstr);
1246 if (ofs < -olen || olen < ofs)
1248 if (ofs < 0) ofs += olen;
1250 STR_SET_LEN(newstr, ofs);
1254 rb_str_modify(newstr);
1255 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1263 STR_SET_LEN(str, 0);
1264 rb_enc_associate(str, enc);
1270str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1272 int ecflags,
VALUE ecopts)
1277 VALUE econv_wrapper;
1278 const unsigned char *start, *sp;
1279 unsigned char *dest, *dp;
1280 size_t converted_output = (size_t)ofs;
1285 RBASIC_CLEAR_CLASS(econv_wrapper);
1287 if (!ec)
return Qnil;
1290 sp = (
unsigned char*)
ptr;
1292 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1293 (dp = dest + converted_output),
1297 size_t converted_input = sp - start;
1298 size_t rest =
len - converted_input;
1299 converted_output = dp - dest;
1301 if (converted_input && converted_output &&
1302 rest < (LONG_MAX / converted_output)) {
1303 rest = (rest * converted_output) / converted_input;
1308 olen += rest < 2 ? 2 : rest;
1309 rb_str_resize(newstr, olen);
1316 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1318 rb_enc_associate(newstr, to);
1337 const int eidx = rb_enc_to_index(eenc);
1340 return rb_enc_str_new(
ptr,
len, eenc);
1344 if ((eidx == rb_ascii8bit_encindex()) ||
1345 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1349 ienc = rb_default_internal_encoding();
1350 if (!ienc || eenc == ienc) {
1351 return rb_enc_str_new(
ptr,
len, eenc);
1355 if ((eidx == rb_ascii8bit_encindex()) ||
1356 (eidx == rb_usascii_encindex()) ||
1357 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1358 return rb_enc_str_new(
ptr,
len, ienc);
1361 str = rb_enc_str_new(NULL, 0, ienc);
1364 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1365 rb_str_initialize(str,
ptr,
len, eenc);
1373 int eidx = rb_enc_to_index(eenc);
1374 if (eidx == rb_usascii_encindex() &&
1375 !is_ascii_string(str)) {
1376 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1379 rb_enc_associate_index(str, eidx);
1438str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1440 const int termlen = TERM_LEN(str);
1445 if (str_embed_capa(str2) >=
len + termlen) {
1446 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1447 STR_SET_EMBED(str2);
1448 memcpy(ptr2, RSTRING_PTR(str),
len);
1449 TERM_FILL(ptr2+
len, termlen);
1453 if (STR_SHARED_P(str)) {
1454 root =
RSTRING(str)->as.heap.aux.shared;
1463 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1465 rb_fatal(
"about to free a possible shared root");
1467 char *ptr2 = STR_HEAP_PTR(str2);
1469 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1472 FL_SET(str2, STR_NOEMBED);
1474 STR_SET_SHARED(str2, root);
1477 STR_SET_LEN(str2,
len);
1485 str_replace_shared_without_enc(str2, str);
1486 rb_enc_cr_str_exact_copy(str2, str);
1493 return str_replace_shared(str_alloc_heap(klass), str);
1510rb_str_new_frozen_String(
VALUE orig)
1518rb_str_frozen_bare_string(
VALUE orig)
1520 if (RB_LIKELY(BARE_STRING_P(orig) &&
OBJ_FROZEN_RAW(orig)))
return orig;
1525rb_str_tmp_frozen_acquire(
VALUE orig)
1528 return str_new_frozen_buffer(0, orig, FALSE);
1532rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1534 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1535 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1537 VALUE str = str_alloc_heap(0);
1540 FL_SET(str, STR_SHARED_ROOT);
1542 size_t capa = str_capacity(orig, TERM_LEN(orig));
1548 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1549 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1556 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1557 RBASIC(orig)->flags &= ~STR_NOFREE;
1558 STR_SET_SHARED(orig, str);
1568rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1573 if (STR_EMBED_P(tmp)) {
1576 else if (
FL_TEST_RAW(orig, STR_SHARED | STR_TMPLOCK) == STR_TMPLOCK &&
1582 RUBY_ASSERT(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1586 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1587 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1592 STR_SET_LEN(tmp, 0);
1600 return str_new_frozen_buffer(klass, orig, TRUE);
1609 VALUE str = str_alloc_heap(klass);
1610 STR_SET_LEN(str, RSTRING_LEN(orig));
1611 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1612 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1613 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1614 RBASIC(orig)->flags &= ~STR_NOFREE;
1615 STR_SET_SHARED(orig, str);
1622str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1626 long len = RSTRING_LEN(orig);
1627 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1628 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1630 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1631 str = str_enc_new(klass, RSTRING_PTR(orig),
len, enc);
1637 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1638 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1644 if ((ofs > 0) || (rest > 0) ||
1647 str = str_new_shared(klass,
shared);
1649 RSTRING(str)->as.heap.ptr += ofs;
1650 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1658 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1659 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1661 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1662 STR_SET_LEN(str, RSTRING_LEN(orig));
1667 str = heap_str_make_shared(klass, orig);
1671 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1683str_new_empty_String(
VALUE str)
1686 rb_enc_copy(v, str);
1690#define STR_BUF_MIN_SIZE 63
1695 if (STR_EMBEDDABLE_P(
capa, 1)) {
1703 RSTRING(str)->as.heap.ptr[0] =
'\0';
1723 return str_new(0, 0,
len);
1729 if (STR_EMBED_P(str)) {
1730 RB_DEBUG_COUNTER_INC(obj_str_embed);
1732 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1733 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1734 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1737 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1738 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1743rb_str_memsize(
VALUE str)
1745 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1746 return STR_HEAP_SIZE(str);
1756 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1759static inline void str_discard(
VALUE str);
1760static void str_shared_replace(
VALUE str,
VALUE str2);
1765 if (str != str2) str_shared_replace(str, str2);
1776 enc = STR_ENC_GET(str2);
1779 termlen = rb_enc_mbminlen(enc);
1781 STR_SET_LEN(str, RSTRING_LEN(str2));
1783 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1785 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1786 rb_enc_associate(str, enc);
1790 if (STR_EMBED_P(str2)) {
1792 long len = RSTRING_LEN(str2);
1795 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1796 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1797 RSTRING(str2)->as.heap.ptr = new_ptr;
1798 STR_SET_LEN(str2,
len);
1800 STR_SET_NOEMBED(str2);
1803 STR_SET_NOEMBED(str);
1805 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1807 if (
FL_TEST(str2, STR_SHARED)) {
1809 STR_SET_SHARED(str,
shared);
1812 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1816 STR_SET_EMBED(str2);
1817 RSTRING_PTR(str2)[0] = 0;
1818 STR_SET_LEN(str2, 0);
1819 rb_enc_associate(str, enc);
1833 return rb_obj_as_string_result(str, obj);
1849 len = RSTRING_LEN(str2);
1850 if (STR_SHARED_P(str2)) {
1853 STR_SET_NOEMBED(str);
1854 STR_SET_LEN(str,
len);
1855 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1856 STR_SET_SHARED(str,
shared);
1857 rb_enc_cr_str_exact_copy(str, str2);
1860 str_replace_shared(str, str2);
1869 size_t size = rb_str_embed_size(
capa);
1873 NEWOBJ_OF(str,
struct RString, klass,
1884 NEWOBJ_OF(str,
struct RString, klass,
1887 str->as.heap.aux.capa = 0;
1888 str->as.heap.ptr = NULL;
1898 encidx = rb_enc_get_index(str);
1899 flags &= ~ENCODING_MASK;
1902 if (encidx) rb_enc_associate_index(dup, encidx);
1912 long len = RSTRING_LEN(str);
1917 STR_SET_LEN(dup, RSTRING_LEN(str));
1918 return str_duplicate_setup_encoding(str, dup, flags);
1927 root =
RSTRING(str)->as.heap.aux.shared;
1930 root = str = str_new_frozen(klass, str);
1936 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1937 FL_SET(root, STR_SHARED_ROOT);
1939 flags |= RSTRING_NOEMBED | STR_SHARED;
1941 STR_SET_LEN(dup, RSTRING_LEN(str));
1942 return str_duplicate_setup_encoding(str, dup, flags);
1948 if (STR_EMBED_P(str)) {
1949 return str_duplicate_setup_embed(klass, str, dup);
1952 return str_duplicate_setup_heap(klass, str, dup);
1960 if (STR_EMBED_P(str)) {
1961 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1964 dup = str_alloc_heap(klass);
1967 return str_duplicate_setup(klass, str, dup);
1978rb_str_dup_m(
VALUE str)
1980 if (LIKELY(BARE_STRING_P(str))) {
1991 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1998 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
2002 new_str = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
2003 str_duplicate_setup_embed(klass, str, new_str);
2006 new_str = ec_str_alloc_heap(ec, klass);
2007 str_duplicate_setup_heap(klass, str, new_str);
2016rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
2018 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
2042 static ID keyword_ids[2];
2043 VALUE orig, opt, venc, vcapa;
2048 if (!keyword_ids[0]) {
2049 keyword_ids[0] = rb_id_encoding();
2050 CONST_ID(keyword_ids[1],
"capacity");
2058 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2059 enc = rb_to_encoding(venc);
2061 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2064 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2066 if (
capa < STR_BUF_MIN_SIZE) {
2067 capa = STR_BUF_MIN_SIZE;
2071 len = RSTRING_LEN(orig);
2075 if (orig == str) n = 0;
2077 str_modifiable(str);
2078 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2080 const size_t size = (size_t)
capa + termlen;
2081 const char *
const old_ptr = RSTRING_PTR(str);
2082 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
2083 char *new_ptr =
ALLOC_N(
char, size);
2084 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2085 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2087 RSTRING(str)->as.heap.ptr = new_ptr;
2089 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2090 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2091 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2093 STR_SET_LEN(str,
len);
2096 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2097 rb_enc_cr_str_exact_copy(str, orig);
2099 FL_SET(str, STR_NOEMBED);
2106 rb_enc_associate(str, enc);
2118rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2124 static ID keyword_ids[2];
2134 keyword_ids[0] = rb_id_encoding();
2135 CONST_ID(keyword_ids[1],
"capacity");
2137 encoding = kwargs[0];
2138 capacity = kwargs[1];
2147 if (UNDEF_P(encoding)) {
2149 encoding = rb_obj_encoding(orig);
2153 if (!UNDEF_P(encoding)) {
2154 enc = rb_to_encoding(encoding);
2158 if (UNDEF_P(capacity)) {
2160 VALUE empty_str = str_new(klass,
"", 0);
2162 rb_enc_associate(empty_str, enc);
2166 VALUE copy = str_duplicate(klass, orig);
2167 rb_enc_associate(copy, enc);
2180 if (orig_capa >
capa) {
2185 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2186 STR_SET_LEN(str, 0);
2197#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2212static inline uintptr_t
2213count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2218 d = (d>>6) | (~d>>7);
2219 d &= NONASCII_MASK >> 7;
2222#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2224 return rb_popcount_intptr(d);
2228# if SIZEOF_VOIDP == 8
2237enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2243 long diff = (long)(e - p);
2244 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2249 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2250 const uintptr_t *s, *t;
2251 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2252 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2253 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2254 while (p < (
const char *)s) {
2255 if (is_utf8_lead_byte(*p))
len++;
2259 len += count_utf8_lead_bytes_with_word(s);
2262 p = (
const char *)s;
2265 if (is_utf8_lead_byte(*p))
len++;
2271 else if (rb_enc_asciicompat(enc)) {
2276 q = search_nonascii(p, e);
2282 p += rb_enc_fast_mbclen(p, e, enc);
2289 q = search_nonascii(p, e);
2295 p += rb_enc_mbclen(p, e, enc);
2302 for (c=0; p<e; c++) {
2303 p += rb_enc_mbclen(p, e, enc);
2318rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2326 long diff = (long)(e - p);
2327 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2329 else if (rb_enc_asciicompat(enc)) {
2333 q = search_nonascii(p, e);
2341 ret = rb_enc_precise_mbclen(p, e, enc);
2356 for (c=0; p<e; c++) {
2357 ret = rb_enc_precise_mbclen(p, e, enc);
2364 if (p + rb_enc_mbminlen(enc) <= e)
2365 p += rb_enc_mbminlen(enc);
2381 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2382 if (!enc) enc = STR_ENC_GET(str);
2383 p = RSTRING_PTR(str);
2388 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2393 return enc_strlen(p, e, enc, cr);
2400 return str_strlen(str, NULL);
2414 return LONG2NUM(str_strlen(str, NULL));
2426rb_str_bytesize(
VALUE str)
2445rb_str_empty(
VALUE str)
2447 return RBOOL(RSTRING_LEN(str) == 0);
2466 char *ptr1, *ptr2, *ptr3;
2471 enc = rb_enc_check_str(str1, str2);
2474 termlen = rb_enc_mbminlen(enc);
2475 if (len1 > LONG_MAX - len2) {
2476 rb_raise(rb_eArgError,
"string size too big");
2478 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2479 ptr3 = RSTRING_PTR(str3);
2480 memcpy(ptr3, ptr1, len1);
2481 memcpy(ptr3+len1, ptr2, len2);
2482 TERM_FILL(&ptr3[len1+len2], termlen);
2498 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2501 int enc1 = rb_enc_get_index(str1);
2502 int enc2 = rb_enc_get_index(str2);
2507 else if (enc2 < 0) {
2510 else if (enc1 != enc2) {
2513 else if (len1 > LONG_MAX - len2) {
2547 rb_enc_copy(str2, str);
2552 rb_raise(rb_eArgError,
"negative argument");
2554 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2555 if (STR_EMBEDDABLE_P(
len, 1)) {
2557 memset(RSTRING_PTR(str2), 0,
len + 1);
2564 STR_SET_LEN(str2,
len);
2565 rb_enc_copy(str2, str);
2568 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2569 rb_raise(rb_eArgError,
"argument too big");
2572 len *= RSTRING_LEN(str);
2573 termlen = TERM_LEN(str);
2575 ptr2 = RSTRING_PTR(str2);
2577 n = RSTRING_LEN(str);
2578 memcpy(ptr2, RSTRING_PTR(str), n);
2579 while (n <=
len/2) {
2580 memcpy(ptr2 + n, ptr2, n);
2583 memcpy(ptr2 + n, ptr2,
len-n);
2585 STR_SET_LEN(str2,
len);
2586 TERM_FILL(&ptr2[
len], termlen);
2587 rb_enc_cr_str_copy_for_substr(str2, str);
2624rb_check_lockedtmp(
VALUE str)
2626 if (
FL_TEST(str, STR_TMPLOCK)) {
2633#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2635str_modifiable(
VALUE str)
2639 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2640 if (CHILLED_STRING_P(str)) {
2641 CHILLED_STRING_MUTATED(str);
2643 rb_check_lockedtmp(str);
2644 rb_check_frozen(str);
2649str_dependent_p(
VALUE str)
2651 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2661#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2663str_independent(
VALUE str)
2667 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2668 str_modifiable(str);
2669 return !str_dependent_p(str);
2675str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2685 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2690 STR_SET_LEN(str,
len);
2695 oldptr = RSTRING_PTR(str);
2697 memcpy(
ptr, oldptr,
len);
2699 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2702 STR_SET_NOEMBED(str);
2703 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2704 TERM_FILL(
ptr +
len, termlen);
2706 STR_SET_LEN(str,
len);
2713 if (!str_independent(str))
2714 str_make_independent(str);
2723 int termlen = TERM_LEN(str);
2724 long len = RSTRING_LEN(str);
2727 rb_raise(rb_eArgError,
"negative expanding string size");
2729 if (expand >= LONG_MAX -
len) {
2730 rb_raise(rb_eArgError,
"string size too big");
2733 if (!str_independent(str)) {
2734 str_make_independent_expand(str,
len, expand, termlen);
2736 else if (expand > 0) {
2737 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2744str_modify_keep_cr(
VALUE str)
2746 if (!str_independent(str))
2747 str_make_independent(str);
2754str_discard(
VALUE str)
2756 str_modifiable(str);
2757 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2758 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2759 RSTRING(str)->as.heap.ptr = 0;
2760 STR_SET_LEN(str, 0);
2767 int encindex = rb_enc_get_index(str);
2769 if (RB_UNLIKELY(encindex == -1)) {
2773 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2778 if (!rb_enc_asciicompat(enc)) {
2800 return RSTRING_PTR(str);
2804zero_filled(
const char *s,
int n)
2806 for (; n > 0; --n) {
2813str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2815 const char *e = s +
len;
2817 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2818 if (zero_filled(s, minlen))
return s;
2824str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2829 if (str_dependent_p(str)) {
2830 if (!zero_filled(s +
len, termlen))
2831 str_make_independent_expand(str,
len, 0L, termlen);
2834 TERM_FILL(s +
len, termlen);
2837 return RSTRING_PTR(str);
2841rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2843 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2844 long len = RSTRING_LEN(str);
2848 rb_check_lockedtmp(str);
2849 str_make_independent_expand(str,
len, 0L, termlen);
2851 else if (str_dependent_p(str)) {
2852 if (termlen > oldtermlen)
2853 str_make_independent_expand(str,
len, 0L, termlen);
2856 if (!STR_EMBED_P(str)) {
2861 if (termlen > oldtermlen) {
2862 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2870str_null_check(
VALUE str,
int *w)
2872 char *s = RSTRING_PTR(str);
2873 long len = RSTRING_LEN(str);
2875 const int minlen = rb_enc_mbminlen(enc);
2879 if (str_null_char(s,
len, minlen, enc)) {
2882 return str_fill_term(str, s,
len, minlen);
2885 if (!s || memchr(s, 0,
len)) {
2889 s = str_fill_term(str, s,
len, minlen);
2895rb_str_to_cstr(
VALUE str)
2898 return str_null_check(str, &w);
2906 char *s = str_null_check(str, &w);
2909 rb_raise(rb_eArgError,
"string contains null char");
2911 rb_raise(rb_eArgError,
"string contains null byte");
2917rb_str_fill_terminator(
VALUE str,
const int newminlen)
2919 char *s = RSTRING_PTR(str);
2920 long len = RSTRING_LEN(str);
2921 return str_fill_term(str, s,
len, newminlen);
2927 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2953str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2962 else if (rb_enc_asciicompat(enc)) {
2963 const char *p2, *e2;
2966 while (p < e && 0 < nth) {
2973 p2 = search_nonascii(p, e2);
2982 n = rb_enc_mbclen(p, e, enc);
2993 while (p < e && nth--) {
2994 p += rb_enc_mbclen(p, e, enc);
3005 return str_nth_len(p, e, &nth, enc);
3009str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3014 p = str_nth_len(p, e, &nth, enc);
3023str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
3025 const char *pp = str_nth(p, e, nth, enc, singlebyte);
3026 if (!pp)
return e - p;
3033 return str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
3034 STR_ENC_GET(str), single_byte_optimizable(str));
3039str_utf8_nth(
const char *p,
const char *e,
long *nthp)
3042 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
3043 const uintptr_t *s, *t;
3044 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
3045 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
3046 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
3047 while (p < (
const char *)s) {
3048 if (is_utf8_lead_byte(*p)) nth--;
3052 nth -= count_utf8_lead_bytes_with_word(s);
3054 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
3058 if (is_utf8_lead_byte(*p)) {
3059 if (nth == 0)
break;
3069str_utf8_offset(
const char *p,
const char *e,
long nth)
3071 const char *pp = str_utf8_nth(p, e, &nth);
3080 if (single_byte_optimizable(str) || pos < 0)
3083 char *p = RSTRING_PTR(str);
3084 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3089str_subseq(
VALUE str,
long beg,
long len)
3097 const int termlen = TERM_LEN(str);
3098 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
3105 if (str_embed_capa(str2) >=
len + termlen) {
3106 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3107 STR_SET_EMBED(str2);
3108 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
3109 TERM_FILL(ptr2+
len, termlen);
3111 STR_SET_LEN(str2,
len);
3115 str_replace_shared(str2, str);
3118 RSTRING(str2)->as.heap.ptr += beg;
3119 if (RSTRING_LEN(str2) >
len) {
3120 STR_SET_LEN(str2,
len);
3130 VALUE str2 = str_subseq(str, beg,
len);
3131 rb_enc_cr_str_copy_for_substr(str2, str);
3140 const long blen = RSTRING_LEN(str);
3142 char *p, *s = RSTRING_PTR(str), *e = s + blen;
3144 if (
len < 0)
return 0;
3145 if (beg < 0 && -beg < 0)
return 0;
3149 if (single_byte_optimizable(str)) {
3150 if (beg > blen)
return 0;
3153 if (beg < 0)
return 0;
3155 if (
len > blen - beg)
3157 if (
len < 0)
return 0;
3162 if (
len > -beg)
len = -beg;
3166 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3169 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3175 slen = str_strlen(str, enc);
3177 if (beg < 0)
return 0;
3179 if (
len == 0)
goto end;
3182 else if (beg > 0 && beg > blen) {
3186 if (beg > str_strlen(str, enc))
return 0;
3191 enc == rb_utf8_encoding()) {
3192 p = str_utf8_nth(s, e, &beg);
3193 if (beg > 0)
return 0;
3194 len = str_utf8_offset(p, e,
len);
3200 p = s + beg * char_sz;
3204 else if (
len * char_sz > e - p)
3209 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3210 if (beg > 0)
return 0;
3214 len = str_offset(p, e,
len, enc, 0);
3222static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3227 return str_substr(str, beg,
len, TRUE);
3237str_substr(
VALUE str,
long beg,
long len,
int empty)
3241 if (!p)
return Qnil;
3242 if (!
len && !empty)
return Qnil;
3244 beg = p - RSTRING_PTR(str);
3246 VALUE str2 = str_subseq(str, beg,
len);
3247 rb_enc_cr_str_copy_for_substr(str2, str);
3255 if (CHILLED_STRING_P(str)) {
3260 rb_str_resize(str, RSTRING_LEN(str));
3278 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3321str_uminus(
VALUE str)
3326 return rb_fstring(str);
3330#define rb_str_dup_frozen rb_str_new_frozen
3335 rb_check_frozen(str);
3336 if (
FL_TEST(str, STR_TMPLOCK)) {
3339 FL_SET(str, STR_TMPLOCK);
3346 rb_check_frozen(str);
3347 if (!
FL_TEST(str, STR_TMPLOCK)) {
3367 const int termlen = TERM_LEN(str);
3369 str_modifiable(str);
3370 if (STR_SHARED_P(str)) {
3373 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3374 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3385 else if (
len > RSTRING_LEN(str)) {
3389 const char *
const new_end = RSTRING_PTR(str) +
len;
3399 else if (
len < RSTRING_LEN(str)) {
3407 STR_SET_LEN(str,
len);
3408 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3415 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3418 int independent = str_independent(str);
3419 long slen = RSTRING_LEN(str);
3420 const int termlen = TERM_LEN(str);
3422 if (slen >
len || (termlen != 1 && slen <
len)) {
3428 if (STR_EMBED_P(str)) {
3429 if (
len == slen)
return str;
3430 if (str_embed_capa(str) >=
len + termlen) {
3431 STR_SET_LEN(str,
len);
3435 str_make_independent_expand(str, slen,
len - slen, termlen);
3437 else if (str_embed_capa(str) >=
len + termlen) {
3438 char *
ptr = STR_HEAP_PTR(str);
3440 if (slen >
len) slen =
len;
3443 STR_SET_LEN(str,
len);
3444 if (independent) ruby_xfree(
ptr);
3447 else if (!independent) {
3448 if (
len == slen)
return str;
3449 str_make_independent_expand(str, slen,
len - slen, termlen);
3453 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3454 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3457 else if (
len == slen)
return str;
3458 STR_SET_LEN(str,
len);
3465str_ensure_available_capa(
VALUE str,
long len)
3467 str_modify_keep_cr(str);
3469 const int termlen = TERM_LEN(str);
3470 long olen = RSTRING_LEN(str);
3472 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3473 rb_raise(rb_eArgError,
"string sizes too big");
3476 long total = olen +
len;
3477 long capa = str_capacity(str, termlen);
3480 if (total >= LONG_MAX / 2) {
3483 while (total >
capa) {
3486 RESIZE_CAPA_TERM(str,
capa, termlen);
3491str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3494 str_modify_keep_cr(str);
3499 if (
len == 0)
return 0;
3501 long total, olen,
off = -1;
3503 const int termlen = TERM_LEN(str);
3506 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3510 long capa = str_capacity(str, termlen);
3512 if (olen > LONG_MAX -
len) {
3513 rb_raise(rb_eArgError,
"string sizes too big");
3517 if (total >= LONG_MAX / 2) {
3520 while (total >
capa) {
3523 RESIZE_CAPA_TERM(str,
capa, termlen);
3524 sptr = RSTRING_PTR(str);
3529 memcpy(sptr + olen,
ptr,
len);
3530 STR_SET_LEN(str, total);
3531 TERM_FILL(sptr + total, termlen);
3536#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3537#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3542 if (
len == 0)
return str;
3544 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3546 return str_buf_cat(str,
ptr,
len);
3557rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3562 if (UNLIKELY(!str_independent(str))) {
3563 str_make_independent(str);
3566 long string_length = -1;
3567 const int null_terminator_length = 1;
3572 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3573 rb_raise(rb_eArgError,
"string sizes too big");
3576 long string_capacity = str_capacity(str, null_terminator_length);
3582 if (LIKELY(string_capacity >= string_length + 1)) {
3584 sptr[string_length] = byte;
3585 STR_SET_LEN(str, string_length + 1);
3586 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3590 str_buf_cat(str, (
char *)&
byte, 1);
3606 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3617rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3618 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3627 if (str_encindex == ptr_encindex) {
3629 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3633 str_enc = rb_enc_from_index(str_encindex);
3634 ptr_enc = rb_enc_from_index(ptr_encindex);
3635 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3638 if (RSTRING_LEN(str) == 0) {
3641 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3647 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3656 *ptr_cr_ret = ptr_cr;
3658 if (str_encindex != ptr_encindex &&
3661 str_enc = rb_enc_from_index(str_encindex);
3662 ptr_enc = rb_enc_from_index(ptr_encindex);
3667 res_encindex = str_encindex;
3672 res_encindex = str_encindex;
3676 res_encindex = ptr_encindex;
3681 res_encindex = str_encindex;
3688 res_encindex = str_encindex;
3694 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3696 str_buf_cat(str,
ptr,
len);
3702 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3709 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3719 if (rb_enc_asciicompat(enc)) {
3720 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3726 unsigned int c = (
unsigned char)*
ptr;
3727 int len = rb_enc_codelen(c, enc);
3728 rb_enc_mbcput(c, buf, enc);
3729 rb_enc_cr_str_buf_cat(str, buf,
len,
3742 if (str_enc_fastpath(str)) {
3746 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3752 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3763 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3779rb_str_concat_literals(
size_t num,
const VALUE *strary)
3783 unsigned long len = 1;
3788 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3790 str_enc_copy_direct(str, strary[0]);
3792 for (i = s; i < num; ++i) {
3793 const VALUE v = strary[i];
3797 if (encidx != ENCINDEX_US_ASCII) {
3799 rb_enc_set_index(str, encidx);
3812rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3814 str_modifiable(str);
3819 else if (argc > 1) {
3822 rb_enc_copy(arg_str, str);
3823 for (i = 0; i < argc; i++) {
3858rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3860 long needed_capacity = 0;
3864 for (
int index = 0; index < argc; index++) {
3865 VALUE obj = argv[index];
3873 needed_capacity += RSTRING_LEN(obj);
3878 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3885 str_ensure_available_capa(str, needed_capacity);
3888 for (
int index = 0; index < argc; index++) {
3889 VALUE obj = argv[index];
3894 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3895 char byte = (char)(
NUM2INT(obj) & 0xFF);
3909 rb_bug(
"append_as_bytes arguments should have been validated");
3913 STR_SET_LEN(str, RSTRING_LEN(str) + needed_capacity);
3914 TERM_FILL(sptr, TERM_LEN(str));
3919 for (
int index = 0; index < argc; index++) {
3920 VALUE obj = argv[index];
3937 rb_bug(
"append_as_bytes arguments should have been validated");
4016 if (rb_num_to_uint(str2, &code) == 0) {
4029 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
4032 rb_str_buf_cat_byte(str1, (
unsigned char)code);
4035 long pos = RSTRING_LEN(str1);
4040 switch (
len = rb_enc_codelen(code, enc)) {
4041 case ONIGERR_INVALID_CODE_POINT_VALUE:
4042 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4044 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
4050 rb_enc_mbcput(code, buf, enc);
4051 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
4052 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
4054 rb_str_resize(str1, pos+
len);
4055 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
4068rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
4070 int encidx = rb_enc_to_index(enc);
4072 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
4077 if (encidx == ENCINDEX_US_ASCII && code > 127) {
4078 return ENCINDEX_ASCII_8BIT;
4100rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4102 str_modifiable(str);
4107 else if (argc > 1) {
4110 rb_enc_copy(arg_str, str);
4111 for (i = 0; i < argc; i++) {
4124 st_index_t precomputed_hash;
4125 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4127 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4128 return precomputed_hash;
4131 return str_do_hash(str);
4138 const char *ptr1, *ptr2;
4141 return (len1 != len2 ||
4143 memcmp(ptr1, ptr2, len1) != 0);
4155rb_str_hash_m(
VALUE str)
4161#define lesser(a,b) (((a)>(b))?(b):(a))
4169 if (RSTRING_LEN(str1) == 0)
return TRUE;
4170 if (RSTRING_LEN(str2) == 0)
return TRUE;
4173 if (idx1 == idx2)
return TRUE;
4178 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4182 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4192 const char *ptr1, *ptr2;
4195 if (str1 == str2)
return 0;
4198 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4207 if (len1 > len2)
return 1;
4210 if (retval > 0)
return 1;
4244 if (str1 == str2)
return Qtrue;
4251 return rb_str_eql_internal(str1, str2);
4265 if (str1 == str2)
return Qtrue;
4267 return rb_str_eql_internal(str1, str2);
4299 return rb_invcmp(str1, str2);
4341 return str_casecmp(str1, s);
4349 const char *p1, *p1end, *p2, *p2end;
4351 enc = rb_enc_compatible(str1, str2);
4356 p1 = RSTRING_PTR(str1); p1end =
RSTRING_END(str1);
4357 p2 = RSTRING_PTR(str2); p2end =
RSTRING_END(str2);
4358 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4359 while (p1 < p1end && p2 < p2end) {
4361 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4362 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4364 return INT2FIX(c1 < c2 ? -1 : 1);
4371 while (p1 < p1end && p2 < p2end) {
4372 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4373 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4375 if (0 <= c1 && 0 <= c2) {
4379 return INT2FIX(c1 < c2 ? -1 : 1);
4383 l1 = rb_enc_mbclen(p1, p1end, enc);
4384 l2 = rb_enc_mbclen(p2, p2end, enc);
4385 len = l1 < l2 ? l1 : l2;
4386 r = memcmp(p1, p2,
len);
4388 return INT2FIX(r < 0 ? -1 : 1);
4390 return INT2FIX(l1 < l2 ? -1 : 1);
4396 if (p1 == p1end && p2 == p2end)
return INT2FIX(0);
4397 if (p1 == p1end)
return INT2FIX(-1);
4430 return str_casecmp_p(str1, s);
4437 VALUE folded_str1, folded_str2;
4438 VALUE fold_opt = sym_fold;
4440 enc = rb_enc_compatible(str1, str2);
4445 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4446 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4448 return rb_str_eql(folded_str1, folded_str2);
4452strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4453 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
4455 const char *search_start = str_ptr;
4456 long pos, search_len = str_len - offset;
4460 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4461 if (pos < 0)
return pos;
4463 if (t == search_start + pos)
break;
4464 search_len -= t - search_start;
4465 if (search_len <= 0)
return -1;
4466 offset += t - search_start;
4469 return pos + offset;
4473#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4474#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4477rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4479 const char *str_ptr, *str_ptr_end, *sub_ptr;
4480 long str_len, sub_len;
4483 enc = rb_enc_check(str, sub);
4484 if (is_broken_string(sub))
return -1;
4486 str_ptr = RSTRING_PTR(str);
4488 str_len = RSTRING_LEN(str);
4489 sub_ptr = RSTRING_PTR(sub);
4490 sub_len = RSTRING_LEN(sub);
4492 if (str_len < sub_len)
return -1;
4495 long str_len_char, sub_len_char;
4496 int single_byte = single_byte_optimizable(str);
4497 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4498 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4500 offset += str_len_char;
4501 if (offset < 0)
return -1;
4503 if (str_len_char - offset < sub_len_char)
return -1;
4504 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4507 if (sub_len == 0)
return offset;
4510 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4523rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4530 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4531 long slen = str_strlen(str, enc);
4533 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4545 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4546 enc, single_byte_optimizable(str));
4557 pos = rb_str_index(str, sub, pos);
4571str_ensure_byte_pos(
VALUE str,
long pos)
4573 if (!single_byte_optimizable(str)) {
4574 const char *s = RSTRING_PTR(str);
4576 const char *p = s + pos;
4577 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4579 "offset %ld does not land on character boundary", pos);
4652rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4658 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4659 long slen = RSTRING_LEN(str);
4661 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4672 str_ensure_byte_pos(str, pos);
4684 pos = rb_str_byteindex(str, sub, pos);
4685 if (pos >= 0)
return LONG2NUM(pos);
4692memrchr(
const char *search_str,
int chr,
long search_len)
4694 const char *ptr = search_str + search_len;
4695 while (ptr > search_str) {
4696 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4706 char *hit, *adjusted;
4708 long slen, searchlen;
4711 sbeg = RSTRING_PTR(str);
4712 slen = RSTRING_LEN(sub);
4713 if (slen == 0)
return s - sbeg;
4715 t = RSTRING_PTR(sub);
4717 searchlen = s - sbeg + 1;
4719 if (memcmp(s, t, slen) == 0) {
4724 hit = memrchr(sbeg, c, searchlen);
4727 if (hit != adjusted) {
4728 searchlen = adjusted - sbeg;
4731 if (memcmp(hit, t, slen) == 0)
4733 searchlen = adjusted - sbeg;
4734 }
while (searchlen > 0);
4748 enc = rb_enc_check(str, sub);
4749 if (is_broken_string(sub))
return -1;
4750 singlebyte = single_byte_optimizable(str);
4751 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4752 slen = str_strlen(sub, enc);
4755 if (
len < slen)
return -1;
4756 if (
len - pos < slen) pos =
len - slen;
4757 if (
len == 0)
return pos;
4759 sbeg = RSTRING_PTR(str);
4762 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4768 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4769 return str_rindex(str, sub, s, enc);
4830rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4835 long pos,
len = str_strlen(str, enc);
4837 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4839 if (pos < 0 && (pos +=
len) < 0) {
4845 if (pos >
len) pos =
len;
4853 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4854 enc, single_byte_optimizable(str));
4865 pos = rb_str_rindex(str, sub, pos);
4875rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4881 enc = rb_enc_check(str, sub);
4882 if (is_broken_string(sub))
return -1;
4883 len = RSTRING_LEN(str);
4884 slen = RSTRING_LEN(sub);
4887 if (
len < slen)
return -1;
4888 if (
len - pos < slen) pos =
len - slen;
4889 if (
len == 0)
return pos;
4891 sbeg = RSTRING_PTR(str);
4894 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4901 return str_rindex(str, sub, s, enc);
4991rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4995 long pos,
len = RSTRING_LEN(str);
4997 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4999 if (pos < 0 && (pos +=
len) < 0) {
5005 if (pos >
len) pos =
len;
5011 str_ensure_byte_pos(str, pos);
5023 pos = rb_str_byterindex(str, sub, pos);
5024 if (pos >= 0)
return LONG2NUM(pos);
5063 switch (OBJ_BUILTIN_TYPE(y)) {
5117rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
5124 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5155rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5159 re = get_pat(argv[0]);
5160 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5169static enum neighbor_char
5175 if (rb_enc_mbminlen(enc) > 1) {
5177 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5179 return NEIGHBOR_NOT_CHAR;
5181 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5183 if (!l)
return NEIGHBOR_NOT_CHAR;
5184 if (l !=
len)
return NEIGHBOR_WRAPPED;
5185 rb_enc_mbcput(c, p, enc);
5186 r = rb_enc_precise_mbclen(p, p +
len, enc);
5188 return NEIGHBOR_NOT_CHAR;
5190 return NEIGHBOR_FOUND;
5193 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5196 return NEIGHBOR_WRAPPED;
5197 ++((
unsigned char*)p)[i];
5198 l = rb_enc_precise_mbclen(p, p+
len, enc);
5202 return NEIGHBOR_FOUND;
5205 memset(p+l, 0xff,
len-l);
5211 for (len2 =
len-1; 0 < len2; len2--) {
5212 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5216 memset(p+len2+1, 0xff,
len-(len2+1));
5221static enum neighbor_char
5226 if (rb_enc_mbminlen(enc) > 1) {
5228 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5230 return NEIGHBOR_NOT_CHAR;
5232 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5233 if (!c)
return NEIGHBOR_NOT_CHAR;
5236 if (!l)
return NEIGHBOR_NOT_CHAR;
5237 if (l !=
len)
return NEIGHBOR_WRAPPED;
5238 rb_enc_mbcput(c, p, enc);
5239 r = rb_enc_precise_mbclen(p, p +
len, enc);
5241 return NEIGHBOR_NOT_CHAR;
5243 return NEIGHBOR_FOUND;
5246 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5249 return NEIGHBOR_WRAPPED;
5250 --((
unsigned char*)p)[i];
5251 l = rb_enc_precise_mbclen(p, p+
len, enc);
5255 return NEIGHBOR_FOUND;
5258 memset(p+l, 0,
len-l);
5264 for (len2 =
len-1; 0 < len2; len2--) {
5265 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5269 memset(p+len2+1, 0,
len-(len2+1));
5283static enum neighbor_char
5284enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
5286 enum neighbor_char ret;
5290 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5294 const int max_gaps = 1;
5296 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5298 ctype = ONIGENC_CTYPE_DIGIT;
5300 ctype = ONIGENC_CTYPE_ALPHA;
5302 return NEIGHBOR_NOT_CHAR;
5305 for (
try = 0;
try <= max_gaps; ++
try) {
5306 ret = enc_succ_char(p,
len, enc);
5307 if (ret == NEIGHBOR_FOUND) {
5308 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5310 return NEIGHBOR_FOUND;
5317 ret = enc_pred_char(p,
len, enc);
5318 if (ret == NEIGHBOR_FOUND) {
5319 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5332 return NEIGHBOR_NOT_CHAR;
5335 if (ctype != ONIGENC_CTYPE_DIGIT) {
5337 return NEIGHBOR_WRAPPED;
5341 enc_succ_char(carry,
len, enc);
5342 return NEIGHBOR_WRAPPED;
5410 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5411 rb_enc_cr_str_copy_for_substr(str, orig);
5412 return str_succ(str);
5419 char *sbeg, *s, *e, *last_alnum = 0;
5420 int found_alnum = 0;
5422 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5423 long carry_pos = 0, carry_len = 1;
5424 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5426 slen = RSTRING_LEN(str);
5427 if (slen == 0)
return str;
5429 enc = STR_ENC_GET(str);
5430 sbeg = RSTRING_PTR(str);
5431 s = e = sbeg + slen;
5433 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5434 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5440 l = rb_enc_precise_mbclen(s, e, enc);
5441 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5442 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5443 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5445 case NEIGHBOR_NOT_CHAR:
5447 case NEIGHBOR_FOUND:
5449 case NEIGHBOR_WRAPPED:
5454 carry_pos = s - sbeg;
5459 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5460 enum neighbor_char neighbor;
5461 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5462 l = rb_enc_precise_mbclen(s, e, enc);
5463 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5464 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5466 neighbor = enc_succ_char(tmp, l, enc);
5468 case NEIGHBOR_FOUND:
5472 case NEIGHBOR_WRAPPED:
5475 case NEIGHBOR_NOT_CHAR:
5478 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5480 enc_succ_char(s, l, enc);
5482 if (!rb_enc_asciicompat(enc)) {
5483 MEMCPY(carry, s,
char, l);
5486 carry_pos = s - sbeg;
5490 RESIZE_CAPA(str, slen + carry_len);
5491 sbeg = RSTRING_PTR(str);
5492 s = sbeg + carry_pos;
5493 memmove(s + carry_len, s, slen - carry_pos);
5494 memmove(s, carry, carry_len);
5496 STR_SET_LEN(str, slen);
5497 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5511rb_str_succ_bang(
VALUE str)
5519all_digits_p(
const char *s,
long len)
5573 VALUE end, exclusive;
5577 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5583 VALUE current, after_end;
5590 enc = rb_enc_check(beg, end);
5591 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5593 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5594 char c = RSTRING_PTR(beg)[0];
5595 char e = RSTRING_PTR(end)[0];
5597 if (c > e || (excl && c == e))
return beg;
5599 VALUE str = rb_enc_str_new(&c, 1, enc);
5601 if ((*each)(str, arg))
break;
5602 if (!excl && c == e)
break;
5604 if (excl && c == e)
break;
5609 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5610 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5611 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5616 b = rb_str_to_inum(beg, 10, FALSE);
5617 e = rb_str_to_inum(end, 10, FALSE);
5624 if (excl && bi == ei)
break;
5625 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5630 ID op = excl ?
'<' : idLE;
5631 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5636 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5637 b = rb_funcallv(b, succ, 0, 0);
5644 if (n > 0 || (excl && n == 0))
return beg;
5646 after_end = rb_funcallv(end, succ, 0, 0);
5651 next = rb_funcallv(current, succ, 0, 0);
5652 if ((*each)(current, arg))
break;
5653 if (
NIL_P(next))
break;
5657 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5672 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5673 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5674 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5676 b = rb_str_to_inum(beg, 10, FALSE);
5682 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5690 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5691 b = rb_funcallv(b, succ, 0, 0);
5697 VALUE next = rb_funcallv(current, succ, 0, 0);
5698 if ((*each)(current, arg))
break;
5701 if (RSTRING_LEN(current) == 0)
5712 if (!
rb_equal(str, *argp))
return 0;
5726 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5727 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5728 rb_enc_asciicompat(STR_ENC_GET(val))) {
5729 const char *bp = RSTRING_PTR(beg);
5730 const char *ep = RSTRING_PTR(end);
5731 const char *vp = RSTRING_PTR(val);
5732 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5733 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5741 if (b <= v && v < e)
return Qtrue;
5742 return RBOOL(!
RTEST(exclusive) && v == e);
5749 all_digits_p(bp, RSTRING_LEN(beg)) &&
5750 all_digits_p(ep, RSTRING_LEN(end))) {
5755 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5757 return RBOOL(
NIL_P(val));
5780 return rb_str_subpat(str, indx,
INT2FIX(0));
5783 if (rb_str_index(str, indx, 0) != -1)
5789 long beg,
len = str_strlen(str, NULL);
5801 return str_substr(str, idx, 1, FALSE);
5820rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5824 return rb_str_subpat(str, argv[0], argv[1]);
5827 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5831 return rb_str_aref(str, argv[0]);
5837 char *ptr = RSTRING_PTR(str);
5838 long olen = RSTRING_LEN(str), nlen;
5840 str_modifiable(str);
5841 if (
len > olen)
len = olen;
5843 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5845 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5847 ptr =
RSTRING(str)->as.embed.ary;
5848 memmove(ptr, oldptr +
len, nlen);
5849 if (fl == STR_NOEMBED)
xfree(oldptr);
5852 if (!STR_SHARED_P(str)) {
5854 rb_enc_cr_str_exact_copy(shared, str);
5859 STR_SET_LEN(str, nlen);
5861 if (!SHARABLE_MIDDLE_SUBSTRING) {
5862 TERM_FILL(ptr + nlen, TERM_LEN(str));
5869rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5875 if (beg == 0 && vlen == 0) {
5880 str_modify_keep_cr(str);
5884 RESIZE_CAPA(str, slen + vlen -
len);
5885 sptr = RSTRING_PTR(str);
5894 memmove(sptr + beg + vlen,
5896 slen - (beg +
len));
5898 if (vlen < beg &&
len < 0) {
5902 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5905 STR_SET_LEN(str, slen);
5906 TERM_FILL(&sptr[slen], TERM_LEN(str));
5913 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5922 int singlebyte = single_byte_optimizable(str);
5928 enc = rb_enc_check(str, val);
5929 slen = str_strlen(str, enc);
5931 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5940 if (
len > slen - beg) {
5943 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5948 beg = p - RSTRING_PTR(str);
5950 rb_str_update_0(str, beg,
len, val);
5951 rb_enc_associate(str, enc);
5962 long start, end,
len;
5972 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5976 nth += regs->num_regs;
5986 enc = rb_enc_check_str(str, val);
5987 rb_str_update_0(str, start,
len, val);
5988 rb_enc_associate(str, enc);
5996 switch (
TYPE(indx)) {
5998 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
6002 beg = rb_str_index(str, indx, 0);
6057rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
6061 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
6069 return rb_str_aset(str, argv[0], argv[1]);
6119rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6127 str_modify_keep_cr(str);
6135 if ((nth += regs->num_regs) <= 0)
return Qnil;
6137 else if (nth >= regs->num_regs)
return Qnil;
6139 len = END(nth) - beg;
6142 else if (argc == 2) {
6151 beg = p - RSTRING_PTR(str);
6155 beg = rb_str_index(str, indx, 0);
6156 if (beg == -1)
return Qnil;
6157 len = RSTRING_LEN(indx);
6169 beg = p - RSTRING_PTR(str);
6178 beg = p - RSTRING_PTR(str);
6182 rb_enc_cr_str_copy_for_substr(result, str);
6190 char *sptr = RSTRING_PTR(str);
6191 long slen = RSTRING_LEN(str);
6192 if (beg +
len > slen)
6196 slen - (beg +
len));
6198 STR_SET_LEN(str, slen);
6199 TERM_FILL(&sptr[slen], TERM_LEN(str));
6210 switch (OBJ_BUILTIN_TYPE(pat)) {
6229get_pat_quoted(
VALUE pat,
int check)
6233 switch (OBJ_BUILTIN_TYPE(pat)) {
6247 if (check && is_broken_string(pat)) {
6254rb_pat_search0(
VALUE pat,
VALUE str,
long pos,
int set_backref_str,
VALUE *match)
6257 pos = rb_str_byteindex(str, pat, pos);
6258 if (set_backref_str) {
6260 str = rb_str_new_frozen_String(str);
6261 VALUE match_data = rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6263 *match = match_data;
6273 return rb_reg_search0(pat, str, pos, 0, set_backref_str, match);
6278rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6280 return rb_pat_search0(pat, str, pos, set_backref_str, NULL);
6299rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6313 hash = rb_check_hash_type(argv[1]);
6319 pat = get_pat_quoted(argv[0], 1);
6321 str_modifiable(str);
6322 beg = rb_pat_search(pat, str, 0, 1);
6336 end0 = beg0 + RSTRING_LEN(pat);
6345 if (iter || !
NIL_P(hash)) {
6346 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6352 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6355 str_mod_check(str, p,
len);
6356 rb_check_frozen(str);
6362 enc = rb_enc_compatible(str, repl);
6365 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6369 rb_enc_inspect_name(str_enc),
6370 rb_enc_inspect_name(STR_ENC_GET(repl)));
6372 enc = STR_ENC_GET(repl);
6375 rb_enc_associate(str, enc);
6385 rlen = RSTRING_LEN(repl);
6386 len = RSTRING_LEN(str);
6388 RESIZE_CAPA(str,
len + rlen - plen);
6390 p = RSTRING_PTR(str);
6392 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6394 rp = RSTRING_PTR(repl);
6395 memmove(p + beg0, rp, rlen);
6397 STR_SET_LEN(str,
len);
6398 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6427 rb_str_sub_bang(argc, argv, str);
6432str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6435 long beg, beg0, end0;
6436 long offset, blen, slen,
len, last;
6437 enum {STR, ITER, FAST_MAP, MAP} mode = STR;
6439 int need_backref_str = -1;
6449 hash = rb_check_hash_type(argv[1]);
6453 else if (rb_hash_default_unredefined(hash) && !
FL_TEST_RAW(hash, RHASH_PROC_DEFAULT)) {
6461 rb_error_arity(argc, 1, 2);
6464 pat = get_pat_quoted(argv[0], 1);
6465 beg = rb_pat_search0(pat, str, 0, need_backref_str, &match);
6468 if (bang)
return Qnil;
6473 blen = RSTRING_LEN(str) + 30;
6475 sp = RSTRING_PTR(str);
6476 slen = RSTRING_LEN(str);
6478 str_enc = STR_ENC_GET(str);
6479 rb_enc_associate(dest, str_enc);
6486 end0 = beg0 + RSTRING_LEN(pat);
6500 struct RString fake_str = {RBASIC_INIT};
6502 if (mode == FAST_MAP) {
6511 val = rb_hash_aref(hash, key);
6514 str_mod_check(str, sp, slen);
6519 else if (need_backref_str) {
6521 if (need_backref_str < 0) {
6522 need_backref_str = val != repl;
6529 len = beg0 - offset;
6543 if (RSTRING_LEN(str) <= end0)
break;
6544 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6546 offset = end0 +
len;
6548 cp = RSTRING_PTR(str) + offset;
6549 if (offset > RSTRING_LEN(str))
break;
6552 if (mode != FAST_MAP && mode != STR) {
6555 beg = rb_pat_search0(pat, str, offset, need_backref_str, &match);
6560 if (RSTRING_LEN(str) > offset) {
6563 rb_pat_search0(pat, str, last, 1, &match);
6565 str_shared_replace(str, dest);
6590rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6592 str_modify_keep_cr(str);
6593 return str_gsub(argc, argv, str, 1);
6643 return str_gsub(argc, argv, str, 0);
6663 str_modifiable(str);
6664 if (str == str2)
return str;
6668 return str_replace(str, str2);
6685rb_str_clear(
VALUE str)
6689 STR_SET_LEN(str, 0);
6690 RSTRING_PTR(str)[0] = 0;
6691 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6707rb_str_chr(
VALUE str)
6725 pos += RSTRING_LEN(str);
6726 if (pos < 0 || RSTRING_LEN(str) <= pos)
6729 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6748 long len = RSTRING_LEN(str);
6749 char *
ptr, *head, *left = 0;
6753 if (pos < -
len ||
len <= pos)
6760 char byte = (char)(
NUM2INT(w) & 0xFF);
6762 if (!str_independent(str))
6763 str_make_independent(str);
6764 enc = STR_ENC_GET(str);
6765 head = RSTRING_PTR(str);
6767 if (!STR_EMBED_P(str)) {
6774 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6782 width = rb_enc_precise_mbclen(left, head+
len, enc);
6784 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6800str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6802 long n = RSTRING_LEN(str);
6804 if (beg > n ||
len < 0)
return Qnil;
6807 if (beg < 0)
return Qnil;
6812 if (!empty)
return Qnil;
6816 VALUE str2 = str_subseq(str, beg,
len);
6818 str_enc_copy_direct(str2, str);
6820 if (RSTRING_LEN(str2) == 0) {
6821 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6855 long beg,
len = RSTRING_LEN(str);
6863 return str_byte_substr(str, beg,
len, TRUE);
6868 return str_byte_substr(str, idx, 1, FALSE);
6880rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6885 return str_byte_substr(str, beg,
len, TRUE);
6888 return str_byte_aref(str, argv[0]);
6892str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6894 long end, slen = RSTRING_LEN(str);
6897 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6906 if (*
len > slen - *beg) {
6910 str_ensure_byte_pos(str, *beg);
6911 str_ensure_byte_pos(str, end);
6925rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6927 long beg,
len, vbeg, vlen;
6932 if (!(argc == 2 || argc == 3 || argc == 5)) {
6933 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6937 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6938 rb_builtin_class_name(argv[0]));
6945 vlen = RSTRING_LEN(val);
6950 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6951 rb_builtin_class_name(argv[2]));
6963 vlen = RSTRING_LEN(val);
6971 str_check_beg_len(str, &beg, &
len);
6972 str_check_beg_len(val, &vbeg, &vlen);
6973 str_modify_keep_cr(str);
6976 rb_enc_associate(str, rb_enc_check(str, val));
6979 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
7001rb_str_reverse(
VALUE str)
7008 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
7009 enc = STR_ENC_GET(str);
7015 if (RSTRING_LEN(str) > 1) {
7016 if (single_byte_optimizable(str)) {
7023 int clen = rb_enc_fast_mbclen(s, e, enc);
7031 cr = rb_enc_asciicompat(enc) ?
7034 int clen = rb_enc_mbclen(s, e, enc);
7043 STR_SET_LEN(rev, RSTRING_LEN(str));
7044 str_enc_copy_direct(rev, str);
7066rb_str_reverse_bang(
VALUE str)
7068 if (RSTRING_LEN(str) > 1) {
7069 if (single_byte_optimizable(str)) {
7072 str_modify_keep_cr(str);
7073 s = RSTRING_PTR(str);
7082 str_shared_replace(str, rb_str_reverse(str));
7086 str_modify_keep_cr(str);
7115 i = rb_str_index(str, arg, 0);
7117 return RBOOL(i != -1);
7159 rb_raise(rb_eArgError,
"invalid radix %d", base);
7161 return rb_str_to_inum(str, base, FALSE);
7185rb_str_to_f(
VALUE str)
7200rb_str_to_s(
VALUE str)
7212 char s[RUBY_MAX_CHAR_LEN];
7213 int n = rb_enc_codelen(c, enc);
7215 rb_enc_mbcput(c, s, enc);
7220#define CHAR_ESC_LEN 13
7223rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7225 char buf[CHAR_ESC_LEN + 1];
7233 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7235 else if (c < 0x10000) {
7236 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7239 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7244 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7247 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7250 l = (int)strlen(buf);
7256ruby_escaped_char(
int c)
7259 case '\0':
return "\\0";
7260 case '\n':
return "\\n";
7261 case '\r':
return "\\r";
7262 case '\t':
return "\\t";
7263 case '\f':
return "\\f";
7264 case '\013':
return "\\v";
7265 case '\010':
return "\\b";
7266 case '\007':
return "\\a";
7267 case '\033':
return "\\e";
7268 case '\x7f':
return "\\c?";
7274rb_str_escape(
VALUE str)
7278 const char *p = RSTRING_PTR(str);
7280 const char *prev = p;
7281 char buf[CHAR_ESC_LEN + 1];
7283 int unicode_p = rb_enc_unicode_p(enc);
7284 int asciicompat = rb_enc_asciicompat(enc);
7289 int n = rb_enc_precise_mbclen(p, pend, enc);
7291 if (p > prev) str_buf_cat(result, prev, p - prev);
7292 n = rb_enc_mbminlen(enc);
7294 n = (int)(pend - p);
7296 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7297 str_buf_cat(result, buf, strlen(buf));
7303 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7305 cc = ruby_escaped_char(c);
7307 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7308 str_buf_cat(result, cc, strlen(cc));
7311 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7314 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7315 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7319 if (p > prev) str_buf_cat(result, prev, p - prev);
7338 const char *p, *pend, *prev;
7339 char buf[CHAR_ESC_LEN + 1];
7341 rb_encoding *resenc = rb_default_internal_encoding();
7342 int unicode_p = rb_enc_unicode_p(enc);
7343 int asciicompat = rb_enc_asciicompat(enc);
7345 if (resenc == NULL) resenc = rb_default_external_encoding();
7346 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
7347 rb_enc_associate(result, resenc);
7348 str_buf_cat2(result,
"\"");
7356 n = rb_enc_precise_mbclen(p, pend, enc);
7358 if (p > prev) str_buf_cat(result, prev, p - prev);
7359 n = rb_enc_mbminlen(enc);
7361 n = (int)(pend - p);
7363 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7364 str_buf_cat(result, buf, strlen(buf));
7370 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7372 if ((asciicompat || unicode_p) &&
7373 (c ==
'"'|| c ==
'\\' ||
7378 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7379 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7380 str_buf_cat2(result,
"\\");
7381 if (asciicompat || enc == resenc) {
7387 case '\n': cc =
'n';
break;
7388 case '\r': cc =
'r';
break;
7389 case '\t': cc =
't';
break;
7390 case '\f': cc =
'f';
break;
7391 case '\013': cc =
'v';
break;
7392 case '\010': cc =
'b';
break;
7393 case '\007': cc =
'a';
break;
7394 case 033: cc =
'e';
break;
7395 default: cc = 0;
break;
7398 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7401 str_buf_cat(result, buf, 2);
7414 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7418 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7419 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7424 if (p > prev) str_buf_cat(result, prev, p - prev);
7425 str_buf_cat2(result,
"\"");
7430#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7443 int encidx = rb_enc_get_index(str);
7446 const char *p, *pend;
7449 int u8 = (encidx == rb_utf8_encindex());
7450 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7453 if (!rb_enc_asciicompat(enc)) {
7455 len += strlen(enc->name);
7458 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7461 unsigned char c = *p++;
7464 case '"':
case '\\':
7465 case '\n':
case '\r':
7466 case '\t':
case '\f':
7467 case '\013':
case '\010':
case '\007':
case '\033':
7472 clen = IS_EVSTR(p, pend) ? 2 : 1;
7480 if (u8 && c > 0x7F) {
7481 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7483 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7486 else if (cc <= 0xFFFFF)
7499 if (clen > LONG_MAX -
len) {
7506 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7507 q = RSTRING_PTR(result); qend = q +
len + 1;
7511 unsigned char c = *p++;
7513 if (c ==
'"' || c ==
'\\') {
7517 else if (c ==
'#') {
7518 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7521 else if (c ==
'\n') {
7525 else if (c ==
'\r') {
7529 else if (c ==
'\t') {
7533 else if (c ==
'\f') {
7537 else if (c ==
'\013') {
7541 else if (c ==
'\010') {
7545 else if (c ==
'\007') {
7549 else if (c ==
'\033') {
7559 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7561 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7564 snprintf(q, qend-q,
"u%04X", cc);
7566 snprintf(q, qend-q,
"u{%X}", cc);
7571 snprintf(q, qend-q,
"x%02X", c);
7577 if (!rb_enc_asciicompat(enc)) {
7578 snprintf(q, qend-q, nonascii_suffix, enc->name);
7579 encidx = rb_ascii8bit_encindex();
7582 rb_enc_associate_index(result, encidx);
7588unescape_ascii(
unsigned int c)
7612undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7614 const char *s = *ss;
7618 unsigned char buf[6];
7636 *buf = unescape_ascii(*s);
7648 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7649 if (*penc != enc_utf8) {
7651 rb_enc_associate(undumped, enc_utf8);
7668 if (hexlen == 0 || hexlen > 6) {
7674 if (0xd800 <= c && c <= 0xdfff) {
7677 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7687 if (0xd800 <= c && c <= 0xdfff) {
7690 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7718static VALUE rb_str_is_ascii_only_p(
VALUE str);
7736str_undump(
VALUE str)
7738 const char *s = RSTRING_PTR(str);
7741 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7743 bool binary =
false;
7747 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7750 if (!str_null_check(str, &w)) {
7753 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7754 if (*s !=
'"')
goto invalid_format;
7772 static const char force_encoding_suffix[] =
".force_encoding(\"";
7773 static const char dup_suffix[] =
".dup";
7774 const char *encname;
7779 size =
sizeof(dup_suffix) - 1;
7780 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7782 size =
sizeof(force_encoding_suffix) - 1;
7783 if (s_end - s <= size)
goto invalid_format;
7784 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7788 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7792 s = memchr(s,
'"', s_end-s);
7794 if (!s)
goto invalid_format;
7795 if (s_end - s != 2)
goto invalid_format;
7796 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7798 encidx = rb_enc_find_index2(encname, (
long)size);
7802 rb_enc_associate_index(undumped, encidx);
7812 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7823 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7829 if (rb_enc_dummy_p(enc)) {
7836str_true_enc(
VALUE str)
7839 rb_str_check_dummy_enc(enc);
7843static OnigCaseFoldType
7844check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7849 rb_raise(rb_eArgError,
"too many options");
7850 if (argv[0]==sym_turkic) {
7851 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7853 if (argv[1]==sym_lithuanian)
7854 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7856 rb_raise(rb_eArgError,
"invalid second option");
7859 else if (argv[0]==sym_lithuanian) {
7860 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7862 if (argv[1]==sym_turkic)
7863 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7865 rb_raise(rb_eArgError,
"invalid second option");
7869 rb_raise(rb_eArgError,
"too many options");
7870 else if (argv[0]==sym_ascii)
7871 flags |= ONIGENC_CASE_ASCII_ONLY;
7872 else if (argv[0]==sym_fold) {
7873 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7874 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7876 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7879 rb_raise(rb_eArgError,
"invalid option");
7886 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7892#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7893#ifndef CASEMAP_DEBUG
7894# define CASEMAP_DEBUG 0
7902 OnigUChar space[FLEX_ARY_LEN];
7906mapping_buffer_free(
void *p)
7910 while (current_buffer) {
7911 previous_buffer = current_buffer;
7912 current_buffer = current_buffer->next;
7913 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7919 {0, mapping_buffer_free,},
7920 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7928 const OnigUChar *source_current, *source_end;
7929 int target_length = 0;
7930 VALUE buffer_anchor;
7933 size_t buffer_count = 0;
7934 int buffer_length_or_invalid;
7936 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7938 source_current = (OnigUChar*)RSTRING_PTR(source);
7943 while (source_current < source_end) {
7945 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7946 if (CASEMAP_DEBUG) {
7947 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7950 *pre_buffer = current_buffer;
7951 pre_buffer = ¤t_buffer->next;
7952 current_buffer->next = NULL;
7953 current_buffer->capa =
capa;
7954 buffer_length_or_invalid = enc->case_map(flags,
7955 &source_current, source_end,
7956 current_buffer->space,
7957 current_buffer->space+current_buffer->capa,
7959 if (buffer_length_or_invalid < 0) {
7960 current_buffer =
DATA_PTR(buffer_anchor);
7962 mapping_buffer_free(current_buffer);
7963 rb_raise(rb_eArgError,
"input string invalid");
7965 target_length += current_buffer->used = buffer_length_or_invalid;
7967 if (CASEMAP_DEBUG) {
7968 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7971 if (buffer_count==1) {
7972 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7975 char *target_current;
7978 target_current = RSTRING_PTR(target);
7979 current_buffer =
DATA_PTR(buffer_anchor);
7980 while (current_buffer) {
7981 memcpy(target_current, current_buffer->space, current_buffer->used);
7982 target_current += current_buffer->used;
7983 current_buffer = current_buffer->next;
7986 current_buffer =
DATA_PTR(buffer_anchor);
7988 mapping_buffer_free(current_buffer);
7993 str_enc_copy_direct(target, source);
8002 const OnigUChar *source_current, *source_end;
8003 OnigUChar *target_current, *target_end;
8004 long old_length = RSTRING_LEN(source);
8005 int length_or_invalid;
8007 if (old_length == 0)
return Qnil;
8009 source_current = (OnigUChar*)RSTRING_PTR(source);
8011 if (source == target) {
8012 target_current = (OnigUChar*)source_current;
8013 target_end = (OnigUChar*)source_end;
8016 target_current = (OnigUChar*)RSTRING_PTR(target);
8020 length_or_invalid = onigenc_ascii_only_case_map(flags,
8021 &source_current, source_end,
8022 target_current, target_end, enc);
8023 if (length_or_invalid < 0)
8024 rb_raise(rb_eArgError,
"input string invalid");
8025 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
8026 fprintf(stderr,
"problem with rb_str_ascii_casemap"
8027 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8028 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
8029 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
8032 str_enc_copy(target, source);
8038upcase_single(
VALUE str)
8040 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8041 bool modified =
false;
8044 unsigned int c = *(
unsigned char*)s;
8046 if (
'a' <= c && c <=
'z') {
8047 *s =
'A' + (c -
'a');
8075rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
8078 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8080 flags = check_case_options(argc, argv, flags);
8081 str_modify_keep_cr(str);
8082 enc = str_true_enc(str);
8083 if (case_option_single_p(flags, enc, str)) {
8084 if (upcase_single(str))
8085 flags |= ONIGENC_CASE_MODIFIED;
8087 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8088 rb_str_ascii_casemap(str, str, &flags, enc);
8090 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8092 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8114rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
8117 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
8120 flags = check_case_options(argc, argv, flags);
8121 enc = str_true_enc(str);
8122 if (case_option_single_p(flags, enc, str)) {
8123 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8124 str_enc_copy_direct(ret, str);
8127 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8129 rb_str_ascii_casemap(str, ret, &flags, enc);
8132 ret = rb_str_casemap(str, &flags, enc);
8139downcase_single(
VALUE str)
8141 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8142 bool modified =
false;
8145 unsigned int c = *(
unsigned char*)s;
8147 if (
'A' <= c && c <=
'Z') {
8148 *s =
'a' + (c -
'A');
8170rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8173 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8175 flags = check_case_options(argc, argv, flags);
8176 str_modify_keep_cr(str);
8177 enc = str_true_enc(str);
8178 if (case_option_single_p(flags, enc, str)) {
8179 if (downcase_single(str))
8180 flags |= ONIGENC_CASE_MODIFIED;
8182 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8183 rb_str_ascii_casemap(str, str, &flags, enc);
8185 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8187 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8201rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8204 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8207 flags = check_case_options(argc, argv, flags);
8208 enc = str_true_enc(str);
8209 if (case_option_single_p(flags, enc, str)) {
8210 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8211 str_enc_copy_direct(ret, str);
8212 downcase_single(ret);
8214 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8216 rb_str_ascii_casemap(str, ret, &flags, enc);
8219 ret = rb_str_casemap(str, &flags, enc);
8239rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8242 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8244 flags = check_case_options(argc, argv, flags);
8245 str_modify_keep_cr(str);
8246 enc = str_true_enc(str);
8247 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8248 if (flags&ONIGENC_CASE_ASCII_ONLY)
8249 rb_str_ascii_casemap(str, str, &flags, enc);
8251 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8253 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8286rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8289 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8292 flags = check_case_options(argc, argv, flags);
8293 enc = str_true_enc(str);
8294 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8295 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8297 rb_str_ascii_casemap(str, ret, &flags, enc);
8300 ret = rb_str_casemap(str, &flags, enc);
8327rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8330 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8332 flags = check_case_options(argc, argv, flags);
8333 str_modify_keep_cr(str);
8334 enc = str_true_enc(str);
8335 if (flags&ONIGENC_CASE_ASCII_ONLY)
8336 rb_str_ascii_casemap(str, str, &flags, enc);
8338 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8340 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8364rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8367 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8370 flags = check_case_options(argc, argv, flags);
8371 enc = str_true_enc(str);
8372 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8373 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8375 rb_str_ascii_casemap(str, ret, &flags, enc);
8378 ret = rb_str_casemap(str, &flags, enc);
8383typedef unsigned char *USTR;
8387 unsigned int now, max;
8399 if (t->p == t->pend)
return -1;
8400 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8403 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8405 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8407 if (t->p < t->pend) {
8408 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8411 if (t->now < 0x80 && c < 0x80) {
8412 rb_raise(rb_eArgError,
8413 "invalid range \"%c-%c\" in string transliteration",
8417 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8421 else if (t->now < c) {
8430 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8431 if (t->now == t->max) {
8436 if (t->now < t->max) {
8452 const unsigned int errc = -1;
8453 unsigned int trans[256];
8455 struct tr trsrc, trrepl;
8457 unsigned int c, c0, last = 0;
8458 int modify = 0, i, l;
8459 unsigned char *s, *send;
8461 int singlebyte = single_byte_optimizable(str);
8465#define CHECK_IF_ASCII(c) \
8466 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8467 (cr = ENC_CODERANGE_VALID) : 0)
8471 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8472 if (RSTRING_LEN(repl) == 0) {
8473 return rb_str_delete_bang(1, &src, str);
8477 e1 = rb_enc_check(str, src);
8478 e2 = rb_enc_check(str, repl);
8483 enc = rb_enc_check(src, repl);
8485 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8486 if (RSTRING_LEN(src) > 1 &&
8487 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8488 trsrc.p + l < trsrc.pend) {
8492 trrepl.p = RSTRING_PTR(repl);
8493 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8494 trsrc.gen = trrepl.gen = 0;
8495 trsrc.now = trrepl.now = 0;
8496 trsrc.max = trrepl.max = 0;
8499 for (i=0; i<256; i++) {
8502 while ((c = trnext(&trsrc, enc)) != errc) {
8507 if (!hash) hash = rb_hash_new();
8511 while ((c = trnext(&trrepl, enc)) != errc)
8514 for (i=0; i<256; i++) {
8515 if (trans[i] != errc) {
8523 for (i=0; i<256; i++) {
8526 while ((c = trnext(&trsrc, enc)) != errc) {
8527 r = trnext(&trrepl, enc);
8528 if (r == errc) r = trrepl.now;
8531 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8534 if (!hash) hash = rb_hash_new();
8542 str_modify_keep_cr(str);
8543 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8544 termlen = rb_enc_mbminlen(enc);
8547 long offset, max = RSTRING_LEN(str);
8548 unsigned int save = -1;
8549 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8554 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8557 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8560 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8562 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8571 if (cflag) c = last;
8574 else if (cflag) c = errc;
8580 if (c != (
unsigned int)-1) {
8586 tlen = rb_enc_codelen(c, enc);
8592 if (enc != e1) may_modify = 1;
8594 if ((offset = t - buf) + tlen > max) {
8595 size_t MAYBE_UNUSED(old) = max + termlen;
8596 max = offset + tlen + (send - s);
8597 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8600 rb_enc_mbcput(c, t, enc);
8601 if (may_modify && memcmp(s, t, tlen) != 0) {
8607 if (!STR_EMBED_P(str)) {
8608 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8610 TERM_FILL((
char *)t, termlen);
8611 RSTRING(str)->as.heap.ptr = (
char *)buf;
8612 STR_SET_LEN(str, t - buf);
8613 STR_SET_NOEMBED(str);
8614 RSTRING(str)->as.heap.aux.capa = max;
8618 c = (
unsigned char)*s;
8619 if (trans[c] != errc) {
8636 long offset, max = (long)((send - s) * 1.2);
8637 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8642 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8645 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8648 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8650 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8658 if (cflag) c = last;
8661 else if (cflag) c = errc;
8665 c = cflag ? last : errc;
8668 tlen = rb_enc_codelen(c, enc);
8673 if (enc != e1) may_modify = 1;
8675 if ((offset = t - buf) + tlen > max) {
8676 size_t MAYBE_UNUSED(old) = max + termlen;
8677 max = offset + tlen + (long)((send - s) * 1.2);
8678 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8682 rb_enc_mbcput(c, t, enc);
8683 if (may_modify && memcmp(s, t, tlen) != 0) {
8691 if (!STR_EMBED_P(str)) {
8692 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8694 TERM_FILL((
char *)t, termlen);
8695 RSTRING(str)->as.heap.ptr = (
char *)buf;
8696 STR_SET_LEN(str, t - buf);
8697 STR_SET_NOEMBED(str);
8698 RSTRING(str)->as.heap.aux.capa = max;
8704 rb_enc_associate(str, enc);
8723 return tr_trans(str, src, repl, 0);
8770 tr_trans(str, src, repl, 0);
8774#define TR_TABLE_MAX (UCHAR_MAX+1)
8775#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8777tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8780 const unsigned int errc = -1;
8781 char buf[TR_TABLE_MAX];
8784 VALUE table = 0, ptable = 0;
8785 int i, l, cflag = 0;
8787 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8788 tr.gen =
tr.now =
tr.max = 0;
8790 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8795 for (i=0; i<TR_TABLE_MAX; i++) {
8798 stable[TR_TABLE_MAX] = cflag;
8800 else if (stable[TR_TABLE_MAX] && !cflag) {
8801 stable[TR_TABLE_MAX] = 0;
8803 for (i=0; i<TR_TABLE_MAX; i++) {
8807 while ((c = trnext(&
tr, enc)) != errc) {
8808 if (c < TR_TABLE_MAX) {
8809 buf[(
unsigned char)c] = !cflag;
8814 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8817 table = ptable ? ptable : rb_hash_new();
8821 table = rb_hash_new();
8826 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8827 rb_hash_aset(table, key,
Qtrue);
8831 for (i=0; i<TR_TABLE_MAX; i++) {
8832 stable[i] = stable[i] && buf[i];
8834 if (!table && !cflag) {
8841tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8843 if (c < TR_TABLE_MAX) {
8844 return table[c] != 0;
8850 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8851 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8855 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8858 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8873rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8875 char squeez[TR_TABLE_SIZE];
8878 VALUE del = 0, nodel = 0;
8880 int i, ascompat, cr;
8882 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8884 for (i=0; i<argc; i++) {
8888 enc = rb_enc_check(str, s);
8889 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8892 str_modify_keep_cr(str);
8893 ascompat = rb_enc_asciicompat(enc);
8894 s = t = RSTRING_PTR(str);
8901 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8912 c = rb_enc_codepoint_len(s, send, &clen, enc);
8914 if (tr_find(c, squeez, del, nodel)) {
8918 if (t != s) rb_enc_mbcput(c, t, enc);
8925 TERM_FILL(t, TERM_LEN(str));
8926 STR_SET_LEN(str, t - RSTRING_PTR(str));
8929 if (modify)
return str;
8943rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8946 rb_str_delete_bang(argc, argv, str);
8960rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8962 char squeez[TR_TABLE_SIZE];
8964 VALUE del = 0, nodel = 0;
8965 unsigned char *s, *send, *t;
8967 int ascompat, singlebyte = single_byte_optimizable(str);
8971 enc = STR_ENC_GET(str);
8974 for (i=0; i<argc; i++) {
8978 enc = rb_enc_check(str, s);
8979 if (singlebyte && !single_byte_optimizable(s))
8981 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8985 str_modify_keep_cr(str);
8986 s = t = (
unsigned char *)RSTRING_PTR(str);
8987 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8990 ascompat = rb_enc_asciicompat(enc);
8994 unsigned int c = *s++;
8995 if (c != save || (argc > 0 && !squeez[c])) {
9005 if (ascompat && (c = *s) < 0x80) {
9006 if (c != save || (argc > 0 && !squeez[c])) {
9012 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
9014 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
9015 if (t != s) rb_enc_mbcput(c, t, enc);
9024 TERM_FILL((
char *)t, TERM_LEN(str));
9025 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
9026 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
9030 if (modify)
return str;
9053rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
9056 rb_str_squeeze_bang(argc, argv, str);
9074 return tr_trans(str, src, repl, 1);
9097 tr_trans(str, src, repl, 1);
9110rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9112 char table[TR_TABLE_SIZE];
9114 VALUE del = 0, nodel = 0, tstr;
9124 enc = rb_enc_check(str, tstr);
9127 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9128 (ptstr = RSTRING_PTR(tstr),
9129 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9130 !is_broken_string(str)) {
9132 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9134 s = RSTRING_PTR(str);
9135 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9138 if (*(
unsigned char*)s++ == c) n++;
9144 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9145 for (i=1; i<argc; i++) {
9148 enc = rb_enc_check(str, tstr);
9149 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9152 s = RSTRING_PTR(str);
9153 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9155 ascompat = rb_enc_asciicompat(enc);
9159 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9167 c = rb_enc_codepoint_len(s, send, &clen, enc);
9168 if (tr_find(c, table, del, nodel)) {
9179rb_fs_check(
VALUE val)
9183 if (
NIL_P(val))
return 0;
9188static const char isspacetable[256] = {
9189 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9191 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9194 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9195 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9204 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9207#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9210split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9212 if (empty_count >= 0 &&
len == 0) {
9213 return empty_count + 1;
9215 if (empty_count > 0) {
9220 }
while (--empty_count > 0);
9224 rb_yield(str_new_empty_String(str));
9225 }
while (--empty_count > 0);
9239 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9243literal_split_pattern(
VALUE spat, split_type_t default_type)
9251 return SPLIT_TYPE_CHARS;
9253 else if (rb_enc_asciicompat(enc)) {
9254 if (
len == 1 && ptr[0] ==
' ') {
9255 return SPLIT_TYPE_AWK;
9260 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9261 return SPLIT_TYPE_AWK;
9264 return default_type;
9277rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9282 split_type_t split_type;
9283 long beg, end, i = 0, empty_count = -1;
9288 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9290 if (lim <= 0) limit =
Qnil;
9291 else if (lim == 1) {
9292 if (RSTRING_LEN(str) == 0)
9303 if (
NIL_P(limit) && !lim) empty_count = 0;
9305 enc = STR_ENC_GET(str);
9306 split_type = SPLIT_TYPE_REGEXP;
9308 spat = get_pat_quoted(spat, 0);
9310 else if (
NIL_P(spat = rb_fs)) {
9311 split_type = SPLIT_TYPE_AWK;
9313 else if (!(spat = rb_fs_check(spat))) {
9314 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9319 if (split_type != SPLIT_TYPE_AWK) {
9324 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9325 if (split_type == SPLIT_TYPE_AWK) {
9327 split_type = SPLIT_TYPE_STRING;
9332 mustnot_broken(spat);
9333 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9341#define SPLIT_STR(beg, len) ( \
9342 empty_count = split_string(result, str, beg, len, empty_count), \
9343 str_mod_check(str, str_start, str_len))
9346 char *ptr = RSTRING_PTR(str);
9347 char *
const str_start = ptr;
9348 const long str_len = RSTRING_LEN(str);
9349 char *
const eptr = str_start + str_len;
9350 if (split_type == SPLIT_TYPE_AWK) {
9357 if (is_ascii_string(str)) {
9358 while (ptr < eptr) {
9359 c = (
unsigned char)*ptr++;
9361 if (ascii_isspace(c)) {
9367 if (!
NIL_P(limit) && lim <= i)
break;
9370 else if (ascii_isspace(c)) {
9371 SPLIT_STR(beg, end-beg);
9374 if (!
NIL_P(limit)) ++i;
9382 while (ptr < eptr) {
9385 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9394 if (!
NIL_P(limit) && lim <= i)
break;
9398 SPLIT_STR(beg, end-beg);
9401 if (!
NIL_P(limit)) ++i;
9409 else if (split_type == SPLIT_TYPE_STRING) {
9410 char *substr_start = ptr;
9411 char *sptr = RSTRING_PTR(spat);
9412 long slen = RSTRING_LEN(spat);
9415 mustnot_broken(str);
9416 enc = rb_enc_check(str, spat);
9417 while (ptr < eptr &&
9418 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9421 if (t != ptr + end) {
9425 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9426 str_mod_check(spat, sptr, slen);
9429 if (!
NIL_P(limit) && lim <= ++i)
break;
9431 beg = ptr - str_start;
9433 else if (split_type == SPLIT_TYPE_CHARS) {
9437 mustnot_broken(str);
9438 enc = rb_enc_get(str);
9439 while (ptr < eptr &&
9440 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9441 SPLIT_STR(ptr - str_start, n);
9443 if (!
NIL_P(limit) && lim <= ++i)
break;
9445 beg = ptr - str_start;
9449 long len = RSTRING_LEN(str);
9457 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9462 if (start == end && BEG(0) == END(0)) {
9467 else if (last_null == 1) {
9468 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9475 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9481 SPLIT_STR(beg, end-beg);
9482 beg = start = END(0);
9486 for (idx=1; idx < regs->num_regs; idx++) {
9487 if (BEG(idx) == -1)
continue;
9488 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9490 if (!
NIL_P(limit) && lim <= ++i)
break;
9492 if (match) rb_match_unbusy(match);
9494 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9495 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9498 return result ? result : str;
9508 return rb_str_split_m(1, &sep, str);
9511#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9526#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9529chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9531 const char *prev = rb_enc_prev_char(p, e, e, enc);
9534 prev = rb_enc_prev_char(p, e, e, enc);
9535 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9547 RSTRING_LEN(rs) != 1 ||
9548 RSTRING_PTR(rs)[0] !=
'\n')) {
9554#define rb_rs get_rs()
9561 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9562 long pos,
len, rslen;
9568 static ID keywords[1];
9573 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9577 if (!ENUM_ELEM(ary, str)) {
9585 if (!RSTRING_LEN(str))
goto end;
9587 ptr = subptr = RSTRING_PTR(str);
9589 len = RSTRING_LEN(str);
9591 rslen = RSTRING_LEN(rs);
9594 enc = rb_enc_get(str);
9596 enc = rb_enc_check(str, rs);
9601 const char *eol = NULL;
9603 while (subend < pend) {
9604 long chomp_rslen = 0;
9606 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9608 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9610 if (eol == subend)
break;
9614 chomp_rslen = -rslen;
9618 if (!subptr) subptr = subend;
9622 }
while (subend < pend);
9624 if (rslen == 0) chomp_rslen = 0;
9626 subend - subptr + (chomp ? chomp_rslen : rslen));
9627 if (ENUM_ELEM(ary, line)) {
9628 str_mod_check(str, ptr,
len);
9630 subptr = eol = NULL;
9635 rsptr = RSTRING_PTR(rs);
9636 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9645 rsptr = RSTRING_PTR(rs);
9646 rslen = RSTRING_LEN(rs);
9649 while (subptr < pend) {
9650 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9654 if (hit != adjusted) {
9658 subend = hit += rslen;
9661 subend = chomp_newline(subptr, subend, enc);
9668 if (ENUM_ELEM(ary, line)) {
9669 str_mod_check(str, ptr,
len);
9674 if (subptr != pend) {
9677 pend = chomp_newline(subptr, pend, enc);
9679 else if (pend - subptr >= rslen &&
9680 memcmp(pend - rslen, rsptr, rslen) == 0) {
9685 ENUM_ELEM(ary, line);
9706rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9709 return rb_str_enumerate_lines(argc, argv, str, 0);
9764rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9766 VALUE ary = WANTARRAY(
"lines", 0);
9767 return rb_str_enumerate_lines(argc, argv, str, ary);
9781 for (i=0; i<RSTRING_LEN(str); i++) {
9782 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9800rb_str_each_byte(
VALUE str)
9803 return rb_str_enumerate_bytes(str, 0);
9815rb_str_bytes(
VALUE str)
9817 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9818 return rb_str_enumerate_bytes(str, ary);
9836 ptr = RSTRING_PTR(str);
9837 len = RSTRING_LEN(str);
9838 enc = rb_enc_get(str);
9841 for (i = 0; i <
len; i += n) {
9842 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9847 for (i = 0; i <
len; i += n) {
9848 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9869rb_str_each_char(
VALUE str)
9872 return rb_str_enumerate_chars(str, 0);
9884rb_str_chars(
VALUE str)
9887 return rb_str_enumerate_chars(str, ary);
9891rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9896 const char *ptr, *end;
9899 if (single_byte_optimizable(str))
9900 return rb_str_enumerate_bytes(str, ary);
9903 ptr = RSTRING_PTR(str);
9905 enc = STR_ENC_GET(str);
9908 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9929rb_str_each_codepoint(
VALUE str)
9932 return rb_str_enumerate_codepoints(str, 0);
9944rb_str_codepoints(
VALUE str)
9947 return rb_str_enumerate_codepoints(str, ary);
9953 int encidx = rb_enc_to_index(enc);
9955 const OnigUChar source_ascii[] =
"\\X";
9956 const OnigUChar *source = source_ascii;
9957 size_t source_len =
sizeof(source_ascii) - 1;
9960#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9961#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9962#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9963#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9964#define CASE_UTF(e) \
9965 case ENCINDEX_UTF_##e: { \
9966 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9967 source = source_UTF_##e; \
9968 source_len = sizeof(source_UTF_##e); \
9971 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9979 regex_t *reg_grapheme_cluster;
9981 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9982 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9984 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9985 onig_error_code_to_str(message, r, &einfo);
9986 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9989 return reg_grapheme_cluster;
9995 int encidx = rb_enc_to_index(enc);
9996 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9998 if (encidx == rb_utf8_encindex()) {
9999 if (!reg_grapheme_cluster_utf8) {
10000 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
10003 return reg_grapheme_cluster_utf8;
10012 size_t grapheme_cluster_count = 0;
10014 const char *ptr, *end;
10016 if (!rb_enc_unicode_p(enc)) {
10020 bool cached_reg_grapheme_cluster =
true;
10021 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10022 if (!reg_grapheme_cluster) {
10023 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10024 cached_reg_grapheme_cluster =
false;
10027 ptr = RSTRING_PTR(str);
10030 while (ptr < end) {
10031 OnigPosition
len = onig_match(reg_grapheme_cluster,
10032 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10033 (
const OnigUChar *)ptr, NULL, 0);
10034 if (
len <= 0)
break;
10035 grapheme_cluster_count++;
10039 if (!cached_reg_grapheme_cluster) {
10040 onig_free(reg_grapheme_cluster);
10043 return SIZET2NUM(grapheme_cluster_count);
10047rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
10051 const char *ptr0, *ptr, *end;
10053 if (!rb_enc_unicode_p(enc)) {
10054 return rb_str_enumerate_chars(str, ary);
10059 bool cached_reg_grapheme_cluster =
true;
10060 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
10061 if (!reg_grapheme_cluster) {
10062 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
10063 cached_reg_grapheme_cluster =
false;
10066 ptr0 = ptr = RSTRING_PTR(str);
10069 while (ptr < end) {
10070 OnigPosition
len = onig_match(reg_grapheme_cluster,
10071 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
10072 (
const OnigUChar *)ptr, NULL, 0);
10073 if (
len <= 0)
break;
10078 if (!cached_reg_grapheme_cluster) {
10079 onig_free(reg_grapheme_cluster);
10099rb_str_each_grapheme_cluster(
VALUE str)
10102 return rb_str_enumerate_grapheme_clusters(str, 0);
10114rb_str_grapheme_clusters(
VALUE str)
10117 return rb_str_enumerate_grapheme_clusters(str, ary);
10121chopped_length(
VALUE str)
10124 const char *p, *p2, *beg, *end;
10126 beg = RSTRING_PTR(str);
10127 end = beg + RSTRING_LEN(str);
10128 if (beg >= end)
return 0;
10129 p = rb_enc_prev_char(beg, end, end, enc);
10131 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10132 p2 = rb_enc_prev_char(beg, p, end, enc);
10133 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10151rb_str_chop_bang(
VALUE str)
10153 str_modify_keep_cr(str);
10154 if (RSTRING_LEN(str) > 0) {
10156 len = chopped_length(str);
10157 STR_SET_LEN(str,
len);
10158 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10177rb_str_chop(
VALUE str)
10183smart_chomp(
VALUE str,
const char *e,
const char *p)
10186 if (rb_enc_mbminlen(enc) > 1) {
10191 pp = e - rb_enc_mbminlen(enc);
10194 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10202 if (--e > p && *(e-1) ==
'\r') {
10219 char *pp, *e, *rsptr;
10221 char *
const p = RSTRING_PTR(str);
10222 long len = RSTRING_LEN(str);
10224 if (
len == 0)
return 0;
10227 return smart_chomp(str, e, p);
10230 enc = rb_enc_get(str);
10233 if (rb_enc_mbminlen(enc) > 1) {
10238 pp -= rb_enc_mbminlen(enc);
10241 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10248 while (e > p && *(e-1) ==
'\n') {
10250 if (e > p && *(e-1) ==
'\r')
10256 if (rslen >
len)
return len;
10258 enc = rb_enc_get(rs);
10259 newline = rsptr[rslen-1];
10260 if (rslen == rb_enc_mbminlen(enc)) {
10262 if (newline ==
'\n')
10263 return smart_chomp(str, e, p);
10267 return smart_chomp(str, e, p);
10271 enc = rb_enc_check(str, rs);
10272 if (is_broken_string(rs)) {
10276 if (p[
len-1] == newline &&
10278 memcmp(rsptr, pp, rslen) == 0)) {
10279 if (at_char_boundary(p, pp, e, enc))
10280 return len - rslen;
10292chomp_rs(
int argc,
const VALUE *argv)
10296 VALUE rs = argv[0];
10308 long olen = RSTRING_LEN(str);
10309 long len = chompped_length(str, rs);
10310 if (
len >= olen)
return Qnil;
10311 str_modify_keep_cr(str);
10312 STR_SET_LEN(str,
len);
10313 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10333rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10336 str_modifiable(str);
10337 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10338 rs = chomp_rs(argc, argv);
10340 return rb_str_chomp_string(str, rs);
10353rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10355 VALUE rs = chomp_rs(argc, argv);
10363 const char *
const start = s;
10365 if (!s || s >= e)
return 0;
10368 if (single_byte_optimizable(str)) {
10369 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10374 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10396rb_str_lstrip_bang(
VALUE str)
10400 long olen, loffset;
10402 str_modify_keep_cr(str);
10403 enc = STR_ENC_GET(str);
10405 loffset = lstrip_offset(str, start, start+olen, enc);
10407 long len = olen-loffset;
10408 s = start + loffset;
10409 memmove(start, s,
len);
10410 STR_SET_LEN(str,
len);
10411 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10435rb_str_lstrip(
VALUE str)
10440 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10441 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10450 rb_str_check_dummy_enc(enc);
10454 if (!s || s >= e)
return 0;
10458 if (single_byte_optimizable(str)) {
10460 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10465 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10485rb_str_rstrip_bang(
VALUE str)
10489 long olen, roffset;
10491 str_modify_keep_cr(str);
10492 enc = STR_ENC_GET(str);
10494 roffset = rstrip_offset(str, start, start+olen, enc);
10496 long len = olen - roffset;
10498 STR_SET_LEN(str,
len);
10499 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10522rb_str_rstrip(
VALUE str)
10526 long olen, roffset;
10528 enc = STR_ENC_GET(str);
10530 roffset = rstrip_offset(str, start, start+olen, enc);
10532 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10548rb_str_strip_bang(
VALUE str)
10551 long olen, loffset, roffset;
10554 str_modify_keep_cr(str);
10555 enc = STR_ENC_GET(str);
10557 loffset = lstrip_offset(str, start, start+olen, enc);
10558 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10560 if (loffset > 0 || roffset > 0) {
10561 long len = olen-roffset;
10564 memmove(start, start + loffset,
len);
10566 STR_SET_LEN(str,
len);
10567 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10590rb_str_strip(
VALUE str)
10593 long olen, loffset, roffset;
10597 loffset = lstrip_offset(str, start, start+olen, enc);
10598 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10600 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10605scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10608 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10614 end = pos + RSTRING_LEN(pat);
10628 if (RSTRING_LEN(str) > end)
10629 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10638 if (!regs || regs->num_regs == 1) {
10644 for (
int i = 1; i < regs->num_regs; i++) {
10705 long last = -1, prev = 0;
10706 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10708 pat = get_pat_quoted(pat, 1);
10709 mustnot_broken(str);
10713 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10718 if (last >= 0) rb_pat_search(pat, str, last, 1);
10723 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10727 str_mod_check(str, p,
len);
10729 if (last >= 0) rb_pat_search(pat, str, last, 1);
10781rb_str_hex(
VALUE str)
10783 return rb_str_to_inum(str, 16, FALSE);
10867rb_str_oct(
VALUE str)
10869 return rb_str_to_inum(str, -8, FALSE);
10872#ifndef HAVE_CRYPT_R
10877 rb_nativethread_lock_t lock;
10878} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10947# define CRYPT_END() ALLOCV_END(databuf)
10950 extern char *crypt(
const char *,
const char *);
10951# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10954 const char *s, *saltp;
10957 char salt_8bit_clean[3];
10961 mustnot_wchar(str);
10962 mustnot_wchar(salt);
10964 saltp = RSTRING_PTR(salt);
10965 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10966 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10970 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10971 salt_8bit_clean[0] = saltp[0] & 0x7f;
10972 salt_8bit_clean[1] = saltp[1] & 0x7f;
10973 salt_8bit_clean[2] =
'\0';
10974 saltp = salt_8bit_clean;
10979# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10980 data->initialized = 0;
10982 res = crypt_r(s, saltp, data);
10985 res = crypt(s, saltp);
11000 size_t res_size = strlen(res)+1;
11001 tmp_buf =
ALLOCA_N(
char, res_size);
11002 memcpy(tmp_buf, res, res_size);
11039 char *ptr, *p, *pend;
11042 unsigned long sum0 = 0;
11047 ptr = p = RSTRING_PTR(str);
11048 len = RSTRING_LEN(str);
11054 str_mod_check(str, ptr,
len);
11057 sum0 += (
unsigned char)*p;
11068 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
11069 sum0 &= (((
unsigned long)1)<<bits)-1;
11089rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
11093 long width,
len, flen = 1, fclen = 1;
11096 const char *f =
" ";
11097 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
11099 int singlebyte = 1, cr;
11103 enc = STR_ENC_GET(str);
11104 termlen = rb_enc_mbminlen(enc);
11108 enc = rb_enc_check(str, pad);
11109 f = RSTRING_PTR(pad);
11110 flen = RSTRING_LEN(pad);
11111 fclen = str_strlen(pad, enc);
11112 singlebyte = single_byte_optimizable(pad);
11113 if (flen == 0 || fclen == 0) {
11114 rb_raise(rb_eArgError,
"zero width padding");
11117 len = str_strlen(str, enc);
11118 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
11120 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
11124 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
11125 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
11127 size = RSTRING_LEN(str);
11128 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
11129 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
11130 (
len += llen2 + rlen2) >= LONG_MAX - size) {
11131 rb_raise(rb_eArgError,
"argument too big");
11135 p = RSTRING_PTR(res);
11137 memset(p, *f, llen);
11141 while (llen >= fclen) {
11147 memcpy(p, f, llen2);
11151 memcpy(p, RSTRING_PTR(str), size);
11154 memset(p, *f, rlen);
11158 while (rlen >= fclen) {
11164 memcpy(p, f, rlen2);
11168 TERM_FILL(p, termlen);
11169 STR_SET_LEN(res, p-RSTRING_PTR(res));
11190rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
11192 return rb_str_justify(argc, argv, str,
'l');
11206rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
11208 return rb_str_justify(argc, argv, str,
'r');
11221rb_str_center(
int argc,
VALUE *argv,
VALUE str)
11223 return rb_str_justify(argc, argv, str,
'c');
11239 sep = get_pat_quoted(sep, 0);
11251 pos = rb_str_index(str, sep, 0);
11252 if (pos < 0)
goto failed;
11257 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11260 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11274 long pos = RSTRING_LEN(str);
11276 sep = get_pat_quoted(sep, 0);
11289 pos = rb_str_rindex(str, sep, pos);
11298 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11300 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11312rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11316 for (i=0; i<argc; i++) {
11317 VALUE tmp = argv[i];
11319 if (rb_reg_start_with_p(tmp, str))
11323 const char *p, *s, *e;
11328 enc = rb_enc_check(str, tmp);
11329 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11330 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11331 p = RSTRING_PTR(str);
11334 if (!at_char_right_boundary(p, s, e, enc))
11336 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11352rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11356 for (i=0; i<argc; i++) {
11357 VALUE tmp = argv[i];
11358 const char *p, *s, *e;
11363 enc = rb_enc_check(str, tmp);
11364 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11365 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11366 p = RSTRING_PTR(str);
11369 if (!at_char_boundary(p, s, e, enc))
11371 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11387deleted_prefix_length(
VALUE str,
VALUE prefix)
11389 const char *strptr, *prefixptr;
11390 long olen, prefixlen;
11395 if (!is_broken_string(prefix) ||
11396 !rb_enc_asciicompat(enc) ||
11397 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11398 enc = rb_enc_check(str, prefix);
11402 prefixlen = RSTRING_LEN(prefix);
11403 if (prefixlen <= 0)
return 0;
11404 olen = RSTRING_LEN(str);
11405 if (olen < prefixlen)
return 0;
11406 strptr = RSTRING_PTR(str);
11407 prefixptr = RSTRING_PTR(prefix);
11408 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11409 if (is_broken_string(prefix)) {
11410 if (!is_broken_string(str)) {
11414 const char *strend = strptr + olen;
11415 const char *after_prefix = strptr + prefixlen;
11416 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11437rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11440 str_modify_keep_cr(str);
11442 prefixlen = deleted_prefix_length(str, prefix);
11443 if (prefixlen <= 0)
return Qnil;
11457rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11461 prefixlen = deleted_prefix_length(str, prefix);
11462 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11464 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11477deleted_suffix_length(
VALUE str,
VALUE suffix)
11479 const char *strptr, *suffixptr;
11480 long olen, suffixlen;
11484 if (is_broken_string(suffix))
return 0;
11485 enc = rb_enc_check(str, suffix);
11488 suffixlen = RSTRING_LEN(suffix);
11489 if (suffixlen <= 0)
return 0;
11490 olen = RSTRING_LEN(str);
11491 if (olen < suffixlen)
return 0;
11492 strptr = RSTRING_PTR(str);
11493 suffixptr = RSTRING_PTR(suffix);
11494 const char *strend = strptr + olen;
11495 const char *before_suffix = strend - suffixlen;
11496 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11497 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11513rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11515 long olen, suffixlen,
len;
11516 str_modifiable(str);
11518 suffixlen = deleted_suffix_length(str, suffix);
11519 if (suffixlen <= 0)
return Qnil;
11521 olen = RSTRING_LEN(str);
11522 str_modify_keep_cr(str);
11523 len = olen - suffixlen;
11524 STR_SET_LEN(str,
len);
11525 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11541rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11545 suffixlen = deleted_suffix_length(str, suffix);
11546 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11548 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11555 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11563 val = rb_fs_check(val);
11566 "value of %"PRIsVALUE
" must be String or Regexp",
11570 rb_warn_deprecated(
"'$;'", NULL);
11587 str_modifiable(str);
11590 int idx = rb_enc_to_index(encoding);
11597 rb_enc_associate_index(str, idx);
11621 if (STR_EMBED_P(str)) {
11622 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11627 str_replace_shared_without_enc(str2, str);
11629 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11662rb_str_valid_encoding_p(
VALUE str)
11682rb_str_is_ascii_only_p(
VALUE str)
11692 static const char ellipsis[] =
"...";
11693 const long ellipsislen =
sizeof(ellipsis) - 1;
11695 const long blen = RSTRING_LEN(str);
11696 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11697 VALUE estr, ret = 0;
11700 if (
len * rb_enc_mbminlen(enc) >= blen ||
11704 else if (
len <= ellipsislen ||
11706 if (rb_enc_asciicompat(enc)) {
11708 rb_enc_associate(ret, enc);
11715 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11720 rb_enc_from_encoding(enc), 0,
Qnil);
11733 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11739 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11758 if (enc == STR_ENC_GET(str)) {
11763 return enc_str_scrub(enc, str, repl, cr);
11771 const char *rep, *p, *e, *p1, *sp;
11777 rb_raise(rb_eArgError,
"both of block and replacement given");
11784 if (!
NIL_P(repl)) {
11785 repl = str_compat_and_valid(repl, enc);
11788 if (rb_enc_dummy_p(enc)) {
11791 encidx = rb_enc_to_index(enc);
11793#define DEFAULT_REPLACE_CHAR(str) do { \
11794 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11795 rep = replace; replen = (int)sizeof(replace); \
11798 slen = RSTRING_LEN(str);
11799 p = RSTRING_PTR(str);
11804 if (rb_enc_asciicompat(enc)) {
11810 else if (!
NIL_P(repl)) {
11811 rep = RSTRING_PTR(repl);
11812 replen = RSTRING_LEN(repl);
11815 else if (encidx == rb_utf8_encindex()) {
11816 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11820 DEFAULT_REPLACE_CHAR(
"?");
11825 p = search_nonascii(p, e);
11830 int ret = rb_enc_precise_mbclen(p, e, enc);
11849 if (e - p < clen) clen = e - p;
11856 for (; clen > 1; clen--) {
11857 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11868 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11869 str_mod_check(str, sp, slen);
11870 repl = str_compat_and_valid(repl, enc);
11877 p = search_nonascii(p, e);
11903 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11904 str_mod_check(str, sp, slen);
11905 repl = str_compat_and_valid(repl, enc);
11914 long mbminlen = rb_enc_mbminlen(enc);
11918 else if (!
NIL_P(repl)) {
11919 rep = RSTRING_PTR(repl);
11920 replen = RSTRING_LEN(repl);
11922 else if (encidx == ENCINDEX_UTF_16BE) {
11923 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11925 else if (encidx == ENCINDEX_UTF_16LE) {
11926 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11928 else if (encidx == ENCINDEX_UTF_32BE) {
11929 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11931 else if (encidx == ENCINDEX_UTF_32LE) {
11932 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11935 DEFAULT_REPLACE_CHAR(
"?");
11939 int ret = rb_enc_precise_mbclen(p, e, enc);
11952 if (e - p < clen) clen = e - p;
11953 if (clen <= mbminlen * 2) {
11958 for (; clen > mbminlen; clen-=mbminlen) {
11959 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11969 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11970 str_mod_check(str, sp, slen);
11971 repl = str_compat_and_valid(repl, enc);
11996 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11997 str_mod_check(str, sp, slen);
11998 repl = str_compat_and_valid(repl, enc);
12034str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
12042static ID id_normalize;
12043static ID id_normalized_p;
12044static VALUE mUnicodeNormalize;
12047unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
12049 static int UnicodeNormalizeRequired = 0;
12052 if (!UnicodeNormalizeRequired) {
12053 rb_require(
"unicode_normalize/normalize.rb");
12054 UnicodeNormalizeRequired = 1;
12058 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
12095rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
12097 return unicode_normalize_common(argc, argv, str, id_normalize);
12111rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
12113 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
12140rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
12142 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12274#define sym_equal rb_obj_equal
12277sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
12281 int c = rb_enc_precise_mbclen(s, send, enc);
12285 c = rb_enc_mbc_to_codepoint(s, send, enc);
12293rb_str_symname_p(
VALUE sym)
12298 rb_encoding *resenc = rb_default_internal_encoding();
12300 if (resenc == NULL) resenc = rb_default_external_encoding();
12301 enc = STR_ENC_GET(sym);
12302 ptr = RSTRING_PTR(sym);
12303 len = RSTRING_LEN(sym);
12304 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12312rb_str_quote_unprintable(
VALUE str)
12320 resenc = rb_default_internal_encoding();
12321 if (resenc == NULL) resenc = rb_default_external_encoding();
12322 enc = STR_ENC_GET(str);
12323 ptr = RSTRING_PTR(str);
12324 len = RSTRING_LEN(str);
12325 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12326 !sym_printable(ptr, ptr +
len, enc)) {
12327 return rb_str_escape(str);
12333rb_id_quote_unprintable(
ID id)
12335 VALUE str = rb_id2str(
id);
12336 if (!rb_str_symname_p(str)) {
12337 return rb_str_escape(str);
12355sym_inspect(
VALUE sym)
12362 if (!rb_str_symname_p(str)) {
12364 len = RSTRING_LEN(str);
12365 rb_str_resize(str,
len + 1);
12366 dest = RSTRING_PTR(str);
12367 memmove(dest + 1, dest,
len);
12371 VALUE orig_str = str;
12373 len = RSTRING_LEN(orig_str);
12374 str = rb_enc_str_new(0,
len + 1, enc);
12377 ptr = RSTRING_PTR(orig_str);
12378 dest = RSTRING_PTR(str);
12379 memcpy(dest + 1, ptr,
len);
12399rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12404 rb_raise(rb_eArgError,
"no receiver given");
12501 return rb_str_match(
rb_sym2str(sym), other);
12516sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12518 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12531sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12533 return rb_str_match_m_p(argc, argv, sym);
12551 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12562sym_length(
VALUE sym)
12576sym_empty(
VALUE sym)
12610sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12626sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12642sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12656sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12658 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12671sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12673 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12685sym_encoding(
VALUE sym)
12691string_for_symbol(
VALUE name)
12696 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12710 name = string_for_symbol(name);
12711 return rb_intern_str(name);
12720 name = string_for_symbol(name);
12744 return rb_fstring(str);
12750 struct RString fake_str = {RBASIC_INIT};
12751 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12763 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12764 rb_enc_autoload(enc);
12767 struct RString fake_str = {RBASIC_INIT};
12768 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
false);
12774 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12775 rb_enc_autoload(enc);
12778 struct RString fake_str = {RBASIC_INIT};
12779 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc),
true,
true);
12790rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12795 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12796 rb_str_buf_cat_byte(str, (
char) code);
12806fstring_set_class_i(
VALUE *str,
void *data)
12810 return ST_CONTINUE;
12818 rb_concurrent_set_foreach_with_replace(fstring_table_obj, fstring_set_class_i, NULL);
12985 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.