Ruby  3.4.0dev (2024-12-06 revision 892c46283a5ea4179500d951c9d4866c0051f27b)
string.h
Go to the documentation of this file.
1 #ifndef RBIMPL_INTERN_STRING_H /*-*-C++-*-vi:se ft=cpp:*/
2 #define RBIMPL_INTERN_STRING_H
23 #include "ruby/internal/config.h"
24 
25 #ifdef STDC_HEADERS
26 # include <stddef.h>
27 #endif
28 
29 #ifdef HAVE_STRING_H
30 # include <string.h>
31 #endif
32 
33 #ifdef HAVE_STDINT_H
34 # include <stdint.h>
35 #endif
36 
42 #include "ruby/internal/value.h"
43 #include "ruby/internal/variable.h" /* rb_gvar_setter_t */
44 #include "ruby/st.h" /* st_index_t */
45 
47 
48 /* string.c */
49 
50 
63 VALUE rb_str_new(const char *ptr, long len);
64 
76 VALUE rb_str_new_cstr(const char *ptr);
77 
91 
103 
123 VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len);
124 
143 VALUE rb_external_str_new(const char *ptr, long len);
144 
164 VALUE rb_external_str_new_cstr(const char *ptr);
165 
186 VALUE rb_locale_str_new(const char *ptr, long len);
187 
207 VALUE rb_locale_str_new_cstr(const char *ptr);
208 
229 VALUE rb_filesystem_str_new(const char *ptr, long len);
230 
251 
262 
278 VALUE rb_str_buf_new_cstr(const char *ptr);
279 
288 VALUE rb_str_tmp_new(long len);
289 
303 VALUE rb_usascii_str_new(const char *ptr, long len);
304 
318 VALUE rb_usascii_str_new_cstr(const char *ptr);
319 
331 VALUE rb_utf8_str_new(const char *ptr, long len);
332 
346 VALUE rb_utf8_str_new_cstr(const char *ptr);
347 
375 VALUE rb_str_new_static(const char *ptr, long len);
376 
392 VALUE rb_usascii_str_new_static(const char *ptr, long len);
393 
409 VALUE rb_utf8_str_new_static(const char *ptr, long len);
410 
432 
452 VALUE rb_interned_str(const char *ptr, long len);
453 
468 VALUE rb_interned_str_cstr(const char *ptr);
469 
485 void rb_str_free(VALUE str);
486 
503 void rb_str_shared_replace(VALUE dst, VALUE src);
504 
521 
523 VALUE rb_str_buf_cat(VALUE, const char*, long);
524 
527 
542 VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src);
543 
553 
567 
582 void rb_must_asciicompat(VALUE obj);
583 
591 VALUE rb_str_dup(VALUE str);
592 
603 
616 
626 
629 
631 #define rb_str_dup_frozen rb_str_new_frozen
632 
649 VALUE rb_str_plus(VALUE lhs, VALUE rhs);
650 
659 VALUE rb_str_times(VALUE str, VALUE num);
660 
672 long rb_str_sublen(VALUE str, long pos);
673 
724 VALUE rb_str_substr(VALUE str, long beg, long len);
725 
737 VALUE rb_str_subseq(VALUE str, long beg, long len);
738 
752 char *rb_str_subpos(VALUE str, long beg, long *len);
753 
765 void rb_str_modify(VALUE str);
766 
780 void rb_str_modify_expand(VALUE str, long capa);
781 
790 
814 void rb_str_set_len(VALUE str, long len);
815 
830 VALUE rb_str_resize(VALUE str, long len);
831 
843 VALUE rb_str_cat(VALUE dst, const char *src, long srclen);
844 
858 VALUE rb_str_cat_cstr(VALUE dst, const char *src);
859 
861 VALUE rb_str_cat2(VALUE, const char*);
862 
876 VALUE rb_str_append(VALUE dst, VALUE src);
877 
892 VALUE rb_str_concat(VALUE dst, VALUE src);
893 
894 /* random.c */
895 
908 st_index_t rb_memhash(const void *ptr, long len);
909 
936 st_index_t rb_hash_start(st_index_t i);
937 
939 #define rb_hash_uint32(h, i) st_hash_uint32((h), (i))
940 
942 #define rb_hash_uint(h, i) st_hash_uint((h), (i))
943 
945 #define rb_hash_end(h) st_hash_end(h)
946 
947 /* string.c */
948 
965 st_index_t rb_str_hash(VALUE str);
966 
983 int rb_str_hash_cmp(VALUE str1, VALUE str2);
984 
1001 int rb_str_comparable(VALUE str1, VALUE str2);
1002 
1015 int rb_str_cmp(VALUE lhs, VALUE rhs);
1016 
1030 VALUE rb_str_equal(VALUE str1, VALUE str2);
1031 
1048 VALUE rb_str_drop_bytes(VALUE str, long len);
1049 
1068 void rb_str_update(VALUE dst, long beg, long len, VALUE src);
1069 
1084 VALUE rb_str_replace(VALUE dst, VALUE src);
1085 
1108 
1121 VALUE rb_str_dump(VALUE str);
1122 
1139 VALUE rb_str_split(VALUE str, const char *delim);
1140 
1147 
1148 /* symbol.c */
1149 
1164 
1165 /* string.c */
1166 
1178 VALUE rb_sym_to_s(VALUE sym);
1179 
1189 long rb_str_strlen(VALUE str);
1190 
1198 
1208 long rb_str_offset(VALUE str, long pos);
1209 
1218 size_t rb_str_capacity(VALUE str);
1219 
1234 VALUE rb_str_ellipsize(VALUE str, long len);
1235 
1265 VALUE rb_str_scrub(VALUE str, VALUE repl);
1266 
1337 VALUE rb_str_succ(VALUE orig);
1338 
1348 static inline long
1349 rbimpl_strlen(const char *str)
1350 {
1351  return RBIMPL_CAST((long)strlen(str));
1352 }
1353 
1363 static inline VALUE
1364 rbimpl_str_new_cstr(const char *str)
1365 {
1366  long len = rbimpl_strlen(str);
1367  return rb_str_new_static(str, len);
1368 }
1369 
1379 static inline VALUE
1380 rbimpl_usascii_str_new_cstr(const char *str)
1381 {
1382  long len = rbimpl_strlen(str);
1383  return rb_usascii_str_new_static(str, len);
1384 }
1385 
1395 static inline VALUE
1396 rbimpl_utf8_str_new_cstr(const char *str)
1397 {
1398  long len = rbimpl_strlen(str);
1399  return rb_utf8_str_new_static(str, len);
1400 }
1401 
1411 static inline VALUE
1412 rbimpl_external_str_new_cstr(const char *str)
1413 {
1414  long len = rbimpl_strlen(str);
1415  return rb_external_str_new(str, len);
1416 }
1417 
1427 static inline VALUE
1428 rbimpl_locale_str_new_cstr(const char *str)
1429 {
1430  long len = rbimpl_strlen(str);
1431  return rb_locale_str_new(str, len);
1432 }
1433 
1443 static inline VALUE
1444 rbimpl_str_buf_new_cstr(const char *str)
1445 {
1446  long len = rbimpl_strlen(str);
1447  VALUE buf = rb_str_buf_new(len);
1448  return rb_str_buf_cat(buf, str, len);
1449 }
1450 
1461 static inline VALUE
1462 rbimpl_str_cat_cstr(VALUE buf, const char *str)
1463 {
1464  long len = rbimpl_strlen(str);
1465  return rb_str_cat(buf, str, len);
1466 }
1467 
1478 static inline VALUE
1479 rbimpl_exc_new_cstr(VALUE exc, const char *str)
1480 {
1481  long len = rbimpl_strlen(str);
1482  return rb_exc_new(exc, str, len);
1483 }
1484 
1498 #define rb_str_new(str, len) \
1499  ((RBIMPL_CONSTANT_P(str) && \
1500  RBIMPL_CONSTANT_P(len) ? \
1501  rb_str_new_static : \
1502  rb_str_new) ((str), (len)))
1503 
1514 #define rb_str_new_cstr(str) \
1515  ((RBIMPL_CONSTANT_P(str) ? \
1516  rbimpl_str_new_cstr : \
1517  rb_str_new_cstr) (str))
1518 
1532 #define rb_usascii_str_new(str, len) \
1533  ((RBIMPL_CONSTANT_P(str) && \
1534  RBIMPL_CONSTANT_P(len) ? \
1535  rb_usascii_str_new_static : \
1536  rb_usascii_str_new) ((str), (len)))
1537 
1549 #define rb_utf8_str_new(str, len) \
1550  ((RBIMPL_CONSTANT_P(str) && \
1551  RBIMPL_CONSTANT_P(len) ? \
1552  rb_utf8_str_new_static : \
1553  rb_utf8_str_new) ((str), (len)))
1554 
1567 #define rb_usascii_str_new_cstr(str) \
1568  ((RBIMPL_CONSTANT_P(str) ? \
1569  rbimpl_usascii_str_new_cstr : \
1570  rb_usascii_str_new_cstr) (str))
1571 
1583 #define rb_utf8_str_new_cstr(str) \
1584  ((RBIMPL_CONSTANT_P(str) ? \
1585  rbimpl_utf8_str_new_cstr : \
1586  rb_utf8_str_new_cstr) (str))
1587 
1604 #define rb_external_str_new_cstr(str) \
1605  ((RBIMPL_CONSTANT_P(str) ? \
1606  rbimpl_external_str_new_cstr : \
1607  rb_external_str_new_cstr) (str))
1608 
1625 #define rb_locale_str_new_cstr(str) \
1626  ((RBIMPL_CONSTANT_P(str) ? \
1627  rbimpl_locale_str_new_cstr : \
1628  rb_locale_str_new_cstr) (str))
1629 
1639 #define rb_str_buf_new_cstr(str) \
1640  ((RBIMPL_CONSTANT_P(str) ? \
1641  rbimpl_str_buf_new_cstr : \
1642  rb_str_buf_new_cstr) (str))
1643 
1656 #define rb_str_cat_cstr(buf, str) \
1657  ((RBIMPL_CONSTANT_P(str) ? \
1658  rbimpl_str_cat_cstr : \
1659  rb_str_cat_cstr) ((buf), (str)))
1660 
1670 #define rb_exc_new_cstr(exc, str) \
1671  ((RBIMPL_CONSTANT_P(str) ? \
1672  rbimpl_exc_new_cstr : \
1673  rb_exc_new_cstr) ((exc), (str)))
1674 
1675 #define rb_str_new2 rb_str_new_cstr
1676 #define rb_str_new3 rb_str_new_shared
1677 #define rb_str_new4 rb_str_new_frozen
1678 #define rb_str_new5 rb_str_new_with_class
1679 #define rb_str_buf_new2 rb_str_buf_new_cstr
1680 #define rb_usascii_str_new2 rb_usascii_str_new_cstr
1681 #define rb_str_buf_cat rb_str_cat
1682 #define rb_str_buf_cat2 rb_str_cat_cstr
1683 #define rb_str_cat2 rb_str_cat_cstr
1692 #define rb_strlen_lit(str) ((sizeof(str "") / sizeof(str ""[0])) - 1)
1693 
1705 #define rb_str_new_lit(str) rb_str_new_static((str), rb_strlen_lit(str))
1706 
1719 #define rb_usascii_str_new_lit(str) rb_usascii_str_new_static((str), rb_strlen_lit(str))
1720 
1733 #define rb_utf8_str_new_lit(str) rb_utf8_str_new_static((str), rb_strlen_lit(str))
1734 
1748 #define rb_enc_str_new_lit(str, enc) rb_enc_str_new_static((str), rb_strlen_lit(str), (enc))
1749 
1750 #define rb_str_new_literal(str) rb_str_new_lit(str)
1751 #define rb_usascii_str_new_literal(str) rb_usascii_str_new_lit(str)
1752 #define rb_utf8_str_new_literal(str) rb_utf8_str_new_lit(str)
1753 #define rb_enc_str_new_literal(str, enc) rb_enc_str_new_lit(str, enc)
1756 
1757 #endif /* RBIMPL_INTERN_STRING_H */
Defines RBIMPL_CONSTANT_P.
Defines RBIMPL_ATTR_DEPRECATED.
Tweaking visibility of C variables/functions.
#define RBIMPL_SYMBOL_EXPORT_END()
Counterpart of RBIMPL_SYMBOL_EXPORT_BEGIN.
Definition: dllexport.h:74
#define RBIMPL_SYMBOL_EXPORT_BEGIN()
Shortcut macro equivalent to RUBY_SYMBOL_EXPORT_BEGIN extern "C" {.
Definition: dllexport.h:65
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
Definition: string.h:1682
#define rb_str_cat2
Old name of rb_str_cat_cstr.
Definition: string.h:1683
VALUE rb_exc_new(VALUE etype, const char *ptr, long len)
Creates an instance of the passed exception class.
Definition: error.c:1446
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
Definition: string.c:12500
void rb_str_free(VALUE str)
Destroys the given string for no reason.
Definition: string.c:1677
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
Definition: string.c:1455
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
Definition: string.c:2400
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
Definition: string.h:1583
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
Definition: string.c:3675
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
Definition: string.c:1366
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
Definition: string.c:12149
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
Definition: string.c:2472
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
Definition: string.c:1342
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
Definition: string.c:1671
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
Definition: string.c:2954
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
Definition: string.c:5267
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
Definition: string.c:4044
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
Definition: string.c:3051
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:11448
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
Definition: random.c:1752
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition: string.h:1498
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
Definition: string.c:1713
#define rb_str_buf_cat
Just another name of rb_str_cat.
Definition: string.h:1681
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
Definition: string.c:1132
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
Definition: string.h:1532
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
Definition: string.c:954
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
Definition: string.c:1461
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition: string.c:1916
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
Definition: string.c:2640
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition: string.c:4030
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition: string.c:3443
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
Definition: string.c:2338
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
Definition: string.c:1934
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
Definition: string.h:1639
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
Definition: string.h:1567
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
Definition: string.c:6475
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
Definition: string.c:3059
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
Definition: string.h:1146
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
Definition: string.c:12513
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
Definition: string.c:1372
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
Definition: string.h:1604
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
Definition: string.c:3641
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
Definition: string.c:3001
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
Definition: string.c:4146
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition: string.c:3267
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
Definition: random.c:1746
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
Definition: string.c:7196
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
Definition: string.c:2692
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
Definition: string.c:12506
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
Definition: string.c:4100
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
Definition: string.c:3917
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
Definition: string.c:4075
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
Definition: string.c:3617
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
Definition: string.c:3176
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
Definition: string.c:5777
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
Definition: string.c:11506
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
Definition: string.h:1625
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
Definition: string.c:1627
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
Definition: string.h:631
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition: string.c:2850
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
Definition: string.c:3148
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
Definition: string.h:1656
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
Definition: string.c:3250
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
Definition: string.c:3315
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
Definition: string.c:1144
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
Definition: string.h:1549
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition: string.c:2648
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
Definition: string.c:7310
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
Definition: string.c:1354
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition: string.c:1643
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
Definition: string.c:2352
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
Definition: string.h:1514
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
Definition: string.c:5695
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
Definition: string.c:9403
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
Definition: string.c:1138
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition: symbol.c:878
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition: string.c:1775
Declares rb_define_variable().
void rb_gvar_setter_t(VALUE val, ID id, VALUE *data)
Type that represents a global variable setter function.
Definition: variable.h:46
int capa
Designed capacity of the buffer.
Definition: io.h:11
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
Definition: io.h:2
RBIMPL_ATTR_PURE() int rb_io_read_pending(rb_io_t *fptr)
Queries if the passed IO has any pending reads.
int len
Length of the buffer.
Definition: io.h:8
Defines RBIMPL_ATTR_NONNULL.
#define RBIMPL_ATTR_NONNULL(list)
Wraps (or simulates) __attribute__((nonnull))
Definition: nonnull.h:27
#define inline
Old Visual Studio versions do not support the inline keyword, so we need to define it to be __inline.
Definition: defines.h:88
Defines RBIMPL_ATTR_PURE.
Defines VALUE and ID.
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40