Ruby
3.4.0dev (2024-12-06 revision 892c46283a5ea4179500d951c9d4866c0051f27b)
|
Defines rb_encoding. More...
#include "ruby/oniguruma.h"
#include "ruby/internal/attr/const.h"
#include "ruby/internal/attr/deprecated.h"
#include "ruby/internal/attr/noalias.h"
#include "ruby/internal/attr/pure.h"
#include "ruby/internal/attr/returns_nonnull.h"
#include "ruby/internal/dllexport.h"
#include "ruby/internal/encoding/coderange.h"
#include "ruby/internal/value.h"
#include "ruby/internal/core/rbasic.h"
#include "ruby/internal/fl_type.h"
Go to the source code of this file.
Macros | |
#define | ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX |
Old name of RUBY_ENCODING_INLINE_MAX. More... | |
#define | ENCODING_SHIFT RUBY_ENCODING_SHIFT |
Old name of RUBY_ENCODING_SHIFT. More... | |
#define | ENCODING_MASK RUBY_ENCODING_MASK |
Old name of RUBY_ENCODING_MASK. More... | |
#define | ENCODING_SET_INLINED(obj, i) RB_ENCODING_SET_INLINED(obj,i) |
Old name of RB_ENCODING_SET_INLINED. More... | |
#define | ENCODING_SET(obj, i) RB_ENCODING_SET(obj,i) |
Old name of RB_ENCODING_SET. More... | |
#define | ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) |
Old name of RB_ENCODING_GET_INLINED. More... | |
#define | ENCODING_GET(obj) RB_ENCODING_GET(obj) |
Old name of RB_ENCODING_GET. More... | |
#define | ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) |
Old name of RB_ENCODING_IS_ASCII8BIT. More... | |
#define | ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN |
Old name of RUBY_ENCODING_MAXNAMELEN. More... | |
#define | MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) |
Old name of ONIGENC_MBCLEN_CHARFOUND_P. More... | |
#define | MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) |
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN. More... | |
#define | MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) |
Old name of ONIGENC_MBCLEN_INVALID_P. More... | |
#define | MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) |
Old name of ONIGENC_MBCLEN_NEEDMORE_P. More... | |
#define | MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) |
Old name of ONIGENC_MBCLEN_NEEDMORE_LEN. More... | |
Typedefs | |
typedef const OnigEncodingType | rb_encoding |
The type of encoding. More... | |
Functions | |
static void | RB_ENCODING_SET_INLINED (VALUE obj, int encindex) |
Destructively assigns the passed encoding to the passed object. More... | |
static int | RB_ENCODING_GET_INLINED (VALUE obj) |
Queries the encoding of the passed object. More... | |
int | rb_char_to_option_kcode (int c, int *option, int *kcode) |
Converts a character option to its encoding. More... | |
int | rb_define_dummy_encoding (const char *name) |
Creates a new "dummy" encoding. More... | |
int | rb_enc_dummy_p (rb_encoding *enc) |
Queries if the passed encoding is dummy. More... | |
int | rb_enc_to_index (rb_encoding *enc) |
Queries the index of the encoding. More... | |
int | rb_enc_get_index (VALUE obj) |
Queries the index of the encoding of the passed object, if any. More... | |
static int | RB_ENCODING_GET (VALUE obj) |
Just another name of rb_enc_get_index. More... | |
void | rb_enc_set_index (VALUE obj, int encindex) |
Destructively assigns an encoding (via its index) to an object. More... | |
static void | RB_ENCODING_SET (VALUE obj, int encindex) |
Just another name of rb_enc_set_index. More... | |
static void | RB_ENCODING_CODERANGE_SET (VALUE obj, int encindex, enum ruby_coderange_type cr) |
This is RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. More... | |
int | rb_enc_capable (VALUE obj) |
Queries if the passed object can have its encoding. More... | |
int | rb_enc_find_index (const char *name) |
Queries the index of the encoding. More... | |
int | rb_enc_alias (const char *alias, const char *orig) |
Registers an "alias" name. More... | |
int | rb_to_encoding_index (VALUE obj) |
Obtains a encoding index from a wider range of objects (than rb_enc_find_index()). More... | |
rb_encoding * | rb_to_encoding (VALUE obj) |
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL. More... | |
rb_encoding * | rb_find_encoding (VALUE obj) |
Identical to rb_to_encoding_index(), except the return type. More... | |
rb_encoding * | rb_enc_get (VALUE obj) |
Identical to rb_enc_get_index(), except the return type. More... | |
rb_encoding * | rb_enc_compatible (VALUE str1, VALUE str2) |
Look for the "common" encoding between the two. More... | |
rb_encoding * | rb_enc_check (VALUE str1, VALUE str2) |
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL. More... | |
VALUE | rb_enc_associate_index (VALUE obj, int encindex) |
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed object. More... | |
VALUE | rb_enc_associate (VALUE obj, rb_encoding *enc) |
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index. More... | |
void | rb_enc_copy (VALUE dst, VALUE src) |
Destructively copies the encoding of the latter object to that of former one. More... | |
rb_encoding * | rb_enc_from_index (int idx) |
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object. More... | |
rb_encoding * | rb_enc_find (const char *name) |
Identical to rb_find_encoding(), except it takes a C's string instead of Ruby's. More... | |
static const char * | rb_enc_name (rb_encoding *enc) |
Queries the (canonical) name of the passed encoding. More... | |
static int | rb_enc_mbminlen (rb_encoding *enc) |
Queries the minimum number of bytes that the passed encoding needs to represent a character. More... | |
static int | rb_enc_mbmaxlen (rb_encoding *enc) |
Queries the maximum number of bytes that the passed encoding needs to represent a character. More... | |
int | rb_enc_mbclen (const char *p, const char *e, rb_encoding *enc) |
Queries the number of bytes of the character at the passed pointer. More... | |
int | rb_enc_fast_mbclen (const char *p, const char *e, rb_encoding *enc) |
Identical to rb_enc_mbclen() unless the character at p overruns e . More... | |
int | rb_enc_precise_mbclen (const char *p, const char *e, rb_encoding *enc) |
Queries the number of bytes of the character at the passed pointer. More... | |
int | rb_enc_ascget (const char *p, const char *e, int *len, rb_encoding *enc) |
Queries the code point of character pointed by the passed pointer. More... | |
unsigned int | rb_enc_codepoint_len (const char *p, const char *e, int *len, rb_encoding *enc) |
Queries the code point of character pointed by the passed pointer. More... | |
static unsigned int | rb_enc_codepoint (const char *p, const char *e, rb_encoding *enc) |
Queries the code point of character pointed by the passed pointer. More... | |
static OnigCodePoint | rb_enc_mbc_to_codepoint (const char *p, const char *e, rb_encoding *enc) |
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken. More... | |
int | rb_enc_codelen (int code, rb_encoding *enc) |
Queries the number of bytes requested to represent the passed code point using the passed encoding. More... | |
static int | rb_enc_code_to_mbclen (int c, rb_encoding *enc) |
Identical to rb_enc_codelen(), except it returns 0 for invalid code points. More... | |
static int | rb_enc_mbcput (unsigned int c, void *buf, rb_encoding *enc) |
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one. More... | |
static char * | rb_enc_prev_char (const char *s, const char *p, const char *e, rb_encoding *enc) |
Queries the previous (left) character. More... | |
static char * | rb_enc_left_char_head (const char *s, const char *p, const char *e, rb_encoding *enc) |
Queries the left boundary of a character. More... | |
static char * | rb_enc_right_char_head (const char *s, const char *p, const char *e, rb_encoding *enc) |
Queries the right boundary of a character. More... | |
static char * | rb_enc_step_back (const char *s, const char *p, const char *e, int n, rb_encoding *enc) |
Scans the string backwards for n characters. More... | |
static bool | rb_enc_asciicompat (rb_encoding *enc) |
Queries if the passed encoding is in some sense compatible with ASCII. More... | |
static bool | rb_enc_str_asciicompat_p (VALUE str) |
Queries if the passed string is in an ASCII-compatible encoding. More... | |
VALUE | rb_enc_from_encoding (rb_encoding *enc) |
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding. More... | |
int | rb_enc_unicode_p (rb_encoding *enc) |
Queries if the passed encoding is either one of UTF-8/16/32. More... | |
rb_encoding * | rb_ascii8bit_encoding (void) |
Queries the encoding that represents ASCII-8BIT a.k.a. More... | |
rb_encoding * | rb_utf8_encoding (void) |
Queries the encoding that represents UTF-8. More... | |
rb_encoding * | rb_usascii_encoding (void) |
Queries the encoding that represents US-ASCII. More... | |
rb_encoding * | rb_locale_encoding (void) |
Queries the encoding that represents the current locale. More... | |
rb_encoding * | rb_filesystem_encoding (void) |
Queries the "filesystem" encoding. More... | |
rb_encoding * | rb_default_external_encoding (void) |
Queries the "default external" encoding. More... | |
rb_encoding * | rb_default_internal_encoding (void) |
Queries the "default internal" encoding. More... | |
int | rb_ascii8bit_encindex (void) |
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding itself. More... | |
static bool | RB_ENCODING_IS_ASCII8BIT (VALUE obj) |
Queries if the passed object is in ascii 8bit (== binary) encoding. More... | |
int | rb_utf8_encindex (void) |
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itself. More... | |
int | rb_usascii_encindex (void) |
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding itself. More... | |
int | rb_locale_encindex (void) |
Identical to rb_locale_encoding(), except it returns the encoding's index instead of the encoding itself. More... | |
int | rb_filesystem_encindex (void) |
Identical to rb_filesystem_encoding(), except it returns the encoding's index instead of the encoding itself. More... | |
VALUE | rb_enc_default_external (void) |
Identical to rb_default_external_encoding(), except it returns the Ruby-level counterpart instance of rb_cEncoding that corresponds to the default external encoding. More... | |
VALUE | rb_enc_default_internal (void) |
Identical to rb_default_internal_encoding(), except it returns the Ruby-level counterpart instance of rb_cEncoding that corresponds to the default internal encoding. More... | |
void | rb_enc_set_default_external (VALUE encoding) |
Destructively assigns the passed encoding as the default external encoding. More... | |
void | rb_enc_set_default_internal (VALUE encoding) |
Destructively assigns the passed encoding as the default internal encoding. More... | |
VALUE | rb_locale_charmap (VALUE klass) |
Returns a platform-depended "charmap" of the current locale. More... | |
Variables | |
VALUE | rb_cEncoding |
Encoding class. More... | |
Defines rb_encoding.
RBIMPL
or rbimpl
are implementation details. Don't take them as canon. They could rapidly appear then vanish. The name (path) of this header file is also an implementation detail. Do not expect it to persist at the place it is now. Developers are free to move it anywhere anytime at will. __VA_ARGS__
is always available. We assume C99 for ruby itself but we don't assume languages of extension libraries. They could be written in C++98. Definition in file encoding.h.
typedef const OnigEncodingType rb_encoding |
The type of encoding.
Our design here is we take Oniguruma/Onigmo's multilingualisation schema as our base data structure.
Definition at line 117 of file encoding.h.
int rb_ascii8bit_encindex | ( | void | ) |
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding itself.
Definition at line 1469 of file encoding.c.
Referenced by rb_char_to_option_kcode(), RB_ENCODING_IS_ASCII8BIT(), rb_external_str_new_with_enc(), rb_str_coderange_scan_restartable(), and rb_str_dump().
rb_encoding* rb_ascii8bit_encoding | ( | void | ) |
Queries the encoding that represents ASCII-8BIT a.k.a.
binary.
Definition at line 1463 of file encoding.c.
Referenced by rb_define_dummy_encoding(), rb_io_extract_modeenc(), rb_reg_new(), rb_symname_p(), ruby_brace_glob(), ruby_glob(), and ruby_init_loadpath().
int rb_char_to_option_kcode | ( | int | c, |
int * | option, | ||
int * | kcode | ||
) |
Converts a character option to its encoding.
It only supports a very limited set of Japanese encodings due to its Japanese origin. Ruby still has this in-core for backwards compatibility. But new codes must not bother such concept like one-character encoding option. Consider deprecated in practice.
[in] | c | One of ‘['n’, 'e', 's', 'u', 'i', 'x', 'm']`. |
[out] | option | Return buffer. |
[out] | kcode | Return buffer. |
1 | c understood properly. |
0 | c is not understood. |
option
is a ::OnigOptionType. kcode
is an enum ruby_preserved_encindex
. rb_encoding* rb_default_external_encoding | ( | void | ) |
Queries the "default external" encoding.
This is used to interact with outer-process things such as File. Though not recommended, you can set this using rb_enc_set_default_external().
Definition at line 1589 of file encoding.c.
Referenced by rb_enc_default_external(), rb_external_str_new(), rb_external_str_new_cstr(), rb_inspect(), rb_str_export(), and rb_str_inspect().
rb_encoding* rb_default_internal_encoding | ( | void | ) |
Queries the "default internal" encoding.
This could be a null pointer. Otherwise, outer-process info are transcoded from default external encoding to this one during reading from an IO.
Definition at line 1676 of file encoding.c.
Referenced by rb_enc_default_internal(), rb_external_str_new_with_enc(), rb_inspect(), and rb_str_inspect().
int rb_define_dummy_encoding | ( | const char * | name | ) |
Creates a new "dummy" encoding.
Roughly speaking, an encoding is dummy when it is stateful. Notable example of dummy encoding are those defined in ISO/IEC 2022
[in] | name | Name of the creating encoding. |
rb_eArgError | Duplicated or malformed name . |
name
is created, whose index is the return value. Definition at line 566 of file encoding.c.
int rb_enc_alias | ( | const char * | alias, |
const char * | orig | ||
) |
Registers an "alias" name.
In the wild, an encoding can be called using multiple names. For instance an encoding known as "CP932"
is also called "SJIS"
on occasions. This API registers such relationships.
[in] | alias | New name. |
[in] | orig | Old name. |
rb_eArgError | alias is duplicated or malformed. |
-1 | Failed to load orig . |
otherwise | The index of orig and alias . |
alias
is a synonym of orig
. They refer to the identical encoding. Definition at line 670 of file encoding.c.
int rb_enc_ascget | ( | const char * | p, |
const char * | e, | ||
int * | len, | ||
rb_encoding * | enc | ||
) |
Queries the code point of character pointed by the passed pointer.
If that code point is included in ASCII that code point is returned. Otherwise -1. This can be different from just looking at the first byte. For instance it reads 2 bytes in case of UTF-16BE.
[in] | p | Pointer to the character's first byte. |
[in] | e | End of the string that has p . |
[in] | len | Return buffer. |
[in] | enc | Encoding of the string. |
-1 | The character at p is not i ASCII. |
otherwise | A code point of the character at p . |
len
(if set) is the number of bytes of p
. Definition at line 1203 of file encoding.c.
Referenced by rb_reg_quote().
|
inlinestatic |
Queries if the passed encoding is in some sense compatible with ASCII.
The concept of ASCII compatibility is nuanced, and private to our implementation. For instance SJIS is ASCII compatible to us, despite their having different characters at code point 0x5C
. This is based on some practical consideration that Japanese people confuses SJIS to be "upper
compatible" with ASCII (which is in fact a wrong idea, but we just don't go strict here). An example of ASCII incompatible encoding is UTF-16. UTF-16 shares code points with ASCII, but employs a completely different encoding scheme.
[in] | enc | Encoding in question. |
0 | It is incompatible. |
1 | It is compatible. |
Definition at line 768 of file encoding.h.
Referenced by rb_enc_ascget(), rb_enc_str_asciicompat_p(), rb_external_str_new_with_enc(), rb_inspect(), rb_must_asciicompat(), rb_reg_regsub(), rb_str_buf_cat_ascii(), rb_str_coderange_scan_restartable(), rb_str_comparable(), rb_str_conv_enc_opts(), rb_str_dump(), rb_str_ellipsize(), rb_str_inspect(), and rb_to_encoding_index().
VALUE rb_enc_associate | ( | VALUE | obj, |
rb_encoding * | enc | ||
) |
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index.
[out] | obj | Object in question. |
[in] | enc | An encoding. |
rb_eFrozenError | obj is frozen. |
rb_eArgError | obj is incapable of having an encoding. |
obj
. obj
's contents might be fixed according to enc
. Definition at line 1022 of file encoding.c.
Referenced by rb_ary_join(), rb_econv_append(), rb_enc_vsprintf(), rb_reg_quote(), rb_str_conv_enc_opts(), rb_str_ellipsize(), rb_str_format(), rb_str_inspect(), rb_str_intern(), and rb_str_update().
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed object.
It for instance changes the byte length of terminating U+0000
according to the passed encoding.
[out] | obj | Object in question. |
[in] | encindex | An encoding index. |
rb_eFrozenError | obj is frozen. |
rb_eArgError | obj is incapable of having an encoding. |
rb_eEncodingError | encindex is out of bounds. |
rb_eLoadError | Failed to load the encoding. |
obj
. obj
's contents might be fixed according to encindex
. Definition at line 994 of file encoding.c.
Referenced by rb_dir_getwd(), rb_enc_associate(), rb_enc_copy(), rb_find_file(), rb_find_file_ext(), rb_str_dump(), and rb_str_format().
int rb_enc_capable | ( | VALUE | obj | ) |
Queries if the passed object can have its encoding.
[in] | obj | Object in question. |
1 | It can. |
0 | It cannot. |
Definition at line 884 of file encoding.c.
rb_encoding* rb_enc_check | ( | VALUE | str1, |
VALUE | str2 | ||
) |
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
[in] | str1 | An object. |
[in] | str2 | Another object. |
rb_eEncCompatError | No encoding can satisfy both. |
Definition at line 1062 of file encoding.c.
Referenced by rb_str_format(), and rb_str_update().
|
inlinestatic |
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
[in] | c | Code point in question. |
[in] | enc | Encoding to convert c into a byte sequence. |
0 | c is invalid. |
enc
to encode c
. Definition at line 619 of file encoding.h.
int rb_enc_codelen | ( | int | code, |
rb_encoding * | enc | ||
) |
Queries the number of bytes requested to represent the passed code point using the passed encoding.
[in] | code | Code point in question. |
[in] | enc | Encoding to convert the code into a byte sequence. |
rb_eArgError | enc does not glean code . |
code
using enc
. Definition at line 1241 of file encoding.c.
Referenced by rb_enc_uint_chr(), rb_str_buf_cat_ascii(), rb_str_concat(), and rb_str_format().
|
inlinestatic |
Queries the code point of character pointed by the passed pointer.
Exceptions happen in case of broken input.
[in] | p | Pointer to the character's first byte. |
[in] | e | End of the string that has p . |
[in] | enc | Encoding of the string. |
rb_eArgError | p is broken. |
p
. Definition at line 571 of file encoding.h.
Referenced by rb_str_inspect().
unsigned int rb_enc_codepoint_len | ( | const char * | p, |
const char * | e, | ||
int * | len, | ||
rb_encoding * | enc | ||
) |
Queries the code point of character pointed by the passed pointer.
Exceptions happen in case of broken input.
[in] | p | Pointer to the character's first byte. |
[in] | e | End of the string that has p . |
[in] | len | Return buffer. |
[in] | enc | Encoding of the string. |
rb_eArgError | p is broken. |
p
. len
(if set) is the number of bytes of p
. Definition at line 1227 of file encoding.c.
Referenced by rb_enc_codepoint().
rb_encoding* rb_enc_compatible | ( | VALUE | str1, |
VALUE | str2 | ||
) |
Look for the "common" encoding between the two.
One character can or cannot be expressed depending on an encoding. This function finds the super-set of encodings that satisfy contents of both arguments. If that is impossible returns NULL.
[in] | str1 | An object. |
[in] | str2 | Another object. |
NULL | No encoding can satisfy both at once. |
otherwise | Common encoding between the two. |
Definition at line 1140 of file encoding.c.
Referenced by rb_enc_check().
Destructively copies the encoding of the latter object to that of former one.
It can also be seen as a routine identical to rb_enc_associate_index(), except it takes an object's encoding instead of an encoding's index.
[out] | dst | Object to modify. |
[in] | src | Object to reference. |
rb_eFrozenError | dst is frozen. |
rb_eArgError | dst is incapable of having an encoding. |
rb_eEncodingError | src is incapable of having an encoding. |
dst
's encoding is that of src
's. Definition at line 1149 of file encoding.c.
Referenced by rb_reg_quote(), and rb_str_times().
VALUE rb_enc_default_external | ( | void | ) |
Identical to rb_default_external_encoding(), except it returns the Ruby-level counterpart instance of rb_cEncoding that corresponds to the default external encoding.
Definition at line 1603 of file encoding.c.
VALUE rb_enc_default_internal | ( | void | ) |
Identical to rb_default_internal_encoding(), except it returns the Ruby-level counterpart instance of rb_cEncoding that corresponds to the default internal encoding.
Definition at line 1685 of file encoding.c.
int rb_enc_dummy_p | ( | rb_encoding * | enc | ) |
Queries if the passed encoding is dummy.
[in] | enc | Encoding in question. |
1 | It is. |
0 | It isn't. |
Definition at line 197 of file encoding.c.
Referenced by rb_enc_asciicompat().
int rb_enc_fast_mbclen | ( | const char * | p, |
const char * | e, | ||
rb_encoding * | enc | ||
) |
Identical to rb_enc_mbclen() unless the character at p
overruns e
.
That can happen for instance when you read from a socket and its partial read cuts a wide character in-between. In those situations this function "estimates" theoretical length of the character in question. Typically it tends to be possible to know how many bytes a character needs before actually reaching its end; for instance UTF-8 encodes a character's length in the first byte of it. This function returns that info.
[in] | p | Pointer to the character's first byte. |
[in] | e | End of the string that has p . |
[in] | enc | Encoding of the string. |
p
, measured or estimated. Definition at line 1173 of file encoding.c.
rb_encoding* rb_enc_find | ( | const char * | name | ) |
Identical to rb_find_encoding(), except it takes a C's string instead of Ruby's.
[in] | name | Name of the encoding to query. |
NULL | No such encoding. |
otherwise | An encoding whose index is idx . |
Definition at line 859 of file encoding.c.
int rb_enc_find_index | ( | const char * | name | ) |
Queries the index of the encoding.
[in] | name | Name of the encoding to find. |
rb_eArgError | No such encoding named name . |
-1 | name exists, but unable to load. |
otherwise | Index of encoding named name . |
Definition at line 824 of file encoding.c.
Referenced by rb_enc_alias(), rb_enc_find(), and rb_to_encoding_index().
VALUE rb_enc_from_encoding | ( | rb_encoding * | enc | ) |
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
[in] | enc | An encoding |
RUBY_Qnil | enc is a null pointer. |
otherwise | An instance of rb_cEncoding. |
Definition at line 182 of file encoding.c.
Referenced by rb_enc_default_external(), rb_enc_default_internal(), rb_io_extract_encoding_option(), and rb_str_ellipsize().
rb_encoding* rb_enc_from_index | ( | int | idx | ) |
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
[in] | idx | An encoding index. |
NULL | No such encoding. |
otherwise | An encoding whose index is idx . |
Definition at line 402 of file encoding.c.
Referenced by rb_default_external_encoding(), rb_default_internal_encoding(), rb_enc_find(), rb_enc_find_index(), rb_enc_get(), rb_filesystem_encoding(), rb_find_encoding(), rb_io_extract_encoding_option(), rb_locale_encoding(), rb_must_asciicompat(), rb_str_buf_cat_ascii(), rb_str_comparable(), rb_str_dump(), rb_str_encode_ospath(), rb_str_format(), and rb_str_inspect().
rb_encoding* rb_enc_get | ( | VALUE | obj | ) |
Identical to rb_enc_get_index(), except the return type.
[in] | obj | Object in question. |
NULL | Obj is incapable of having an encoding. |
otherwise | obj 's encoding. |
Definition at line 1028 of file encoding.c.
Referenced by rb_econv_append(), rb_econv_open_opts(), rb_econv_prepare_options(), rb_enc_str_asciicompat_p(), rb_inspect(), rb_io_extract_encoding_option(), rb_io_extract_modeenc(), rb_path_to_class(), rb_reg_quote(), rb_reg_regsub(), rb_str_conv_enc_opts(), rb_str_ellipsize(), rb_str_format(), rb_str_intern(), rb_str_set_len(), and rb_to_encoding_index().
int rb_enc_get_index | ( | VALUE | obj | ) |
Queries the index of the encoding of the passed object, if any.
[in] | obj | Object in question. |
-1 | obj is incapable of having an encoding. |
otherwise | obj 's encoding's index. |
Definition at line 920 of file encoding.c.
Referenced by rb_enc_compatible(), rb_enc_copy(), rb_enc_get(), rb_must_asciicompat(), rb_obj_encoding(), and rb_str_dump().
|
inlinestatic |
Queries the left boundary of a character.
This function takes a pointer that is not necessarily a head of a character, and searches for its head.
[in] | s | Start of the string. |
[in] | p | Pointer to a possibly-middle of a character. |
[in] | e | End of the string. |
[in] | enc | Encoding. |
p
. Definition at line 683 of file encoding.h.
|
inlinestatic |
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
[in] | p | Pointer to the character's first byte. |
[in] | e | End of the string that has p . |
[in] | enc | Encoding of the string. |
p
. Definition at line 591 of file encoding.h.
Referenced by rb_enc_ascget(), rb_enc_codepoint_len(), rb_str_dump(), and rb_str_inspect().
int rb_enc_mbclen | ( | const char * | p, |
const char * | e, | ||
rb_encoding * | enc | ||
) |
Queries the number of bytes of the character at the passed pointer.
[in] | p | Pointer to a character's first byte. |
[in] | e | End of the string that has p . |
[in] | enc | Encoding of the string. |
p
does not end until e
, number of bytes between p
and e
. Otherwise the number of bytes that the character at p
is encoded. Definition at line 1179 of file encoding.c.
Referenced by rb_str_format().
|
inlinestatic |
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
[in] | c | Code point. |
[out] | buf | Return buffer. |
[in] | enc | Target encoding scheme. |
<= | 0 c is invalid in enc . |
buf
. c
is encoded according to enc
, then written to buf
. Definition at line 643 of file encoding.h.
Referenced by rb_enc_uint_chr(), rb_reg_quote(), rb_str_buf_cat_ascii(), rb_str_concat(), and rb_str_format().
|
inlinestatic |
Queries the maximum number of bytes that the passed encoding needs to represent a character.
Fixed-width encodings have the same value for this one and rb_enc_mbminlen. However there are variable-width encodings. UTF-8, for instance, takes from 1 up to 6 bytes.
[in] | enc | An encoding. |
Definition at line 447 of file encoding.h.
Referenced by rb_str_buf_cat_ascii(), and rb_str_subpos().
|
inlinestatic |
Queries the minimum number of bytes that the passed encoding needs to represent a character.
For ASCII and compatible encodings this is typically
[in] | enc | An encoding. |
Definition at line 432 of file encoding.h.
Referenced by rb_enc_asciicompat(), rb_enc_mbclen(), rb_enc_str_new_cstr(), rb_enc_vsprintf(), rb_memsearch(), rb_str_ellipsize(), rb_str_inspect(), rb_str_plus(), and rb_str_subpos().
|
inlinestatic |
Queries the (canonical) name of the passed encoding.
[in] | enc | An encoding. |
Definition at line 417 of file encoding.h.
Referenced by rb_econv_open_opts(), rb_econv_prepare_options(), rb_enc_codelen(), rb_enc_codepoint_len(), rb_enc_uint_chr(), rb_enc_vsprintf(), rb_must_asciicompat(), and rb_str_concat().
int rb_enc_precise_mbclen | ( | const char * | p, |
const char * | e, | ||
rb_encoding * | enc | ||
) |
Queries the number of bytes of the character at the passed pointer.
This function returns 3 different types of information:
[in] | p | Pointer to the character's first byte. |
[in] | e | End of the string that has p . |
[in] | enc | Encoding of the string. |
Definition at line 1191 of file encoding.c.
Referenced by rb_enc_ascget(), rb_enc_codepoint_len(), rb_enc_uint_chr(), rb_str_coderange_scan_restartable(), rb_str_concat(), rb_str_dump(), and rb_str_inspect().
|
inlinestatic |
Queries the previous (left) character.
[in] | s | Start of the string. |
[in] | p | Pointer to a character. |
[in] | e | End of the string. |
[in] | enc | Encoding. |
NULL | No previous character. |
otherwise | Pointer to the head of the previous character. |
Definition at line 662 of file encoding.h.
Referenced by rb_str_subpos().
|
inlinestatic |
Queries the right boundary of a character.
This function takes a pointer that is not necessarily a head of a character, and searches for its tail.
[in] | s | Start of the string. |
[in] | p | Pointer to a possibly-middle of a character. |
[in] | e | End of the string. |
[in] | enc | Encoding. |
p
. Definition at line 704 of file encoding.h.
Referenced by rb_str_format().
void rb_enc_set_default_external | ( | VALUE | encoding | ) |
Destructively assigns the passed encoding as the default external encoding.
You should not use this API. It has process-global side effects. Also it doesn't change encodings of strings that have already been read.
[in] | encoding | Ruby level encoding. |
rb_eArgError | encoding is RUBY_Qnil. |
encoding
. Definition at line 1643 of file encoding.c.
void rb_enc_set_default_internal | ( | VALUE | encoding | ) |
Destructively assigns the passed encoding as the default internal encoding.
You should not use this API. It has process-global side effects. Also it doesn't change encodings of strings that have already been read.
[in] | encoding | Ruby level encoding. |
encoding
. Definition at line 1726 of file encoding.c.
void rb_enc_set_index | ( | VALUE | obj, |
int | encindex | ||
) |
Destructively assigns an encoding (via its index) to an object.
[out] | obj | Object in question. |
[in] | encindex | An encoding index. |
rb_eFrozenError | obj is frozen. |
rb_eArgError | obj is incapable of having an encoding. |
rb_eEncodingError | encindex is out of bounds. |
rb_eLoadError | Failed to load the encoding. |
Definition at line 986 of file encoding.c.
Referenced by RB_ENCODING_SET().
|
inlinestatic |
Scans the string backwards for n characters.
[in] | s | Start of the string. |
[in] | p | Pointer to a character. |
[in] | e | End of the string. |
[in] | n | Steps. |
[in] | enc | Encoding. |
NULL | There are no n characters left. |
otherwise | Pointer to n character before p . |
Definition at line 726 of file encoding.h.
Referenced by rb_str_ellipsize().
|
inlinestatic |
Queries if the passed string is in an ASCII-compatible encoding.
[in] | str | A Ruby's string to query. |
0 | str is not a String, or an ASCII-incompatible string. |
1 | Otherwise. |
Definition at line 789 of file encoding.h.
int rb_enc_to_index | ( | rb_encoding * | enc | ) |
Queries the index of the encoding.
An encoding's index is a Ruby-local concept. It is a (sequential) number assigned to each encoding.
[in] | enc | Encoding in question. |
Definition at line 191 of file encoding.c.
Referenced by rb_dir_getwd(), rb_enc_associate(), rb_enc_str_buf_cat(), rb_enc_str_new_static(), rb_external_str_new_with_enc(), rb_str_coderange_scan_restartable(), rb_str_format(), and rb_str_plus().
int rb_enc_unicode_p | ( | rb_encoding * | enc | ) |
Queries if the passed encoding is either one of UTF-8/16/32.
[in] | enc | Encoding in question. |
0 | It is not a Unicode variant. |
otherwise | It is. |
Definition at line 638 of file encoding.c.
Referenced by rb_str_inspect().
|
inlinestatic |
This is RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo.
The object must be capable of having inline encoding. Using this macro needs deep understanding of bit level object binary layout.
[out] | obj | Target object. |
[in] | encindex | Encoding in encindex format. |
[in] | cr | An enum ruby_coderange_type. |
obj
's encoding is encindex
. obj
's code range is cr
. Definition at line 238 of file encoding.h.
|
inlinestatic |
Just another name of rb_enc_get_index.
Definition at line 195 of file encoding.h.
|
inlinestatic |
Queries the encoding of the passed object.
The encoding must be smaller than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the return value. This means the API is for internal use only.
[in] | obj | Target object. |
obj
's encoding index. Definition at line 99 of file encoding.h.
Referenced by RB_ENCODING_GET(), and RB_ENCODING_IS_ASCII8BIT().
|
inlinestatic |
Queries if the passed object is in ascii 8bit (== binary) encoding.
The object must be capable of having inline encoding. Using this macro needs deep understanding of bit level object binary layout.
[in] | obj | An object to check. |
1 | It is. |
0 | It isn't. |
Definition at line 927 of file encoding.h.
|
inlinestatic |
Just another name of rb_enc_set_index.
Definition at line 221 of file encoding.h.
Referenced by RB_ENCODING_CODERANGE_SET().
|
inlinestatic |
Destructively assigns the passed encoding to the passed object.
The object must be capable of having inline encoding. Using this macro needs deep understanding of bit level object binary layout.
[out] | obj | Target object to modify. |
[in] | ecindex | Encoding in encindex format. |
obj
's encoding is encindex
. Definition at line 81 of file encoding.h.
int rb_filesystem_encindex | ( | void | ) |
Identical to rb_filesystem_encoding(), except it returns the encoding's index instead of the encoding itself.
Definition at line 1529 of file encoding.c.
Referenced by rb_filesystem_encoding(), and rb_str_encode_ospath().
rb_encoding* rb_filesystem_encoding | ( | void | ) |
Queries the "filesystem" encoding.
This is the encoding that ruby expects info from the OS' file system are in. This affects for instance return value of rb_dir_getwd(). Most notably on Windows it can be an alias of OS codepage. Most notably on Linux users can set this via default external encoding.
Definition at line 1537 of file encoding.c.
Referenced by rb_dir_getwd(), rb_filesystem_str_new(), and rb_filesystem_str_new_cstr().
rb_encoding* rb_find_encoding | ( | VALUE | obj | ) |
Identical to rb_to_encoding_index(), except the return type.
[in] | obj | An rb_cEncoding, or its name in rb_cString. |
rb_eTypeError | obj is neither rb_cEncoding nor rb_cString. |
NULL | No such encoding. |
obj
. Definition at line 330 of file encoding.c.
Returns a platform-depended "charmap" of the current locale.
This information is called a "Codeset name" in IEEE 1003.1 section 13 (<langinfo.h>
). This is a very low-level API. The return value can have no corresponding encoding when passed to rb_find_encoding().
[in] | klass | Ignored for no reason (why...) |
Definition at line 91 of file localeinit.c.
int rb_locale_encindex | ( | void | ) |
Identical to rb_locale_encoding(), except it returns the encoding's index instead of the encoding itself.
Definition at line 1501 of file encoding.c.
Referenced by rb_locale_encoding().
rb_encoding* rb_locale_encoding | ( | void | ) |
Queries the encoding that represents the current locale.
Definition at line 1523 of file encoding.c.
Referenced by rb_default_external_encoding(), rb_loaderror(), rb_loaderror_with_path(), rb_locale_str_new(), rb_locale_str_new_cstr(), and rb_str_export_locale().
rb_encoding* rb_to_encoding | ( | VALUE | obj | ) |
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
[in] | obj | An rb_cEncoding, or its name in rb_cString. |
rb_eTypeError | obj is neither rb_cEncoding nor rb_cString. |
rb_eArgError | obj is an unknown encoding name. |
obj
. Definition at line 323 of file encoding.c.
Referenced by rb_io_extract_encoding_option().
int rb_to_encoding_index | ( | VALUE | obj | ) |
Obtains a encoding index from a wider range of objects (than rb_enc_find_index()).
[in] | obj | An rb_cEncoding, or its name in rb_cString. |
-1 | obj is unexpected type/contents. |
otherwise | Index corresponding to obj . |
Definition at line 261 of file encoding.c.
Referenced by rb_io_extract_encoding_option().
int rb_usascii_encindex | ( | void | ) |
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding itself.
Definition at line 1493 of file encoding.c.
Referenced by rb_external_str_new_with_enc(), rb_find_file(), and rb_find_file_ext().
rb_encoding* rb_usascii_encoding | ( | void | ) |
Queries the encoding that represents US-ASCII.
Definition at line 1487 of file encoding.c.
Referenced by rb_ary_join(), rb_intern2(), rb_iv_get(), rb_reg_quote(), rb_str_inspect(), rb_str_intern(), rb_usascii_str_new(), and rb_usascii_str_new_cstr().
int rb_utf8_encindex | ( | void | ) |
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itself.
Definition at line 1481 of file encoding.c.
Referenced by rb_char_to_option_kcode(), and rb_str_dump().
rb_encoding* rb_utf8_encoding | ( | void | ) |
Queries the encoding that represents UTF-8.
Definition at line 1475 of file encoding.c.
Referenced by rb_memsearch(), rb_str_encode_ospath(), rb_str_subpos(), rb_utf8_str_new(), and rb_utf8_str_new_cstr().