Ruby
3.4.0dev (2024-12-06 revision 892c46283a5ea4179500d951c9d4866c0051f27b)
|
econv stuff More...
Go to the source code of this file.
Typedefs | |
typedef struct rb_econv_t | rb_econv_t |
An opaque struct that represents a lowest level of encoding conversion. More... | |
Functions | |
VALUE | rb_str_encode (VALUE str, VALUE to, int ecflags, VALUE ecopts) |
Converts the contents of the passed string from its encoding to the passed one. More... | |
int | rb_econv_has_convpath_p (const char *from_encoding, const char *to_encoding) |
Queries if there is more than one way to convert between the passed two encodings. More... | |
int | rb_econv_prepare_options (VALUE opthash, VALUE *ecopts, int ecflags) |
Identical to rb_econv_prepare_opts(), except it additionally takes the initial value of flags. More... | |
int | rb_econv_prepare_opts (VALUE opthash, VALUE *ecopts) |
Splits a keyword arguments hash (that for instance String#encode took) into a set of enum ruby_econv_flag_type and a hash storing replacement characters etc. More... | |
rb_econv_t * | rb_econv_open (const char *source_encoding, const char *destination_encoding, int ecflags) |
Creates a new instance of struct rb_econv_t. More... | |
rb_econv_t * | rb_econv_open_opts (const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts) |
Identical to rb_econv_open(), except it additionally takes a hash of optional strings. More... | |
rb_econv_result_t | rb_econv_convert (rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags) |
Converts a string from an encoding to another. More... | |
void | rb_econv_close (rb_econv_t *ec) |
Destructs a converter. More... | |
int | rb_econv_set_replacement (rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname) |
Assigns the replacement string. More... | |
int | rb_econv_decorate_at_first (rb_econv_t *ec, const char *decorator_name) |
"Decorate"s a converter. More... | |
int | rb_econv_decorate_at_last (rb_econv_t *ec, const char *decorator_name) |
Identical to rb_econv_decorate_at_first(), except it adds to the opposite direction. More... | |
VALUE | rb_econv_open_exc (const char *senc, const char *denc, int ecflags) |
Creates a rb_eConverterNotFoundError exception object (but does not raise). More... | |
int | rb_econv_insert_output (rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding) |
Appends the passed string to the passed converter's output buffer. More... | |
const char * | rb_econv_encoding_to_insert_output (rb_econv_t *ec) |
Queries an encoding name which best suits for rb_econv_insert_output()'s last parameter. More... | |
void | rb_econv_check_error (rb_econv_t *ec) |
This is a rb_econv_make_exception() + rb_exc_raise() combo. More... | |
VALUE | rb_econv_make_exception (rb_econv_t *ec) |
This function makes sense right after rb_econv_convert() returns. More... | |
int | rb_econv_putbackable (rb_econv_t *ec) |
Queries if rb_econv_putback() makes sense, i.e. More... | |
void | rb_econv_putback (rb_econv_t *ec, unsigned char *p, int n) |
Puts back the bytes. More... | |
const char * | rb_econv_asciicompat_encoding (const char *encname) |
Queries the passed encoding's corresponding ASCII compatible encoding. More... | |
VALUE | rb_econv_str_convert (rb_econv_t *ec, VALUE src, int flags) |
Identical to rb_econv_convert(), except it takes Ruby's string instead of C's pointer. More... | |
VALUE | rb_econv_substr_convert (rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags) |
Identical to rb_econv_str_convert(), except it converts only a part of the passed string. More... | |
VALUE | rb_econv_str_append (rb_econv_t *ec, VALUE src, VALUE dst, int flags) |
Identical to rb_econv_str_convert(), except it appends the conversion result to the additionally passed string instead of creating a new string. More... | |
VALUE | rb_econv_substr_append (rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags) |
Identical to rb_econv_str_append(), except it appends only a part of the passed string with conversion. More... | |
VALUE | rb_econv_append (rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags) |
Converts the passed C's pointer according to the passed converter, then append the conversion result to the passed Ruby's string. More... | |
void | rb_econv_binmode (rb_econv_t *ec) |
This badly named function does not set the destination encoding to binary, but instead just nullifies newline conversion decorators if any. More... | |
Flags for rb_econv_convert() | |
#define | ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT |
Old name of RUBY_ECONV_PARTIAL_INPUT. More... | |
#define | ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT |
Old name of RUBY_ECONV_AFTER_OUTPUT. More... | |
econv stuff
RBIMPL
or rbimpl
are implementation details. Don't take them as canon. They could rapidly appear then vanish. The name (path) of this header file is also an implementation detail. Do not expect it to persist at the place it is now. Developers are free to move it anywhere anytime at will. __VA_ARGS__
is always available. We assume C99 for ruby itself but we don't assume languages of extension libraries. They could be written in C++98. Definition in file transcode.h.
typedef struct rb_econv_t rb_econv_t |
An opaque struct that represents a lowest level of encoding conversion.
Definition at line 1 of file transcode.h.
enum rb_econv_result_t |
return value of rb_econv_convert()
Definition at line 30 of file transcode.h.
enum ruby_econv_flag_type |
This enum is kind of omnibus.
Gathers various constants.
Definition at line 452 of file transcode.h.
VALUE rb_econv_append | ( | rb_econv_t * | ec, |
const char * | bytesrc, | ||
long | bytesize, | ||
VALUE | dst, | ||
int | flags | ||
) |
Converts the passed C's pointer according to the passed converter, then append the conversion result to the passed Ruby's string.
This way buffer overflow is properly avoided to resize the destination properly.
[in,out] | ec | Target converter. |
[in] | bytesrc | Target string. |
[in] | bytesize | Number of bytes of bytesrc . |
[in] | dst | Return buffer. |
[in] | flags | Flags (see rb_econv_convert). |
rb_eArgError | Converted string is too long. |
rb_eInvalidByteSequenceError | Invalid byte sequence. |
rb_eUndefinedConversionError | Conversion undefined. |
Definition at line 1847 of file transcode.c.
Referenced by rb_econv_substr_append().
const char* rb_econv_asciicompat_encoding | ( | const char * | encname | ) |
Queries the passed encoding's corresponding ASCII compatible encoding.
"The corresponding ASCII compatible encoding" in this context is an ASCII compatible encoding which can represent exactly the same character sets as the given ASCII incompatible encoding. For instance that of UTF-16LE is UTF-8.
[in] | encname | Name of an ASCII incompatible encoding. |
NULL | encname is already ASCII compatible. |
otherwise | The corresponding ASCII compatible encoding. |
Definition at line 1814 of file transcode.c.
void rb_econv_binmode | ( | rb_econv_t * | ec | ) |
This badly named function does not set the destination encoding to binary, but instead just nullifies newline conversion decorators if any.
Other ordinal character conversions still happen after this; something non-binary would still be generated.
[out] | ec | Target converter to modify. |
Definition at line 1996 of file transcode.c.
Referenced by rb_io_binmode().
void rb_econv_check_error | ( | rb_econv_t * | ec | ) |
This is a rb_econv_make_exception() + rb_exc_raise() combo.
[in] | ec | (Possibly failed) conversion. |
rb_eInvalidByteSequenceError | Invalid byte sequence. |
rb_eUndefinedConversionError | Conversion undefined. |
Definition at line 4278 of file transcode.c.
Referenced by rb_econv_append().
void rb_econv_close | ( | rb_econv_t * | ec | ) |
Destructs a converter.
Note that a converter can have a buffer, and can be non-empty. Calling this would lose your data then.
[out] | ec | The converter to destroy. |
ec
is no longer a valid pointer. Definition at line 1731 of file transcode.c.
Referenced by rb_econv_open(), and rb_econv_open_opts().
rb_econv_result_t rb_econv_convert | ( | rb_econv_t * | ec, |
const unsigned char ** | source_buffer_ptr, | ||
const unsigned char * | source_buffer_end, | ||
unsigned char ** | destination_buffer_ptr, | ||
unsigned char * | destination_buffer_end, | ||
int | flags | ||
) |
Converts a string from an encoding to another.
Possible flags are either RUBY_ECONV_PARTIAL_INPUT (means the source buffer is a part of much larger one), RUBY_ECONV_AFTER_OUTPUT (instructs the converter to stop after output before input), or both of them.
[in,out] | ec | Conversion specification/state etc. |
[in] | source_buffer_ptr | Target string. |
[in] | source_buffer_end | End of target string. |
[out] | destination_buffer_ptr | Return buffer. |
[out] | destination_buffer_end | End of return buffer. |
[in] | flags | Flags (see above). |
destination_buffer_ptr
holds conversion results. Definition at line 1475 of file transcode.c.
Referenced by rb_econv_append().
int rb_econv_decorate_at_first | ( | rb_econv_t * | ec, |
const char * | decorator_name | ||
) |
"Decorate"s a converter.
There are special kind of converters that transforms the contents, like replacing CR into CRLF. You can add such decorators to a converter using this API. By using this function a decorator is prepended at the beginning of a conversion sequence: in case of CRLF conversion, newlines are converted before encodings are converted.
[out] | ec | Target converter to decorate. |
[in] | decorator_name | Name of decorator to prepend. |
0 | Success. |
-1 | Failure (no such decorator etc.). |
Definition at line 1962 of file transcode.c.
int rb_econv_decorate_at_last | ( | rb_econv_t * | ec, |
const char * | decorator_name | ||
) |
Identical to rb_econv_decorate_at_first(), except it adds to the opposite direction.
For instance CRLF conversion would run after encodings are converted.
[out] | ec | Target converter to decorate. |
[in] | decorator_name | Name of decorator to prepend. |
0 | Success. |
-1 | Failure (no such decorator etc.). |
Definition at line 1979 of file transcode.c.
Referenced by rb_econv_open().
const char* rb_econv_encoding_to_insert_output | ( | rb_econv_t * | ec | ) |
Queries an encoding name which best suits for rb_econv_insert_output()'s last parameter.
Strings in this encoding need no conversion when inserted; can be both time/space efficient.
[in] | ec | Target converter. |
Definition at line 1532 of file transcode.c.
Referenced by rb_econv_insert_output(), and rb_econv_set_replacement().
int rb_econv_has_convpath_p | ( | const char * | from_encoding, |
const char * | to_encoding | ||
) |
Queries if there is more than one way to convert between the passed two encodings.
Encoding conversion are has_and_belongs_to_many relationships. There could be no direct conversion defined for the passed pair. Ruby tries to find an indirect way to do so then. For instance ISO-8859-1 has no direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8 conversion; then there is UTF-8 to EUC-JP conversion; finally there also is EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted to ISO-2022-JP using that path. This function returns true. Obviously not everything that can be represented using UTF-8 can also be represented using EUC-JP. Conversions in practice can fail depending on the actual input, and that renders exceptions in case of rb_str_encode().
[in] | from_encoding | One encoding. |
[in] | to_encoding | Another encoding. |
0 | No way to convert the two. |
1 | At least one way to convert the two. |
Definition at line 3211 of file transcode.c.
int rb_econv_insert_output | ( | rb_econv_t * | ec, |
const unsigned char * | str, | ||
size_t | len, | ||
const char * | str_encoding | ||
) |
Appends the passed string to the passed converter's output buffer.
This can be handy when an encoding needs bytes out of thin air; for instance ISO-2022-JP has "shift function" which does not correspond to any characters.
[out] | ec | Target converter. |
[in] | str | String to insert. |
[in] | len | Number of bytes of str . |
[in] | str_encoding | Encoding of str . |
0 | Success. |
-1 | Failure (conversion error etc.). |
str_encoding
can be anything, and str
itself is converted when necessary. Definition at line 1616 of file transcode.c.
VALUE rb_econv_make_exception | ( | rb_econv_t * | ec | ) |
This function makes sense right after rb_econv_convert() returns.
As listed in rb_econv_result_t, rb_econv_convert() can bail out for various reasons. This function checks the passed converter's internal state and convert it to an appropriate exception object.
[in] | ec | Target converter. |
RUBY_Qnil | The converter has no error. |
otherwise | Conversion error turned into an exception. |
Definition at line 4272 of file transcode.c.
rb_econv_t* rb_econv_open | ( | const char * | source_encoding, |
const char * | destination_encoding, | ||
int | ecflags | ||
) |
Creates a new instance of struct rb_econv_t.
[in] | source_encoding | Name of an encoding. |
[in] | destination_encoding | Name of another encoding. |
[in] | ecflags | A set of enum ruby_econv_flag_type. |
rb_eArgError | No such encoding. |
NULL | Failed to create a struct rb_econv_t. |
otherwise | Allocated struct rb_econv_t. |
Definition at line 1098 of file transcode.c.
Referenced by rb_econv_open_opts().
VALUE rb_econv_open_exc | ( | const char * | senc, |
const char * | denc, | ||
int | ecflags | ||
) |
Creates a rb_eConverterNotFoundError
exception object (but does not raise).
[in] | senc | Name of source encoding. |
[in] | denc | Name of destination encoding. |
[in] | ecflags | A set of enum ruby_econv_flag_type. |
rb_eConverterNotFoundError
. Definition at line 2097 of file transcode.c.
rb_econv_t* rb_econv_open_opts | ( | const char * | source_encoding, |
const char * | destination_encoding, | ||
int | ecflags, | ||
VALUE | ecopts | ||
) |
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
[in] | source_encoding | Name of an encoding. |
[in] | destination_encoding | Name of another encoding. |
[in] | ecflags | A set of enum ruby_econv_flag_type. |
[in] | ecopts | Optional set of strings. |
rb_eArgError | No such encoding. |
NULL | Failed to create a struct rb_econv_t. |
otherwise | Allocated struct rb_econv_t. |
Definition at line 2651 of file transcode.c.
Identical to rb_econv_prepare_opts(), except it additionally takes the initial value of flags.
The extra bits are bitwise-ORed to the return value.
[in] | opthash | Keyword arguments. |
[out] | ecopts | Return buffer. |
[in] | ecflags | Default set of enum ruby_econv_flag_type. |
rb_eArgError | Unknown/Broken values passed. |
ecopts
holds a hash object suitable for ::rb_io_t::rb_io_enc_t::ecopts. Definition at line 2600 of file transcode.c.
Referenced by rb_econv_prepare_opts(), and rb_io_extract_modeenc().
Splits a keyword arguments hash (that for instance String#encode
took) into a set of enum ruby_econv_flag_type and a hash storing replacement characters etc.
[in] | opthash | Keyword arguments. |
[out] | ecopts | Return buffer. |
rb_eArgError | Unknown/Broken values passed. |
ecopts
holds a hash object suitable for ::rb_io_t::rb_io_enc_t::ecopts. Definition at line 2645 of file transcode.c.
void rb_econv_putback | ( | rb_econv_t * | ec, |
unsigned char * | p, | ||
int | n | ||
) |
Puts back the bytes.
In case of econv_invalid_byte_sequence, some of those invalid bytes are discarded and the others are buffered to be converted later. The latter bytes can be put back using this API.
[out] | ec | Target converter (invalid byte sequence). |
[out] | p | Return buffer. |
[in] | n | Max number of bytes to put back. |
n
bytes of what was put back is written to p
. Definition at line 1781 of file transcode.c.
int rb_econv_putbackable | ( | rb_econv_t * | ec | ) |
Queries if rb_econv_putback() makes sense, i.e.
there are invalid byte sequences remain in the buffer.
[in] | ec | Target converter. |
Definition at line 1770 of file transcode.c.
int rb_econv_set_replacement | ( | rb_econv_t * | ec, |
const unsigned char * | str, | ||
size_t | len, | ||
const char * | encname | ||
) |
Assigns the replacement string.
The string passed here would appear in converted string when it cannot represent its source counterpart. This can happen for instance you convert an emoji to ISO-8859-1.
[out] | ec | Target converter. |
[in] | str | Replacement string. |
[in] | len | Number of bytes of str . |
[in] | encname | Name of encoding of str . |
0 | Success. |
-1 | Failure (ENOMEM etc.). |
ec
's replacement string is set to str
. Definition at line 2259 of file transcode.c.
Referenced by rb_econv_open_opts().
VALUE rb_econv_str_append | ( | rb_econv_t * | ec, |
VALUE | src, | ||
VALUE | dst, | ||
int | flags | ||
) |
Identical to rb_econv_str_convert(), except it appends the conversion result to the additionally passed string instead of creating a new string.
It can also be seen as a routine identical to rb_econv_append(), except it takes a Ruby's string instead of C's pointer.
[in,out] | ec | Target converter. |
[in] | src | Source string. |
[in] | dst | Return buffer. |
[in] | flags | Flags (see rb_econv_convert). |
rb_eArgError | Converted string is too long. |
rb_eInvalidByteSequenceError | Invalid byte sequence. |
rb_eUndefinedConversionError | Conversion undefined. |
Definition at line 1919 of file transcode.c.
VALUE rb_econv_str_convert | ( | rb_econv_t * | ec, |
VALUE | src, | ||
int | flags | ||
) |
Identical to rb_econv_convert(), except it takes Ruby's string instead of C's pointer.
[in,out] | ec | Target converter. |
[in] | src | Source string. |
[in] | flags | Flags (see rb_econv_convert). |
rb_eArgError | Converted string is too long. |
rb_eInvalidByteSequenceError | Invalid byte sequence. |
rb_eUndefinedConversionError | Conversion undefined. |
Definition at line 1931 of file transcode.c.
VALUE rb_econv_substr_append | ( | rb_econv_t * | ec, |
VALUE | src, | ||
long | byteoff, | ||
long | bytesize, | ||
VALUE | dst, | ||
int | flags | ||
) |
Identical to rb_econv_str_append(), except it appends only a part of the passed string with conversion.
It can also be seen as a routine identical to rb_econv_substr_convert(), except it appends the conversion result to the additionally passed string instead of creating a new string.
[in,out] | ec | Target converter. |
[in] | src | Source string. |
[in] | byteoff | Number of bytes to seek. |
[in] | bytesize | Number of bytes to read. |
[in] | dst | Return buffer. |
[in] | flags | Flags (see rb_econv_convert). |
rb_eArgError | Converted string is too long. |
rb_eInvalidByteSequenceError | Invalid byte sequence. |
rb_eUndefinedConversionError | Conversion undefined. |
Definition at line 1910 of file transcode.c.
Referenced by rb_econv_str_append(), rb_econv_str_convert(), and rb_econv_substr_convert().
VALUE rb_econv_substr_convert | ( | rb_econv_t * | ec, |
VALUE | src, | ||
long | byteoff, | ||
long | bytesize, | ||
int | flags | ||
) |
Identical to rb_econv_str_convert(), except it converts only a part of the passed string.
Can be handy when you for instance want to do line-buffered conversion.
[in,out] | ec | Target converter. |
[in] | src | Source string. |
[in] | byteoff | Number of bytes to seek. |
[in] | bytesize | Number of bytes to read. |
[in] | flags | Flags (see rb_econv_convert). |
rb_eArgError | Converted string is too long. |
rb_eInvalidByteSequenceError | Invalid byte sequence. |
rb_eUndefinedConversionError | Conversion undefined. |
Definition at line 1925 of file transcode.c.
Converts the contents of the passed string from its encoding to the passed one.
[in] | str | Target string. |
[in] | to | Destination encoding. |
[in] | ecflags | A set of enum ruby_econv_flag_type. |
[in] | ecopts | A keyword hash, like ::rb_io_t::rb_io_enc_t::ecopts. |
rb_eArgError | Not fully converted. |
rb_eInvalidByteSequenceError | str is malformed. |
rb_eUndefinedConversionError | str has a character not representable using to . |
rb_eConversionNotFoundError | There is no known conversion from str 's encoding to to . |
to
, and whose contents is converted contents of str
. ecopts
. Definition at line 2914 of file transcode.c.
Referenced by rb_str_ellipsize().