Ruby  3.4.0dev (2024-11-22 revision 0989400a925cd201defdca9eb28eb87200b30785)
Typedefs | Enumerations | Functions
transcode.h File Reference

(0989400a925cd201defdca9eb28eb87200b30785)

econv stuff More...

#include "ruby/internal/dllexport.h"
#include "ruby/internal/value.h"
Include dependency graph for transcode.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Typedefs

typedef struct rb_econv_t rb_econv_t
 An opaque struct that represents a lowest level of encoding conversion. More...
 

Enumerations

enum  rb_econv_result_t {
  econv_invalid_byte_sequence , econv_undefined_conversion , econv_destination_buffer_full , econv_source_buffer_empty ,
  econv_finished , econv_after_output , econv_incomplete_input
}
 return value of rb_econv_convert() More...
 
enum  ruby_econv_flag_type {
  RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff , RUBY_ECONV_INVALID_MASK = 0x0000000f , RUBY_ECONV_INVALID_REPLACE = 0x00000002 , RUBY_ECONV_UNDEF_MASK = 0x000000f0 ,
  RUBY_ECONV_UNDEF_REPLACE = 0x00000020 , RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030 , RUBY_ECONV_DECORATOR_MASK = 0x0001ff00 , RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00 ,
  RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00 , RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000 , RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100 , RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000 ,
  RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000 , RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000 , RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000 , RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000 ,
  RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000 , RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000 , RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR , RUBY_ECONV_PARTIAL_INPUT = 0x00020000 ,
  RUBY_ECONV_AFTER_OUTPUT = 0x00040000 , RUBY_ECONV_FLAGS_PLACEHOLDER
}
 This enum is kind of omnibus. More...
 

Functions

VALUE rb_str_encode (VALUE str, VALUE to, int ecflags, VALUE ecopts)
 Converts the contents of the passed string from its encoding to the passed one. More...
 
int rb_econv_has_convpath_p (const char *from_encoding, const char *to_encoding)
 Queries if there is more than one way to convert between the passed two encodings. More...
 
int rb_econv_prepare_options (VALUE opthash, VALUE *ecopts, int ecflags)
 Identical to rb_econv_prepare_opts(), except it additionally takes the initial value of flags. More...
 
int rb_econv_prepare_opts (VALUE opthash, VALUE *ecopts)
 Splits a keyword arguments hash (that for instance String#encode took) into a set of enum ruby_econv_flag_type and a hash storing replacement characters etc. More...
 
rb_econv_trb_econv_open (const char *source_encoding, const char *destination_encoding, int ecflags)
 Creates a new instance of struct rb_econv_t. More...
 
rb_econv_trb_econv_open_opts (const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
 Identical to rb_econv_open(), except it additionally takes a hash of optional strings. More...
 
rb_econv_result_t rb_econv_convert (rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
 Converts a string from an encoding to another. More...
 
void rb_econv_close (rb_econv_t *ec)
 Destructs a converter. More...
 
int rb_econv_set_replacement (rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
 Assigns the replacement string. More...
 
int rb_econv_decorate_at_first (rb_econv_t *ec, const char *decorator_name)
 "Decorate"s a converter. More...
 
int rb_econv_decorate_at_last (rb_econv_t *ec, const char *decorator_name)
 Identical to rb_econv_decorate_at_first(), except it adds to the opposite direction. More...
 
VALUE rb_econv_open_exc (const char *senc, const char *denc, int ecflags)
 Creates a rb_eConverterNotFoundError exception object (but does not raise). More...
 
int rb_econv_insert_output (rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
 Appends the passed string to the passed converter's output buffer. More...
 
const char * rb_econv_encoding_to_insert_output (rb_econv_t *ec)
 Queries an encoding name which best suits for rb_econv_insert_output()'s last parameter. More...
 
void rb_econv_check_error (rb_econv_t *ec)
 This is a rb_econv_make_exception() + rb_exc_raise() combo. More...
 
VALUE rb_econv_make_exception (rb_econv_t *ec)
 This function makes sense right after rb_econv_convert() returns. More...
 
int rb_econv_putbackable (rb_econv_t *ec)
 Queries if rb_econv_putback() makes sense, i.e. More...
 
void rb_econv_putback (rb_econv_t *ec, unsigned char *p, int n)
 Puts back the bytes. More...
 
const char * rb_econv_asciicompat_encoding (const char *encname)
 Queries the passed encoding's corresponding ASCII compatible encoding. More...
 
VALUE rb_econv_str_convert (rb_econv_t *ec, VALUE src, int flags)
 Identical to rb_econv_convert(), except it takes Ruby's string instead of C's pointer. More...
 
VALUE rb_econv_substr_convert (rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
 Identical to rb_econv_str_convert(), except it converts only a part of the passed string. More...
 
VALUE rb_econv_str_append (rb_econv_t *ec, VALUE src, VALUE dst, int flags)
 Identical to rb_econv_str_convert(), except it appends the conversion result to the additionally passed string instead of creating a new string. More...
 
VALUE rb_econv_substr_append (rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags)
 Identical to rb_econv_str_append(), except it appends only a part of the passed string with conversion. More...
 
VALUE rb_econv_append (rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags)
 Converts the passed C's pointer according to the passed converter, then append the conversion result to the passed Ruby's string. More...
 
void rb_econv_binmode (rb_econv_t *ec)
 This badly named function does not set the destination encoding to binary, but instead just nullifies newline conversion decorators if any. More...
 

Flags for rb_econv_open()

#define ECONV_ERROR_HANDLER_MASK   RUBY_ECONV_ERROR_HANDLER_MASK
 Old name of RUBY_ECONV_ERROR_HANDLER_MASK. More...
 
#define ECONV_INVALID_MASK   RUBY_ECONV_INVALID_MASK
 Old name of RUBY_ECONV_INVALID_MASK. More...
 
#define ECONV_INVALID_REPLACE   RUBY_ECONV_INVALID_REPLACE
 Old name of RUBY_ECONV_INVALID_REPLACE. More...
 
#define ECONV_UNDEF_MASK   RUBY_ECONV_UNDEF_MASK
 Old name of RUBY_ECONV_UNDEF_MASK. More...
 
#define ECONV_UNDEF_REPLACE   RUBY_ECONV_UNDEF_REPLACE
 Old name of RUBY_ECONV_UNDEF_REPLACE. More...
 
#define ECONV_UNDEF_HEX_CHARREF   RUBY_ECONV_UNDEF_HEX_CHARREF
 Old name of RUBY_ECONV_UNDEF_HEX_CHARREF. More...
 
#define ECONV_DECORATOR_MASK   RUBY_ECONV_DECORATOR_MASK
 Old name of RUBY_ECONV_DECORATOR_MASK. More...
 
#define ECONV_NEWLINE_DECORATOR_MASK   RUBY_ECONV_NEWLINE_DECORATOR_MASK
 Old name of RUBY_ECONV_NEWLINE_DECORATOR_MASK. More...
 
#define ECONV_NEWLINE_DECORATOR_READ_MASK   RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK
 Old name of RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK. More...
 
#define ECONV_NEWLINE_DECORATOR_WRITE_MASK   RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK
 Old name of RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK. More...
 
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR   RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR
 Old name of RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR. More...
 
#define ECONV_CRLF_NEWLINE_DECORATOR   RUBY_ECONV_CRLF_NEWLINE_DECORATOR
 Old name of RUBY_ECONV_CRLF_NEWLINE_DECORATOR. More...
 
#define ECONV_CR_NEWLINE_DECORATOR   RUBY_ECONV_CR_NEWLINE_DECORATOR
 Old name of RUBY_ECONV_CR_NEWLINE_DECORATOR. More...
 
#define ECONV_LF_NEWLINE_DECORATOR   RUBY_ECONV_LF_NEWLINE_DECORATOR
 Old name of RUBY_ECONV_LF_NEWLINE_DECORATOR. More...
 
#define ECONV_XML_TEXT_DECORATOR   RUBY_ECONV_XML_TEXT_DECORATOR
 Old name of RUBY_ECONV_XML_TEXT_DECORATOR. More...
 
#define ECONV_XML_ATTR_CONTENT_DECORATOR   RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR
 Old name of RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR. More...
 
#define ECONV_STATEFUL_DECORATOR_MASK   RUBY_ECONV_STATEFUL_DECORATOR_MASK
 Old name of RUBY_ECONV_STATEFUL_DECORATOR_MASK. More...
 
#define ECONV_XML_ATTR_QUOTE_DECORATOR   RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR
 Old name of RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR. More...
 
#define ECONV_DEFAULT_NEWLINE_DECORATOR   RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR
 Old name of RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR. More...
 

Flags for rb_econv_convert()

#define ECONV_PARTIAL_INPUT   RUBY_ECONV_PARTIAL_INPUT
 Old name of RUBY_ECONV_PARTIAL_INPUT. More...
 
#define ECONV_AFTER_OUTPUT   RUBY_ECONV_AFTER_OUTPUT
 Old name of RUBY_ECONV_AFTER_OUTPUT. More...
 

Detailed Description

econv stuff

Author
Ruby developers ruby-.nosp@m.core.nosp@m.@ruby.nosp@m.-lan.nosp@m.g.org
Warning
Symbols prefixed with either RBIMPL or rbimpl are implementation details. Don't take them as canon. They could rapidly appear then vanish. The name (path) of this header file is also an implementation detail. Do not expect it to persist at the place it is now. Developers are free to move it anywhere anytime at will.
Note
To ruby-core: remember that this header can be possibly recursively included from extension libraries written in C++. Do not expect for instance __VA_ARGS__ is always available. We assume C99 for ruby itself but we don't assume languages of extension libraries. They could be written in C++98.

Definition in file transcode.h.

Typedef Documentation

◆ rb_econv_t

typedef struct rb_econv_t rb_econv_t

An opaque struct that represents a lowest level of encoding conversion.

Definition at line 1 of file transcode.h.

Enumeration Type Documentation

◆ rb_econv_result_t

return value of rb_econv_convert()

Enumerator
econv_invalid_byte_sequence 

The conversion stopped when it found an invalid sequence.

econv_undefined_conversion 

The conversion stopped when it found a character in the input which cannot be representable in the output.

econv_destination_buffer_full 

The conversion stopped because there is no destination.

econv_source_buffer_empty 

The conversion stopped because there is no input.

econv_finished 

The conversion stopped after converting everything.

This is arguably the expected normal end of conversion.

econv_after_output 

The conversion stopped after writing something to somewhere, before reading everything.

econv_incomplete_input 

The conversion stopped in middle of reading a character, possibly due to a partial read of a socket etc.

Definition at line 30 of file transcode.h.

◆ ruby_econv_flag_type

This enum is kind of omnibus.

Gathers various constants.

Enumerator
RUBY_ECONV_ERROR_HANDLER_MASK 

Mask for error handling related bits.

RUBY_ECONV_INVALID_MASK 

Special handling of invalid sequences are there.

RUBY_ECONV_INVALID_REPLACE 

Invalid sequences shall be replaced.

RUBY_ECONV_UNDEF_MASK 

Special handling of undefined conversion are there.

RUBY_ECONV_UNDEF_REPLACE 

Undefined characters shall be replaced.

RUBY_ECONV_UNDEF_HEX_CHARREF 

Undefined characters shall be escaped.

RUBY_ECONV_DECORATOR_MASK 

Decorators are there.

RUBY_ECONV_NEWLINE_DECORATOR_MASK 

Newline converters are there.

RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK 

(Unclear; seems unused).

RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK 

(Unclear; seems unused).

RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR 

Universal newline mode.

RUBY_ECONV_CRLF_NEWLINE_DECORATOR 

CR to CRLF conversion shall happen.

RUBY_ECONV_CR_NEWLINE_DECORATOR 

CRLF to CR conversion shall happen.

RUBY_ECONV_LF_NEWLINE_DECORATOR 

CRLF to LF conversion shall happen.

RUBY_ECONV_XML_TEXT_DECORATOR 

Texts shall be XML-escaped.

RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR 

Texts shall be AttrValue escaped.

RUBY_ECONV_STATEFUL_DECORATOR_MASK 

(Unclear; seems unused).

RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR 

Texts shall be AttrValue escaped.

RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR 

Newline decorator's default.

RUBY_ECONV_PARTIAL_INPUT 

Indicates the input is a part of much larger one.

RUBY_ECONV_AFTER_OUTPUT 

Instructs the converter to stop after output.

RUBY_ECONV_FLAGS_PLACEHOLDER 

Placeholder (not used)

Definition at line 452 of file transcode.h.

Function Documentation

◆ rb_econv_append()

VALUE rb_econv_append ( rb_econv_t ec,
const char *  bytesrc,
long  bytesize,
VALUE  dst,
int  flags 
)

Converts the passed C's pointer according to the passed converter, then append the conversion result to the passed Ruby's string.

This way buffer overflow is properly avoided to resize the destination properly.

Parameters
[in,out]ecTarget converter.
[in]bytesrcTarget string.
[in]bytesizeNumber of bytes of bytesrc.
[in]dstReturn buffer.
[in]flagsFlags (see rb_econv_convert).
Exceptions
rb_eArgErrorConverted string is too long.
rb_eInvalidByteSequenceErrorInvalid byte sequence.
rb_eUndefinedConversionErrorConversion undefined.
Returns
The conversion result.

Definition at line 1847 of file transcode.c.

Referenced by rb_econv_substr_append().

◆ rb_econv_asciicompat_encoding()

const char* rb_econv_asciicompat_encoding ( const char *  encname)

Queries the passed encoding's corresponding ASCII compatible encoding.

"The corresponding ASCII compatible encoding" in this context is an ASCII compatible encoding which can represent exactly the same character sets as the given ASCII incompatible encoding. For instance that of UTF-16LE is UTF-8.

Parameters
[in]encnameName of an ASCII incompatible encoding.
Return values
NULLencname is already ASCII compatible.
otherwiseThe corresponding ASCII compatible encoding.

Definition at line 1814 of file transcode.c.

◆ rb_econv_binmode()

void rb_econv_binmode ( rb_econv_t ec)

This badly named function does not set the destination encoding to binary, but instead just nullifies newline conversion decorators if any.

Other ordinal character conversions still happen after this; something non-binary would still be generated.

Parameters
[out]ecTarget converter to modify.
Postcondition
Any newline conversions, if any, would be killed.

Definition at line 1996 of file transcode.c.

Referenced by rb_io_binmode().

◆ rb_econv_check_error()

void rb_econv_check_error ( rb_econv_t ec)

This is a rb_econv_make_exception() + rb_exc_raise() combo.

Parameters
[in]ec(Possibly failed) conversion.
Exceptions
rb_eInvalidByteSequenceErrorInvalid byte sequence.
rb_eUndefinedConversionErrorConversion undefined.
Note
This function can return when no error.

Definition at line 4278 of file transcode.c.

Referenced by rb_econv_append().

◆ rb_econv_close()

void rb_econv_close ( rb_econv_t ec)

Destructs a converter.

Note that a converter can have a buffer, and can be non-empty. Calling this would lose your data then.

Parameters
[out]ecThe converter to destroy.
Postcondition
ec is no longer a valid pointer.

Definition at line 1731 of file transcode.c.

Referenced by rb_econv_open(), and rb_econv_open_opts().

◆ rb_econv_convert()

rb_econv_result_t rb_econv_convert ( rb_econv_t ec,
const unsigned char **  source_buffer_ptr,
const unsigned char *  source_buffer_end,
unsigned char **  destination_buffer_ptr,
unsigned char *  destination_buffer_end,
int  flags 
)

Converts a string from an encoding to another.

Possible flags are either RUBY_ECONV_PARTIAL_INPUT (means the source buffer is a part of much larger one), RUBY_ECONV_AFTER_OUTPUT (instructs the converter to stop after output before input), or both of them.

Parameters
[in,out]ecConversion specification/state etc.
[in]source_buffer_ptrTarget string.
[in]source_buffer_endEnd of target string.
[out]destination_buffer_ptrReturn buffer.
[out]destination_buffer_endEnd of return buffer.
[in]flagsFlags (see above).
Returns
The status of the conversion.
Postcondition
destination_buffer_ptr holds conversion results.

Definition at line 1475 of file transcode.c.

Referenced by rb_econv_append().

◆ rb_econv_decorate_at_first()

int rb_econv_decorate_at_first ( rb_econv_t ec,
const char *  decorator_name 
)

"Decorate"s a converter.

There are special kind of converters that transforms the contents, like replacing CR into CRLF. You can add such decorators to a converter using this API. By using this function a decorator is prepended at the beginning of a conversion sequence: in case of CRLF conversion, newlines are converted before encodings are converted.

Parameters
[out]ecTarget converter to decorate.
[in]decorator_nameName of decorator to prepend.
Return values
0Success.
-1Failure (no such decorator etc.).
Postcondition
Decorator works before encoding conversion happens.

Definition at line 1962 of file transcode.c.

◆ rb_econv_decorate_at_last()

int rb_econv_decorate_at_last ( rb_econv_t ec,
const char *  decorator_name 
)

Identical to rb_econv_decorate_at_first(), except it adds to the opposite direction.

For instance CRLF conversion would run after encodings are converted.

Parameters
[out]ecTarget converter to decorate.
[in]decorator_nameName of decorator to prepend.
Return values
0Success.
-1Failure (no such decorator etc.).
Postcondition
Decorator works after encoding conversion happens.

Definition at line 1979 of file transcode.c.

Referenced by rb_econv_open().

◆ rb_econv_encoding_to_insert_output()

const char* rb_econv_encoding_to_insert_output ( rb_econv_t ec)

Queries an encoding name which best suits for rb_econv_insert_output()'s last parameter.

Strings in this encoding need no conversion when inserted; can be both time/space efficient.

Parameters
[in]ecTarget converter.
Returns
Its encoding for insertion.

Definition at line 1532 of file transcode.c.

Referenced by rb_econv_insert_output(), and rb_econv_set_replacement().

◆ rb_econv_has_convpath_p()

int rb_econv_has_convpath_p ( const char *  from_encoding,
const char *  to_encoding 
)

Queries if there is more than one way to convert between the passed two encodings.

Encoding conversion are has_and_belongs_to_many relationships. There could be no direct conversion defined for the passed pair. Ruby tries to find an indirect way to do so then. For instance ISO-8859-1 has no direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8 conversion; then there is UTF-8 to EUC-JP conversion; finally there also is EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted to ISO-2022-JP using that path. This function returns true. Obviously not everything that can be represented using UTF-8 can also be represented using EUC-JP. Conversions in practice can fail depending on the actual input, and that renders exceptions in case of rb_str_encode().

Parameters
[in]from_encodingOne encoding.
[in]to_encodingAnother encoding.
Return values
0No way to convert the two.
1At least one way to convert the two.

Definition at line 3211 of file transcode.c.

◆ rb_econv_insert_output()

int rb_econv_insert_output ( rb_econv_t ec,
const unsigned char *  str,
size_t  len,
const char *  str_encoding 
)

Appends the passed string to the passed converter's output buffer.

This can be handy when an encoding needs bytes out of thin air; for instance ISO-2022-JP has "shift function" which does not correspond to any characters.

Parameters
[out]ecTarget converter.
[in]strString to insert.
[in]lenNumber of bytes of str.
[in]str_encodingEncoding of str.
Return values
0Success.
-1Failure (conversion error etc.).
Note
str_encoding can be anything, and str itself is converted when necessary.

Definition at line 1616 of file transcode.c.

◆ rb_econv_make_exception()

VALUE rb_econv_make_exception ( rb_econv_t ec)

This function makes sense right after rb_econv_convert() returns.

As listed in rb_econv_result_t, rb_econv_convert() can bail out for various reasons. This function checks the passed converter's internal state and convert it to an appropriate exception object.

Parameters
[in]ecTarget converter.
Return values
RUBY_QnilThe converter has no error.
otherwiseConversion error turned into an exception.

Definition at line 4272 of file transcode.c.

◆ rb_econv_open()

rb_econv_t* rb_econv_open ( const char *  source_encoding,
const char *  destination_encoding,
int  ecflags 
)

Creates a new instance of struct rb_econv_t.

Parameters
[in]source_encodingName of an encoding.
[in]destination_encodingName of another encoding.
[in]ecflagsA set of enum ruby_econv_flag_type.
Exceptions
rb_eArgErrorNo such encoding.
Return values
NULLFailed to create a struct rb_econv_t.
otherwiseAllocated struct rb_econv_t.
Warning
Return value must be passed to rb_econv_close() exactly once.

Definition at line 1098 of file transcode.c.

Referenced by rb_econv_open_opts().

◆ rb_econv_open_exc()

VALUE rb_econv_open_exc ( const char *  senc,
const char *  denc,
int  ecflags 
)

Creates a rb_eConverterNotFoundError exception object (but does not raise).

Parameters
[in]sencName of source encoding.
[in]dencName of destination encoding.
[in]ecflagsA set of enum ruby_econv_flag_type.
Returns
An instance of rb_eConverterNotFoundError.

Definition at line 2097 of file transcode.c.

◆ rb_econv_open_opts()

rb_econv_t* rb_econv_open_opts ( const char *  source_encoding,
const char *  destination_encoding,
int  ecflags,
VALUE  ecopts 
)

Identical to rb_econv_open(), except it additionally takes a hash of optional strings.

Parameters
[in]source_encodingName of an encoding.
[in]destination_encodingName of another encoding.
[in]ecflagsA set of enum ruby_econv_flag_type.
[in]ecoptsOptional set of strings.
Exceptions
rb_eArgErrorNo such encoding.
Return values
NULLFailed to create a struct rb_econv_t.
otherwiseAllocated struct rb_econv_t.
Warning
Return value must be passed to rb_econv_close() exactly once.

Definition at line 2651 of file transcode.c.

◆ rb_econv_prepare_options()

int rb_econv_prepare_options ( VALUE  opthash,
VALUE ecopts,
int  ecflags 
)

Identical to rb_econv_prepare_opts(), except it additionally takes the initial value of flags.

The extra bits are bitwise-ORed to the return value.

Parameters
[in]opthashKeyword arguments.
[out]ecoptsReturn buffer.
[in]ecflagsDefault set of enum ruby_econv_flag_type.
Exceptions
rb_eArgErrorUnknown/Broken values passed.
Returns
Calculated set of enum ruby_econv_flag_type.
Postcondition
ecopts holds a hash object suitable for ::rb_io_t::rb_io_enc_t::ecopts.

Definition at line 2600 of file transcode.c.

Referenced by rb_econv_prepare_opts(), and rb_io_extract_modeenc().

◆ rb_econv_prepare_opts()

int rb_econv_prepare_opts ( VALUE  opthash,
VALUE ecopts 
)

Splits a keyword arguments hash (that for instance String#encode took) into a set of enum ruby_econv_flag_type and a hash storing replacement characters etc.

Parameters
[in]opthashKeyword arguments.
[out]ecoptsReturn buffer.
Exceptions
rb_eArgErrorUnknown/Broken values passed.
Returns
Calculated set of enum ruby_econv_flag_type.
Postcondition
ecopts holds a hash object suitable for ::rb_io_t::rb_io_enc_t::ecopts.

Definition at line 2645 of file transcode.c.

◆ rb_econv_putback()

void rb_econv_putback ( rb_econv_t ec,
unsigned char *  p,
int  n 
)

Puts back the bytes.

In case of econv_invalid_byte_sequence, some of those invalid bytes are discarded and the others are buffered to be converted later. The latter bytes can be put back using this API.

Parameters
[out]ecTarget converter (invalid byte sequence).
[out]pReturn buffer.
[in]nMax number of bytes to put back.
Postcondition
At most n bytes of what was put back is written to p.

Definition at line 1781 of file transcode.c.

◆ rb_econv_putbackable()

int rb_econv_putbackable ( rb_econv_t ec)

Queries if rb_econv_putback() makes sense, i.e.

there are invalid byte sequences remain in the buffer.

Parameters
[in]ecTarget converter.
Returns
Number of bytes that can be pushed back.

Definition at line 1770 of file transcode.c.

◆ rb_econv_set_replacement()

int rb_econv_set_replacement ( rb_econv_t ec,
const unsigned char *  str,
size_t  len,
const char *  encname 
)

Assigns the replacement string.

The string passed here would appear in converted string when it cannot represent its source counterpart. This can happen for instance you convert an emoji to ISO-8859-1.

Parameters
[out]ecTarget converter.
[in]strReplacement string.
[in]lenNumber of bytes of str.
[in]encnameName of encoding of str.
Return values
0Success.
-1Failure (ENOMEM etc.).
Postcondition
ec's replacement string is set to str.

Definition at line 2259 of file transcode.c.

Referenced by rb_econv_open_opts().

◆ rb_econv_str_append()

VALUE rb_econv_str_append ( rb_econv_t ec,
VALUE  src,
VALUE  dst,
int  flags 
)

Identical to rb_econv_str_convert(), except it appends the conversion result to the additionally passed string instead of creating a new string.

It can also be seen as a routine identical to rb_econv_append(), except it takes a Ruby's string instead of C's pointer.

Parameters
[in,out]ecTarget converter.
[in]srcSource string.
[in]dstReturn buffer.
[in]flagsFlags (see rb_econv_convert).
Exceptions
rb_eArgErrorConverted string is too long.
rb_eInvalidByteSequenceErrorInvalid byte sequence.
rb_eUndefinedConversionErrorConversion undefined.
Returns
The conversion result.

Definition at line 1919 of file transcode.c.

◆ rb_econv_str_convert()

VALUE rb_econv_str_convert ( rb_econv_t ec,
VALUE  src,
int  flags 
)

Identical to rb_econv_convert(), except it takes Ruby's string instead of C's pointer.

Parameters
[in,out]ecTarget converter.
[in]srcSource string.
[in]flagsFlags (see rb_econv_convert).
Exceptions
rb_eArgErrorConverted string is too long.
rb_eInvalidByteSequenceErrorInvalid byte sequence.
rb_eUndefinedConversionErrorConversion undefined.
Returns
The conversion result.

Definition at line 1931 of file transcode.c.

◆ rb_econv_substr_append()

VALUE rb_econv_substr_append ( rb_econv_t ec,
VALUE  src,
long  byteoff,
long  bytesize,
VALUE  dst,
int  flags 
)

Identical to rb_econv_str_append(), except it appends only a part of the passed string with conversion.

It can also be seen as a routine identical to rb_econv_substr_convert(), except it appends the conversion result to the additionally passed string instead of creating a new string.

Parameters
[in,out]ecTarget converter.
[in]srcSource string.
[in]byteoffNumber of bytes to seek.
[in]bytesizeNumber of bytes to read.
[in]dstReturn buffer.
[in]flagsFlags (see rb_econv_convert).
Exceptions
rb_eArgErrorConverted string is too long.
rb_eInvalidByteSequenceErrorInvalid byte sequence.
rb_eUndefinedConversionErrorConversion undefined.
Returns
The conversion result.

Definition at line 1910 of file transcode.c.

Referenced by rb_econv_str_append(), rb_econv_str_convert(), and rb_econv_substr_convert().

◆ rb_econv_substr_convert()

VALUE rb_econv_substr_convert ( rb_econv_t ec,
VALUE  src,
long  byteoff,
long  bytesize,
int  flags 
)

Identical to rb_econv_str_convert(), except it converts only a part of the passed string.

Can be handy when you for instance want to do line-buffered conversion.

Parameters
[in,out]ecTarget converter.
[in]srcSource string.
[in]byteoffNumber of bytes to seek.
[in]bytesizeNumber of bytes to read.
[in]flagsFlags (see rb_econv_convert).
Exceptions
rb_eArgErrorConverted string is too long.
rb_eInvalidByteSequenceErrorInvalid byte sequence.
rb_eUndefinedConversionErrorConversion undefined.
Returns
The conversion result.

Definition at line 1925 of file transcode.c.

◆ rb_str_encode()

VALUE rb_str_encode ( VALUE  str,
VALUE  to,
int  ecflags,
VALUE  ecopts 
)

Converts the contents of the passed string from its encoding to the passed one.

Parameters
[in]strTarget string.
[in]toDestination encoding.
[in]ecflagsA set of enum ruby_econv_flag_type.
[in]ecoptsA keyword hash, like ::rb_io_t::rb_io_enc_t::ecopts.
Exceptions
rb_eArgErrorNot fully converted.
rb_eInvalidByteSequenceErrorstr is malformed.
rb_eUndefinedConversionErrorstr has a character not representable using to.
rb_eConversionNotFoundErrorThere is no known conversion from str's encoding to to.
Returns
A string whose encoding is to, and whose contents is converted contents of str.
Note
Use rb_econv_prepare_options() to generate ecopts.

Definition at line 2914 of file transcode.c.

Referenced by rb_str_ellipsize().