6#ifndef PRISM_ENCODING_H 
    7#define PRISM_ENCODING_H 
   29    size_t (*char_width)(
const uint8_t *b, ptrdiff_t n);
 
   36    size_t (*alpha_char)(
const uint8_t *b, ptrdiff_t n);
 
   43    size_t (*alnum_char)(
const uint8_t *b, ptrdiff_t n);
 
   50    bool (*isupper_char)(
const uint8_t *b, ptrdiff_t n);
 
 
   68#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0 
   74#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1 
   80#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2 
  137    PM_ENCODING_UTF_8 = 0,
 
  138    PM_ENCODING_US_ASCII,
 
  139    PM_ENCODING_ASCII_8BIT,
 
  141    PM_ENCODING_WINDOWS_31J,
 
  145#ifndef PRISM_ENCODING_EXCLUDE_FULL 
  147    PM_ENCODING_BIG5_HKSCS,
 
  148    PM_ENCODING_BIG5_UAO,
 
  157    PM_ENCODING_EMACS_MULE,
 
  158    PM_ENCODING_EUC_JP_MS,
 
  159    PM_ENCODING_EUC_JIS_2004,
 
  182    PM_ENCODING_ISO_8859_1,
 
  183    PM_ENCODING_ISO_8859_2,
 
  184    PM_ENCODING_ISO_8859_3,
 
  185    PM_ENCODING_ISO_8859_4,
 
  186    PM_ENCODING_ISO_8859_5,
 
  187    PM_ENCODING_ISO_8859_6,
 
  188    PM_ENCODING_ISO_8859_7,
 
  189    PM_ENCODING_ISO_8859_8,
 
  190    PM_ENCODING_ISO_8859_9,
 
  191    PM_ENCODING_ISO_8859_10,
 
  192    PM_ENCODING_ISO_8859_11,
 
  193    PM_ENCODING_ISO_8859_13,
 
  194    PM_ENCODING_ISO_8859_14,
 
  195    PM_ENCODING_ISO_8859_15,
 
  196    PM_ENCODING_ISO_8859_16,
 
  199    PM_ENCODING_MAC_CENT_EURO,
 
  200    PM_ENCODING_MAC_CROATIAN,
 
  201    PM_ENCODING_MAC_CYRILLIC,
 
  202    PM_ENCODING_MAC_GREEK,
 
  203    PM_ENCODING_MAC_ICELAND,
 
  204    PM_ENCODING_MAC_JAPANESE,
 
  205    PM_ENCODING_MAC_ROMAN,
 
  206    PM_ENCODING_MAC_ROMANIA,
 
  207    PM_ENCODING_MAC_THAI,
 
  208    PM_ENCODING_MAC_TURKISH,
 
  209    PM_ENCODING_MAC_UKRAINE,
 
  210    PM_ENCODING_SHIFT_JIS,
 
  211    PM_ENCODING_SJIS_DOCOMO,
 
  212    PM_ENCODING_SJIS_KDDI,
 
  213    PM_ENCODING_SJIS_SOFTBANK,
 
  214    PM_ENCODING_STATELESS_ISO_2022_JP,
 
  215    PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
 
  217    PM_ENCODING_UTF8_MAC,
 
  218    PM_ENCODING_UTF8_DOCOMO,
 
  219    PM_ENCODING_UTF8_KDDI,
 
  220    PM_ENCODING_UTF8_SOFTBANK,
 
  221    PM_ENCODING_WINDOWS_1250,
 
  222    PM_ENCODING_WINDOWS_1251,
 
  223    PM_ENCODING_WINDOWS_1252,
 
  224    PM_ENCODING_WINDOWS_1253,
 
  225    PM_ENCODING_WINDOWS_1254,
 
  226    PM_ENCODING_WINDOWS_1255,
 
  227    PM_ENCODING_WINDOWS_1256,
 
  228    PM_ENCODING_WINDOWS_1257,
 
  229    PM_ENCODING_WINDOWS_1258,
 
  230    PM_ENCODING_WINDOWS_874,
 
 
  245#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8]) 
  252#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII]) 
  259#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT]) 
  265#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP]) 
  271#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J]) 
A custom strncasecmp implementation.
Macro definitions used throughout the prism library.
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]
This is the table of all of the encodings that prism supports.
pm_encoding_type_t
These are all of the encodings that prism supports.
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphabetical character.
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphanumeric character.
This struct defines the functions necessary to implement the encoding interface so we can determine h...
bool multibyte
Return true if the encoding is a multibyte encoding.
const char * name
The name of the encoding.