6 #ifndef PRISM_ENCODING_H
7 #define PRISM_ENCODING_H
29 size_t (*char_width)(
const uint8_t *b, ptrdiff_t n);
36 size_t (*alpha_char)(
const uint8_t *b, ptrdiff_t n);
43 size_t (*alnum_char)(
const uint8_t *b, ptrdiff_t n);
50 bool (*isupper_char)(
const uint8_t *b, ptrdiff_t n);
68 #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
74 #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
80 #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
137 PM_ENCODING_UTF_8 = 0,
138 PM_ENCODING_US_ASCII,
139 PM_ENCODING_ASCII_8BIT,
141 PM_ENCODING_WINDOWS_31J,
145 #ifndef PRISM_ENCODING_EXCLUDE_FULL
147 PM_ENCODING_BIG5_HKSCS,
148 PM_ENCODING_BIG5_UAO,
157 PM_ENCODING_EMACS_MULE,
158 PM_ENCODING_EUC_JP_MS,
159 PM_ENCODING_EUC_JIS_2004,
182 PM_ENCODING_ISO_8859_1,
183 PM_ENCODING_ISO_8859_2,
184 PM_ENCODING_ISO_8859_3,
185 PM_ENCODING_ISO_8859_4,
186 PM_ENCODING_ISO_8859_5,
187 PM_ENCODING_ISO_8859_6,
188 PM_ENCODING_ISO_8859_7,
189 PM_ENCODING_ISO_8859_8,
190 PM_ENCODING_ISO_8859_9,
191 PM_ENCODING_ISO_8859_10,
192 PM_ENCODING_ISO_8859_11,
193 PM_ENCODING_ISO_8859_13,
194 PM_ENCODING_ISO_8859_14,
195 PM_ENCODING_ISO_8859_15,
196 PM_ENCODING_ISO_8859_16,
199 PM_ENCODING_MAC_CENT_EURO,
200 PM_ENCODING_MAC_CROATIAN,
201 PM_ENCODING_MAC_CYRILLIC,
202 PM_ENCODING_MAC_GREEK,
203 PM_ENCODING_MAC_ICELAND,
204 PM_ENCODING_MAC_JAPANESE,
205 PM_ENCODING_MAC_ROMAN,
206 PM_ENCODING_MAC_ROMANIA,
207 PM_ENCODING_MAC_THAI,
208 PM_ENCODING_MAC_TURKISH,
209 PM_ENCODING_MAC_UKRAINE,
210 PM_ENCODING_SHIFT_JIS,
211 PM_ENCODING_SJIS_DOCOMO,
212 PM_ENCODING_SJIS_KDDI,
213 PM_ENCODING_SJIS_SOFTBANK,
214 PM_ENCODING_STATELESS_ISO_2022_JP,
215 PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
217 PM_ENCODING_UTF8_MAC,
218 PM_ENCODING_UTF8_DOCOMO,
219 PM_ENCODING_UTF8_KDDI,
220 PM_ENCODING_UTF8_SOFTBANK,
221 PM_ENCODING_WINDOWS_1250,
222 PM_ENCODING_WINDOWS_1251,
223 PM_ENCODING_WINDOWS_1252,
224 PM_ENCODING_WINDOWS_1253,
225 PM_ENCODING_WINDOWS_1254,
226 PM_ENCODING_WINDOWS_1255,
227 PM_ENCODING_WINDOWS_1256,
228 PM_ENCODING_WINDOWS_1257,
229 PM_ENCODING_WINDOWS_1258,
230 PM_ENCODING_WINDOWS_874,
245 #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
252 #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
259 #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
265 #define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
271 #define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
A custom strncasecmp implementation.
Macro definitions used throughout the prism library.
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]
This is the table of all of the encodings that prism supports.
pm_encoding_type_t
These are all of the encodings that prism supports.
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphabetical character.
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphanumeric character.
This struct defines the functions necessary to implement the encoding interface so we can determine h...
bool multibyte
Return true if the encoding is a multibyte encoding.
const char * name
The name of the encoding.