7pm_strpbrk_invalid_multibyte_character(
pm_parser_t *parser,
const uint8_t *start,
const uint8_t *end) {
8 pm_diagnostic_list_append_format(&parser->
error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
15pm_strpbrk_explicit_encoding_set(
pm_parser_t *parser,
const uint8_t *source,
size_t width) {
22 pm_diagnostic_list_append_format(&parser->
error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->
encoding->
name);
25 assert(
false &&
"unreachable");
35static inline const uint8_t *
36pm_strpbrk_utf8(
pm_parser_t *parser,
const uint8_t *source,
const uint8_t *charset,
size_t maximum,
bool validate) {
39 while (index < maximum) {
40 if (strchr((
const char *) charset, source[index]) != NULL) {
41 return source + index;
44 if (source[index] < 0x80) {
47 size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
51 }
else if (!validate) {
58 const size_t start = index;
62 }
while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
64 pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
75static inline const uint8_t *
76pm_strpbrk_ascii_8bit(
pm_parser_t *parser,
const uint8_t *source,
const uint8_t *charset,
size_t maximum,
bool validate) {
79 while (index < maximum) {
80 if (strchr((
const char *) charset, source[index]) != NULL) {
81 return source + index;
84 if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
94static inline const uint8_t *
95pm_strpbrk_multi_byte(
pm_parser_t *parser,
const uint8_t *source,
const uint8_t *charset,
size_t maximum,
bool validate) {
99 while (index < maximum) {
100 if (strchr((
const char *) charset, source[index]) != NULL) {
101 return source + index;
104 if (source[index] < 0x80) {
107 size_t width = encoding->
char_width(source + index, (ptrdiff_t) (maximum - index));
108 if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
112 }
else if (!validate) {
119 const size_t start = index;
123 }
while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
125 pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
137static inline const uint8_t *
138pm_strpbrk_single_byte(
pm_parser_t *parser,
const uint8_t *source,
const uint8_t *charset,
size_t maximum,
bool validate) {
142 while (index < maximum) {
143 if (strchr((
const char *) charset, source[index]) != NULL) {
144 return source + index;
147 if (source[index] < 0x80 || !validate) {
150 size_t width = encoding->
char_width(source + index, (ptrdiff_t) (maximum - index));
151 pm_strpbrk_explicit_encoding_set(parser, source, width);
160 const size_t start = index;
164 }
while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
166 pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
194pm_strpbrk(
pm_parser_t *parser,
const uint8_t *source,
const uint8_t *charset, ptrdiff_t length,
bool validate) {
198 return pm_strpbrk_utf8(parser, source, charset, (
size_t) length, validate);
200 return pm_strpbrk_ascii_8bit(parser, source, charset, (
size_t) length, validate);
202 return pm_strpbrk_multi_byte(parser, source, charset, (
size_t) length, validate);
204 return pm_strpbrk_single_byte(parser, source, charset, (
size_t) length, validate);
A custom strpbrk implementation.
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
#define PM_ENCODING_ASCII_8BIT_ENTRY
This is the ASCII-8BIT encoding.
This struct defines the functions necessary to implement the encoding interface so we can determine h...
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
bool multibyte
Return true if the encoding is a multibyte encoding.
const char * name
The name of the encoding.
This struct represents the overall parser.
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
pm_list_t error_list
The list of errors that have been found while parsing.