Ruby  3.4.0dev (2024-11-05 revision e440268d51fe02b303e3817a7a733a0dac1c5091)
pm_char.c
1 #include "prism/util/pm_char.h"
2 
3 #define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
4 #define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
5 #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
6 
7 #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
8 #define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
9 #define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
10 #define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
11 #define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
12 #define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
13 #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14 #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15 
16 static const uint8_t pm_byte_table[256] = {
17 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
18  0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20  3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
21  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
22  0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
23  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
24  0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
25  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
26  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
27  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
28  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
29  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
30  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
31  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
32  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
33  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34 };
35 
36 static const uint8_t pm_number_table[256] = {
37  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
40  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
41  0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
42  0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
43  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
44  0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
45  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
46  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
47  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
48  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
49  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
50  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
51  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
52  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
53  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
54 };
55 
60 static inline size_t
61 pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
62  if (length <= 0) return 0;
63 
64  size_t size = 0;
65  size_t maximum = (size_t) length;
66 
67  while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
68  return size;
69 }
70 
75 size_t
76 pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
77  return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
78 }
79 
85 size_t
86 pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
87  if (length <= 0) return 0;
88 
89  size_t size = 0;
90  size_t maximum = (size_t) length;
91 
92  while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
93  if (string[size] == '\n') {
94  pm_newline_list_append(newline_list, string + size);
95  }
96 
97  size++;
98  }
99 
100  return size;
101 }
102 
107 size_t
108 pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
109  return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
110 }
111 
116 size_t
117 pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
118  return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
119 }
120 
124 static inline bool
125 pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
126  return (pm_byte_table[b] & kind) != 0;
127 }
128 
132 bool
133 pm_char_is_whitespace(const uint8_t b) {
134  return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
135 }
136 
140 bool
142  return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
143 }
144 
150 static inline size_t
151 pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
152  if (length <= 0) return 0;
153 
154  size_t size = 0;
155  size_t maximum = (size_t) length;
156 
157  while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
158  return size;
159 }
160 
169 static inline size_t
170 pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
171  if (length <= 0) return 0;
172 
173  size_t size = 0;
174  size_t maximum = (size_t) length;
175 
176  bool underscore = false;
177  while (size < maximum && (pm_number_table[string[size]] & kind)) {
178  if (string[size] == '_') {
179  if (underscore) *invalid = string + size;
180  underscore = true;
181  } else {
182  underscore = false;
183  }
184 
185  size++;
186  }
187 
188  if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
189  return size;
190 }
191 
201 size_t
202 pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
203  return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
204 }
205 
215 size_t
216 pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
217  return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
218 }
219 
224 size_t
225 pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
226  return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
227 }
228 
238 size_t
239 pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
240  return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
241 }
242 
248 size_t
249 pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
250  return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
251 }
252 
262 size_t
263 pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
264  return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
265 }
266 
270 static inline bool
271 pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
272  return (pm_number_table[b] & kind) != 0;
273 }
274 
278 bool
279 pm_char_is_binary_digit(const uint8_t b) {
280  return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
281 }
282 
286 bool
287 pm_char_is_octal_digit(const uint8_t b) {
288  return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
289 }
290 
294 bool
295 pm_char_is_decimal_digit(const uint8_t b) {
296  return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
297 }
298 
302 bool
304  return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
305 }
306 
307 #undef PRISM_CHAR_BIT_WHITESPACE
308 #undef PRISM_CHAR_BIT_INLINE_WHITESPACE
309 #undef PRISM_CHAR_BIT_REGEXP_OPTION
310 
311 #undef PRISM_NUMBER_BIT_BINARY_DIGIT
312 #undef PRISM_NUMBER_BIT_BINARY_NUMBER
313 #undef PRISM_NUMBER_BIT_OCTAL_DIGIT
314 #undef PRISM_NUMBER_BIT_OCTAL_NUMBER
315 #undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
316 #undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
317 #undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
318 #undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
Functions for working with characters and strings.
size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are hexadecimal digits.
Definition: pm_char.c:249
bool pm_char_is_decimal_digit(const uint8_t b)
Returns true if the given character is a decimal digit.
Definition: pm_char.c:295
size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are whitespace.
Definition: pm_char.c:76
size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are hexadecimal digits or underscore...
Definition: pm_char.c:263
size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are decimal digits or underscores.
Definition: pm_char.c:239
size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are decimal digits.
Definition: pm_char.c:225
size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are binary digits or underscores.
Definition: pm_char.c:202
size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are octal digits or underscores.
Definition: pm_char.c:216
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list)
Returns the number of characters at the start of the string that are whitespace while also tracking t...
Definition: pm_char.c:86
bool pm_char_is_hexadecimal_digit(const uint8_t b)
Returns true if the given character is a hexadecimal digit.
Definition: pm_char.c:303
bool pm_char_is_octal_digit(const uint8_t b)
Returns true if the given character is an octal digit.
Definition: pm_char.c:287
bool pm_char_is_binary_digit(const uint8_t b)
Returns true if the given character is a binary digit.
Definition: pm_char.c:279
bool pm_char_is_inline_whitespace(const uint8_t b)
Returns true if the given character is an inline whitespace character.
Definition: pm_char.c:141
size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are inline whitespace.
Definition: pm_char.c:108
bool pm_char_is_whitespace(const uint8_t b)
Returns true if the given character is a whitespace character.
Definition: pm_char.c:133
size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are regexp options.
Definition: pm_char.c:117
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor)
Append a new offset to the newline list.
A list of offsets of newlines in a string.