Ruby 3.5.0dev (2025-02-22 revision d6f44535c6482e895483c0c28c9a35bcf5e4fd88)
pm_char.c
2
3#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
4#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
5#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
6
7#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
8#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
9#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
10#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
11#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
12#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
13#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15
16static const uint8_t pm_byte_table[256] = {
17// 0 1 2 3 4 5 6 7 8 9 A B C D E F
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
22 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
23 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
24 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
25 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34};
35
36static const uint8_t pm_number_table[256] = {
37 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
40 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
41 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
42 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
43 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
44 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
45 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
54};
55
60static inline size_t
61pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
62 if (length <= 0) return 0;
63
64 size_t size = 0;
65 size_t maximum = (size_t) length;
66
67 while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
68 return size;
69}
70
75size_t
76pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
77 return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
78}
79
85size_t
86pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
87 if (length <= 0) return 0;
88
89 size_t size = 0;
90 size_t maximum = (size_t) length;
91
92 while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
93 if (string[size] == '\n') {
94 pm_newline_list_append(newline_list, string + size);
95 }
96
97 size++;
98 }
99
100 return size;
101}
102
107size_t
108pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
109 return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
110}
111
116size_t
117pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
118 return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
119}
120
124static inline bool
125pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
126 return (pm_byte_table[b] & kind) != 0;
127}
128
132bool
133pm_char_is_whitespace(const uint8_t b) {
134 return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
135}
136
140bool
141pm_char_is_inline_whitespace(const uint8_t b) {
142 return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
143}
144
150static inline size_t
151pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
152 if (length <= 0) return 0;
153
154 size_t size = 0;
155 size_t maximum = (size_t) length;
156
157 while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
158 return size;
159}
160
169static inline size_t
170pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
171 if (length <= 0) return 0;
172
173 size_t size = 0;
174 size_t maximum = (size_t) length;
175
176 bool underscore = false;
177 while (size < maximum && (pm_number_table[string[size]] & kind)) {
178 if (string[size] == '_') {
179 if (underscore) *invalid = string + size;
180 underscore = true;
181 } else {
182 underscore = false;
183 }
184
185 size++;
186 }
187
188 if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
189 return size;
190}
191
201size_t
202pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
203 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
204}
205
215size_t
216pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
217 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
218}
219
224size_t
225pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
226 return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
227}
228
238size_t
239pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
240 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
241}
242
248size_t
249pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
250 return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
251}
252
262size_t
263pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
264 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
265}
266
270static inline bool
271pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
272 return (pm_number_table[b] & kind) != 0;
273}
274
278bool
279pm_char_is_binary_digit(const uint8_t b) {
280 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
281}
282
286bool
287pm_char_is_octal_digit(const uint8_t b) {
288 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
289}
290
294bool
295pm_char_is_decimal_digit(const uint8_t b) {
296 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
297}
298
302bool
303pm_char_is_hexadecimal_digit(const uint8_t b) {
304 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
305}
306
307#undef PRISM_CHAR_BIT_WHITESPACE
308#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
309#undef PRISM_CHAR_BIT_REGEXP_OPTION
310
311#undef PRISM_NUMBER_BIT_BINARY_DIGIT
312#undef PRISM_NUMBER_BIT_BINARY_NUMBER
313#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
314#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
315#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
316#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
317#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
318#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
Functions for working with characters and strings.
A list of offsets of newlines in a string.