Ruby 4.1.0dev (2026-03-20 revision eb04ab9117336f2b3613244cfe8a528c52faf6d6)
pm_char.c
2
3#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
4
5#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
6#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
7#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
8#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
9#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
10#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
11#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
12#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
13
14const uint8_t pm_byte_table[256] = {
15// 0 1 2 3 4 5 6 7 8 9 A B C D E F
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
18 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
20 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
21 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
22 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
23 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
32};
33
34static const uint8_t pm_number_table[256] = {
35 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
36 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
37 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
38 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
39 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
40 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
41 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
42 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
43 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
44 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
45 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
52};
53
58static inline size_t
59pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
60 if (length <= 0) return 0;
61
62 size_t size = 0;
63 size_t maximum = (size_t) length;
64
65 while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
66 return size;
67}
68
73size_t
74pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
75 return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
76}
77
83size_t
84pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) {
85 if (length <= 0) return 0;
86
87 uint32_t size = 0;
88 uint32_t maximum = (uint32_t) length;
89
90 while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
91 if (string[size] == '\n') {
92 pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1);
93 }
94
95 size++;
96 }
97
98 return size;
99}
100
105size_t
106pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
107 return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
108}
109
110
116static inline size_t
117pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
118 if (length <= 0) return 0;
119
120 size_t size = 0;
121 size_t maximum = (size_t) length;
122
123 while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
124 return size;
125}
126
135static inline size_t
136pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
137 if (length <= 0) return 0;
138
139 size_t size = 0;
140 size_t maximum = (size_t) length;
141
142 bool underscore = false;
143 while (size < maximum && (pm_number_table[string[size]] & kind)) {
144 if (string[size] == '_') {
145 if (underscore) *invalid = string + size;
146 underscore = true;
147 } else {
148 underscore = false;
149 }
150
151 size++;
152 }
153
154 if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
155 return size;
156}
157
167size_t
168pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
169 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
170}
171
181size_t
182pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
183 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
184}
185
190size_t
191pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
192 return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
193}
194
204size_t
205pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
206 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
207}
208
214size_t
215pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
216 return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
217}
218
228size_t
229pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
230 return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
231}
232
236static inline bool
237pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
238 return (pm_number_table[b] & kind) != 0;
239}
240
244bool
245pm_char_is_binary_digit(const uint8_t b) {
246 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
247}
248
252bool
253pm_char_is_octal_digit(const uint8_t b) {
254 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
255}
256
260bool
261pm_char_is_decimal_digit(const uint8_t b) {
262 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
263}
264
268bool
269pm_char_is_hexadecimal_digit(const uint8_t b) {
270 return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
271}
272
273#undef PRISM_CHAR_BIT_WHITESPACE
274#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
275#undef PRISM_CHAR_BIT_REGEXP_OPTION
276
277#undef PRISM_NUMBER_BIT_BINARY_DIGIT
278#undef PRISM_NUMBER_BIT_BINARY_NUMBER
279#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
280#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
281#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
282#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
283#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
284#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
Functions for working with characters and strings.
#define PRISM_CHAR_BIT_WHITESPACE
Bit flag for whitespace characters in pm_byte_table.
Definition pm_char.h:16
static PRISM_FORCE_INLINE void pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor)
Append a new offset to the list.
A bump allocator.
Definition pm_arena.h:39
A list of offsets of the start of lines in a string.