Ruby 3.5.0dev (2025-02-22 revision b17f984e4e903d3ece3013c1488279d1947dfc39)
pack.c
1#include "prism/pack.h"
2
3// We optionally support parsing String#pack templates. For systems that don't
4// want or need this functionality, it can be turned off with the
5// PRISM_EXCLUDE_PACK define.
6#ifdef PRISM_EXCLUDE_PACK
7
8void pm_pack_parse(void) {}
9
10#else
11
12#include <stdbool.h>
13#include <errno.h>
14
15static uintmax_t
16strtoumaxc(const char **format) {
17 uintmax_t value = 0;
18 while (**format >= '0' && **format <= '9') {
19 if (value > UINTMAX_MAX / 10) {
20 errno = ERANGE;
21 }
22 value = value * 10 + ((uintmax_t) (**format - '0'));
23 (*format)++;
24 }
25 return value;
26}
27
29pm_pack_parse(
30 pm_pack_variant variant,
31 const char **format,
32 const char *format_end,
34 pm_pack_signed *signed_type,
35 pm_pack_endian *endian,
36 pm_pack_size *size,
37 pm_pack_length_type *length_type,
38 uint64_t *length,
39 pm_pack_encoding *encoding
40) {
41 if (*encoding == PM_PACK_ENCODING_START) {
42 *encoding = PM_PACK_ENCODING_US_ASCII;
43 }
44
45 if (*format == format_end) {
46 *type = PM_PACK_END;
47 *signed_type = PM_PACK_SIGNED_NA;
48 *endian = PM_PACK_ENDIAN_NA;
49 *size = PM_PACK_SIZE_NA;
50 *length_type = PM_PACK_LENGTH_NA;
51 return PM_PACK_OK;
52 }
53
54 *length_type = PM_PACK_LENGTH_FIXED;
55 *length = 1;
56 bool length_changed_allowed = true;
57
58 char directive = **format;
59 (*format)++;
60 switch (directive) {
61 case ' ':
62 case '\t':
63 case '\n':
64 case '\v':
65 case '\f':
66 case '\r':
67 *type = PM_PACK_SPACE;
68 *signed_type = PM_PACK_SIGNED_NA;
69 *endian = PM_PACK_ENDIAN_NA;
70 *size = PM_PACK_SIZE_NA;
71 *length_type = PM_PACK_LENGTH_NA;
72 *length = 0;
73 return PM_PACK_OK;
74 case '#':
75 while ((*format < format_end) && (**format != '\n')) {
76 (*format)++;
77 }
78 *type = PM_PACK_COMMENT;
79 *signed_type = PM_PACK_SIGNED_NA;
80 *endian = PM_PACK_ENDIAN_NA;
81 *size = PM_PACK_SIZE_NA;
82 *length_type = PM_PACK_LENGTH_NA;
83 *length = 0;
84 return PM_PACK_OK;
85 case 'C':
86 *type = PM_PACK_INTEGER;
87 *signed_type = PM_PACK_UNSIGNED;
88 *endian = PM_PACK_AGNOSTIC_ENDIAN;
89 *size = PM_PACK_SIZE_8;
90 break;
91 case 'S':
92 *type = PM_PACK_INTEGER;
93 *signed_type = PM_PACK_UNSIGNED;
94 *endian = PM_PACK_NATIVE_ENDIAN;
95 *size = PM_PACK_SIZE_16;
96 break;
97 case 'L':
98 *type = PM_PACK_INTEGER;
99 *signed_type = PM_PACK_UNSIGNED;
100 *endian = PM_PACK_NATIVE_ENDIAN;
101 *size = PM_PACK_SIZE_32;
102 break;
103 case 'Q':
104 *type = PM_PACK_INTEGER;
105 *signed_type = PM_PACK_UNSIGNED;
106 *endian = PM_PACK_NATIVE_ENDIAN;
107 *size = PM_PACK_SIZE_64;
108 break;
109 case 'J':
110 *type = PM_PACK_INTEGER;
111 *signed_type = PM_PACK_UNSIGNED;
112 *endian = PM_PACK_NATIVE_ENDIAN;
113 *size = PM_PACK_SIZE_P;
114 break;
115 case 'c':
116 *type = PM_PACK_INTEGER;
117 *signed_type = PM_PACK_SIGNED;
118 *endian = PM_PACK_AGNOSTIC_ENDIAN;
119 *size = PM_PACK_SIZE_8;
120 break;
121 case 's':
122 *type = PM_PACK_INTEGER;
123 *signed_type = PM_PACK_SIGNED;
124 *endian = PM_PACK_NATIVE_ENDIAN;
125 *size = PM_PACK_SIZE_16;
126 break;
127 case 'l':
128 *type = PM_PACK_INTEGER;
129 *signed_type = PM_PACK_SIGNED;
130 *endian = PM_PACK_NATIVE_ENDIAN;
131 *size = PM_PACK_SIZE_32;
132 break;
133 case 'q':
134 *type = PM_PACK_INTEGER;
135 *signed_type = PM_PACK_SIGNED;
136 *endian = PM_PACK_NATIVE_ENDIAN;
137 *size = PM_PACK_SIZE_64;
138 break;
139 case 'j':
140 *type = PM_PACK_INTEGER;
141 *signed_type = PM_PACK_SIGNED;
142 *endian = PM_PACK_NATIVE_ENDIAN;
143 *size = PM_PACK_SIZE_P;
144 break;
145 case 'I':
146 *type = PM_PACK_INTEGER;
147 *signed_type = PM_PACK_UNSIGNED;
148 *endian = PM_PACK_NATIVE_ENDIAN;
149 *size = PM_PACK_SIZE_INT;
150 break;
151 case 'i':
152 *type = PM_PACK_INTEGER;
153 *signed_type = PM_PACK_SIGNED;
154 *endian = PM_PACK_NATIVE_ENDIAN;
155 *size = PM_PACK_SIZE_INT;
156 break;
157 case 'n':
158 *type = PM_PACK_INTEGER;
159 *signed_type = PM_PACK_UNSIGNED;
160 *endian = PM_PACK_BIG_ENDIAN;
161 *size = PM_PACK_SIZE_16;
162 length_changed_allowed = false;
163 break;
164 case 'N':
165 *type = PM_PACK_INTEGER;
166 *signed_type = PM_PACK_UNSIGNED;
167 *endian = PM_PACK_BIG_ENDIAN;
168 *size = PM_PACK_SIZE_32;
169 length_changed_allowed = false;
170 break;
171 case 'v':
172 *type = PM_PACK_INTEGER;
173 *signed_type = PM_PACK_UNSIGNED;
174 *endian = PM_PACK_LITTLE_ENDIAN;
175 *size = PM_PACK_SIZE_16;
176 length_changed_allowed = false;
177 break;
178 case 'V':
179 *type = PM_PACK_INTEGER;
180 *signed_type = PM_PACK_UNSIGNED;
181 *endian = PM_PACK_LITTLE_ENDIAN;
182 *size = PM_PACK_SIZE_32;
183 length_changed_allowed = false;
184 break;
185 case 'U':
186 *type = PM_PACK_UTF8;
187 *signed_type = PM_PACK_SIGNED_NA;
188 *endian = PM_PACK_ENDIAN_NA;
189 *size = PM_PACK_SIZE_NA;
190 break;
191 case 'w':
192 *type = PM_PACK_BER;
193 *signed_type = PM_PACK_SIGNED_NA;
194 *endian = PM_PACK_ENDIAN_NA;
195 *size = PM_PACK_SIZE_NA;
196 break;
197 case 'D':
198 case 'd':
199 *type = PM_PACK_FLOAT;
200 *signed_type = PM_PACK_SIGNED_NA;
201 *endian = PM_PACK_NATIVE_ENDIAN;
202 *size = PM_PACK_SIZE_64;
203 break;
204 case 'F':
205 case 'f':
206 *type = PM_PACK_FLOAT;
207 *signed_type = PM_PACK_SIGNED_NA;
208 *endian = PM_PACK_NATIVE_ENDIAN;
209 *size = PM_PACK_SIZE_32;
210 break;
211 case 'E':
212 *type = PM_PACK_FLOAT;
213 *signed_type = PM_PACK_SIGNED_NA;
214 *endian = PM_PACK_LITTLE_ENDIAN;
215 *size = PM_PACK_SIZE_64;
216 break;
217 case 'e':
218 *type = PM_PACK_FLOAT;
219 *signed_type = PM_PACK_SIGNED_NA;
220 *endian = PM_PACK_LITTLE_ENDIAN;
221 *size = PM_PACK_SIZE_32;
222 break;
223 case 'G':
224 *type = PM_PACK_FLOAT;
225 *signed_type = PM_PACK_SIGNED_NA;
226 *endian = PM_PACK_BIG_ENDIAN;
227 *size = PM_PACK_SIZE_64;
228 break;
229 case 'g':
230 *type = PM_PACK_FLOAT;
231 *signed_type = PM_PACK_SIGNED_NA;
232 *endian = PM_PACK_BIG_ENDIAN;
233 *size = PM_PACK_SIZE_32;
234 break;
235 case 'A':
236 *type = PM_PACK_STRING_SPACE_PADDED;
237 *signed_type = PM_PACK_SIGNED_NA;
238 *endian = PM_PACK_ENDIAN_NA;
239 *size = PM_PACK_SIZE_NA;
240 break;
241 case 'a':
242 *type = PM_PACK_STRING_NULL_PADDED;
243 *signed_type = PM_PACK_SIGNED_NA;
244 *endian = PM_PACK_ENDIAN_NA;
245 *size = PM_PACK_SIZE_NA;
246 break;
247 case 'Z':
248 *type = PM_PACK_STRING_NULL_TERMINATED;
249 *signed_type = PM_PACK_SIGNED_NA;
250 *endian = PM_PACK_ENDIAN_NA;
251 *size = PM_PACK_SIZE_NA;
252 break;
253 case 'B':
254 *type = PM_PACK_STRING_MSB;
255 *signed_type = PM_PACK_SIGNED_NA;
256 *endian = PM_PACK_ENDIAN_NA;
257 *size = PM_PACK_SIZE_NA;
258 break;
259 case 'b':
260 *type = PM_PACK_STRING_LSB;
261 *signed_type = PM_PACK_SIGNED_NA;
262 *endian = PM_PACK_ENDIAN_NA;
263 *size = PM_PACK_SIZE_NA;
264 break;
265 case 'H':
266 *type = PM_PACK_STRING_HEX_HIGH;
267 *signed_type = PM_PACK_SIGNED_NA;
268 *endian = PM_PACK_ENDIAN_NA;
269 *size = PM_PACK_SIZE_NA;
270 break;
271 case 'h':
272 *type = PM_PACK_STRING_HEX_LOW;
273 *signed_type = PM_PACK_SIGNED_NA;
274 *endian = PM_PACK_ENDIAN_NA;
275 *size = PM_PACK_SIZE_NA;
276 break;
277 case 'u':
278 *type = PM_PACK_STRING_UU;
279 *signed_type = PM_PACK_SIGNED_NA;
280 *endian = PM_PACK_ENDIAN_NA;
281 *size = PM_PACK_SIZE_NA;
282 break;
283 case 'M':
284 *type = PM_PACK_STRING_MIME;
285 *signed_type = PM_PACK_SIGNED_NA;
286 *endian = PM_PACK_ENDIAN_NA;
287 *size = PM_PACK_SIZE_NA;
288 break;
289 case 'm':
290 *type = PM_PACK_STRING_BASE64;
291 *signed_type = PM_PACK_SIGNED_NA;
292 *endian = PM_PACK_ENDIAN_NA;
293 *size = PM_PACK_SIZE_NA;
294 break;
295 case 'P':
296 *type = PM_PACK_STRING_FIXED;
297 *signed_type = PM_PACK_SIGNED_NA;
298 *endian = PM_PACK_ENDIAN_NA;
299 *size = PM_PACK_SIZE_NA;
300 break;
301 case 'p':
302 *type = PM_PACK_STRING_POINTER;
303 *signed_type = PM_PACK_SIGNED_NA;
304 *endian = PM_PACK_ENDIAN_NA;
305 *size = PM_PACK_SIZE_NA;
306 break;
307 case '@':
308 *type = PM_PACK_MOVE;
309 *signed_type = PM_PACK_SIGNED_NA;
310 *endian = PM_PACK_ENDIAN_NA;
311 *size = PM_PACK_SIZE_NA;
312 break;
313 case 'X':
314 *type = PM_PACK_BACK;
315 *signed_type = PM_PACK_SIGNED_NA;
316 *endian = PM_PACK_ENDIAN_NA;
317 *size = PM_PACK_SIZE_NA;
318 break;
319 case 'x':
320 *type = PM_PACK_NULL;
321 *signed_type = PM_PACK_SIGNED_NA;
322 *endian = PM_PACK_ENDIAN_NA;
323 *size = PM_PACK_SIZE_NA;
324 break;
325 case '%':
326 return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
327 default:
328 return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
329 }
330
331 bool explicit_endian = false;
332
333 while (*format < format_end) {
334 switch (**format) {
335 case '_':
336 case '!':
337 (*format)++;
338 if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
339 return PM_PACK_ERROR_BANG_NOT_ALLOWED;
340 }
341 switch (*size) {
342 case PM_PACK_SIZE_SHORT:
343 case PM_PACK_SIZE_INT:
344 case PM_PACK_SIZE_LONG:
345 case PM_PACK_SIZE_LONG_LONG:
346 break;
347 case PM_PACK_SIZE_16:
348 *size = PM_PACK_SIZE_SHORT;
349 break;
350 case PM_PACK_SIZE_32:
351 *size = PM_PACK_SIZE_LONG;
352 break;
353 case PM_PACK_SIZE_64:
354 *size = PM_PACK_SIZE_LONG_LONG;
355 break;
356 case PM_PACK_SIZE_P:
357 break;
358 default:
359 return PM_PACK_ERROR_BANG_NOT_ALLOWED;
360 }
361 break;
362 case '<':
363 (*format)++;
364 if (explicit_endian) {
365 return PM_PACK_ERROR_DOUBLE_ENDIAN;
366 }
367 *endian = PM_PACK_LITTLE_ENDIAN;
368 explicit_endian = true;
369 break;
370 case '>':
371 (*format)++;
372 if (explicit_endian) {
373 return PM_PACK_ERROR_DOUBLE_ENDIAN;
374 }
375 *endian = PM_PACK_BIG_ENDIAN;
376 explicit_endian = true;
377 break;
378 default:
379 goto exit_modifier_loop;
380 }
381 }
382
383exit_modifier_loop:
384
385 if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
386 *length = 0;
387 }
388
389 if (*format < format_end) {
390 if (**format == '*') {
391 switch (*type) {
392 case PM_PACK_NULL:
393 case PM_PACK_BACK:
394 switch (variant) {
395 case PM_PACK_VARIANT_PACK:
396 *length_type = PM_PACK_LENGTH_FIXED;
397 break;
398 case PM_PACK_VARIANT_UNPACK:
399 *length_type = PM_PACK_LENGTH_MAX;
400 break;
401 }
402 *length = 0;
403 break;
404
405 case PM_PACK_MOVE:
406 switch (variant) {
407 case PM_PACK_VARIANT_PACK:
408 *length_type = PM_PACK_LENGTH_FIXED;
409 break;
410 case PM_PACK_VARIANT_UNPACK:
411 *length_type = PM_PACK_LENGTH_RELATIVE;
412 break;
413 }
414 *length = 0;
415 break;
416
417 case PM_PACK_STRING_UU:
418 *length_type = PM_PACK_LENGTH_FIXED;
419 *length = 0;
420 break;
421
422 case PM_PACK_STRING_FIXED:
423 switch (variant) {
424 case PM_PACK_VARIANT_PACK:
425 *length_type = PM_PACK_LENGTH_FIXED;
426 *length = 1;
427 break;
428 case PM_PACK_VARIANT_UNPACK:
429 *length_type = PM_PACK_LENGTH_MAX;
430 *length = 0;
431 break;
432 }
433 break;
434
435 case PM_PACK_STRING_MIME:
436 case PM_PACK_STRING_BASE64:
437 *length_type = PM_PACK_LENGTH_FIXED;
438 *length = 1;
439 break;
440
441 default:
442 *length_type = PM_PACK_LENGTH_MAX;
443 *length = 0;
444 break;
445 }
446
447 (*format)++;
448 } else if (**format >= '0' && **format <= '9') {
449 errno = 0;
450 *length_type = PM_PACK_LENGTH_FIXED;
451 #if UINTMAX_MAX < UINT64_MAX
452 #error "prism's design assumes uintmax_t is at least as large as uint64_t"
453 #endif
454 uintmax_t length_max = strtoumaxc(format);
455 if (errno || length_max > UINT64_MAX) {
456 return PM_PACK_ERROR_LENGTH_TOO_BIG;
457 }
458 *length = (uint64_t) length_max;
459 }
460 }
461
462 switch (*type) {
463 case PM_PACK_UTF8:
464 /* if encoding is US-ASCII, upgrade to UTF-8 */
465 if (*encoding == PM_PACK_ENCODING_US_ASCII) {
466 *encoding = PM_PACK_ENCODING_UTF_8;
467 }
468 break;
469 case PM_PACK_STRING_MIME:
470 case PM_PACK_STRING_BASE64:
471 case PM_PACK_STRING_UU:
472 /* keep US-ASCII (do nothing) */
473 break;
474 default:
475 /* fall back to BINARY */
476 *encoding = PM_PACK_ENCODING_ASCII_8BIT;
477 break;
478 }
479
480 return PM_PACK_OK;
481}
482
484pm_size_to_native(pm_pack_size size) {
485 switch (size) {
486 case PM_PACK_SIZE_SHORT:
487 return sizeof(short);
488 case PM_PACK_SIZE_INT:
489 return sizeof(int);
490 case PM_PACK_SIZE_LONG:
491 return sizeof(long);
492 case PM_PACK_SIZE_LONG_LONG:
493 return sizeof(long long);
494 case PM_PACK_SIZE_8:
495 return 1;
496 case PM_PACK_SIZE_16:
497 return 2;
498 case PM_PACK_SIZE_32:
499 return 4;
500 case PM_PACK_SIZE_64:
501 return 8;
502 case PM_PACK_SIZE_P:
503 return sizeof(void *);
504 default:
505 return 0;
506 }
507}
508
509#endif
VALUE type(ANYARGS)
ANYARGS-ed function type.
A pack template string parser.
pm_pack_encoding
The type of encoding for a pack template string.
Definition pack.h:99
pm_pack_result
The result of parsing a pack template.
Definition pack.h:107
pm_pack_variant
The type of pack template we are parsing.
Definition pack.h:29
pm_pack_endian
The endianness of a pack directive.
Definition pack.h:68
pm_pack_signed
The signness of a pack directive.
Definition pack.h:61
pm_pack_size
The size of an integer pack directive.
Definition pack.h:77
pm_pack_length_type
The type of length of a pack directive.
Definition pack.h:91
pm_pack_type
A directive within the pack template.
Definition pack.h:35
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
C99 shim for <stdbool.h>