Ruby  3.4.0dev (2024-12-06 revision 892c46283a5ea4179500d951c9d4866c0051f27b)
pack.c
1 #include "prism/pack.h"
2 
3 // We optionally support parsing String#pack templates. For systems that don't
4 // want or need this functionality, it can be turned off with the
5 // PRISM_EXCLUDE_PACK define.
6 #ifdef PRISM_EXCLUDE_PACK
7 
8 void pm_pack_parse(void) {}
9 
10 #else
11 
12 #include <stdbool.h>
13 #include <errno.h>
14 
15 static uintmax_t
16 strtoumaxc(const char **format) {
17  uintmax_t value = 0;
18  while (**format >= '0' && **format <= '9') {
19  if (value > UINTMAX_MAX / 10) {
20  errno = ERANGE;
21  }
22  value = value * 10 + ((uintmax_t) (**format - '0'));
23  (*format)++;
24  }
25  return value;
26 }
27 
30  pm_pack_variant variant,
31  const char **format,
32  const char *format_end,
34  pm_pack_signed *signed_type,
35  pm_pack_endian *endian,
36  pm_pack_size *size,
37  pm_pack_length_type *length_type,
38  uint64_t *length,
39  pm_pack_encoding *encoding
40 ) {
41  if (*encoding == PM_PACK_ENCODING_START) {
42  *encoding = PM_PACK_ENCODING_US_ASCII;
43  }
44 
45  if (*format == format_end) {
46  *type = PM_PACK_END;
47  *signed_type = PM_PACK_SIGNED_NA;
48  *endian = PM_PACK_ENDIAN_NA;
49  *size = PM_PACK_SIZE_NA;
50  *length_type = PM_PACK_LENGTH_NA;
51  return PM_PACK_OK;
52  }
53 
54  *length_type = PM_PACK_LENGTH_FIXED;
55  *length = 1;
56  bool length_changed_allowed = true;
57 
58  char directive = **format;
59  (*format)++;
60  switch (directive) {
61  case ' ':
62  case '\t':
63  case '\n':
64  case '\v':
65  case '\f':
66  case '\r':
67  *type = PM_PACK_SPACE;
68  *signed_type = PM_PACK_SIGNED_NA;
69  *endian = PM_PACK_ENDIAN_NA;
70  *size = PM_PACK_SIZE_NA;
71  *length_type = PM_PACK_LENGTH_NA;
72  *length = 0;
73  return PM_PACK_OK;
74  case '#':
75  while ((*format < format_end) && (**format != '\n')) {
76  (*format)++;
77  }
78  *type = PM_PACK_COMMENT;
79  *signed_type = PM_PACK_SIGNED_NA;
80  *endian = PM_PACK_ENDIAN_NA;
81  *size = PM_PACK_SIZE_NA;
82  *length_type = PM_PACK_LENGTH_NA;
83  *length = 0;
84  return PM_PACK_OK;
85  case 'C':
86  *type = PM_PACK_INTEGER;
87  *signed_type = PM_PACK_UNSIGNED;
88  *endian = PM_PACK_AGNOSTIC_ENDIAN;
89  *size = PM_PACK_SIZE_8;
90  break;
91  case 'S':
92  *type = PM_PACK_INTEGER;
93  *signed_type = PM_PACK_UNSIGNED;
94  *endian = PM_PACK_NATIVE_ENDIAN;
95  *size = PM_PACK_SIZE_16;
96  break;
97  case 'L':
98  *type = PM_PACK_INTEGER;
99  *signed_type = PM_PACK_UNSIGNED;
100  *endian = PM_PACK_NATIVE_ENDIAN;
101  *size = PM_PACK_SIZE_32;
102  break;
103  case 'Q':
104  *type = PM_PACK_INTEGER;
105  *signed_type = PM_PACK_UNSIGNED;
106  *endian = PM_PACK_NATIVE_ENDIAN;
107  *size = PM_PACK_SIZE_64;
108  break;
109  case 'J':
110  *type = PM_PACK_INTEGER;
111  *signed_type = PM_PACK_UNSIGNED;
112  *endian = PM_PACK_NATIVE_ENDIAN;
113  *size = PM_PACK_SIZE_P;
114  break;
115  case 'c':
116  *type = PM_PACK_INTEGER;
117  *signed_type = PM_PACK_SIGNED;
118  *endian = PM_PACK_AGNOSTIC_ENDIAN;
119  *size = PM_PACK_SIZE_8;
120  break;
121  case 's':
122  *type = PM_PACK_INTEGER;
123  *signed_type = PM_PACK_SIGNED;
124  *endian = PM_PACK_NATIVE_ENDIAN;
125  *size = PM_PACK_SIZE_16;
126  break;
127  case 'l':
128  *type = PM_PACK_INTEGER;
129  *signed_type = PM_PACK_SIGNED;
130  *endian = PM_PACK_NATIVE_ENDIAN;
131  *size = PM_PACK_SIZE_32;
132  break;
133  case 'q':
134  *type = PM_PACK_INTEGER;
135  *signed_type = PM_PACK_SIGNED;
136  *endian = PM_PACK_NATIVE_ENDIAN;
137  *size = PM_PACK_SIZE_64;
138  break;
139  case 'j':
140  *type = PM_PACK_INTEGER;
141  *signed_type = PM_PACK_SIGNED;
142  *endian = PM_PACK_NATIVE_ENDIAN;
143  *size = PM_PACK_SIZE_P;
144  break;
145  case 'I':
146  *type = PM_PACK_INTEGER;
147  *signed_type = PM_PACK_UNSIGNED;
148  *endian = PM_PACK_NATIVE_ENDIAN;
149  *size = PM_PACK_SIZE_INT;
150  break;
151  case 'i':
152  *type = PM_PACK_INTEGER;
153  *signed_type = PM_PACK_SIGNED;
154  *endian = PM_PACK_NATIVE_ENDIAN;
155  *size = PM_PACK_SIZE_INT;
156  break;
157  case 'n':
158  *type = PM_PACK_INTEGER;
159  *signed_type = PM_PACK_UNSIGNED;
160  *endian = PM_PACK_BIG_ENDIAN;
161  *size = PM_PACK_SIZE_16;
162  length_changed_allowed = false;
163  break;
164  case 'N':
165  *type = PM_PACK_INTEGER;
166  *signed_type = PM_PACK_UNSIGNED;
167  *endian = PM_PACK_BIG_ENDIAN;
168  *size = PM_PACK_SIZE_32;
169  length_changed_allowed = false;
170  break;
171  case 'v':
172  *type = PM_PACK_INTEGER;
173  *signed_type = PM_PACK_UNSIGNED;
174  *endian = PM_PACK_LITTLE_ENDIAN;
175  *size = PM_PACK_SIZE_16;
176  length_changed_allowed = false;
177  break;
178  case 'V':
179  *type = PM_PACK_INTEGER;
180  *signed_type = PM_PACK_UNSIGNED;
181  *endian = PM_PACK_LITTLE_ENDIAN;
182  *size = PM_PACK_SIZE_32;
183  length_changed_allowed = false;
184  break;
185  case 'U':
186  *type = PM_PACK_UTF8;
187  *signed_type = PM_PACK_SIGNED_NA;
188  *endian = PM_PACK_ENDIAN_NA;
189  *size = PM_PACK_SIZE_NA;
190  break;
191  case 'w':
192  *type = PM_PACK_BER;
193  *signed_type = PM_PACK_SIGNED_NA;
194  *endian = PM_PACK_ENDIAN_NA;
195  *size = PM_PACK_SIZE_NA;
196  break;
197  case 'D':
198  case 'd':
199  *type = PM_PACK_FLOAT;
200  *signed_type = PM_PACK_SIGNED_NA;
201  *endian = PM_PACK_NATIVE_ENDIAN;
202  *size = PM_PACK_SIZE_64;
203  break;
204  case 'F':
205  case 'f':
206  *type = PM_PACK_FLOAT;
207  *signed_type = PM_PACK_SIGNED_NA;
208  *endian = PM_PACK_NATIVE_ENDIAN;
209  *size = PM_PACK_SIZE_32;
210  break;
211  case 'E':
212  *type = PM_PACK_FLOAT;
213  *signed_type = PM_PACK_SIGNED_NA;
214  *endian = PM_PACK_LITTLE_ENDIAN;
215  *size = PM_PACK_SIZE_64;
216  break;
217  case 'e':
218  *type = PM_PACK_FLOAT;
219  *signed_type = PM_PACK_SIGNED_NA;
220  *endian = PM_PACK_LITTLE_ENDIAN;
221  *size = PM_PACK_SIZE_32;
222  break;
223  case 'G':
224  *type = PM_PACK_FLOAT;
225  *signed_type = PM_PACK_SIGNED_NA;
226  *endian = PM_PACK_BIG_ENDIAN;
227  *size = PM_PACK_SIZE_64;
228  break;
229  case 'g':
230  *type = PM_PACK_FLOAT;
231  *signed_type = PM_PACK_SIGNED_NA;
232  *endian = PM_PACK_BIG_ENDIAN;
233  *size = PM_PACK_SIZE_32;
234  break;
235  case 'A':
236  *type = PM_PACK_STRING_SPACE_PADDED;
237  *signed_type = PM_PACK_SIGNED_NA;
238  *endian = PM_PACK_ENDIAN_NA;
239  *size = PM_PACK_SIZE_NA;
240  break;
241  case 'a':
242  *type = PM_PACK_STRING_NULL_PADDED;
243  *signed_type = PM_PACK_SIGNED_NA;
244  *endian = PM_PACK_ENDIAN_NA;
245  *size = PM_PACK_SIZE_NA;
246  break;
247  case 'Z':
248  *type = PM_PACK_STRING_NULL_TERMINATED;
249  *signed_type = PM_PACK_SIGNED_NA;
250  *endian = PM_PACK_ENDIAN_NA;
251  *size = PM_PACK_SIZE_NA;
252  break;
253  case 'B':
254  *type = PM_PACK_STRING_MSB;
255  *signed_type = PM_PACK_SIGNED_NA;
256  *endian = PM_PACK_ENDIAN_NA;
257  *size = PM_PACK_SIZE_NA;
258  break;
259  case 'b':
260  *type = PM_PACK_STRING_LSB;
261  *signed_type = PM_PACK_SIGNED_NA;
262  *endian = PM_PACK_ENDIAN_NA;
263  *size = PM_PACK_SIZE_NA;
264  break;
265  case 'H':
266  *type = PM_PACK_STRING_HEX_HIGH;
267  *signed_type = PM_PACK_SIGNED_NA;
268  *endian = PM_PACK_ENDIAN_NA;
269  *size = PM_PACK_SIZE_NA;
270  break;
271  case 'h':
272  *type = PM_PACK_STRING_HEX_LOW;
273  *signed_type = PM_PACK_SIGNED_NA;
274  *endian = PM_PACK_ENDIAN_NA;
275  *size = PM_PACK_SIZE_NA;
276  break;
277  case 'u':
278  *type = PM_PACK_STRING_UU;
279  *signed_type = PM_PACK_SIGNED_NA;
280  *endian = PM_PACK_ENDIAN_NA;
281  *size = PM_PACK_SIZE_NA;
282  break;
283  case 'M':
284  *type = PM_PACK_STRING_MIME;
285  *signed_type = PM_PACK_SIGNED_NA;
286  *endian = PM_PACK_ENDIAN_NA;
287  *size = PM_PACK_SIZE_NA;
288  break;
289  case 'm':
290  *type = PM_PACK_STRING_BASE64;
291  *signed_type = PM_PACK_SIGNED_NA;
292  *endian = PM_PACK_ENDIAN_NA;
293  *size = PM_PACK_SIZE_NA;
294  break;
295  case 'P':
296  *type = PM_PACK_STRING_FIXED;
297  *signed_type = PM_PACK_SIGNED_NA;
298  *endian = PM_PACK_ENDIAN_NA;
299  *size = PM_PACK_SIZE_NA;
300  break;
301  case 'p':
302  *type = PM_PACK_STRING_POINTER;
303  *signed_type = PM_PACK_SIGNED_NA;
304  *endian = PM_PACK_ENDIAN_NA;
305  *size = PM_PACK_SIZE_NA;
306  break;
307  case '@':
308  *type = PM_PACK_MOVE;
309  *signed_type = PM_PACK_SIGNED_NA;
310  *endian = PM_PACK_ENDIAN_NA;
311  *size = PM_PACK_SIZE_NA;
312  break;
313  case 'X':
314  *type = PM_PACK_BACK;
315  *signed_type = PM_PACK_SIGNED_NA;
316  *endian = PM_PACK_ENDIAN_NA;
317  *size = PM_PACK_SIZE_NA;
318  break;
319  case 'x':
320  *type = PM_PACK_NULL;
321  *signed_type = PM_PACK_SIGNED_NA;
322  *endian = PM_PACK_ENDIAN_NA;
323  *size = PM_PACK_SIZE_NA;
324  break;
325  case '%':
326  return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
327  default:
328  return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
329  }
330 
331  bool explicit_endian = false;
332 
333  while (*format < format_end) {
334  switch (**format) {
335  case '_':
336  case '!':
337  (*format)++;
338  if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
339  return PM_PACK_ERROR_BANG_NOT_ALLOWED;
340  }
341  switch (*size) {
342  case PM_PACK_SIZE_SHORT:
343  case PM_PACK_SIZE_INT:
344  case PM_PACK_SIZE_LONG:
345  case PM_PACK_SIZE_LONG_LONG:
346  break;
347  case PM_PACK_SIZE_16:
348  *size = PM_PACK_SIZE_SHORT;
349  break;
350  case PM_PACK_SIZE_32:
351  *size = PM_PACK_SIZE_LONG;
352  break;
353  case PM_PACK_SIZE_64:
354  *size = PM_PACK_SIZE_LONG_LONG;
355  break;
356  case PM_PACK_SIZE_P:
357  break;
358  default:
359  return PM_PACK_ERROR_BANG_NOT_ALLOWED;
360  }
361  break;
362  case '<':
363  (*format)++;
364  if (explicit_endian) {
365  return PM_PACK_ERROR_DOUBLE_ENDIAN;
366  }
367  *endian = PM_PACK_LITTLE_ENDIAN;
368  explicit_endian = true;
369  break;
370  case '>':
371  (*format)++;
372  if (explicit_endian) {
373  return PM_PACK_ERROR_DOUBLE_ENDIAN;
374  }
375  *endian = PM_PACK_BIG_ENDIAN;
376  explicit_endian = true;
377  break;
378  default:
379  goto exit_modifier_loop;
380  }
381  }
382 
383 exit_modifier_loop:
384 
385  if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
386  *length = 0;
387  }
388 
389  if (*format < format_end) {
390  if (**format == '*') {
391  switch (*type) {
392  case PM_PACK_NULL:
393  case PM_PACK_BACK:
394  switch (variant) {
395  case PM_PACK_VARIANT_PACK:
396  *length_type = PM_PACK_LENGTH_FIXED;
397  break;
398  case PM_PACK_VARIANT_UNPACK:
399  *length_type = PM_PACK_LENGTH_MAX;
400  break;
401  }
402  *length = 0;
403  break;
404 
405  case PM_PACK_MOVE:
406  switch (variant) {
407  case PM_PACK_VARIANT_PACK:
408  *length_type = PM_PACK_LENGTH_FIXED;
409  break;
410  case PM_PACK_VARIANT_UNPACK:
411  *length_type = PM_PACK_LENGTH_RELATIVE;
412  break;
413  }
414  *length = 0;
415  break;
416 
417  case PM_PACK_STRING_UU:
418  *length_type = PM_PACK_LENGTH_FIXED;
419  *length = 0;
420  break;
421 
422  case PM_PACK_STRING_FIXED:
423  switch (variant) {
424  case PM_PACK_VARIANT_PACK:
425  *length_type = PM_PACK_LENGTH_FIXED;
426  *length = 1;
427  break;
428  case PM_PACK_VARIANT_UNPACK:
429  *length_type = PM_PACK_LENGTH_MAX;
430  *length = 0;
431  break;
432  }
433  break;
434 
435  case PM_PACK_STRING_MIME:
436  case PM_PACK_STRING_BASE64:
437  *length_type = PM_PACK_LENGTH_FIXED;
438  *length = 1;
439  break;
440 
441  default:
442  *length_type = PM_PACK_LENGTH_MAX;
443  *length = 0;
444  break;
445  }
446 
447  (*format)++;
448  } else if (**format >= '0' && **format <= '9') {
449  errno = 0;
450  *length_type = PM_PACK_LENGTH_FIXED;
451  #if UINTMAX_MAX < UINT64_MAX
452  #error "prism's design assumes uintmax_t is at least as large as uint64_t"
453  #endif
454  uintmax_t length_max = strtoumaxc(format);
455  if (errno || length_max > UINT64_MAX) {
456  return PM_PACK_ERROR_LENGTH_TOO_BIG;
457  }
458  *length = (uint64_t) length_max;
459  }
460  }
461 
462  switch (*type) {
463  case PM_PACK_UTF8:
464  /* if encoding is US-ASCII, upgrade to UTF-8 */
465  if (*encoding == PM_PACK_ENCODING_US_ASCII) {
466  *encoding = PM_PACK_ENCODING_UTF_8;
467  }
468  break;
469  case PM_PACK_STRING_MIME:
470  case PM_PACK_STRING_BASE64:
471  case PM_PACK_STRING_UU:
472  /* keep US-ASCII (do nothing) */
473  break;
474  default:
475  /* fall back to BINARY */
476  *encoding = PM_PACK_ENCODING_ASCII_8BIT;
477  break;
478  }
479 
480  return PM_PACK_OK;
481 }
482 
485  switch (size) {
486  case PM_PACK_SIZE_SHORT:
487  return sizeof(short);
488  case PM_PACK_SIZE_INT:
489  return sizeof(int);
490  case PM_PACK_SIZE_LONG:
491  return sizeof(long);
492  case PM_PACK_SIZE_LONG_LONG:
493  return sizeof(long long);
494  case PM_PACK_SIZE_8:
495  return 1;
496  case PM_PACK_SIZE_16:
497  return 2;
498  case PM_PACK_SIZE_32:
499  return 4;
500  case PM_PACK_SIZE_64:
501  return 8;
502  case PM_PACK_SIZE_P:
503  return sizeof(void *);
504  default:
505  return 0;
506  }
507 }
508 
509 #endif
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
A pack template string parser.
pm_pack_encoding
The type of encoding for a pack template string.
Definition: pack.h:99
pm_pack_result
The result of parsing a pack template.
Definition: pack.h:107
pm_pack_variant
The type of pack template we are parsing.
Definition: pack.h:29
pm_pack_endian
The endianness of a pack directive.
Definition: pack.h:68
pm_pack_signed
The signness of a pack directive.
Definition: pack.h:61
pm_pack_size
The size of an integer pack directive.
Definition: pack.h:77
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size)
Prism abstracts sizes away from the native system - this converts an abstract size to a native size.
Definition: pack.c:484
pm_pack_length_type
The type of length of a pack directive.
Definition: pack.h:91
PRISM_EXPORTED_FUNCTION pm_pack_result pm_pack_parse(pm_pack_variant variant, const char **format, const char *format_end, pm_pack_type *type, pm_pack_signed *signed_type, pm_pack_endian *endian, pm_pack_size *size, pm_pack_length_type *length_type, uint64_t *length, pm_pack_encoding *encoding)
Parse a single directive from a pack or unpack format string.
Definition: pack.c:29
pm_pack_type
A directive within the pack template.
Definition: pack.h:35
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition: defines.h:50
#define errno
Ractor-aware version of errno.
Definition: ruby.h:388
C99 shim for <stdbool.h>