Ruby  3.4.0dev (2024-11-05 revision 348a53415339076afc4a02fcd09f3ae36e9c4c61)
pack.c (348a53415339076afc4a02fcd09f3ae36e9c4c61)
1 /**********************************************************************
2 
3  pack.c -
4 
5  $Author$
6  created at: Thu Feb 10 15:17:05 JST 1994
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/internal/config.h"
13 
14 #include <ctype.h>
15 #include <errno.h>
16 #include <float.h>
17 #include <sys/types.h>
18 
19 #include "internal.h"
20 #include "internal/array.h"
21 #include "internal/bits.h"
22 #include "internal/string.h"
23 #include "internal/symbol.h"
24 #include "internal/variable.h"
25 #include "ruby/util.h"
26 
27 #include "builtin.h"
28 
29 /*
30  * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31  * instead of HAVE_LONG_LONG or LONG_LONG.
32  * This means q! and Q! means always the standard long long type and
33  * causes ArgumentError for platforms which has no long long type,
34  * even if the platform has an implementation specific 64bit type.
35  * This behavior is consistent with the document of pack/unpack.
36  */
37 #ifdef HAVE_TRUE_LONG_LONG
38 static const char natstr[] = "sSiIlLqQjJ";
39 # define endstr natstr
40 #else
41 static const char natstr[] = "sSiIlLjJ";
42 static const char endstr[] = "sSiIlLqQjJ";
43 #endif
44 
45 #ifdef HAVE_TRUE_LONG_LONG
46 /* It is intentional to use long long instead of LONG_LONG. */
47 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
48 #else
49 # define NATINT_LEN_Q 8
50 #endif
51 
52 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
53 # define NATINT_PACK
54 #endif
55 
56 #ifdef DYNAMIC_ENDIAN
57 /* for universal binary of NEXTSTEP and MacOS X */
58 /* useless since autoconf 2.63? */
59 static int
60 is_bigendian(void)
61 {
62  static int init = 0;
63  static int endian_value;
64  char *p;
65 
66  if (init) return endian_value;
67  init = 1;
68  p = (char*)&init;
69  return endian_value = p[0]?0:1;
70 }
71 # define BIGENDIAN_P() (is_bigendian())
72 #elif defined(WORDS_BIGENDIAN)
73 # define BIGENDIAN_P() 1
74 #else
75 # define BIGENDIAN_P() 0
76 #endif
77 
78 #ifdef NATINT_PACK
79 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
80 #else
81 # define NATINT_LEN(type,len) ((int)sizeof(type))
82 #endif
83 
84 typedef union {
85  float f;
86  uint32_t u;
87  char buf[4];
89 typedef union {
90  double d;
91  uint64_t u;
92  char buf[8];
94 #define swapf(x) swap32(x)
95 #define swapd(x) swap64(x)
96 
97 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
98 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
99 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
100 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
101 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
102 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
103 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
104 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
105 
106 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
107 #define HTONF(x) ((x).u = rb_htonf((x).u))
108 #define HTOVF(x) ((x).u = rb_htovf((x).u))
109 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
110 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
111 
112 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
113 #define HTOND(x) ((x).u = rb_htond((x).u))
114 #define HTOVD(x) ((x).u = rb_htovd((x).u))
115 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
116 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
117 
118 #define MAX_INTEGER_PACK_SIZE 8
119 
120 static const char toofew[] = "too few arguments";
121 
122 static void encodes(VALUE,const char*,long,int,int);
123 static void qpencode(VALUE,VALUE,long);
124 
125 static unsigned long utf8_to_uv(const char*,long*);
126 
127 static ID id_associated;
128 
129 static void
130 str_associate(VALUE str, VALUE add)
131 {
132  /* assert(NIL_P(rb_attr_get(str, id_associated))); */
133  rb_ivar_set(str, id_associated, add);
134 }
135 
136 static VALUE
137 str_associated(VALUE str)
138 {
139  VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
140  if (!associates)
141  rb_raise(rb_eArgError, "no associated pointer");
142  return associates;
143 }
144 
145 static VALUE
146 associated_pointer(VALUE associates, const char *t)
147 {
148  const VALUE *p = RARRAY_CONST_PTR(associates);
149  const VALUE *pend = p + RARRAY_LEN(associates);
150  for (; p < pend; p++) {
151  VALUE tmp = *p;
152  if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
153  }
154  rb_raise(rb_eArgError, "non associated pointer");
156 }
157 
159 static void
160 unknown_directive(const char *mode, char type, VALUE fmt)
161 {
162  char unknown[5];
163 
164  if (ISPRINT(type)) {
165  unknown[0] = type;
166  unknown[1] = '\0';
167  }
168  else {
169  snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
170  }
171  fmt = rb_str_quote_unprintable(fmt);
172  rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'",
173  mode, unknown, fmt);
174 }
175 
176 static float
177 VALUE_to_float(VALUE obj)
178 {
179  VALUE v = rb_to_float(obj);
180  double d = RFLOAT_VALUE(v);
181 
182  if (isnan(d)) {
183  return NAN;
184  }
185  else if (d < -FLT_MAX) {
186  return -INFINITY;
187  }
188  else if (d <= FLT_MAX) {
189  return d;
190  }
191  else {
192  return INFINITY;
193  }
194 }
195 
196 static void
197 str_expand_fill(VALUE res, int c, long len)
198 {
199  long olen = RSTRING_LEN(res);
200  memset(RSTRING_PTR(res) + olen, c, len);
201  rb_str_set_len(res, olen + len);
202 }
203 
204 static char *
205 skip_to_eol(const char *p, const char *pend)
206 {
207  p = memchr(p, '\n', pend - p);
208  return (char *)(p ? p + 1 : pend);
209 }
210 
211 #define skip_blank(p, type) \
212  (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
213 
214 #ifndef NATINT_PACK
215 # define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
216 #endif
217 static char *
218 pack_modifiers(const char *p, char type, int *natint, int *explicit_endian)
219 {
220  while (1) {
221  switch (*p) {
222  case '_':
223  case '!':
224  if (strchr(natstr, type)) {
225 #ifdef NATINT_PACK
226  *natint = 1;
227 #endif
228  p++;
229  }
230  else {
231  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
232  }
233  break;
234 
235  case '<':
236  case '>':
237  if (!strchr(endstr, type)) {
238  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
239  }
240  if (*explicit_endian) {
241  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
242  }
243  *explicit_endian = *p++;
244  break;
245  default:
246  return (char *)p;
247  }
248  }
249 }
250 
251 static VALUE
252 pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
253 {
254  const char *p, *pend;
255  VALUE res, from, associates = 0;
256  long len, idx, plen;
257  const char *ptr;
258  int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
259  int integer_size, bigendian_p;
260 
261  StringValue(fmt);
262  rb_must_asciicompat(fmt);
263  p = RSTRING_PTR(fmt);
264  pend = p + RSTRING_LEN(fmt);
265 
266  if (NIL_P(buffer)) {
267  res = rb_str_buf_new(0);
268  }
269  else {
270  if (!RB_TYPE_P(buffer, T_STRING))
271  rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
272  rb_str_modify(buffer);
273  res = buffer;
274  }
275 
276  idx = 0;
277 
278 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
279 #define MORE_ITEM (idx < RARRAY_LEN(ary))
280 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
281 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
282 
283  while (p < pend) {
284  int explicit_endian = 0;
285  if (RSTRING_END(fmt) != pend) {
286  rb_raise(rb_eRuntimeError, "format string modified");
287  }
288  const char type = *p++; /* get data type */
289 #ifdef NATINT_PACK
290  int natint = 0; /* native integer */
291 #endif
292 
293  if (skip_blank(p, type)) continue;
294  p = pack_modifiers(p, type, &natint, &explicit_endian);
295 
296  if (*p == '*') { /* set data length */
297  len = strchr("@Xxu", type) ? 0
298  : strchr("PMm", type) ? 1
299  : RARRAY_LEN(ary) - idx;
300  p++;
301  }
302  else if (ISDIGIT(*p)) {
303  errno = 0;
304  len = STRTOUL(p, (char**)&p, 10);
305  if (errno) {
306  rb_raise(rb_eRangeError, "pack length too big");
307  }
308  }
309  else {
310  len = 1;
311  }
312 
313  switch (type) {
314  case 'U':
315  /* if encoding is US-ASCII, upgrade to UTF-8 */
316  if (enc_info == 1) enc_info = 2;
317  break;
318  case 'm': case 'M': case 'u':
319  /* keep US-ASCII (do nothing) */
320  break;
321  default:
322  /* fall back to BINARY */
323  enc_info = 0;
324  break;
325  }
326  switch (type) {
327  case 'A': case 'a': case 'Z':
328  case 'B': case 'b':
329  case 'H': case 'h':
330  from = NEXTFROM;
331  if (NIL_P(from)) {
332  ptr = "";
333  plen = 0;
334  }
335  else {
336  StringValue(from);
337  ptr = RSTRING_PTR(from);
338  plen = RSTRING_LEN(from);
339  }
340 
341  if (p[-1] == '*')
342  len = plen;
343 
344  switch (type) {
345  case 'a': /* arbitrary binary string (null padded) */
346  case 'A': /* arbitrary binary string (ASCII space padded) */
347  case 'Z': /* null terminated string */
348  if (plen >= len) {
349  rb_str_buf_cat(res, ptr, len);
350  if (p[-1] == '*' && type == 'Z')
351  rb_str_buf_cat(res, "", 1);
352  }
353  else {
355  rb_str_buf_cat(res, ptr, plen);
356  str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen);
357  }
358  break;
359 
360 #define castchar(from) (char)((from) & 0xff)
361 
362  case 'b': /* bit string (ascending) */
363  {
364  int byte = 0;
365  long i, j = 0;
366 
367  if (len > plen) {
368  j = (len - plen + 1)/2;
369  len = plen;
370  }
371  for (i=0; i++ < len; ptr++) {
372  if (*ptr & 1)
373  byte |= 128;
374  if (i & 7)
375  byte >>= 1;
376  else {
377  char c = castchar(byte);
378  rb_str_buf_cat(res, &c, 1);
379  byte = 0;
380  }
381  }
382  if (len & 7) {
383  char c;
384  byte >>= 7 - (len & 7);
385  c = castchar(byte);
386  rb_str_buf_cat(res, &c, 1);
387  }
388  len = j;
389  goto grow;
390  }
391  break;
392 
393  case 'B': /* bit string (descending) */
394  {
395  int byte = 0;
396  long i, j = 0;
397 
398  if (len > plen) {
399  j = (len - plen + 1)/2;
400  len = plen;
401  }
402  for (i=0; i++ < len; ptr++) {
403  byte |= *ptr & 1;
404  if (i & 7)
405  byte <<= 1;
406  else {
407  char c = castchar(byte);
408  rb_str_buf_cat(res, &c, 1);
409  byte = 0;
410  }
411  }
412  if (len & 7) {
413  char c;
414  byte <<= 7 - (len & 7);
415  c = castchar(byte);
416  rb_str_buf_cat(res, &c, 1);
417  }
418  len = j;
419  goto grow;
420  }
421  break;
422 
423  case 'h': /* hex string (low nibble first) */
424  {
425  int byte = 0;
426  long i, j = 0;
427 
428  if (len > plen) {
429  j = (len + 1) / 2 - (plen + 1) / 2;
430  len = plen;
431  }
432  for (i=0; i++ < len; ptr++) {
433  if (ISALPHA(*ptr))
434  byte |= (((*ptr & 15) + 9) & 15) << 4;
435  else
436  byte |= (*ptr & 15) << 4;
437  if (i & 1)
438  byte >>= 4;
439  else {
440  char c = castchar(byte);
441  rb_str_buf_cat(res, &c, 1);
442  byte = 0;
443  }
444  }
445  if (len & 1) {
446  char c = castchar(byte);
447  rb_str_buf_cat(res, &c, 1);
448  }
449  len = j;
450  goto grow;
451  }
452  break;
453 
454  case 'H': /* hex string (high nibble first) */
455  {
456  int byte = 0;
457  long i, j = 0;
458 
459  if (len > plen) {
460  j = (len + 1) / 2 - (plen + 1) / 2;
461  len = plen;
462  }
463  for (i=0; i++ < len; ptr++) {
464  if (ISALPHA(*ptr))
465  byte |= ((*ptr & 15) + 9) & 15;
466  else
467  byte |= *ptr & 15;
468  if (i & 1)
469  byte <<= 4;
470  else {
471  char c = castchar(byte);
472  rb_str_buf_cat(res, &c, 1);
473  byte = 0;
474  }
475  }
476  if (len & 1) {
477  char c = castchar(byte);
478  rb_str_buf_cat(res, &c, 1);
479  }
480  len = j;
481  goto grow;
482  }
483  break;
484  }
485  break;
486 
487  case 'c': /* signed char */
488  case 'C': /* unsigned char */
489  integer_size = 1;
490  bigendian_p = BIGENDIAN_P(); /* not effective */
491  goto pack_integer;
492 
493  case 's': /* s for int16_t, s! for signed short */
494  case 'S': /* S for uint16_t, S! for unsigned short */
495  integer_size = NATINT_LEN(short, 2);
496  bigendian_p = BIGENDIAN_P();
497  goto pack_integer;
498 
499  case 'i': /* i and i! for signed int */
500  case 'I': /* I and I! for unsigned int */
501  integer_size = (int)sizeof(int);
502  bigendian_p = BIGENDIAN_P();
503  goto pack_integer;
504 
505  case 'l': /* l for int32_t, l! for signed long */
506  case 'L': /* L for uint32_t, L! for unsigned long */
507  integer_size = NATINT_LEN(long, 4);
508  bigendian_p = BIGENDIAN_P();
509  goto pack_integer;
510 
511  case 'q': /* q for int64_t, q! for signed long long */
512  case 'Q': /* Q for uint64_t, Q! for unsigned long long */
513  integer_size = NATINT_LEN_Q;
514  bigendian_p = BIGENDIAN_P();
515  goto pack_integer;
516 
517  case 'j': /* j for intptr_t */
518  integer_size = sizeof(intptr_t);
519  bigendian_p = BIGENDIAN_P();
520  goto pack_integer;
521 
522  case 'J': /* J for uintptr_t */
523  integer_size = sizeof(uintptr_t);
524  bigendian_p = BIGENDIAN_P();
525  goto pack_integer;
526 
527  case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
528  integer_size = 2;
529  bigendian_p = 1;
530  goto pack_integer;
531 
532  case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
533  integer_size = 4;
534  bigendian_p = 1;
535  goto pack_integer;
536 
537  case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
538  integer_size = 2;
539  bigendian_p = 0;
540  goto pack_integer;
541 
542  case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
543  integer_size = 4;
544  bigendian_p = 0;
545  goto pack_integer;
546 
547  pack_integer:
548  if (explicit_endian) {
549  bigendian_p = explicit_endian == '>';
550  }
551  if (integer_size > MAX_INTEGER_PACK_SIZE)
552  rb_bug("unexpected integer size for pack: %d", integer_size);
553  while (len-- > 0) {
554  char intbuf[MAX_INTEGER_PACK_SIZE];
555 
556  from = NEXTFROM;
557  rb_integer_pack(from, intbuf, integer_size, 1, 0,
560  rb_str_buf_cat(res, intbuf, integer_size);
561  }
562  break;
563 
564  case 'f': /* single precision float in native format */
565  case 'F': /* ditto */
566  while (len-- > 0) {
567  float f;
568 
569  from = NEXTFROM;
570  f = VALUE_to_float(from);
571  rb_str_buf_cat(res, (char*)&f, sizeof(float));
572  }
573  break;
574 
575  case 'e': /* single precision float in VAX byte-order */
576  while (len-- > 0) {
577  FLOAT_CONVWITH(tmp);
578 
579  from = NEXTFROM;
580  tmp.f = VALUE_to_float(from);
581  HTOVF(tmp);
582  rb_str_buf_cat(res, tmp.buf, sizeof(float));
583  }
584  break;
585 
586  case 'E': /* double precision float in VAX byte-order */
587  while (len-- > 0) {
588  DOUBLE_CONVWITH(tmp);
589  from = NEXTFROM;
590  tmp.d = RFLOAT_VALUE(rb_to_float(from));
591  HTOVD(tmp);
592  rb_str_buf_cat(res, tmp.buf, sizeof(double));
593  }
594  break;
595 
596  case 'd': /* double precision float in native format */
597  case 'D': /* ditto */
598  while (len-- > 0) {
599  double d;
600 
601  from = NEXTFROM;
602  d = RFLOAT_VALUE(rb_to_float(from));
603  rb_str_buf_cat(res, (char*)&d, sizeof(double));
604  }
605  break;
606 
607  case 'g': /* single precision float in network byte-order */
608  while (len-- > 0) {
609  FLOAT_CONVWITH(tmp);
610  from = NEXTFROM;
611  tmp.f = VALUE_to_float(from);
612  HTONF(tmp);
613  rb_str_buf_cat(res, tmp.buf, sizeof(float));
614  }
615  break;
616 
617  case 'G': /* double precision float in network byte-order */
618  while (len-- > 0) {
619  DOUBLE_CONVWITH(tmp);
620 
621  from = NEXTFROM;
622  tmp.d = RFLOAT_VALUE(rb_to_float(from));
623  HTOND(tmp);
624  rb_str_buf_cat(res, tmp.buf, sizeof(double));
625  }
626  break;
627 
628  case 'x': /* null byte */
629  grow:
631  str_expand_fill(res, '\0', len);
632  break;
633 
634  case 'X': /* back up byte */
635  shrink:
636  plen = RSTRING_LEN(res);
637  if (plen < len)
638  rb_raise(rb_eArgError, "X outside of string");
639  rb_str_set_len(res, plen - len);
640  break;
641 
642  case '@': /* null fill to absolute position */
643  len -= RSTRING_LEN(res);
644  if (len > 0) goto grow;
645  len = -len;
646  if (len > 0) goto shrink;
647  break;
648 
649  case '%':
650  rb_raise(rb_eArgError, "%% is not supported");
651  break;
652 
653  case 'U': /* Unicode character */
654  while (len-- > 0) {
655  SIGNED_VALUE l;
656  char buf[8];
657  int le;
658 
659  from = NEXTFROM;
660  from = rb_to_int(from);
661  l = NUM2LONG(from);
662  if (l < 0) {
663  rb_raise(rb_eRangeError, "pack(U): value out of range");
664  }
665  le = rb_uv_to_utf8(buf, l);
666  rb_str_buf_cat(res, (char*)buf, le);
667  }
668  break;
669 
670  case 'u': /* uuencoded string */
671  case 'm': /* base64 encoded string */
672  from = NEXTFROM;
673  StringValue(from);
674  ptr = RSTRING_PTR(from);
675  plen = RSTRING_LEN(from);
676 
677  if (len == 0 && type == 'm') {
678  encodes(res, ptr, plen, type, 0);
679  ptr += plen;
680  break;
681  }
682  if (len <= 2)
683  len = 45;
684  else if (len > 63 && type == 'u')
685  len = 63;
686  else
687  len = len / 3 * 3;
688  while (plen > 0) {
689  long todo;
690 
691  if (plen > len)
692  todo = len;
693  else
694  todo = plen;
695  encodes(res, ptr, todo, type, 1);
696  plen -= todo;
697  ptr += todo;
698  }
699  break;
700 
701  case 'M': /* quoted-printable encoded string */
702  from = rb_obj_as_string(NEXTFROM);
703  if (len <= 1)
704  len = 72;
705  qpencode(res, from, len);
706  break;
707 
708  case 'P': /* pointer to packed byte string */
709  from = THISFROM;
710  if (!NIL_P(from)) {
711  StringValue(from);
712  if (RSTRING_LEN(from) < len) {
713  rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
714  RSTRING_LEN(from), len);
715  }
716  }
717  len = 1;
718  /* FALL THROUGH */
719  case 'p': /* pointer to string */
720  while (len-- > 0) {
721  char *t;
722  from = NEXTFROM;
723  if (NIL_P(from)) {
724  t = 0;
725  }
726  else {
727  t = StringValuePtr(from);
728  }
729  if (!associates) {
730  associates = rb_ary_new();
731  }
732  rb_ary_push(associates, from);
733  rb_str_buf_cat(res, (char*)&t, sizeof(char*));
734  }
735  break;
736 
737  case 'w': /* BER compressed integer */
738  while (len-- > 0) {
739  VALUE buf = rb_str_new(0, 0);
740  size_t numbytes;
741  int sign;
742  char *cp;
743 
744  from = NEXTFROM;
745  from = rb_to_int(from);
746  numbytes = rb_absint_numwords(from, 7, NULL);
747  if (numbytes == 0)
748  numbytes = 1;
749  buf = rb_str_new(NULL, numbytes);
750 
751  sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
752 
753  if (sign < 0)
754  rb_raise(rb_eArgError, "can't compress negative numbers");
755  if (sign == 2)
756  rb_bug("buffer size problem?");
757 
758  cp = RSTRING_PTR(buf);
759  while (1 < numbytes) {
760  *cp |= 0x80;
761  cp++;
762  numbytes--;
763  }
764 
765  rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
766  }
767  break;
768 
769  default: {
770  unknown_directive("pack", type, fmt);
771  break;
772  }
773  }
774  }
775 
776  if (associates) {
777  str_associate(res, associates);
778  }
779  switch (enc_info) {
780  case 1:
782  break;
783  case 2:
785  break;
786  default:
787  /* do nothing, keep ASCII-8BIT */
788  break;
789  }
790  return res;
791 }
792 
793 VALUE
794 rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
795 {
796  return pack_pack(ec, ary, fmt, buffer);
797 }
798 
799 static const char uu_table[] =
800 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
801 static const char b64_table[] =
802 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
803 
804 static void
805 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
806 {
807  enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
808  char buff[buff_size + 1]; /* +1 for tail_lf */
809  long i = 0;
810  const char *const trans = type == 'u' ? uu_table : b64_table;
811  char padding;
812  const unsigned char *s = (const unsigned char *)s0;
813 
814  if (type == 'u') {
815  buff[i++] = (char)len + ' ';
816  padding = '`';
817  }
818  else {
819  padding = '=';
820  }
821  while (len >= input_unit) {
822  while (len >= input_unit && buff_size-i >= encoded_unit) {
823  buff[i++] = trans[077 & (*s >> 2)];
824  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
825  buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
826  buff[i++] = trans[077 & s[2]];
827  s += input_unit;
828  len -= input_unit;
829  }
830  if (buff_size-i < encoded_unit) {
831  rb_str_buf_cat(str, buff, i);
832  i = 0;
833  }
834  }
835 
836  if (len == 2) {
837  buff[i++] = trans[077 & (*s >> 2)];
838  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
839  buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
840  buff[i++] = padding;
841  }
842  else if (len == 1) {
843  buff[i++] = trans[077 & (*s >> 2)];
844  buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
845  buff[i++] = padding;
846  buff[i++] = padding;
847  }
848  if (tail_lf) buff[i++] = '\n';
849  rb_str_buf_cat(str, buff, i);
850  if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
851 }
852 
853 static const char hex_table[] = "0123456789ABCDEF";
854 
855 static void
856 qpencode(VALUE str, VALUE from, long len)
857 {
858  char buff[1024];
859  long i = 0, n = 0, prev = EOF;
860  unsigned char *s = (unsigned char*)RSTRING_PTR(from);
861  unsigned char *send = s + RSTRING_LEN(from);
862 
863  while (s < send) {
864  if ((*s > 126) ||
865  (*s < 32 && *s != '\n' && *s != '\t') ||
866  (*s == '=')) {
867  buff[i++] = '=';
868  buff[i++] = hex_table[*s >> 4];
869  buff[i++] = hex_table[*s & 0x0f];
870  n += 3;
871  prev = EOF;
872  }
873  else if (*s == '\n') {
874  if (prev == ' ' || prev == '\t') {
875  buff[i++] = '=';
876  buff[i++] = *s;
877  }
878  buff[i++] = *s;
879  n = 0;
880  prev = *s;
881  }
882  else {
883  buff[i++] = *s;
884  n++;
885  prev = *s;
886  }
887  if (n > len) {
888  buff[i++] = '=';
889  buff[i++] = '\n';
890  n = 0;
891  prev = '\n';
892  }
893  if (i > 1024 - 5) {
894  rb_str_buf_cat(str, buff, i);
895  i = 0;
896  }
897  s++;
898  }
899  if (n > 0) {
900  buff[i++] = '=';
901  buff[i++] = '\n';
902  }
903  if (i > 0) {
904  rb_str_buf_cat(str, buff, i);
905  }
906 }
907 
908 static inline int
909 hex2num(char c)
910 {
911  int n;
912  n = ruby_digit36_to_number_table[(unsigned char)c];
913  if (16 <= n)
914  n = -1;
915  return n;
916 }
917 
918 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
919  tmp_len = 0; \
920  if (len > (long)((send-s)/(sz))) { \
921  if (!star) { \
922  tmp_len = len-(send-s)/(sz); \
923  } \
924  len = (send-s)/(sz); \
925  } \
926 } while (0)
927 
928 #define PACK_ITEM_ADJUST() do { \
929  if (tmp_len > 0 && mode == UNPACK_ARRAY) \
930  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
931 } while (0)
932 
933 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
934  * 12.4/12.5/12.6 C compiler optimization bug
935  * with "-xO4" optimization option.
936  */
937 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
938 # define AVOID_CC_BUG volatile
939 #else
940 # define AVOID_CC_BUG
941 #endif
942 
943 enum unpack_mode {
944  UNPACK_ARRAY,
945  UNPACK_BLOCK,
946  UNPACK_1
947 };
948 
949 static VALUE
950 pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
951 {
952 #define hexdigits ruby_hexdigits
953  char *s, *send;
954  char *p, *pend;
955  VALUE ary, associates = Qfalse;
956  long len;
957  AVOID_CC_BUG long tmp_len;
958  int signed_p, integer_size, bigendian_p;
959 #define UNPACK_PUSH(item) do {\
960  VALUE item_val = (item);\
961  if ((mode) == UNPACK_BLOCK) {\
962  rb_yield(item_val);\
963  }\
964  else if ((mode) == UNPACK_ARRAY) {\
965  rb_ary_push(ary, item_val);\
966  }\
967  else /* if ((mode) == UNPACK_1) { */ {\
968  return item_val; \
969  }\
970  } while (0)
971 
972  StringValue(str);
973  StringValue(fmt);
974  rb_must_asciicompat(fmt);
975 
976  if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
977  len = RSTRING_LEN(str);
978  if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
979 
980  s = RSTRING_PTR(str);
981  send = s + len;
982  s += offset;
983 
984  p = RSTRING_PTR(fmt);
985  pend = p + RSTRING_LEN(fmt);
986 
987 #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
988 
989  ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
990  while (p < pend) {
991  int explicit_endian = 0;
992  const char type = *p++;
993 #ifdef NATINT_PACK
994  int natint = 0; /* native integer */
995 #endif
996  int star = 0;
997 
998  if (skip_blank(p, type)) continue;
999  p = pack_modifiers(p, type, &natint, &explicit_endian);
1000 
1001  if (p >= pend)
1002  len = 1;
1003  else if (*p == '*') {
1004  star = 1;
1005  len = send - s;
1006  p++;
1007  }
1008  else if (ISDIGIT(*p)) {
1009  errno = 0;
1010  len = STRTOUL(p, (char**)&p, 10);
1011  if (len < 0 || errno) {
1012  rb_raise(rb_eRangeError, "pack length too big");
1013  }
1014  }
1015  else {
1016  len = (type != '@');
1017  }
1018 
1019  switch (type) {
1020  case '%':
1021  rb_raise(rb_eArgError, "%% is not supported");
1022  break;
1023 
1024  case 'A':
1025  if (len > send - s) len = send - s;
1026  {
1027  long end = len;
1028  char *t = s + len - 1;
1029 
1030  while (t >= s) {
1031  if (*t != ' ' && *t != '\0') break;
1032  t--; len--;
1033  }
1034  UNPACK_PUSH(rb_str_new(s, len));
1035  s += end;
1036  }
1037  break;
1038 
1039  case 'Z':
1040  {
1041  char *t = s;
1042 
1043  if (len > send-s) len = send-s;
1044  while (t < s+len && *t) t++;
1045  UNPACK_PUSH(rb_str_new(s, t-s));
1046  if (t < send) t++;
1047  s = star ? t : s+len;
1048  }
1049  break;
1050 
1051  case 'a':
1052  if (len > send - s) len = send - s;
1053  UNPACK_PUSH(rb_str_new(s, len));
1054  s += len;
1055  break;
1056 
1057  case 'b':
1058  {
1059  VALUE bitstr;
1060  char *t;
1061  int bits;
1062  long i;
1063 
1064  if (p[-1] == '*' || len > (send - s) * 8)
1065  len = (send - s) * 8;
1066  bits = 0;
1067  bitstr = rb_usascii_str_new(0, len);
1068  t = RSTRING_PTR(bitstr);
1069  for (i=0; i<len; i++) {
1070  if (i & 7) bits >>= 1;
1071  else bits = (unsigned char)*s++;
1072  *t++ = (bits & 1) ? '1' : '0';
1073  }
1074  UNPACK_PUSH(bitstr);
1075  }
1076  break;
1077 
1078  case 'B':
1079  {
1080  VALUE bitstr;
1081  char *t;
1082  int bits;
1083  long i;
1084 
1085  if (p[-1] == '*' || len > (send - s) * 8)
1086  len = (send - s) * 8;
1087  bits = 0;
1088  bitstr = rb_usascii_str_new(0, len);
1089  t = RSTRING_PTR(bitstr);
1090  for (i=0; i<len; i++) {
1091  if (i & 7) bits <<= 1;
1092  else bits = (unsigned char)*s++;
1093  *t++ = (bits & 128) ? '1' : '0';
1094  }
1095  UNPACK_PUSH(bitstr);
1096  }
1097  break;
1098 
1099  case 'h':
1100  {
1101  VALUE bitstr;
1102  char *t;
1103  int bits;
1104  long i;
1105 
1106  if (p[-1] == '*' || len > (send - s) * 2)
1107  len = (send - s) * 2;
1108  bits = 0;
1109  bitstr = rb_usascii_str_new(0, len);
1110  t = RSTRING_PTR(bitstr);
1111  for (i=0; i<len; i++) {
1112  if (i & 1)
1113  bits >>= 4;
1114  else
1115  bits = (unsigned char)*s++;
1116  *t++ = hexdigits[bits & 15];
1117  }
1118  UNPACK_PUSH(bitstr);
1119  }
1120  break;
1121 
1122  case 'H':
1123  {
1124  VALUE bitstr;
1125  char *t;
1126  int bits;
1127  long i;
1128 
1129  if (p[-1] == '*' || len > (send - s) * 2)
1130  len = (send - s) * 2;
1131  bits = 0;
1132  bitstr = rb_usascii_str_new(0, len);
1133  t = RSTRING_PTR(bitstr);
1134  for (i=0; i<len; i++) {
1135  if (i & 1)
1136  bits <<= 4;
1137  else
1138  bits = (unsigned char)*s++;
1139  *t++ = hexdigits[(bits >> 4) & 15];
1140  }
1141  UNPACK_PUSH(bitstr);
1142  }
1143  break;
1144 
1145  case 'c':
1146  signed_p = 1;
1147  integer_size = 1;
1148  bigendian_p = BIGENDIAN_P(); /* not effective */
1149  goto unpack_integer;
1150 
1151  case 'C':
1152  signed_p = 0;
1153  integer_size = 1;
1154  bigendian_p = BIGENDIAN_P(); /* not effective */
1155  goto unpack_integer;
1156 
1157  case 's':
1158  signed_p = 1;
1159  integer_size = NATINT_LEN(short, 2);
1160  bigendian_p = BIGENDIAN_P();
1161  goto unpack_integer;
1162 
1163  case 'S':
1164  signed_p = 0;
1165  integer_size = NATINT_LEN(short, 2);
1166  bigendian_p = BIGENDIAN_P();
1167  goto unpack_integer;
1168 
1169  case 'i':
1170  signed_p = 1;
1171  integer_size = (int)sizeof(int);
1172  bigendian_p = BIGENDIAN_P();
1173  goto unpack_integer;
1174 
1175  case 'I':
1176  signed_p = 0;
1177  integer_size = (int)sizeof(int);
1178  bigendian_p = BIGENDIAN_P();
1179  goto unpack_integer;
1180 
1181  case 'l':
1182  signed_p = 1;
1183  integer_size = NATINT_LEN(long, 4);
1184  bigendian_p = BIGENDIAN_P();
1185  goto unpack_integer;
1186 
1187  case 'L':
1188  signed_p = 0;
1189  integer_size = NATINT_LEN(long, 4);
1190  bigendian_p = BIGENDIAN_P();
1191  goto unpack_integer;
1192 
1193  case 'q':
1194  signed_p = 1;
1195  integer_size = NATINT_LEN_Q;
1196  bigendian_p = BIGENDIAN_P();
1197  goto unpack_integer;
1198 
1199  case 'Q':
1200  signed_p = 0;
1201  integer_size = NATINT_LEN_Q;
1202  bigendian_p = BIGENDIAN_P();
1203  goto unpack_integer;
1204 
1205  case 'j':
1206  signed_p = 1;
1207  integer_size = sizeof(intptr_t);
1208  bigendian_p = BIGENDIAN_P();
1209  goto unpack_integer;
1210 
1211  case 'J':
1212  signed_p = 0;
1213  integer_size = sizeof(uintptr_t);
1214  bigendian_p = BIGENDIAN_P();
1215  goto unpack_integer;
1216 
1217  case 'n':
1218  signed_p = 0;
1219  integer_size = 2;
1220  bigendian_p = 1;
1221  goto unpack_integer;
1222 
1223  case 'N':
1224  signed_p = 0;
1225  integer_size = 4;
1226  bigendian_p = 1;
1227  goto unpack_integer;
1228 
1229  case 'v':
1230  signed_p = 0;
1231  integer_size = 2;
1232  bigendian_p = 0;
1233  goto unpack_integer;
1234 
1235  case 'V':
1236  signed_p = 0;
1237  integer_size = 4;
1238  bigendian_p = 0;
1239  goto unpack_integer;
1240 
1241  unpack_integer:
1242  if (explicit_endian) {
1243  bigendian_p = explicit_endian == '>';
1244  }
1245  PACK_LENGTH_ADJUST_SIZE(integer_size);
1246  while (len-- > 0) {
1247  int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1248  VALUE val;
1249  if (signed_p)
1250  flags |= INTEGER_PACK_2COMP;
1251  val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1252  UNPACK_PUSH(val);
1253  s += integer_size;
1254  }
1255  PACK_ITEM_ADJUST();
1256  break;
1257 
1258  case 'f':
1259  case 'F':
1260  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1261  while (len-- > 0) {
1262  float tmp;
1263  UNPACK_FETCH(&tmp, float);
1264  UNPACK_PUSH(DBL2NUM((double)tmp));
1265  }
1266  PACK_ITEM_ADJUST();
1267  break;
1268 
1269  case 'e':
1270  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1271  while (len-- > 0) {
1272  FLOAT_CONVWITH(tmp);
1273  UNPACK_FETCH(tmp.buf, float);
1274  VTOHF(tmp);
1275  UNPACK_PUSH(DBL2NUM(tmp.f));
1276  }
1277  PACK_ITEM_ADJUST();
1278  break;
1279 
1280  case 'E':
1281  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1282  while (len-- > 0) {
1283  DOUBLE_CONVWITH(tmp);
1284  UNPACK_FETCH(tmp.buf, double);
1285  VTOHD(tmp);
1286  UNPACK_PUSH(DBL2NUM(tmp.d));
1287  }
1288  PACK_ITEM_ADJUST();
1289  break;
1290 
1291  case 'D':
1292  case 'd':
1293  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1294  while (len-- > 0) {
1295  double tmp;
1296  UNPACK_FETCH(&tmp, double);
1297  UNPACK_PUSH(DBL2NUM(tmp));
1298  }
1299  PACK_ITEM_ADJUST();
1300  break;
1301 
1302  case 'g':
1303  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1304  while (len-- > 0) {
1305  FLOAT_CONVWITH(tmp);
1306  UNPACK_FETCH(tmp.buf, float);
1307  NTOHF(tmp);
1308  UNPACK_PUSH(DBL2NUM(tmp.f));
1309  }
1310  PACK_ITEM_ADJUST();
1311  break;
1312 
1313  case 'G':
1314  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1315  while (len-- > 0) {
1316  DOUBLE_CONVWITH(tmp);
1317  UNPACK_FETCH(tmp.buf, double);
1318  NTOHD(tmp);
1319  UNPACK_PUSH(DBL2NUM(tmp.d));
1320  }
1321  PACK_ITEM_ADJUST();
1322  break;
1323 
1324  case 'U':
1325  if (len > send - s) len = send - s;
1326  while (len > 0 && s < send) {
1327  long alen = send - s;
1328  unsigned long l;
1329 
1330  l = utf8_to_uv(s, &alen);
1331  s += alen; len--;
1332  UNPACK_PUSH(ULONG2NUM(l));
1333  }
1334  break;
1335 
1336  case 'u':
1337  {
1338  VALUE buf = rb_str_new(0, (send - s)*3/4);
1339  char *ptr = RSTRING_PTR(buf);
1340  long total = 0;
1341 
1342  while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1343  long a,b,c,d;
1344  char hunk[3];
1345 
1346  len = ((unsigned char)*s++ - ' ') & 077;
1347 
1348  total += len;
1349  if (total > RSTRING_LEN(buf)) {
1350  len -= total - RSTRING_LEN(buf);
1351  total = RSTRING_LEN(buf);
1352  }
1353 
1354  while (len > 0) {
1355  long mlen = len > 3 ? 3 : len;
1356 
1357  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1358  a = ((unsigned char)*s++ - ' ') & 077;
1359  else
1360  a = 0;
1361  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1362  b = ((unsigned char)*s++ - ' ') & 077;
1363  else
1364  b = 0;
1365  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1366  c = ((unsigned char)*s++ - ' ') & 077;
1367  else
1368  c = 0;
1369  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1370  d = ((unsigned char)*s++ - ' ') & 077;
1371  else
1372  d = 0;
1373  hunk[0] = (char)(a << 2 | b >> 4);
1374  hunk[1] = (char)(b << 4 | c >> 2);
1375  hunk[2] = (char)(c << 6 | d);
1376  memcpy(ptr, hunk, mlen);
1377  ptr += mlen;
1378  len -= mlen;
1379  }
1380  if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1381  s++; /* possible checksum byte */
1382  if (s < send && *s == '\r') s++;
1383  if (s < send && *s == '\n') s++;
1384  }
1385 
1386  rb_str_set_len(buf, total);
1387  UNPACK_PUSH(buf);
1388  }
1389  break;
1390 
1391  case 'm':
1392  {
1393  VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1394  char *ptr = RSTRING_PTR(buf);
1395  int a = -1,b = -1,c = 0,d = 0;
1396  static signed char b64_xtable[256];
1397 
1398  if (b64_xtable['/'] <= 0) {
1399  int i;
1400 
1401  for (i = 0; i < 256; i++) {
1402  b64_xtable[i] = -1;
1403  }
1404  for (i = 0; i < 64; i++) {
1405  b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1406  }
1407  }
1408  if (len == 0) {
1409  while (s < send) {
1410  a = b = c = d = -1;
1411  a = b64_xtable[(unsigned char)*s++];
1412  if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1413  b = b64_xtable[(unsigned char)*s++];
1414  if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1415  if (*s == '=') {
1416  if (s + 2 == send && *(s + 1) == '=') break;
1417  rb_raise(rb_eArgError, "invalid base64");
1418  }
1419  c = b64_xtable[(unsigned char)*s++];
1420  if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1421  if (s + 1 == send && *s == '=') break;
1422  d = b64_xtable[(unsigned char)*s++];
1423  if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1424  *ptr++ = castchar(a << 2 | b >> 4);
1425  *ptr++ = castchar(b << 4 | c >> 2);
1426  *ptr++ = castchar(c << 6 | d);
1427  }
1428  if (c == -1) {
1429  *ptr++ = castchar(a << 2 | b >> 4);
1430  if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1431  }
1432  else if (d == -1) {
1433  *ptr++ = castchar(a << 2 | b >> 4);
1434  *ptr++ = castchar(b << 4 | c >> 2);
1435  if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1436  }
1437  }
1438  else {
1439  while (s < send) {
1440  a = b = c = d = -1;
1441  while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1442  if (s >= send) break;
1443  s++;
1444  while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1445  if (s >= send) break;
1446  s++;
1447  while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1448  if (*s == '=' || s >= send) break;
1449  s++;
1450  while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1451  if (*s == '=' || s >= send) break;
1452  s++;
1453  *ptr++ = castchar(a << 2 | b >> 4);
1454  *ptr++ = castchar(b << 4 | c >> 2);
1455  *ptr++ = castchar(c << 6 | d);
1456  a = -1;
1457  }
1458  if (a != -1 && b != -1) {
1459  if (c == -1)
1460  *ptr++ = castchar(a << 2 | b >> 4);
1461  else {
1462  *ptr++ = castchar(a << 2 | b >> 4);
1463  *ptr++ = castchar(b << 4 | c >> 2);
1464  }
1465  }
1466  }
1467  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1468  UNPACK_PUSH(buf);
1469  }
1470  break;
1471 
1472  case 'M':
1473  {
1474  VALUE buf = rb_str_new(0, send - s);
1475  char *ptr = RSTRING_PTR(buf), *ss = s;
1476  int csum = 0;
1477  int c1, c2;
1478 
1479  while (s < send) {
1480  if (*s == '=') {
1481  if (++s == send) break;
1482  if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1483  s++;
1484  if (*s != '\n') {
1485  if ((c1 = hex2num(*s)) == -1) break;
1486  if (++s == send) break;
1487  if ((c2 = hex2num(*s)) == -1) break;
1488  csum |= *ptr++ = castchar(c1 << 4 | c2);
1489  }
1490  }
1491  else {
1492  csum |= *ptr++ = *s;
1493  }
1494  s++;
1495  ss = s;
1496  }
1497  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1498  rb_str_buf_cat(buf, ss, send-ss);
1501  UNPACK_PUSH(buf);
1502  }
1503  break;
1504 
1505  case '@':
1506  if (len > RSTRING_LEN(str))
1507  rb_raise(rb_eArgError, "@ outside of string");
1508  s = RSTRING_PTR(str) + len;
1509  break;
1510 
1511  case 'X':
1512  if (len > s - RSTRING_PTR(str))
1513  rb_raise(rb_eArgError, "X outside of string");
1514  s -= len;
1515  break;
1516 
1517  case 'x':
1518  if (len > send - s)
1519  rb_raise(rb_eArgError, "x outside of string");
1520  s += len;
1521  break;
1522 
1523  case 'P':
1524  if (sizeof(char *) <= (size_t)(send - s)) {
1525  VALUE tmp = Qnil;
1526  char *t;
1527 
1528  UNPACK_FETCH(&t, char *);
1529  if (t) {
1530  if (!associates) associates = str_associated(str);
1531  tmp = associated_pointer(associates, t);
1532  if (len < RSTRING_LEN(tmp)) {
1533  tmp = rb_str_new(t, len);
1534  str_associate(tmp, associates);
1535  }
1536  }
1537  UNPACK_PUSH(tmp);
1538  }
1539  break;
1540 
1541  case 'p':
1542  if (len > (long)((send - s) / sizeof(char *)))
1543  len = (send - s) / sizeof(char *);
1544  while (len-- > 0) {
1545  if ((size_t)(send - s) < sizeof(char *))
1546  break;
1547  else {
1548  VALUE tmp = Qnil;
1549  char *t;
1550 
1551  UNPACK_FETCH(&t, char *);
1552  if (t) {
1553  if (!associates) associates = str_associated(str);
1554  tmp = associated_pointer(associates, t);
1555  }
1556  UNPACK_PUSH(tmp);
1557  }
1558  }
1559  break;
1560 
1561  case 'w':
1562  {
1563  char *s0 = s;
1564  while (len > 0 && s < send) {
1565  if (*s & 0x80) {
1566  s++;
1567  }
1568  else {
1569  s++;
1570  UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1571  len--;
1572  s0 = s;
1573  }
1574  }
1575  }
1576  break;
1577 
1578  default:
1579  unknown_directive("unpack", type, fmt);
1580  break;
1581  }
1582  }
1583 
1584  return ary;
1585 }
1586 
1587 static VALUE
1588 pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1589 {
1590  enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1591  return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1592 }
1593 
1594 static VALUE
1595 pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1596 {
1597  return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1598 }
1599 
1600 int
1601 rb_uv_to_utf8(char buf[6], unsigned long uv)
1602 {
1603  if (uv <= 0x7f) {
1604  buf[0] = (char)uv;
1605  return 1;
1606  }
1607  if (uv <= 0x7ff) {
1608  buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1609  buf[1] = castchar((uv&0x3f)|0x80);
1610  return 2;
1611  }
1612  if (uv <= 0xffff) {
1613  buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1614  buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1615  buf[2] = castchar((uv&0x3f)|0x80);
1616  return 3;
1617  }
1618  if (uv <= 0x1fffff) {
1619  buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1620  buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1621  buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1622  buf[3] = castchar((uv&0x3f)|0x80);
1623  return 4;
1624  }
1625  if (uv <= 0x3ffffff) {
1626  buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1627  buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1628  buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1629  buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1630  buf[4] = castchar((uv&0x3f)|0x80);
1631  return 5;
1632  }
1633  if (uv <= 0x7fffffff) {
1634  buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1635  buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1636  buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1637  buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1638  buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1639  buf[5] = castchar((uv&0x3f)|0x80);
1640  return 6;
1641  }
1642  rb_raise(rb_eRangeError, "pack(U): value out of range");
1643 
1645 }
1646 
1647 static const unsigned long utf8_limits[] = {
1648  0x0, /* 1 */
1649  0x80, /* 2 */
1650  0x800, /* 3 */
1651  0x10000, /* 4 */
1652  0x200000, /* 5 */
1653  0x4000000, /* 6 */
1654  0x80000000, /* 7 */
1655 };
1656 
1657 static unsigned long
1658 utf8_to_uv(const char *p, long *lenp)
1659 {
1660  int c = *p++ & 0xff;
1661  unsigned long uv = c;
1662  long n;
1663 
1664  if (!(uv & 0x80)) {
1665  *lenp = 1;
1666  return uv;
1667  }
1668  if (!(uv & 0x40)) {
1669  *lenp = 1;
1670  rb_raise(rb_eArgError, "malformed UTF-8 character");
1671  }
1672 
1673  if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1674  else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1675  else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1676  else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1677  else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1678  else {
1679  *lenp = 1;
1680  rb_raise(rb_eArgError, "malformed UTF-8 character");
1681  }
1682  if (n > *lenp) {
1683  rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1684  n, *lenp);
1685  }
1686  *lenp = n--;
1687  if (n != 0) {
1688  while (n--) {
1689  c = *p++ & 0xff;
1690  if ((c & 0xc0) != 0x80) {
1691  *lenp -= n + 1;
1692  rb_raise(rb_eArgError, "malformed UTF-8 character");
1693  }
1694  else {
1695  c &= 0x3f;
1696  uv = uv << 6 | c;
1697  }
1698  }
1699  }
1700  n = *lenp - 1;
1701  if (uv < utf8_limits[n]) {
1702  rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1703  }
1704  return uv;
1705 }
1706 
1707 #include "pack.rbinc"
1708 
1709 void
1710 Init_pack(void)
1711 {
1712  id_associated = rb_make_internal_id();
1713 }
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:916
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition: coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition: coderange.h:181
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition: double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition: value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition: long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition: assume.h:29
#define STRTOUL
Old name of ruby_strtoul.
Definition: ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition: ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition: ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition: ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition: double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition: ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition: long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition: coderange.h:189
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3627
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:1088
VALUE rb_eRangeError
RangeError exception.
Definition: error.c:1407
VALUE rb_eTypeError
TypeError exception.
Definition: error.c:1403
VALUE rb_eRuntimeError
RuntimeError exception.
Definition: error.c:1401
VALUE rb_eArgError
ArgumentError exception.
Definition: error.c:1404
VALUE rb_to_float(VALUE val)
Identical to rb_check_to_float(), except it raises on error.
Definition: object.c:3622
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
Definition: object.c:3186
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
Definition: encoding.c:1466
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
Definition: encoding.c:1454
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
Definition: encoding.c:971
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
Definition: encoding.c:1478
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new(void)
Allocates a new, empty array.
Definition: array.c:747
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
Definition: array.c:1384
int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Exports an integer into a buffer.
Definition: bignum.c:3588
size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
Calculates the number of words needed represent the absolute value of the passed integer.
Definition: bignum.c:3424
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Import an integer from a buffer.
Definition: bignum.c:3674
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition: bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition: bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition: pack.c:1601
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition: bignum.h:549
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
Definition: string.c:2642
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
Definition: string.c:1026
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition: string.c:3254
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
Definition: string.c:2694
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
Definition: string.c:1020
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition: string.c:2650
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition: string.c:1627
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition: string.c:1770
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition: variable.c:1859
RBIMPL_ATTR_NORETURN() void rb_eof_error(void)
Utility function to raise rb_eEOFError.
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
Definition: io.h:2
int len
Length of the buffer.
Definition: io.h:8
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11` etc.
Definition: util.c:81
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition: long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
#define RARRAY_LEN
Just another name of rb_array_len.
Definition: rarray.h:51
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition: rarray.h:52
#define StringValue(v)
Ensures that the parameter object is a String.
Definition: rstring.h:66
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition: rstring.h:76
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition: rstring.h:442
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:416
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:367
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition: variable.c:427
#define errno
Ractor-aware version of errno.
Definition: ruby.h:388
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition: value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition: value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition: value_type.h:376