Ruby 4.1.0dev (2026-04-17 revision 11e3c78b61da705c783dd12fb7f158c0d256ede0)
pack.c (11e3c78b61da705c783dd12fb7f158c0d256ede0)
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/internal/config.h"
13
14#include <ctype.h>
15#include <errno.h>
16#include <float.h>
17#include <sys/types.h>
18
19#include "internal.h"
20#include "internal/array.h"
21#include "internal/bits.h"
22#include "internal/numeric.h"
23#include "internal/string.h"
24#include "internal/symbol.h"
25#include "internal/variable.h"
26#include "ruby/util.h"
27
28#include "builtin.h"
29
30/*
31 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
32 * instead of HAVE_LONG_LONG or LONG_LONG.
33 * This means q! and Q! means always the standard long long type and
34 * causes ArgumentError for platforms which has no long long type,
35 * even if the platform has an implementation specific 64bit type.
36 * This behavior is consistent with the document of pack/unpack.
37 */
38#ifdef HAVE_TRUE_LONG_LONG
39static const char natstr[] = "sSiIlLqQjJ";
40# define endstr natstr
41#else
42static const char natstr[] = "sSiIlLjJ";
43static const char endstr[] = "sSiIlLqQjJ";
44#endif
45
46#ifdef HAVE_TRUE_LONG_LONG
47/* It is intentional to use long long instead of LONG_LONG. */
48# define NATINT_LEN_Q NATINT_LEN(long long, 8)
49#else
50# define NATINT_LEN_Q 8
51#endif
52
53#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
54# define NATINT_PACK
55#endif
56
57#ifdef DYNAMIC_ENDIAN
58/* for universal binary of NEXTSTEP and MacOS X */
59/* useless since autoconf 2.63? */
60static int
61is_bigendian(void)
62{
63 static int init = 0;
64 static int endian_value;
65 const char *p;
66
67 if (init) return endian_value;
68 init = 1;
69 p = (char*)&init;
70 return endian_value = p[0]?0:1;
71}
72# define BIGENDIAN_P() (is_bigendian())
73#elif defined(WORDS_BIGENDIAN)
74# define BIGENDIAN_P() 1
75#else
76# define BIGENDIAN_P() 0
77#endif
78
79#ifdef NATINT_PACK
80# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
81#else
82# define NATINT_LEN(type,len) ((int)sizeof(type))
83#endif
84
85typedef union {
86 float f;
87 uint32_t u;
88 char buf[4];
90typedef union {
91 double d;
92 uint64_t u;
93 char buf[8];
95#define swapf(x) swap32(x)
96#define swapd(x) swap64(x)
97
98#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
99#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
100#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
101#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
102#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
103#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
104#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
105#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
106
107#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
108#define HTONF(x) ((x).u = rb_htonf((x).u))
109#define HTOVF(x) ((x).u = rb_htovf((x).u))
110#define NTOHF(x) ((x).u = rb_ntohf((x).u))
111#define VTOHF(x) ((x).u = rb_vtohf((x).u))
112
113#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
114#define HTOND(x) ((x).u = rb_htond((x).u))
115#define HTOVD(x) ((x).u = rb_htovd((x).u))
116#define NTOHD(x) ((x).u = rb_ntohd((x).u))
117#define VTOHD(x) ((x).u = rb_vtohd((x).u))
118
119#define MAX_INTEGER_PACK_SIZE 8
120
121static const char toofew[] = "too few arguments";
122static const char intoitself[] = "cannot pack buffer object into itself";
123
124static void encodes(VALUE,const char*,long,int,int);
125static void qpencode(VALUE,VALUE,long);
126
127static unsigned long utf8_to_uv(const char*,long*);
128
129static ID id_associated;
130
131static void
132str_associate(VALUE str, VALUE add)
133{
134 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
135 rb_ivar_set(str, id_associated, add);
136}
137
138static VALUE
139str_associated(VALUE str)
140{
141 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
142 if (!associates)
143 rb_raise(rb_eArgError, "no associated pointer");
144 return associates;
145}
146
147static VALUE
148associated_pointer(VALUE associates, const char *t)
149{
150 const VALUE *p = RARRAY_CONST_PTR(associates);
151 const VALUE *pend = p + RARRAY_LEN(associates);
152 for (; p < pend; p++) {
153 VALUE tmp = *p;
154 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
155 }
156 rb_raise(rb_eArgError, "non associated pointer");
158}
159
161static void
162unknown_directive(const char *mode, char type, VALUE fmt)
163{
164 char unknown[5];
165
166 if (ISPRINT(type)) {
167 unknown[0] = type;
168 unknown[1] = '\0';
169 }
170 else {
171 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
172 }
173 fmt = rb_str_quote_unprintable(fmt);
174 rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'",
175 mode, unknown, fmt);
176}
177
178static float
179VALUE_to_float(VALUE obj)
180{
181 VALUE v = rb_to_float(obj);
182 double d = RFLOAT_VALUE(v);
183
184 if (isnan(d)) {
185 return NAN;
186 }
187 else if (d < -FLT_MAX) {
188 return -INFINITY;
189 }
190 else if (d <= FLT_MAX) {
191 return d;
192 }
193 else {
194 return INFINITY;
195 }
196}
197
198static void
199str_expand_fill(VALUE res, int c, long len)
200{
201 long olen = RSTRING_LEN(res);
202 memset(RSTRING_PTR(res) + olen, c, len);
203 rb_str_set_len(res, olen + len);
204}
205
206static char *
207skip_to_eol(const char *p, const char *pend)
208{
209 p = memchr(p, '\n', pend - p);
210 return (char *)(p ? p + 1 : pend);
211}
212
213#define skip_blank(p, type) \
214 (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
215
216#ifndef NATINT_PACK
217# define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
218#endif
219static char *
220pack_modifiers(const char *p, char type, int *natint, int *explicit_endian)
221{
222 while (1) {
223 switch (*p) {
224 case '_':
225 case '!':
226 if (strchr(natstr, type)) {
227#ifdef NATINT_PACK
228 *natint = 1;
229#endif
230 p++;
231 }
232 else {
233 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
234 }
235 break;
236
237 case '<':
238 case '>':
239 if (!strchr(endstr, type)) {
240 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
241 }
242 if (*explicit_endian) {
243 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
244 }
245 *explicit_endian = *p++;
246 break;
247 default:
248 return (char *)p;
249 }
250 }
251}
252
253static VALUE
254pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
255{
256 const char *p, *pend;
257 VALUE res, from, associates = 0;
258 long len, idx, plen;
259 const char *ptr;
260 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
261 int integer_size, bigendian_p;
262
263 StringValue(fmt);
265 p = RSTRING_PTR(fmt);
266 pend = p + RSTRING_LEN(fmt);
267
268 if (NIL_P(buffer)) {
269 res = rb_str_buf_new(0);
270 }
271 else {
272 if (!RB_TYPE_P(buffer, T_STRING))
273 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
274 rb_str_modify(buffer);
275 res = buffer;
276 }
277
278 idx = 0;
279
280#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
281#define MORE_ITEM (idx < RARRAY_LEN(ary))
282#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
283#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
284#define NOT_BUFFER(val) (((val) == res) ? rb_raise(rb_eArgError, intoitself) : (void)0)
285#define STR_FROM(val) NOT_BUFFER(StringValue(val))
286
287 while (p < pend) {
288 int explicit_endian = 0;
289 if (RSTRING_END(fmt) != pend) {
290 rb_raise(rb_eRuntimeError, "format string modified");
291 }
292 const char type = *p++; /* get data type */
293#ifdef NATINT_PACK
294 int natint = 0; /* native integer */
295#endif
296
297 if (skip_blank(p, type)) continue;
298 p = pack_modifiers(p, type, &natint, &explicit_endian);
299
300 if (*p == '*') { /* set data length */
301 len = strchr("@Xxu", type) ? 0
302 : strchr("PMm", type) ? 1
303 : RARRAY_LEN(ary) - idx;
304 p++;
305 }
306 else if (ISDIGIT(*p)) {
307 errno = 0;
308 len = STRTOUL(p, (char**)&p, 10);
309 if (len < 0 || errno) {
310 rb_raise(rb_eRangeError, "pack length too big");
311 }
312 }
313 else {
314 len = 1;
315 }
316
317 switch (type) {
318 case 'U':
319 /* if encoding is US-ASCII, upgrade to UTF-8 */
320 if (enc_info == 1) enc_info = 2;
321 break;
322 case 'm': case 'M': case 'u':
323 /* keep US-ASCII (do nothing) */
324 break;
325 default:
326 /* fall back to BINARY */
327 enc_info = 0;
328 break;
329 }
330 switch (type) {
331 case 'A': case 'a': case 'Z':
332 case 'B': case 'b':
333 case 'H': case 'h':
334 from = NEXTFROM;
335 if (NIL_P(from)) {
336 ptr = "";
337 plen = 0;
338 }
339 else {
340 STR_FROM(from);
341 ptr = RSTRING_PTR(from);
342 plen = RSTRING_LEN(from);
343 }
344
345 if (p[-1] == '*')
346 len = plen;
347
348 switch (type) {
349 case 'a': /* arbitrary binary string (null padded) */
350 case 'A': /* arbitrary binary string (ASCII space padded) */
351 case 'Z': /* null terminated string */
352 if (plen >= len) {
353 rb_str_buf_cat(res, ptr, len);
354 if (p[-1] == '*' && type == 'Z')
355 rb_str_buf_cat(res, "", 1);
356 }
357 else {
359 rb_str_buf_cat(res, ptr, plen);
360 str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen);
361 }
362 break;
363
364#define castchar(from) (char)((from) & 0xff)
365
366 case 'b': /* bit string (ascending) */
367 {
368 int byte = 0;
369 long i, j = 0;
370
371 if (len > plen) {
372 j = (len - plen + 1)/2;
373 len = plen;
374 }
375 for (i=0; i++ < len; ptr++) {
376 if (*ptr & 1)
377 byte |= 128;
378 if (i & 7)
379 byte >>= 1;
380 else {
381 char c = castchar(byte);
382 rb_str_buf_cat(res, &c, 1);
383 byte = 0;
384 }
385 }
386 if (len & 7) {
387 char c;
388 byte >>= 7 - (len & 7);
389 c = castchar(byte);
390 rb_str_buf_cat(res, &c, 1);
391 }
392 len = j;
393 goto grow;
394 }
395 break;
396
397 case 'B': /* bit string (descending) */
398 {
399 int byte = 0;
400 long i, j = 0;
401
402 if (len > plen) {
403 j = (len - plen + 1)/2;
404 len = plen;
405 }
406 for (i=0; i++ < len; ptr++) {
407 byte |= *ptr & 1;
408 if (i & 7)
409 byte <<= 1;
410 else {
411 char c = castchar(byte);
412 rb_str_buf_cat(res, &c, 1);
413 byte = 0;
414 }
415 }
416 if (len & 7) {
417 char c;
418 byte <<= 7 - (len & 7);
419 c = castchar(byte);
420 rb_str_buf_cat(res, &c, 1);
421 }
422 len = j;
423 goto grow;
424 }
425 break;
426
427 case 'h': /* hex string (low nibble first) */
428 {
429 int byte = 0;
430 long i, j = 0;
431
432 if (len > plen) {
433 j = (len + 1) / 2 - (plen + 1) / 2;
434 len = plen;
435 }
436 for (i=0; i++ < len; ptr++) {
437 if (ISALPHA(*ptr))
438 byte |= (((*ptr & 15) + 9) & 15) << 4;
439 else
440 byte |= (*ptr & 15) << 4;
441 if (i & 1)
442 byte >>= 4;
443 else {
444 char c = castchar(byte);
445 rb_str_buf_cat(res, &c, 1);
446 byte = 0;
447 }
448 }
449 if (len & 1) {
450 char c = castchar(byte);
451 rb_str_buf_cat(res, &c, 1);
452 }
453 len = j;
454 goto grow;
455 }
456 break;
457
458 case 'H': /* hex string (high nibble first) */
459 {
460 int byte = 0;
461 long i, j = 0;
462
463 if (len > plen) {
464 j = (len + 1) / 2 - (plen + 1) / 2;
465 len = plen;
466 }
467 for (i=0; i++ < len; ptr++) {
468 if (ISALPHA(*ptr))
469 byte |= ((*ptr & 15) + 9) & 15;
470 else
471 byte |= *ptr & 15;
472 if (i & 1)
473 byte <<= 4;
474 else {
475 char c = castchar(byte);
476 rb_str_buf_cat(res, &c, 1);
477 byte = 0;
478 }
479 }
480 if (len & 1) {
481 char c = castchar(byte);
482 rb_str_buf_cat(res, &c, 1);
483 }
484 len = j;
485 goto grow;
486 }
487 break;
488 }
489 break;
490
491 case 'c': /* signed char */
492 case 'C': /* unsigned char */
493 integer_size = 1;
494 bigendian_p = BIGENDIAN_P(); /* not effective */
495 goto pack_integer;
496
497 case 's': /* s for int16_t, s! for signed short */
498 case 'S': /* S for uint16_t, S! for unsigned short */
499 integer_size = NATINT_LEN(short, 2);
500 bigendian_p = BIGENDIAN_P();
501 goto pack_integer;
502
503 case 'i': /* i and i! for signed int */
504 case 'I': /* I and I! for unsigned int */
505 integer_size = (int)sizeof(int);
506 bigendian_p = BIGENDIAN_P();
507 goto pack_integer;
508
509 case 'l': /* l for int32_t, l! for signed long */
510 case 'L': /* L for uint32_t, L! for unsigned long */
511 integer_size = NATINT_LEN(long, 4);
512 bigendian_p = BIGENDIAN_P();
513 goto pack_integer;
514
515 case 'q': /* q for int64_t, q! for signed long long */
516 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
517 integer_size = NATINT_LEN_Q;
518 bigendian_p = BIGENDIAN_P();
519 goto pack_integer;
520
521 case 'j': /* j for intptr_t */
522 integer_size = sizeof(intptr_t);
523 bigendian_p = BIGENDIAN_P();
524 goto pack_integer;
525
526 case 'J': /* J for uintptr_t */
527 integer_size = sizeof(uintptr_t);
528 bigendian_p = BIGENDIAN_P();
529 goto pack_integer;
530
531 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
532 integer_size = 2;
533 bigendian_p = 1;
534 goto pack_integer;
535
536 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
537 integer_size = 4;
538 bigendian_p = 1;
539 goto pack_integer;
540
541 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
542 integer_size = 2;
543 bigendian_p = 0;
544 goto pack_integer;
545
546 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
547 integer_size = 4;
548 bigendian_p = 0;
549 goto pack_integer;
550
551 pack_integer:
552 if (explicit_endian) {
553 bigendian_p = explicit_endian == '>';
554 }
555 if (integer_size > MAX_INTEGER_PACK_SIZE)
556 rb_bug("unexpected integer size for pack: %d", integer_size);
557 while (len-- > 0) {
558 char intbuf[MAX_INTEGER_PACK_SIZE];
559
560 from = NEXTFROM;
561 rb_integer_pack(from, intbuf, integer_size, 1, 0,
564 rb_str_buf_cat(res, intbuf, integer_size);
565 }
566 break;
567
568 case 'f': /* single precision float in native format */
569 case 'F': /* ditto */
570 while (len-- > 0) {
571 float f;
572
573 from = NEXTFROM;
574 f = VALUE_to_float(from);
575 rb_str_buf_cat(res, (char*)&f, sizeof(float));
576 }
577 break;
578
579 case 'e': /* single precision float in VAX byte-order */
580 while (len-- > 0) {
581 FLOAT_CONVWITH(tmp);
582
583 from = NEXTFROM;
584 tmp.f = VALUE_to_float(from);
585 HTOVF(tmp);
586 rb_str_buf_cat(res, tmp.buf, sizeof(float));
587 }
588 break;
589
590 case 'E': /* double precision float in VAX byte-order */
591 while (len-- > 0) {
592 DOUBLE_CONVWITH(tmp);
593 from = NEXTFROM;
594 tmp.d = RFLOAT_VALUE(rb_to_float(from));
595 HTOVD(tmp);
596 rb_str_buf_cat(res, tmp.buf, sizeof(double));
597 }
598 break;
599
600 case 'd': /* double precision float in native format */
601 case 'D': /* ditto */
602 while (len-- > 0) {
603 double d;
604
605 from = NEXTFROM;
606 d = RFLOAT_VALUE(rb_to_float(from));
607 rb_str_buf_cat(res, (char*)&d, sizeof(double));
608 }
609 break;
610
611 case 'g': /* single precision float in network byte-order */
612 while (len-- > 0) {
613 FLOAT_CONVWITH(tmp);
614 from = NEXTFROM;
615 tmp.f = VALUE_to_float(from);
616 HTONF(tmp);
617 rb_str_buf_cat(res, tmp.buf, sizeof(float));
618 }
619 break;
620
621 case 'G': /* double precision float in network byte-order */
622 while (len-- > 0) {
623 DOUBLE_CONVWITH(tmp);
624
625 from = NEXTFROM;
626 tmp.d = RFLOAT_VALUE(rb_to_float(from));
627 HTOND(tmp);
628 rb_str_buf_cat(res, tmp.buf, sizeof(double));
629 }
630 break;
631
632 case 'x': /* null byte */
633 grow:
635 str_expand_fill(res, '\0', len);
636 break;
637
638 case 'X': /* back up byte */
639 shrink:
640 plen = RSTRING_LEN(res);
641 if (plen < len)
642 rb_raise(rb_eArgError, "X outside of string");
643 rb_str_set_len(res, plen - len);
644 break;
645
646 case '@': /* null fill to absolute position */
647 len -= RSTRING_LEN(res);
648 if (len > 0) goto grow;
649 len = -len;
650 if (len > 0) goto shrink;
651 break;
652
653 case '%':
654 rb_raise(rb_eArgError, "%% is not supported");
655 break;
656
657 case 'U': /* Unicode character */
658 while (len-- > 0) {
659 SIGNED_VALUE l;
660 char buf[8];
661 int le;
662
663 from = NEXTFROM;
664 from = rb_to_int(from);
665 l = NUM2LONG(from);
666 if (l < 0) {
667 rb_raise(rb_eRangeError, "pack(U): value out of range");
668 }
669 le = rb_uv_to_utf8(buf, l);
670 rb_str_buf_cat(res, (char*)buf, le);
671 }
672 break;
673
674 case 'r': /* r for SLEB128 encoding (signed) */
675 case 'R': /* R for ULEB128 encoding (unsigned) */
676 {
677 int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
678
679 if (type == 'r') {
680 pack_flags |= INTEGER_PACK_2COMP;
681 }
682
683 while (len-- > 0) {
684 size_t numbytes, nlz_bits;
685 int sign, extra = 0;
686 char *cp;
687 const long start = RSTRING_LEN(res);
688
689 from = NEXTFROM;
690 from = rb_to_int(from);
691 if (type == 'R' && rb_int_negative_p(from)) {
692 rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128");
693 }
694
695 numbytes = rb_absint_numwords(from, 7, &nlz_bits);
696 if (numbytes == 0) {
697 numbytes = 1;
698 }
699 else if (nlz_bits == 0 && type == 'r') {
700 /* No leading zero bits, we need an extra byte for sign extension */
701 extra = 1;
702 }
703 rb_str_modify_expand(res, numbytes + extra);
704
705 cp = RSTRING_PTR(res) + start;
706 sign = rb_integer_pack(from, cp, numbytes, 1, 1, pack_flags);
707
708 if (extra) {
709 /* Need an extra byte */
710 cp[numbytes++] = sign < 0 ? 0x7f : 0x00;
711 }
712 rb_str_set_len(res, start + numbytes);
713
714 while (1 < numbytes) {
715 *cp |= 0x80;
716 cp++;
717 numbytes--;
718 }
719 }
720 }
721 break;
722 case 'u': /* uuencoded string */
723 case 'm': /* base64 encoded string */
724 from = NEXTFROM;
725 STR_FROM(from);
726 ptr = RSTRING_PTR(from);
727 plen = RSTRING_LEN(from);
728
729 if (len == 0 && type == 'm') {
730 encodes(res, ptr, plen, type, 0);
731 ptr += plen;
732 break;
733 }
734 if (len <= 2)
735 len = 45;
736 else if (len > 63 && type == 'u')
737 len = 63;
738 else
739 len = len / 3 * 3;
740 while (plen > 0) {
741 long todo;
742
743 if (plen > len)
744 todo = len;
745 else
746 todo = plen;
747 encodes(res, ptr, todo, type, 1);
748 plen -= todo;
749 ptr += todo;
750 }
751 break;
752
753 case 'M': /* quoted-printable encoded string */
754 from = rb_obj_as_string(NEXTFROM);
755 NOT_BUFFER(from);
756 if (len <= 1)
757 len = 72;
758 qpencode(res, from, len);
759 break;
760
761 case 'P': /* pointer to packed byte string */
762 from = THISFROM;
763 if (!NIL_P(from)) {
764 STR_FROM(from);
765 if (RSTRING_LEN(from) < len) {
766 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
767 RSTRING_LEN(from), len);
768 }
769 }
770 len = 1;
771 /* FALL THROUGH */
772 case 'p': /* pointer to string */
773 while (len-- > 0) {
774 const char *t = 0;
775 from = NEXTFROM;
776 if (!NIL_P(from)) {
777 STR_FROM(from);
778 t = RSTRING_PTR(from);
779 }
780 if (!associates) {
781 associates = rb_ary_new();
782 }
783 rb_ary_push(associates, from);
784 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
785 }
786 break;
787
788 case 'w': /* BER compressed integer */
789 while (len-- > 0) {
790 VALUE buf;
791 size_t numbytes;
792 int sign;
793 char *cp;
794
795 from = NEXTFROM;
796 from = rb_to_int(from);
797 numbytes = rb_absint_numwords(from, 7, NULL);
798 if (numbytes == 0)
799 numbytes = 1;
800 buf = rb_str_new(NULL, numbytes);
801
802 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
803
804 if (sign < 0)
805 rb_raise(rb_eArgError, "can't compress negative numbers");
806 if (sign == 2)
807 rb_bug("buffer size problem?");
808
809 cp = RSTRING_PTR(buf);
810 while (1 < numbytes) {
811 *cp |= 0x80;
812 cp++;
813 numbytes--;
814 }
815
816 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
817 }
818 break;
819
820 default: {
821 unknown_directive("pack", type, fmt);
822 break;
823 }
824 }
825 }
826
827 if (associates) {
828 str_associate(res, associates);
829 }
830 switch (enc_info) {
831 case 1:
832 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
833 break;
834 case 2:
835 rb_enc_set_index(res, rb_utf8_encindex());
836 break;
837 default:
838 /* do nothing, keep ASCII-8BIT */
839 break;
840 }
841 return res;
842}
843
844VALUE
845rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
846{
847 return pack_pack(ec, ary, fmt, buffer);
848}
849
850static const char uu_table[] =
851"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
852static const char b64_table[] =
853"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
854
855static void
856encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
857{
858 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
859 char buff[buff_size + 1]; /* +1 for tail_lf */
860 long i = 0;
861 const char *const trans = type == 'u' ? uu_table : b64_table;
862 char padding;
863 const unsigned char *s = (const unsigned char *)s0;
864
865 if (type == 'u') {
866 buff[i++] = (char)len + ' ';
867 padding = '`';
868 }
869 else {
870 padding = '=';
871 }
872 while (len >= input_unit) {
873 while (len >= input_unit && buff_size-i >= encoded_unit) {
874 buff[i++] = trans[077 & (*s >> 2)];
875 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
876 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
877 buff[i++] = trans[077 & s[2]];
878 s += input_unit;
879 len -= input_unit;
880 }
881 if (buff_size-i < encoded_unit) {
882 rb_str_buf_cat(str, buff, i);
883 i = 0;
884 }
885 }
886
887 if (len == 2) {
888 buff[i++] = trans[077 & (*s >> 2)];
889 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
890 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
891 buff[i++] = padding;
892 }
893 else if (len == 1) {
894 buff[i++] = trans[077 & (*s >> 2)];
895 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
896 buff[i++] = padding;
897 buff[i++] = padding;
898 }
899 if (tail_lf) buff[i++] = '\n';
900 rb_str_buf_cat(str, buff, i);
901 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
902}
903
904static const char hex_table[] = "0123456789ABCDEF";
905
906static void
907qpencode(VALUE str, VALUE from, long len)
908{
909 char buff[1024];
910 long i = 0, n = 0, prev = EOF;
911 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
912 unsigned char *send = s + RSTRING_LEN(from);
913
914 while (s < send) {
915 if ((*s > 126) ||
916 (*s < 32 && *s != '\n' && *s != '\t') ||
917 (*s == '=')) {
918 buff[i++] = '=';
919 buff[i++] = hex_table[*s >> 4];
920 buff[i++] = hex_table[*s & 0x0f];
921 n += 3;
922 prev = EOF;
923 }
924 else if (*s == '\n') {
925 if (prev == ' ' || prev == '\t') {
926 buff[i++] = '=';
927 buff[i++] = *s;
928 }
929 buff[i++] = *s;
930 n = 0;
931 prev = *s;
932 }
933 else {
934 buff[i++] = *s;
935 n++;
936 prev = *s;
937 }
938 if (n > len) {
939 buff[i++] = '=';
940 buff[i++] = '\n';
941 n = 0;
942 prev = '\n';
943 }
944 if (i > 1024 - 5) {
945 rb_str_buf_cat(str, buff, i);
946 i = 0;
947 }
948 s++;
949 }
950 if (n > 0) {
951 buff[i++] = '=';
952 buff[i++] = '\n';
953 }
954 if (i > 0) {
955 rb_str_buf_cat(str, buff, i);
956 }
957}
958
959static inline int
960hex2num(char c)
961{
962 int n;
963 n = ruby_digit36_to_number_table[(unsigned char)c];
964 if (16 <= n)
965 n = -1;
966 return n;
967}
968
969#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
970 tmp_len = 0; \
971 if (len > (long)((send-s)/(sz))) { \
972 if (!star) { \
973 tmp_len = len-(send-s)/(sz); \
974 } \
975 len = (send-s)/(sz); \
976 } \
977} while (0)
978
979#define PACK_ITEM_ADJUST() do { \
980 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
981 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
982} while (0)
983
984/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
985 * 12.4/12.5/12.6 C compiler optimization bug
986 * with "-xO4" optimization option.
987 */
988#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
989# define AVOID_CC_BUG volatile
990#else
991# define AVOID_CC_BUG
992#endif
993
994enum unpack_mode {
995 UNPACK_ARRAY,
996 UNPACK_BLOCK,
997 UNPACK_1
998};
999
1000static VALUE
1001pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
1002{
1003#define hexdigits ruby_hexdigits
1004 const char *s, *send;
1005 const char *p, *pend;
1006 VALUE ary, associates = Qfalse;
1007 long len;
1008 AVOID_CC_BUG long tmp_len;
1009 int signed_p, integer_size, bigendian_p;
1010#define UNPACK_PUSH(item) do {\
1011 VALUE item_val = (item);\
1012 if ((mode) == UNPACK_BLOCK) {\
1013 rb_yield(item_val);\
1014 }\
1015 else if ((mode) == UNPACK_ARRAY) {\
1016 rb_ary_push(ary, item_val);\
1017 }\
1018 else /* if ((mode) == UNPACK_1) { */ {\
1019 return item_val; \
1020 }\
1021 } while (0)
1022
1023 StringValue(str);
1024 StringValue(fmt);
1026
1027 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
1028 len = RSTRING_LEN(str);
1029 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
1030
1031 s = RSTRING_PTR(str);
1032 send = s + len;
1033 s += offset;
1034
1035 p = RSTRING_PTR(fmt);
1036 pend = p + RSTRING_LEN(fmt);
1037
1038#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
1039
1040 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1041 while (p < pend) {
1042 int explicit_endian = 0;
1043 const char type = *p++;
1044#ifdef NATINT_PACK
1045 int natint = 0; /* native integer */
1046#endif
1047 int star = 0;
1048
1049 if (skip_blank(p, type)) continue;
1050 p = pack_modifiers(p, type, &natint, &explicit_endian);
1051
1052 if (p >= pend)
1053 len = 1;
1054 else if (*p == '*') {
1055 star = 1;
1056 len = send - s;
1057 p++;
1058 }
1059 else if (ISDIGIT(*p)) {
1060 errno = 0;
1061 len = STRTOUL(p, (char**)&p, 10);
1062 if (len < 0 || errno) {
1063 rb_raise(rb_eRangeError, "pack length too big");
1064 }
1065 }
1066 else {
1067 len = (type != '@');
1068 }
1069
1070 switch (type) {
1071 case '%':
1072 rb_raise(rb_eArgError, "%% is not supported");
1073 break;
1074
1075 case 'A':
1076 if (len > send - s) len = send - s;
1077 {
1078 long end = len;
1079 const char *t = s + len - 1;
1080
1081 while (t >= s) {
1082 if (*t != ' ' && *t != '\0') break;
1083 t--; len--;
1084 }
1085 UNPACK_PUSH(rb_str_new(s, len));
1086 s += end;
1087 }
1088 break;
1089
1090 case 'Z':
1091 {
1092 const char *t = s;
1093
1094 if (len > send-s) len = send-s;
1095 while (t < s+len && *t) t++;
1096 UNPACK_PUSH(rb_str_new(s, t-s));
1097 if (t < send) t++;
1098 s = star ? t : s+len;
1099 }
1100 break;
1101
1102 case 'a':
1103 if (len > send - s) len = send - s;
1104 UNPACK_PUSH(rb_str_new(s, len));
1105 s += len;
1106 break;
1107
1108 case 'b':
1109 {
1110 VALUE bitstr;
1111 char *t;
1112 int bits;
1113 long i;
1114
1115 if (p[-1] == '*' || len > (send - s) * 8)
1116 len = (send - s) * 8;
1117 bits = 0;
1118 bitstr = rb_usascii_str_new(0, len);
1119 t = RSTRING_PTR(bitstr);
1120 for (i=0; i<len; i++) {
1121 if (i & 7) bits >>= 1;
1122 else bits = (unsigned char)*s++;
1123 *t++ = (bits & 1) ? '1' : '0';
1124 }
1125 UNPACK_PUSH(bitstr);
1126 }
1127 break;
1128
1129 case 'B':
1130 {
1131 VALUE bitstr;
1132 char *t;
1133 int bits;
1134 long i;
1135
1136 if (p[-1] == '*' || len > (send - s) * 8)
1137 len = (send - s) * 8;
1138 bits = 0;
1139 bitstr = rb_usascii_str_new(0, len);
1140 t = RSTRING_PTR(bitstr);
1141 for (i=0; i<len; i++) {
1142 if (i & 7) bits <<= 1;
1143 else bits = (unsigned char)*s++;
1144 *t++ = (bits & 128) ? '1' : '0';
1145 }
1146 UNPACK_PUSH(bitstr);
1147 }
1148 break;
1149
1150 case 'h':
1151 {
1152 VALUE bitstr;
1153 char *t;
1154 int bits;
1155 long i;
1156
1157 if (p[-1] == '*' || len > (send - s) * 2)
1158 len = (send - s) * 2;
1159 bits = 0;
1160 bitstr = rb_usascii_str_new(0, len);
1161 t = RSTRING_PTR(bitstr);
1162 for (i=0; i<len; i++) {
1163 if (i & 1)
1164 bits >>= 4;
1165 else
1166 bits = (unsigned char)*s++;
1167 *t++ = hexdigits[bits & 15];
1168 }
1169 UNPACK_PUSH(bitstr);
1170 }
1171 break;
1172
1173 case 'H':
1174 {
1175 VALUE bitstr;
1176 char *t;
1177 int bits;
1178 long i;
1179
1180 if (p[-1] == '*' || len > (send - s) * 2)
1181 len = (send - s) * 2;
1182 bits = 0;
1183 bitstr = rb_usascii_str_new(0, len);
1184 t = RSTRING_PTR(bitstr);
1185 for (i=0; i<len; i++) {
1186 if (i & 1)
1187 bits <<= 4;
1188 else
1189 bits = (unsigned char)*s++;
1190 *t++ = hexdigits[(bits >> 4) & 15];
1191 }
1192 UNPACK_PUSH(bitstr);
1193 }
1194 break;
1195
1196 case 'c':
1197 signed_p = 1;
1198 integer_size = 1;
1199 bigendian_p = BIGENDIAN_P(); /* not effective */
1200 goto unpack_integer;
1201
1202 case 'C':
1203 signed_p = 0;
1204 integer_size = 1;
1205 bigendian_p = BIGENDIAN_P(); /* not effective */
1206 goto unpack_integer;
1207
1208 case 's':
1209 signed_p = 1;
1210 integer_size = NATINT_LEN(short, 2);
1211 bigendian_p = BIGENDIAN_P();
1212 goto unpack_integer;
1213
1214 case 'S':
1215 signed_p = 0;
1216 integer_size = NATINT_LEN(short, 2);
1217 bigendian_p = BIGENDIAN_P();
1218 goto unpack_integer;
1219
1220 case 'i':
1221 signed_p = 1;
1222 integer_size = (int)sizeof(int);
1223 bigendian_p = BIGENDIAN_P();
1224 goto unpack_integer;
1225
1226 case 'I':
1227 signed_p = 0;
1228 integer_size = (int)sizeof(int);
1229 bigendian_p = BIGENDIAN_P();
1230 goto unpack_integer;
1231
1232 case 'l':
1233 signed_p = 1;
1234 integer_size = NATINT_LEN(long, 4);
1235 bigendian_p = BIGENDIAN_P();
1236 goto unpack_integer;
1237
1238 case 'L':
1239 signed_p = 0;
1240 integer_size = NATINT_LEN(long, 4);
1241 bigendian_p = BIGENDIAN_P();
1242 goto unpack_integer;
1243
1244 case 'q':
1245 signed_p = 1;
1246 integer_size = NATINT_LEN_Q;
1247 bigendian_p = BIGENDIAN_P();
1248 goto unpack_integer;
1249
1250 case 'Q':
1251 signed_p = 0;
1252 integer_size = NATINT_LEN_Q;
1253 bigendian_p = BIGENDIAN_P();
1254 goto unpack_integer;
1255
1256 case 'j':
1257 signed_p = 1;
1258 integer_size = sizeof(intptr_t);
1259 bigendian_p = BIGENDIAN_P();
1260 goto unpack_integer;
1261
1262 case 'J':
1263 signed_p = 0;
1264 integer_size = sizeof(uintptr_t);
1265 bigendian_p = BIGENDIAN_P();
1266 goto unpack_integer;
1267
1268 case 'n':
1269 signed_p = 0;
1270 integer_size = 2;
1271 bigendian_p = 1;
1272 goto unpack_integer;
1273
1274 case 'N':
1275 signed_p = 0;
1276 integer_size = 4;
1277 bigendian_p = 1;
1278 goto unpack_integer;
1279
1280 case 'v':
1281 signed_p = 0;
1282 integer_size = 2;
1283 bigendian_p = 0;
1284 goto unpack_integer;
1285
1286 case 'V':
1287 signed_p = 0;
1288 integer_size = 4;
1289 bigendian_p = 0;
1290 goto unpack_integer;
1291
1292 unpack_integer:
1293 if (explicit_endian) {
1294 bigendian_p = explicit_endian == '>';
1295 }
1296 PACK_LENGTH_ADJUST_SIZE(integer_size);
1297 while (len-- > 0) {
1298 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1299 VALUE val;
1300 if (signed_p)
1301 flags |= INTEGER_PACK_2COMP;
1302 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1303 UNPACK_PUSH(val);
1304 s += integer_size;
1305 }
1306 PACK_ITEM_ADJUST();
1307 break;
1308
1309 case 'f':
1310 case 'F':
1311 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1312 while (len-- > 0) {
1313 float tmp;
1314 UNPACK_FETCH(&tmp, float);
1315 UNPACK_PUSH(DBL2NUM((double)tmp));
1316 }
1317 PACK_ITEM_ADJUST();
1318 break;
1319
1320 case 'e':
1321 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1322 while (len-- > 0) {
1323 FLOAT_CONVWITH(tmp);
1324 UNPACK_FETCH(tmp.buf, float);
1325 VTOHF(tmp);
1326 UNPACK_PUSH(DBL2NUM(tmp.f));
1327 }
1328 PACK_ITEM_ADJUST();
1329 break;
1330
1331 case 'E':
1332 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1333 while (len-- > 0) {
1334 DOUBLE_CONVWITH(tmp);
1335 UNPACK_FETCH(tmp.buf, double);
1336 VTOHD(tmp);
1337 UNPACK_PUSH(DBL2NUM(tmp.d));
1338 }
1339 PACK_ITEM_ADJUST();
1340 break;
1341
1342 case 'D':
1343 case 'd':
1344 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1345 while (len-- > 0) {
1346 double tmp;
1347 UNPACK_FETCH(&tmp, double);
1348 UNPACK_PUSH(DBL2NUM(tmp));
1349 }
1350 PACK_ITEM_ADJUST();
1351 break;
1352
1353 case 'g':
1354 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1355 while (len-- > 0) {
1356 FLOAT_CONVWITH(tmp);
1357 UNPACK_FETCH(tmp.buf, float);
1358 NTOHF(tmp);
1359 UNPACK_PUSH(DBL2NUM(tmp.f));
1360 }
1361 PACK_ITEM_ADJUST();
1362 break;
1363
1364 case 'G':
1365 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1366 while (len-- > 0) {
1367 DOUBLE_CONVWITH(tmp);
1368 UNPACK_FETCH(tmp.buf, double);
1369 NTOHD(tmp);
1370 UNPACK_PUSH(DBL2NUM(tmp.d));
1371 }
1372 PACK_ITEM_ADJUST();
1373 break;
1374
1375 case 'U':
1376 if (len > send - s) len = send - s;
1377 while (len > 0 && s < send) {
1378 long alen = send - s;
1379 unsigned long l;
1380
1381 l = utf8_to_uv(s, &alen);
1382 s += alen; len--;
1383 UNPACK_PUSH(ULONG2NUM(l));
1384 }
1385 break;
1386
1387 case 'u':
1388 {
1389 VALUE buf = rb_str_new(0, (send - s)*3/4);
1390 char *ptr = RSTRING_PTR(buf);
1391 long total = 0;
1392
1393 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1394 long a,b,c,d;
1395 char hunk[3];
1396
1397 len = ((unsigned char)*s++ - ' ') & 077;
1398
1399 total += len;
1400 if (total > RSTRING_LEN(buf)) {
1401 len -= total - RSTRING_LEN(buf);
1402 total = RSTRING_LEN(buf);
1403 }
1404
1405 while (len > 0) {
1406 long mlen = len > 3 ? 3 : len;
1407
1408 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1409 a = ((unsigned char)*s++ - ' ') & 077;
1410 else
1411 a = 0;
1412 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1413 b = ((unsigned char)*s++ - ' ') & 077;
1414 else
1415 b = 0;
1416 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1417 c = ((unsigned char)*s++ - ' ') & 077;
1418 else
1419 c = 0;
1420 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1421 d = ((unsigned char)*s++ - ' ') & 077;
1422 else
1423 d = 0;
1424 hunk[0] = (char)(a << 2 | b >> 4);
1425 hunk[1] = (char)(b << 4 | c >> 2);
1426 hunk[2] = (char)(c << 6 | d);
1427 memcpy(ptr, hunk, mlen);
1428 ptr += mlen;
1429 len -= mlen;
1430 }
1431 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1432 s++; /* possible checksum byte */
1433 if (s < send && *s == '\r') s++;
1434 if (s < send && *s == '\n') s++;
1435 }
1436
1437 rb_str_set_len(buf, total);
1438 UNPACK_PUSH(buf);
1439 }
1440 break;
1441
1442 case 'm':
1443 {
1444 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1445 char *ptr = RSTRING_PTR(buf);
1446 int a = -1,b = -1,c = 0,d = 0;
1447 static signed char b64_xtable[256];
1448
1449 if (b64_xtable['/'] <= 0) {
1450 int i;
1451
1452 for (i = 0; i < 256; i++) {
1453 b64_xtable[i] = -1;
1454 }
1455 for (i = 0; i < 64; i++) {
1456 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1457 }
1458 }
1459 if (len == 0) {
1460 while (s < send) {
1461 a = b = c = d = -1;
1462 a = b64_xtable[(unsigned char)*s++];
1463 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1464 b = b64_xtable[(unsigned char)*s++];
1465 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1466 if (*s == '=') {
1467 if (s + 2 == send && *(s + 1) == '=') break;
1468 rb_raise(rb_eArgError, "invalid base64");
1469 }
1470 c = b64_xtable[(unsigned char)*s++];
1471 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1472 if (s + 1 == send && *s == '=') break;
1473 d = b64_xtable[(unsigned char)*s++];
1474 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1475 *ptr++ = castchar(a << 2 | b >> 4);
1476 *ptr++ = castchar(b << 4 | c >> 2);
1477 *ptr++ = castchar(c << 6 | d);
1478 }
1479 if (c == -1) {
1480 *ptr++ = castchar(a << 2 | b >> 4);
1481 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1482 }
1483 else if (d == -1) {
1484 *ptr++ = castchar(a << 2 | b >> 4);
1485 *ptr++ = castchar(b << 4 | c >> 2);
1486 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1487 }
1488 }
1489 else {
1490 while (s < send) {
1491 a = b = c = d = -1;
1492 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1493 if (s >= send) break;
1494 s++;
1495 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1496 if (s >= send) break;
1497 s++;
1498 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1499 if (*s == '=' || s >= send) break;
1500 s++;
1501 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1502 if (*s == '=' || s >= send) break;
1503 s++;
1504 *ptr++ = castchar(a << 2 | b >> 4);
1505 *ptr++ = castchar(b << 4 | c >> 2);
1506 *ptr++ = castchar(c << 6 | d);
1507 a = -1;
1508 }
1509 if (a != -1 && b != -1) {
1510 if (c == -1)
1511 *ptr++ = castchar(a << 2 | b >> 4);
1512 else {
1513 *ptr++ = castchar(a << 2 | b >> 4);
1514 *ptr++ = castchar(b << 4 | c >> 2);
1515 }
1516 }
1517 }
1518 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1519 UNPACK_PUSH(buf);
1520 }
1521 break;
1522
1523 case 'M':
1524 {
1525 VALUE buf = rb_str_new(0, send - s);
1526 char *ptr = RSTRING_PTR(buf);
1527 const char *ss = s;
1528 int csum = 0;
1529 int c1, c2;
1530
1531 while (s < send) {
1532 if (*s == '=') {
1533 if (++s == send) break;
1534 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1535 s++;
1536 if (*s != '\n') {
1537 if ((c1 = hex2num(*s)) == -1) break;
1538 if (++s == send) break;
1539 if ((c2 = hex2num(*s)) == -1) break;
1540 csum |= *ptr++ = castchar(c1 << 4 | c2);
1541 }
1542 }
1543 else {
1544 csum |= *ptr++ = *s;
1545 }
1546 s++;
1547 ss = s;
1548 }
1549 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1550 rb_str_buf_cat(buf, ss, send-ss);
1552 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1553 UNPACK_PUSH(buf);
1554 }
1555 break;
1556
1557 case '@':
1558 if (len > RSTRING_LEN(str))
1559 rb_raise(rb_eArgError, "@ outside of string");
1560 s = RSTRING_PTR(str) + len;
1561 break;
1562
1563 case 'X':
1564 if (len > s - RSTRING_PTR(str))
1565 rb_raise(rb_eArgError, "X outside of string");
1566 s -= len;
1567 break;
1568
1569 case 'x':
1570 if (len > send - s)
1571 rb_raise(rb_eArgError, "x outside of string");
1572 s += len;
1573 break;
1574
1575 case '^':
1576 UNPACK_PUSH(SSIZET2NUM(s - RSTRING_PTR(str)));
1577 break;
1578
1579 case 'P':
1580 if (sizeof(char *) <= (size_t)(send - s)) {
1581 VALUE tmp = Qnil;
1582 const char *t;
1583
1584 UNPACK_FETCH(&t, char *);
1585 if (t) {
1586 if (!associates) associates = str_associated(str);
1587 tmp = associated_pointer(associates, t);
1588 if (len < RSTRING_LEN(tmp)) {
1589 tmp = rb_str_new(t, len);
1590 str_associate(tmp, associates);
1591 }
1592 }
1593 UNPACK_PUSH(tmp);
1594 }
1595 break;
1596
1597 case 'p':
1598 if (len > (long)((send - s) / sizeof(char *)))
1599 len = (send - s) / sizeof(char *);
1600 while (len-- > 0) {
1601 if ((size_t)(send - s) < sizeof(char *))
1602 break;
1603 else {
1604 VALUE tmp = Qnil;
1605 const char *t;
1606
1607 UNPACK_FETCH(&t, char *);
1608 if (t) {
1609 if (!associates) associates = str_associated(str);
1610 tmp = associated_pointer(associates, t);
1611 }
1612 UNPACK_PUSH(tmp);
1613 }
1614 }
1615 break;
1616
1617 case 'r':
1618 case 'R':
1619 {
1620 int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
1621
1622 if (type == 'r') {
1623 pack_flags |= INTEGER_PACK_2COMP;
1624 }
1625 const char *s0 = s;
1626 while (len > 0 && s < send) {
1627 if (*s & 0x80) {
1628 s++;
1629 }
1630 else {
1631 s++;
1632 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags));
1633 len--;
1634 s0 = s;
1635 }
1636 }
1637 /* Handle incomplete value and remaining expected values with nil (only if not using *) */
1638 if (!star) {
1639 if (s0 != s && len > 0) {
1640 UNPACK_PUSH(Qnil);
1641 len--;
1642 }
1643 while (len-- > 0) {
1644 UNPACK_PUSH(Qnil);
1645 }
1646 }
1647 }
1648 break;
1649
1650 case 'w':
1651 {
1652 const char *s0 = s;
1653 while (len > 0 && s < send) {
1654 if (*s & 0x80) {
1655 s++;
1656 }
1657 else {
1658 s++;
1659 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1660 len--;
1661 s0 = s;
1662 }
1663 }
1664 }
1665 break;
1666
1667 default:
1668 unknown_directive("unpack", type, fmt);
1669 break;
1670 }
1671 }
1672
1673 return ary;
1674}
1675
1676static VALUE
1677pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1678{
1679 enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1680 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1681}
1682
1683static VALUE
1684pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1685{
1686 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1687}
1688
1689int
1690rb_uv_to_utf8(char buf[6], unsigned long uv)
1691{
1692 if (uv <= 0x7f) {
1693 buf[0] = (char)uv;
1694 return 1;
1695 }
1696 if (uv <= 0x7ff) {
1697 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1698 buf[1] = castchar((uv&0x3f)|0x80);
1699 return 2;
1700 }
1701 if (uv <= 0xffff) {
1702 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1703 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1704 buf[2] = castchar((uv&0x3f)|0x80);
1705 return 3;
1706 }
1707 if (uv <= 0x1fffff) {
1708 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1709 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1710 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1711 buf[3] = castchar((uv&0x3f)|0x80);
1712 return 4;
1713 }
1714 if (uv <= 0x3ffffff) {
1715 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1716 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1717 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1718 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1719 buf[4] = castchar((uv&0x3f)|0x80);
1720 return 5;
1721 }
1722 if (uv <= 0x7fffffff) {
1723 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1724 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1725 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1726 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1727 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1728 buf[5] = castchar((uv&0x3f)|0x80);
1729 return 6;
1730 }
1731 rb_raise(rb_eRangeError, "pack(U): value out of range");
1732
1734}
1735
1736static const unsigned long utf8_limits[] = {
1737 0x0, /* 1 */
1738 0x80, /* 2 */
1739 0x800, /* 3 */
1740 0x10000, /* 4 */
1741 0x200000, /* 5 */
1742 0x4000000, /* 6 */
1743 0x80000000, /* 7 */
1744};
1745
1746static unsigned long
1747utf8_to_uv(const char *p, long *lenp)
1748{
1749 int c = *p++ & 0xff;
1750 unsigned long uv = c;
1751 long n;
1752
1753 if (!(uv & 0x80)) {
1754 *lenp = 1;
1755 return uv;
1756 }
1757 if (!(uv & 0x40)) {
1758 *lenp = 1;
1759 rb_raise(rb_eArgError, "malformed UTF-8 character");
1760 }
1761
1762 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1763 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1764 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1765 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1766 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1767 else {
1768 *lenp = 1;
1769 rb_raise(rb_eArgError, "malformed UTF-8 character");
1770 }
1771 if (n > *lenp) {
1772 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1773 n, *lenp);
1774 }
1775 *lenp = n--;
1776 if (n != 0) {
1777 while (n--) {
1778 c = *p++ & 0xff;
1779 if ((c & 0xc0) != 0x80) {
1780 *lenp -= n + 1;
1781 rb_raise(rb_eArgError, "malformed UTF-8 character");
1782 }
1783 else {
1784 c &= 0x3f;
1785 uv = uv << 6 | c;
1786 }
1787 }
1788 }
1789 n = *lenp - 1;
1790 if (uv < utf8_limits[n]) {
1791 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1792 }
1793 return uv;
1794}
1795
1796#include "pack.rbinc"
1797
1798void
1799Init_pack(void)
1800{
1801 id_associated = rb_make_internal_id();
1802}
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition eval.c:1018
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition coderange.h:181
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition assume.h:29
#define SSIZET2NUM
Old name of RB_SSIZE2NUM.
Definition size_t.h:64
#define STRTOUL
Old name of ruby_strtoul.
Definition ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition coderange.h:189
VALUE rb_eRangeError
RangeError exception.
Definition error.c:1431
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1427
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1425
VALUE rb_to_float(VALUE val)
Identical to rb_check_to_float(), except it raises on error.
Definition object.c:3782
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
Definition object.c:3334
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition pack.c:1690
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition bignum.h:549
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1499
#define rb_str_buf_cat
Just another name of rb_str_cat.
Definition string.h:1682
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
Definition string.h:1533
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3421
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
Definition string.c:2790
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:2744
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition string.c:1716
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition string.c:1848
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition variable.c:2047
int len
Length of the buffer.
Definition io.h:8
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11etc.
Definition util.c:60
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RBIMPL_ATTR_NORETURN()
Wraps (or simulates) [[noreturn]]
Definition noreturn.h:38
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition rarray.h:52
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition rstring.h:409
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition variable.c:515
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376