Ruby 4.1.0dev (2026-03-01 revision d68e4be1873e364c5ee24ed112bce4bc86e3a406)
pack.c (d68e4be1873e364c5ee24ed112bce4bc86e3a406)
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/internal/config.h"
13
14#include <ctype.h>
15#include <errno.h>
16#include <float.h>
17#include <sys/types.h>
18
19#include "internal.h"
20#include "internal/array.h"
21#include "internal/bits.h"
22#include "internal/string.h"
23#include "internal/symbol.h"
24#include "internal/variable.h"
25#include "ruby/util.h"
26
27#include "builtin.h"
28
29/*
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
36 */
37#ifdef HAVE_TRUE_LONG_LONG
38static const char natstr[] = "sSiIlLqQjJ";
39# define endstr natstr
40#else
41static const char natstr[] = "sSiIlLjJ";
42static const char endstr[] = "sSiIlLqQjJ";
43#endif
44
45#ifdef HAVE_TRUE_LONG_LONG
46/* It is intentional to use long long instead of LONG_LONG. */
47# define NATINT_LEN_Q NATINT_LEN(long long, 8)
48#else
49# define NATINT_LEN_Q 8
50#endif
51
52#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
53# define NATINT_PACK
54#endif
55
56#ifdef DYNAMIC_ENDIAN
57/* for universal binary of NEXTSTEP and MacOS X */
58/* useless since autoconf 2.63? */
59static int
60is_bigendian(void)
61{
62 static int init = 0;
63 static int endian_value;
64 char *p;
65
66 if (init) return endian_value;
67 init = 1;
68 p = (char*)&init;
69 return endian_value = p[0]?0:1;
70}
71# define BIGENDIAN_P() (is_bigendian())
72#elif defined(WORDS_BIGENDIAN)
73# define BIGENDIAN_P() 1
74#else
75# define BIGENDIAN_P() 0
76#endif
77
78#ifdef NATINT_PACK
79# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
80#else
81# define NATINT_LEN(type,len) ((int)sizeof(type))
82#endif
83
84typedef union {
85 float f;
86 uint32_t u;
87 char buf[4];
89typedef union {
90 double d;
91 uint64_t u;
92 char buf[8];
94#define swapf(x) swap32(x)
95#define swapd(x) swap64(x)
96
97#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
98#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
99#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
100#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
101#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
102#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
103#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
104#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
105
106#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
107#define HTONF(x) ((x).u = rb_htonf((x).u))
108#define HTOVF(x) ((x).u = rb_htovf((x).u))
109#define NTOHF(x) ((x).u = rb_ntohf((x).u))
110#define VTOHF(x) ((x).u = rb_vtohf((x).u))
111
112#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
113#define HTOND(x) ((x).u = rb_htond((x).u))
114#define HTOVD(x) ((x).u = rb_htovd((x).u))
115#define NTOHD(x) ((x).u = rb_ntohd((x).u))
116#define VTOHD(x) ((x).u = rb_vtohd((x).u))
117
118#define MAX_INTEGER_PACK_SIZE 8
119
120static const char toofew[] = "too few arguments";
121
122static void encodes(VALUE,const char*,long,int,int);
123static void qpencode(VALUE,VALUE,long);
124
125static unsigned long utf8_to_uv(const char*,long*);
126
127static ID id_associated;
128
129static void
130str_associate(VALUE str, VALUE add)
131{
132 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
133 rb_ivar_set(str, id_associated, add);
134}
135
136static VALUE
137str_associated(VALUE str)
138{
139 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
140 if (!associates)
141 rb_raise(rb_eArgError, "no associated pointer");
142 return associates;
143}
144
145static VALUE
146associated_pointer(VALUE associates, const char *t)
147{
148 const VALUE *p = RARRAY_CONST_PTR(associates);
149 const VALUE *pend = p + RARRAY_LEN(associates);
150 for (; p < pend; p++) {
151 VALUE tmp = *p;
152 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
153 }
154 rb_raise(rb_eArgError, "non associated pointer");
156}
157
159static void
160unknown_directive(const char *mode, char type, VALUE fmt)
161{
162 char unknown[5];
163
164 if (ISPRINT(type)) {
165 unknown[0] = type;
166 unknown[1] = '\0';
167 }
168 else {
169 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
170 }
171 fmt = rb_str_quote_unprintable(fmt);
172 rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'",
173 mode, unknown, fmt);
174}
175
176static float
177VALUE_to_float(VALUE obj)
178{
179 VALUE v = rb_to_float(obj);
180 double d = RFLOAT_VALUE(v);
181
182 if (isnan(d)) {
183 return NAN;
184 }
185 else if (d < -FLT_MAX) {
186 return -INFINITY;
187 }
188 else if (d <= FLT_MAX) {
189 return d;
190 }
191 else {
192 return INFINITY;
193 }
194}
195
196static void
197str_expand_fill(VALUE res, int c, long len)
198{
199 long olen = RSTRING_LEN(res);
200 memset(RSTRING_PTR(res) + olen, c, len);
201 rb_str_set_len(res, olen + len);
202}
203
204static char *
205skip_to_eol(const char *p, const char *pend)
206{
207 p = memchr(p, '\n', pend - p);
208 return (char *)(p ? p + 1 : pend);
209}
210
211#define skip_blank(p, type) \
212 (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
213
214#ifndef NATINT_PACK
215# define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
216#endif
217static char *
218pack_modifiers(const char *p, char type, int *natint, int *explicit_endian)
219{
220 while (1) {
221 switch (*p) {
222 case '_':
223 case '!':
224 if (strchr(natstr, type)) {
225#ifdef NATINT_PACK
226 *natint = 1;
227#endif
228 p++;
229 }
230 else {
231 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
232 }
233 break;
234
235 case '<':
236 case '>':
237 if (!strchr(endstr, type)) {
238 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
239 }
240 if (*explicit_endian) {
241 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
242 }
243 *explicit_endian = *p++;
244 break;
245 default:
246 return (char *)p;
247 }
248 }
249}
250
251static VALUE
252pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
253{
254 const char *p, *pend;
255 VALUE res, from, associates = 0;
256 long len, idx, plen;
257 const char *ptr;
258 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
259 int integer_size, bigendian_p;
260
261 StringValue(fmt);
263 p = RSTRING_PTR(fmt);
264 pend = p + RSTRING_LEN(fmt);
265
266 if (NIL_P(buffer)) {
267 res = rb_str_buf_new(0);
268 }
269 else {
270 if (!RB_TYPE_P(buffer, T_STRING))
271 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
272 rb_str_modify(buffer);
273 res = buffer;
274 }
275
276 idx = 0;
277
278#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
279#define MORE_ITEM (idx < RARRAY_LEN(ary))
280#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
281#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
282
283 while (p < pend) {
284 int explicit_endian = 0;
285 if (RSTRING_END(fmt) != pend) {
286 rb_raise(rb_eRuntimeError, "format string modified");
287 }
288 const char type = *p++; /* get data type */
289#ifdef NATINT_PACK
290 int natint = 0; /* native integer */
291#endif
292
293 if (skip_blank(p, type)) continue;
294 p = pack_modifiers(p, type, &natint, &explicit_endian);
295
296 if (*p == '*') { /* set data length */
297 len = strchr("@Xxu", type) ? 0
298 : strchr("PMm", type) ? 1
299 : RARRAY_LEN(ary) - idx;
300 p++;
301 }
302 else if (ISDIGIT(*p)) {
303 errno = 0;
304 len = STRTOUL(p, (char**)&p, 10);
305 if (len < 0 || errno) {
306 rb_raise(rb_eRangeError, "pack length too big");
307 }
308 }
309 else {
310 len = 1;
311 }
312
313 switch (type) {
314 case 'U':
315 /* if encoding is US-ASCII, upgrade to UTF-8 */
316 if (enc_info == 1) enc_info = 2;
317 break;
318 case 'm': case 'M': case 'u':
319 /* keep US-ASCII (do nothing) */
320 break;
321 default:
322 /* fall back to BINARY */
323 enc_info = 0;
324 break;
325 }
326 switch (type) {
327 case 'A': case 'a': case 'Z':
328 case 'B': case 'b':
329 case 'H': case 'h':
330 from = NEXTFROM;
331 if (NIL_P(from)) {
332 ptr = "";
333 plen = 0;
334 }
335 else {
336 StringValue(from);
337 ptr = RSTRING_PTR(from);
338 plen = RSTRING_LEN(from);
339 }
340
341 if (p[-1] == '*')
342 len = plen;
343
344 switch (type) {
345 case 'a': /* arbitrary binary string (null padded) */
346 case 'A': /* arbitrary binary string (ASCII space padded) */
347 case 'Z': /* null terminated string */
348 if (plen >= len) {
349 rb_str_buf_cat(res, ptr, len);
350 if (p[-1] == '*' && type == 'Z')
351 rb_str_buf_cat(res, "", 1);
352 }
353 else {
355 rb_str_buf_cat(res, ptr, plen);
356 str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen);
357 }
358 break;
359
360#define castchar(from) (char)((from) & 0xff)
361
362 case 'b': /* bit string (ascending) */
363 {
364 int byte = 0;
365 long i, j = 0;
366
367 if (len > plen) {
368 j = (len - plen + 1)/2;
369 len = plen;
370 }
371 for (i=0; i++ < len; ptr++) {
372 if (*ptr & 1)
373 byte |= 128;
374 if (i & 7)
375 byte >>= 1;
376 else {
377 char c = castchar(byte);
378 rb_str_buf_cat(res, &c, 1);
379 byte = 0;
380 }
381 }
382 if (len & 7) {
383 char c;
384 byte >>= 7 - (len & 7);
385 c = castchar(byte);
386 rb_str_buf_cat(res, &c, 1);
387 }
388 len = j;
389 goto grow;
390 }
391 break;
392
393 case 'B': /* bit string (descending) */
394 {
395 int byte = 0;
396 long i, j = 0;
397
398 if (len > plen) {
399 j = (len - plen + 1)/2;
400 len = plen;
401 }
402 for (i=0; i++ < len; ptr++) {
403 byte |= *ptr & 1;
404 if (i & 7)
405 byte <<= 1;
406 else {
407 char c = castchar(byte);
408 rb_str_buf_cat(res, &c, 1);
409 byte = 0;
410 }
411 }
412 if (len & 7) {
413 char c;
414 byte <<= 7 - (len & 7);
415 c = castchar(byte);
416 rb_str_buf_cat(res, &c, 1);
417 }
418 len = j;
419 goto grow;
420 }
421 break;
422
423 case 'h': /* hex string (low nibble first) */
424 {
425 int byte = 0;
426 long i, j = 0;
427
428 if (len > plen) {
429 j = (len + 1) / 2 - (plen + 1) / 2;
430 len = plen;
431 }
432 for (i=0; i++ < len; ptr++) {
433 if (ISALPHA(*ptr))
434 byte |= (((*ptr & 15) + 9) & 15) << 4;
435 else
436 byte |= (*ptr & 15) << 4;
437 if (i & 1)
438 byte >>= 4;
439 else {
440 char c = castchar(byte);
441 rb_str_buf_cat(res, &c, 1);
442 byte = 0;
443 }
444 }
445 if (len & 1) {
446 char c = castchar(byte);
447 rb_str_buf_cat(res, &c, 1);
448 }
449 len = j;
450 goto grow;
451 }
452 break;
453
454 case 'H': /* hex string (high nibble first) */
455 {
456 int byte = 0;
457 long i, j = 0;
458
459 if (len > plen) {
460 j = (len + 1) / 2 - (plen + 1) / 2;
461 len = plen;
462 }
463 for (i=0; i++ < len; ptr++) {
464 if (ISALPHA(*ptr))
465 byte |= ((*ptr & 15) + 9) & 15;
466 else
467 byte |= *ptr & 15;
468 if (i & 1)
469 byte <<= 4;
470 else {
471 char c = castchar(byte);
472 rb_str_buf_cat(res, &c, 1);
473 byte = 0;
474 }
475 }
476 if (len & 1) {
477 char c = castchar(byte);
478 rb_str_buf_cat(res, &c, 1);
479 }
480 len = j;
481 goto grow;
482 }
483 break;
484 }
485 break;
486
487 case 'c': /* signed char */
488 case 'C': /* unsigned char */
489 integer_size = 1;
490 bigendian_p = BIGENDIAN_P(); /* not effective */
491 goto pack_integer;
492
493 case 's': /* s for int16_t, s! for signed short */
494 case 'S': /* S for uint16_t, S! for unsigned short */
495 integer_size = NATINT_LEN(short, 2);
496 bigendian_p = BIGENDIAN_P();
497 goto pack_integer;
498
499 case 'i': /* i and i! for signed int */
500 case 'I': /* I and I! for unsigned int */
501 integer_size = (int)sizeof(int);
502 bigendian_p = BIGENDIAN_P();
503 goto pack_integer;
504
505 case 'l': /* l for int32_t, l! for signed long */
506 case 'L': /* L for uint32_t, L! for unsigned long */
507 integer_size = NATINT_LEN(long, 4);
508 bigendian_p = BIGENDIAN_P();
509 goto pack_integer;
510
511 case 'q': /* q for int64_t, q! for signed long long */
512 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
513 integer_size = NATINT_LEN_Q;
514 bigendian_p = BIGENDIAN_P();
515 goto pack_integer;
516
517 case 'j': /* j for intptr_t */
518 integer_size = sizeof(intptr_t);
519 bigendian_p = BIGENDIAN_P();
520 goto pack_integer;
521
522 case 'J': /* J for uintptr_t */
523 integer_size = sizeof(uintptr_t);
524 bigendian_p = BIGENDIAN_P();
525 goto pack_integer;
526
527 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
528 integer_size = 2;
529 bigendian_p = 1;
530 goto pack_integer;
531
532 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
533 integer_size = 4;
534 bigendian_p = 1;
535 goto pack_integer;
536
537 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
538 integer_size = 2;
539 bigendian_p = 0;
540 goto pack_integer;
541
542 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
543 integer_size = 4;
544 bigendian_p = 0;
545 goto pack_integer;
546
547 pack_integer:
548 if (explicit_endian) {
549 bigendian_p = explicit_endian == '>';
550 }
551 if (integer_size > MAX_INTEGER_PACK_SIZE)
552 rb_bug("unexpected integer size for pack: %d", integer_size);
553 while (len-- > 0) {
554 char intbuf[MAX_INTEGER_PACK_SIZE];
555
556 from = NEXTFROM;
557 rb_integer_pack(from, intbuf, integer_size, 1, 0,
560 rb_str_buf_cat(res, intbuf, integer_size);
561 }
562 break;
563
564 case 'f': /* single precision float in native format */
565 case 'F': /* ditto */
566 while (len-- > 0) {
567 float f;
568
569 from = NEXTFROM;
570 f = VALUE_to_float(from);
571 rb_str_buf_cat(res, (char*)&f, sizeof(float));
572 }
573 break;
574
575 case 'e': /* single precision float in VAX byte-order */
576 while (len-- > 0) {
577 FLOAT_CONVWITH(tmp);
578
579 from = NEXTFROM;
580 tmp.f = VALUE_to_float(from);
581 HTOVF(tmp);
582 rb_str_buf_cat(res, tmp.buf, sizeof(float));
583 }
584 break;
585
586 case 'E': /* double precision float in VAX byte-order */
587 while (len-- > 0) {
588 DOUBLE_CONVWITH(tmp);
589 from = NEXTFROM;
590 tmp.d = RFLOAT_VALUE(rb_to_float(from));
591 HTOVD(tmp);
592 rb_str_buf_cat(res, tmp.buf, sizeof(double));
593 }
594 break;
595
596 case 'd': /* double precision float in native format */
597 case 'D': /* ditto */
598 while (len-- > 0) {
599 double d;
600
601 from = NEXTFROM;
602 d = RFLOAT_VALUE(rb_to_float(from));
603 rb_str_buf_cat(res, (char*)&d, sizeof(double));
604 }
605 break;
606
607 case 'g': /* single precision float in network byte-order */
608 while (len-- > 0) {
609 FLOAT_CONVWITH(tmp);
610 from = NEXTFROM;
611 tmp.f = VALUE_to_float(from);
612 HTONF(tmp);
613 rb_str_buf_cat(res, tmp.buf, sizeof(float));
614 }
615 break;
616
617 case 'G': /* double precision float in network byte-order */
618 while (len-- > 0) {
619 DOUBLE_CONVWITH(tmp);
620
621 from = NEXTFROM;
622 tmp.d = RFLOAT_VALUE(rb_to_float(from));
623 HTOND(tmp);
624 rb_str_buf_cat(res, tmp.buf, sizeof(double));
625 }
626 break;
627
628 case 'x': /* null byte */
629 grow:
631 str_expand_fill(res, '\0', len);
632 break;
633
634 case 'X': /* back up byte */
635 shrink:
636 plen = RSTRING_LEN(res);
637 if (plen < len)
638 rb_raise(rb_eArgError, "X outside of string");
639 rb_str_set_len(res, plen - len);
640 break;
641
642 case '@': /* null fill to absolute position */
643 len -= RSTRING_LEN(res);
644 if (len > 0) goto grow;
645 len = -len;
646 if (len > 0) goto shrink;
647 break;
648
649 case '%':
650 rb_raise(rb_eArgError, "%% is not supported");
651 break;
652
653 case 'U': /* Unicode character */
654 while (len-- > 0) {
655 SIGNED_VALUE l;
656 char buf[8];
657 int le;
658
659 from = NEXTFROM;
660 from = rb_to_int(from);
661 l = NUM2LONG(from);
662 if (l < 0) {
663 rb_raise(rb_eRangeError, "pack(U): value out of range");
664 }
665 le = rb_uv_to_utf8(buf, l);
666 rb_str_buf_cat(res, (char*)buf, le);
667 }
668 break;
669
670 case 'r': /* r for SLEB128 encoding (signed) */
671 case 'R': /* R for ULEB128 encoding (unsigned) */
672 {
673 int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
674
675 if (type == 'r') {
676 pack_flags |= INTEGER_PACK_2COMP;
677 }
678
679 while (len-- > 0) {
680 size_t numbytes;
681 int sign;
682 char *cp;
683
684 from = NEXTFROM;
685 from = rb_to_int(from);
686 numbytes = rb_absint_numwords(from, 7, NULL);
687 if (numbytes == 0)
688 numbytes = 1;
689 VALUE buf = rb_str_new(NULL, numbytes);
690
691 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, pack_flags);
692
693 if (sign < 0 && type == 'R') {
694 rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128");
695 }
696
697 if (type == 'r') {
698 /* Check if we need an extra byte for sign extension */
699 unsigned char last_byte = (unsigned char)RSTRING_PTR(buf)[numbytes - 1];
700 if ((sign >= 0 && (last_byte & 0x40)) || /* positive but sign bit set */
701 (sign < 0 && !(last_byte & 0x40))) { /* negative but sign bit clear */
702 /* Need an extra byte */
703 rb_str_resize(buf, numbytes + 1);
704 RSTRING_PTR(buf)[numbytes] = sign < 0 ? 0x7f : 0x00;
705 numbytes++;
706 }
707 }
708
709 cp = RSTRING_PTR(buf);
710 while (1 < numbytes) {
711 *cp |= 0x80;
712 cp++;
713 numbytes--;
714 }
715
716 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
717 }
718 }
719 break;
720 case 'u': /* uuencoded string */
721 case 'm': /* base64 encoded string */
722 from = NEXTFROM;
723 StringValue(from);
724 ptr = RSTRING_PTR(from);
725 plen = RSTRING_LEN(from);
726
727 if (len == 0 && type == 'm') {
728 encodes(res, ptr, plen, type, 0);
729 ptr += plen;
730 break;
731 }
732 if (len <= 2)
733 len = 45;
734 else if (len > 63 && type == 'u')
735 len = 63;
736 else
737 len = len / 3 * 3;
738 while (plen > 0) {
739 long todo;
740
741 if (plen > len)
742 todo = len;
743 else
744 todo = plen;
745 encodes(res, ptr, todo, type, 1);
746 plen -= todo;
747 ptr += todo;
748 }
749 break;
750
751 case 'M': /* quoted-printable encoded string */
752 from = rb_obj_as_string(NEXTFROM);
753 if (len <= 1)
754 len = 72;
755 qpencode(res, from, len);
756 break;
757
758 case 'P': /* pointer to packed byte string */
759 from = THISFROM;
760 if (!NIL_P(from)) {
761 StringValue(from);
762 if (RSTRING_LEN(from) < len) {
763 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
764 RSTRING_LEN(from), len);
765 }
766 }
767 len = 1;
768 /* FALL THROUGH */
769 case 'p': /* pointer to string */
770 while (len-- > 0) {
771 char *t;
772 from = NEXTFROM;
773 if (NIL_P(from)) {
774 t = 0;
775 }
776 else {
777 t = StringValuePtr(from);
778 }
779 if (!associates) {
780 associates = rb_ary_new();
781 }
782 rb_ary_push(associates, from);
783 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
784 }
785 break;
786
787 case 'w': /* BER compressed integer */
788 while (len-- > 0) {
789 VALUE buf;
790 size_t numbytes;
791 int sign;
792 char *cp;
793
794 from = NEXTFROM;
795 from = rb_to_int(from);
796 numbytes = rb_absint_numwords(from, 7, NULL);
797 if (numbytes == 0)
798 numbytes = 1;
799 buf = rb_str_new(NULL, numbytes);
800
801 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
802
803 if (sign < 0)
804 rb_raise(rb_eArgError, "can't compress negative numbers");
805 if (sign == 2)
806 rb_bug("buffer size problem?");
807
808 cp = RSTRING_PTR(buf);
809 while (1 < numbytes) {
810 *cp |= 0x80;
811 cp++;
812 numbytes--;
813 }
814
815 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
816 }
817 break;
818
819 default: {
820 unknown_directive("pack", type, fmt);
821 break;
822 }
823 }
824 }
825
826 if (associates) {
827 str_associate(res, associates);
828 }
829 switch (enc_info) {
830 case 1:
831 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
832 break;
833 case 2:
834 rb_enc_set_index(res, rb_utf8_encindex());
835 break;
836 default:
837 /* do nothing, keep ASCII-8BIT */
838 break;
839 }
840 return res;
841}
842
843VALUE
844rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
845{
846 return pack_pack(ec, ary, fmt, buffer);
847}
848
849static const char uu_table[] =
850"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
851static const char b64_table[] =
852"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
853
854static void
855encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
856{
857 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
858 char buff[buff_size + 1]; /* +1 for tail_lf */
859 long i = 0;
860 const char *const trans = type == 'u' ? uu_table : b64_table;
861 char padding;
862 const unsigned char *s = (const unsigned char *)s0;
863
864 if (type == 'u') {
865 buff[i++] = (char)len + ' ';
866 padding = '`';
867 }
868 else {
869 padding = '=';
870 }
871 while (len >= input_unit) {
872 while (len >= input_unit && buff_size-i >= encoded_unit) {
873 buff[i++] = trans[077 & (*s >> 2)];
874 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
875 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
876 buff[i++] = trans[077 & s[2]];
877 s += input_unit;
878 len -= input_unit;
879 }
880 if (buff_size-i < encoded_unit) {
881 rb_str_buf_cat(str, buff, i);
882 i = 0;
883 }
884 }
885
886 if (len == 2) {
887 buff[i++] = trans[077 & (*s >> 2)];
888 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
889 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
890 buff[i++] = padding;
891 }
892 else if (len == 1) {
893 buff[i++] = trans[077 & (*s >> 2)];
894 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
895 buff[i++] = padding;
896 buff[i++] = padding;
897 }
898 if (tail_lf) buff[i++] = '\n';
899 rb_str_buf_cat(str, buff, i);
900 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
901}
902
903static const char hex_table[] = "0123456789ABCDEF";
904
905static void
906qpencode(VALUE str, VALUE from, long len)
907{
908 char buff[1024];
909 long i = 0, n = 0, prev = EOF;
910 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
911 unsigned char *send = s + RSTRING_LEN(from);
912
913 while (s < send) {
914 if ((*s > 126) ||
915 (*s < 32 && *s != '\n' && *s != '\t') ||
916 (*s == '=')) {
917 buff[i++] = '=';
918 buff[i++] = hex_table[*s >> 4];
919 buff[i++] = hex_table[*s & 0x0f];
920 n += 3;
921 prev = EOF;
922 }
923 else if (*s == '\n') {
924 if (prev == ' ' || prev == '\t') {
925 buff[i++] = '=';
926 buff[i++] = *s;
927 }
928 buff[i++] = *s;
929 n = 0;
930 prev = *s;
931 }
932 else {
933 buff[i++] = *s;
934 n++;
935 prev = *s;
936 }
937 if (n > len) {
938 buff[i++] = '=';
939 buff[i++] = '\n';
940 n = 0;
941 prev = '\n';
942 }
943 if (i > 1024 - 5) {
944 rb_str_buf_cat(str, buff, i);
945 i = 0;
946 }
947 s++;
948 }
949 if (n > 0) {
950 buff[i++] = '=';
951 buff[i++] = '\n';
952 }
953 if (i > 0) {
954 rb_str_buf_cat(str, buff, i);
955 }
956}
957
958static inline int
959hex2num(char c)
960{
961 int n;
962 n = ruby_digit36_to_number_table[(unsigned char)c];
963 if (16 <= n)
964 n = -1;
965 return n;
966}
967
968#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
969 tmp_len = 0; \
970 if (len > (long)((send-s)/(sz))) { \
971 if (!star) { \
972 tmp_len = len-(send-s)/(sz); \
973 } \
974 len = (send-s)/(sz); \
975 } \
976} while (0)
977
978#define PACK_ITEM_ADJUST() do { \
979 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
980 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
981} while (0)
982
983/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
984 * 12.4/12.5/12.6 C compiler optimization bug
985 * with "-xO4" optimization option.
986 */
987#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
988# define AVOID_CC_BUG volatile
989#else
990# define AVOID_CC_BUG
991#endif
992
993enum unpack_mode {
994 UNPACK_ARRAY,
995 UNPACK_BLOCK,
996 UNPACK_1
997};
998
999static VALUE
1000pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
1001{
1002#define hexdigits ruby_hexdigits
1003 char *s, *send;
1004 char *p, *pend;
1005 VALUE ary, associates = Qfalse;
1006 long len;
1007 AVOID_CC_BUG long tmp_len;
1008 int signed_p, integer_size, bigendian_p;
1009#define UNPACK_PUSH(item) do {\
1010 VALUE item_val = (item);\
1011 if ((mode) == UNPACK_BLOCK) {\
1012 rb_yield(item_val);\
1013 }\
1014 else if ((mode) == UNPACK_ARRAY) {\
1015 rb_ary_push(ary, item_val);\
1016 }\
1017 else /* if ((mode) == UNPACK_1) { */ {\
1018 return item_val; \
1019 }\
1020 } while (0)
1021
1022 StringValue(str);
1023 StringValue(fmt);
1025
1026 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
1027 len = RSTRING_LEN(str);
1028 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
1029
1030 s = RSTRING_PTR(str);
1031 send = s + len;
1032 s += offset;
1033
1034 p = RSTRING_PTR(fmt);
1035 pend = p + RSTRING_LEN(fmt);
1036
1037#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
1038
1039 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1040 while (p < pend) {
1041 int explicit_endian = 0;
1042 const char type = *p++;
1043#ifdef NATINT_PACK
1044 int natint = 0; /* native integer */
1045#endif
1046 int star = 0;
1047
1048 if (skip_blank(p, type)) continue;
1049 p = pack_modifiers(p, type, &natint, &explicit_endian);
1050
1051 if (p >= pend)
1052 len = 1;
1053 else if (*p == '*') {
1054 star = 1;
1055 len = send - s;
1056 p++;
1057 }
1058 else if (ISDIGIT(*p)) {
1059 errno = 0;
1060 len = STRTOUL(p, (char**)&p, 10);
1061 if (len < 0 || errno) {
1062 rb_raise(rb_eRangeError, "pack length too big");
1063 }
1064 }
1065 else {
1066 len = (type != '@');
1067 }
1068
1069 switch (type) {
1070 case '%':
1071 rb_raise(rb_eArgError, "%% is not supported");
1072 break;
1073
1074 case 'A':
1075 if (len > send - s) len = send - s;
1076 {
1077 long end = len;
1078 char *t = s + len - 1;
1079
1080 while (t >= s) {
1081 if (*t != ' ' && *t != '\0') break;
1082 t--; len--;
1083 }
1084 UNPACK_PUSH(rb_str_new(s, len));
1085 s += end;
1086 }
1087 break;
1088
1089 case 'Z':
1090 {
1091 char *t = s;
1092
1093 if (len > send-s) len = send-s;
1094 while (t < s+len && *t) t++;
1095 UNPACK_PUSH(rb_str_new(s, t-s));
1096 if (t < send) t++;
1097 s = star ? t : s+len;
1098 }
1099 break;
1100
1101 case 'a':
1102 if (len > send - s) len = send - s;
1103 UNPACK_PUSH(rb_str_new(s, len));
1104 s += len;
1105 break;
1106
1107 case 'b':
1108 {
1109 VALUE bitstr;
1110 char *t;
1111 int bits;
1112 long i;
1113
1114 if (p[-1] == '*' || len > (send - s) * 8)
1115 len = (send - s) * 8;
1116 bits = 0;
1117 bitstr = rb_usascii_str_new(0, len);
1118 t = RSTRING_PTR(bitstr);
1119 for (i=0; i<len; i++) {
1120 if (i & 7) bits >>= 1;
1121 else bits = (unsigned char)*s++;
1122 *t++ = (bits & 1) ? '1' : '0';
1123 }
1124 UNPACK_PUSH(bitstr);
1125 }
1126 break;
1127
1128 case 'B':
1129 {
1130 VALUE bitstr;
1131 char *t;
1132 int bits;
1133 long i;
1134
1135 if (p[-1] == '*' || len > (send - s) * 8)
1136 len = (send - s) * 8;
1137 bits = 0;
1138 bitstr = rb_usascii_str_new(0, len);
1139 t = RSTRING_PTR(bitstr);
1140 for (i=0; i<len; i++) {
1141 if (i & 7) bits <<= 1;
1142 else bits = (unsigned char)*s++;
1143 *t++ = (bits & 128) ? '1' : '0';
1144 }
1145 UNPACK_PUSH(bitstr);
1146 }
1147 break;
1148
1149 case 'h':
1150 {
1151 VALUE bitstr;
1152 char *t;
1153 int bits;
1154 long i;
1155
1156 if (p[-1] == '*' || len > (send - s) * 2)
1157 len = (send - s) * 2;
1158 bits = 0;
1159 bitstr = rb_usascii_str_new(0, len);
1160 t = RSTRING_PTR(bitstr);
1161 for (i=0; i<len; i++) {
1162 if (i & 1)
1163 bits >>= 4;
1164 else
1165 bits = (unsigned char)*s++;
1166 *t++ = hexdigits[bits & 15];
1167 }
1168 UNPACK_PUSH(bitstr);
1169 }
1170 break;
1171
1172 case 'H':
1173 {
1174 VALUE bitstr;
1175 char *t;
1176 int bits;
1177 long i;
1178
1179 if (p[-1] == '*' || len > (send - s) * 2)
1180 len = (send - s) * 2;
1181 bits = 0;
1182 bitstr = rb_usascii_str_new(0, len);
1183 t = RSTRING_PTR(bitstr);
1184 for (i=0; i<len; i++) {
1185 if (i & 1)
1186 bits <<= 4;
1187 else
1188 bits = (unsigned char)*s++;
1189 *t++ = hexdigits[(bits >> 4) & 15];
1190 }
1191 UNPACK_PUSH(bitstr);
1192 }
1193 break;
1194
1195 case 'c':
1196 signed_p = 1;
1197 integer_size = 1;
1198 bigendian_p = BIGENDIAN_P(); /* not effective */
1199 goto unpack_integer;
1200
1201 case 'C':
1202 signed_p = 0;
1203 integer_size = 1;
1204 bigendian_p = BIGENDIAN_P(); /* not effective */
1205 goto unpack_integer;
1206
1207 case 's':
1208 signed_p = 1;
1209 integer_size = NATINT_LEN(short, 2);
1210 bigendian_p = BIGENDIAN_P();
1211 goto unpack_integer;
1212
1213 case 'S':
1214 signed_p = 0;
1215 integer_size = NATINT_LEN(short, 2);
1216 bigendian_p = BIGENDIAN_P();
1217 goto unpack_integer;
1218
1219 case 'i':
1220 signed_p = 1;
1221 integer_size = (int)sizeof(int);
1222 bigendian_p = BIGENDIAN_P();
1223 goto unpack_integer;
1224
1225 case 'I':
1226 signed_p = 0;
1227 integer_size = (int)sizeof(int);
1228 bigendian_p = BIGENDIAN_P();
1229 goto unpack_integer;
1230
1231 case 'l':
1232 signed_p = 1;
1233 integer_size = NATINT_LEN(long, 4);
1234 bigendian_p = BIGENDIAN_P();
1235 goto unpack_integer;
1236
1237 case 'L':
1238 signed_p = 0;
1239 integer_size = NATINT_LEN(long, 4);
1240 bigendian_p = BIGENDIAN_P();
1241 goto unpack_integer;
1242
1243 case 'q':
1244 signed_p = 1;
1245 integer_size = NATINT_LEN_Q;
1246 bigendian_p = BIGENDIAN_P();
1247 goto unpack_integer;
1248
1249 case 'Q':
1250 signed_p = 0;
1251 integer_size = NATINT_LEN_Q;
1252 bigendian_p = BIGENDIAN_P();
1253 goto unpack_integer;
1254
1255 case 'j':
1256 signed_p = 1;
1257 integer_size = sizeof(intptr_t);
1258 bigendian_p = BIGENDIAN_P();
1259 goto unpack_integer;
1260
1261 case 'J':
1262 signed_p = 0;
1263 integer_size = sizeof(uintptr_t);
1264 bigendian_p = BIGENDIAN_P();
1265 goto unpack_integer;
1266
1267 case 'n':
1268 signed_p = 0;
1269 integer_size = 2;
1270 bigendian_p = 1;
1271 goto unpack_integer;
1272
1273 case 'N':
1274 signed_p = 0;
1275 integer_size = 4;
1276 bigendian_p = 1;
1277 goto unpack_integer;
1278
1279 case 'v':
1280 signed_p = 0;
1281 integer_size = 2;
1282 bigendian_p = 0;
1283 goto unpack_integer;
1284
1285 case 'V':
1286 signed_p = 0;
1287 integer_size = 4;
1288 bigendian_p = 0;
1289 goto unpack_integer;
1290
1291 unpack_integer:
1292 if (explicit_endian) {
1293 bigendian_p = explicit_endian == '>';
1294 }
1295 PACK_LENGTH_ADJUST_SIZE(integer_size);
1296 while (len-- > 0) {
1297 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1298 VALUE val;
1299 if (signed_p)
1300 flags |= INTEGER_PACK_2COMP;
1301 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1302 UNPACK_PUSH(val);
1303 s += integer_size;
1304 }
1305 PACK_ITEM_ADJUST();
1306 break;
1307
1308 case 'f':
1309 case 'F':
1310 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1311 while (len-- > 0) {
1312 float tmp;
1313 UNPACK_FETCH(&tmp, float);
1314 UNPACK_PUSH(DBL2NUM((double)tmp));
1315 }
1316 PACK_ITEM_ADJUST();
1317 break;
1318
1319 case 'e':
1320 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1321 while (len-- > 0) {
1322 FLOAT_CONVWITH(tmp);
1323 UNPACK_FETCH(tmp.buf, float);
1324 VTOHF(tmp);
1325 UNPACK_PUSH(DBL2NUM(tmp.f));
1326 }
1327 PACK_ITEM_ADJUST();
1328 break;
1329
1330 case 'E':
1331 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1332 while (len-- > 0) {
1333 DOUBLE_CONVWITH(tmp);
1334 UNPACK_FETCH(tmp.buf, double);
1335 VTOHD(tmp);
1336 UNPACK_PUSH(DBL2NUM(tmp.d));
1337 }
1338 PACK_ITEM_ADJUST();
1339 break;
1340
1341 case 'D':
1342 case 'd':
1343 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1344 while (len-- > 0) {
1345 double tmp;
1346 UNPACK_FETCH(&tmp, double);
1347 UNPACK_PUSH(DBL2NUM(tmp));
1348 }
1349 PACK_ITEM_ADJUST();
1350 break;
1351
1352 case 'g':
1353 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1354 while (len-- > 0) {
1355 FLOAT_CONVWITH(tmp);
1356 UNPACK_FETCH(tmp.buf, float);
1357 NTOHF(tmp);
1358 UNPACK_PUSH(DBL2NUM(tmp.f));
1359 }
1360 PACK_ITEM_ADJUST();
1361 break;
1362
1363 case 'G':
1364 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1365 while (len-- > 0) {
1366 DOUBLE_CONVWITH(tmp);
1367 UNPACK_FETCH(tmp.buf, double);
1368 NTOHD(tmp);
1369 UNPACK_PUSH(DBL2NUM(tmp.d));
1370 }
1371 PACK_ITEM_ADJUST();
1372 break;
1373
1374 case 'U':
1375 if (len > send - s) len = send - s;
1376 while (len > 0 && s < send) {
1377 long alen = send - s;
1378 unsigned long l;
1379
1380 l = utf8_to_uv(s, &alen);
1381 s += alen; len--;
1382 UNPACK_PUSH(ULONG2NUM(l));
1383 }
1384 break;
1385
1386 case 'u':
1387 {
1388 VALUE buf = rb_str_new(0, (send - s)*3/4);
1389 char *ptr = RSTRING_PTR(buf);
1390 long total = 0;
1391
1392 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1393 long a,b,c,d;
1394 char hunk[3];
1395
1396 len = ((unsigned char)*s++ - ' ') & 077;
1397
1398 total += len;
1399 if (total > RSTRING_LEN(buf)) {
1400 len -= total - RSTRING_LEN(buf);
1401 total = RSTRING_LEN(buf);
1402 }
1403
1404 while (len > 0) {
1405 long mlen = len > 3 ? 3 : len;
1406
1407 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1408 a = ((unsigned char)*s++ - ' ') & 077;
1409 else
1410 a = 0;
1411 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1412 b = ((unsigned char)*s++ - ' ') & 077;
1413 else
1414 b = 0;
1415 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1416 c = ((unsigned char)*s++ - ' ') & 077;
1417 else
1418 c = 0;
1419 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1420 d = ((unsigned char)*s++ - ' ') & 077;
1421 else
1422 d = 0;
1423 hunk[0] = (char)(a << 2 | b >> 4);
1424 hunk[1] = (char)(b << 4 | c >> 2);
1425 hunk[2] = (char)(c << 6 | d);
1426 memcpy(ptr, hunk, mlen);
1427 ptr += mlen;
1428 len -= mlen;
1429 }
1430 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1431 s++; /* possible checksum byte */
1432 if (s < send && *s == '\r') s++;
1433 if (s < send && *s == '\n') s++;
1434 }
1435
1436 rb_str_set_len(buf, total);
1437 UNPACK_PUSH(buf);
1438 }
1439 break;
1440
1441 case 'm':
1442 {
1443 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1444 char *ptr = RSTRING_PTR(buf);
1445 int a = -1,b = -1,c = 0,d = 0;
1446 static signed char b64_xtable[256];
1447
1448 if (b64_xtable['/'] <= 0) {
1449 int i;
1450
1451 for (i = 0; i < 256; i++) {
1452 b64_xtable[i] = -1;
1453 }
1454 for (i = 0; i < 64; i++) {
1455 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1456 }
1457 }
1458 if (len == 0) {
1459 while (s < send) {
1460 a = b = c = d = -1;
1461 a = b64_xtable[(unsigned char)*s++];
1462 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1463 b = b64_xtable[(unsigned char)*s++];
1464 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1465 if (*s == '=') {
1466 if (s + 2 == send && *(s + 1) == '=') break;
1467 rb_raise(rb_eArgError, "invalid base64");
1468 }
1469 c = b64_xtable[(unsigned char)*s++];
1470 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1471 if (s + 1 == send && *s == '=') break;
1472 d = b64_xtable[(unsigned char)*s++];
1473 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1474 *ptr++ = castchar(a << 2 | b >> 4);
1475 *ptr++ = castchar(b << 4 | c >> 2);
1476 *ptr++ = castchar(c << 6 | d);
1477 }
1478 if (c == -1) {
1479 *ptr++ = castchar(a << 2 | b >> 4);
1480 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1481 }
1482 else if (d == -1) {
1483 *ptr++ = castchar(a << 2 | b >> 4);
1484 *ptr++ = castchar(b << 4 | c >> 2);
1485 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1486 }
1487 }
1488 else {
1489 while (s < send) {
1490 a = b = c = d = -1;
1491 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1492 if (s >= send) break;
1493 s++;
1494 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1495 if (s >= send) break;
1496 s++;
1497 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1498 if (*s == '=' || s >= send) break;
1499 s++;
1500 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1501 if (*s == '=' || s >= send) break;
1502 s++;
1503 *ptr++ = castchar(a << 2 | b >> 4);
1504 *ptr++ = castchar(b << 4 | c >> 2);
1505 *ptr++ = castchar(c << 6 | d);
1506 a = -1;
1507 }
1508 if (a != -1 && b != -1) {
1509 if (c == -1)
1510 *ptr++ = castchar(a << 2 | b >> 4);
1511 else {
1512 *ptr++ = castchar(a << 2 | b >> 4);
1513 *ptr++ = castchar(b << 4 | c >> 2);
1514 }
1515 }
1516 }
1517 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1518 UNPACK_PUSH(buf);
1519 }
1520 break;
1521
1522 case 'M':
1523 {
1524 VALUE buf = rb_str_new(0, send - s);
1525 char *ptr = RSTRING_PTR(buf), *ss = s;
1526 int csum = 0;
1527 int c1, c2;
1528
1529 while (s < send) {
1530 if (*s == '=') {
1531 if (++s == send) break;
1532 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1533 s++;
1534 if (*s != '\n') {
1535 if ((c1 = hex2num(*s)) == -1) break;
1536 if (++s == send) break;
1537 if ((c2 = hex2num(*s)) == -1) break;
1538 csum |= *ptr++ = castchar(c1 << 4 | c2);
1539 }
1540 }
1541 else {
1542 csum |= *ptr++ = *s;
1543 }
1544 s++;
1545 ss = s;
1546 }
1547 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1548 rb_str_buf_cat(buf, ss, send-ss);
1550 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1551 UNPACK_PUSH(buf);
1552 }
1553 break;
1554
1555 case '@':
1556 if (len > RSTRING_LEN(str))
1557 rb_raise(rb_eArgError, "@ outside of string");
1558 s = RSTRING_PTR(str) + len;
1559 break;
1560
1561 case 'X':
1562 if (len > s - RSTRING_PTR(str))
1563 rb_raise(rb_eArgError, "X outside of string");
1564 s -= len;
1565 break;
1566
1567 case 'x':
1568 if (len > send - s)
1569 rb_raise(rb_eArgError, "x outside of string");
1570 s += len;
1571 break;
1572
1573 case '^':
1574 UNPACK_PUSH(SSIZET2NUM(s - RSTRING_PTR(str)));
1575 break;
1576
1577 case 'P':
1578 if (sizeof(char *) <= (size_t)(send - s)) {
1579 VALUE tmp = Qnil;
1580 char *t;
1581
1582 UNPACK_FETCH(&t, char *);
1583 if (t) {
1584 if (!associates) associates = str_associated(str);
1585 tmp = associated_pointer(associates, t);
1586 if (len < RSTRING_LEN(tmp)) {
1587 tmp = rb_str_new(t, len);
1588 str_associate(tmp, associates);
1589 }
1590 }
1591 UNPACK_PUSH(tmp);
1592 }
1593 break;
1594
1595 case 'p':
1596 if (len > (long)((send - s) / sizeof(char *)))
1597 len = (send - s) / sizeof(char *);
1598 while (len-- > 0) {
1599 if ((size_t)(send - s) < sizeof(char *))
1600 break;
1601 else {
1602 VALUE tmp = Qnil;
1603 char *t;
1604
1605 UNPACK_FETCH(&t, char *);
1606 if (t) {
1607 if (!associates) associates = str_associated(str);
1608 tmp = associated_pointer(associates, t);
1609 }
1610 UNPACK_PUSH(tmp);
1611 }
1612 }
1613 break;
1614
1615 case 'r':
1616 case 'R':
1617 {
1618 int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
1619
1620 if (type == 'r') {
1621 pack_flags |= INTEGER_PACK_2COMP;
1622 }
1623 char *s0 = s;
1624 while (len > 0 && s < send) {
1625 if (*s & 0x80) {
1626 s++;
1627 }
1628 else {
1629 s++;
1630 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags));
1631 len--;
1632 s0 = s;
1633 }
1634 }
1635 /* Handle incomplete value and remaining expected values with nil (only if not using *) */
1636 if (!star) {
1637 if (s0 != s && len > 0) {
1638 UNPACK_PUSH(Qnil);
1639 len--;
1640 }
1641 while (len-- > 0) {
1642 UNPACK_PUSH(Qnil);
1643 }
1644 }
1645 }
1646 break;
1647
1648 case 'w':
1649 {
1650 char *s0 = s;
1651 while (len > 0 && s < send) {
1652 if (*s & 0x80) {
1653 s++;
1654 }
1655 else {
1656 s++;
1657 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1658 len--;
1659 s0 = s;
1660 }
1661 }
1662 }
1663 break;
1664
1665 default:
1666 unknown_directive("unpack", type, fmt);
1667 break;
1668 }
1669 }
1670
1671 return ary;
1672}
1673
1674static VALUE
1675pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1676{
1677 enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1678 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1679}
1680
1681static VALUE
1682pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1683{
1684 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1685}
1686
1687int
1688rb_uv_to_utf8(char buf[6], unsigned long uv)
1689{
1690 if (uv <= 0x7f) {
1691 buf[0] = (char)uv;
1692 return 1;
1693 }
1694 if (uv <= 0x7ff) {
1695 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1696 buf[1] = castchar((uv&0x3f)|0x80);
1697 return 2;
1698 }
1699 if (uv <= 0xffff) {
1700 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1701 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1702 buf[2] = castchar((uv&0x3f)|0x80);
1703 return 3;
1704 }
1705 if (uv <= 0x1fffff) {
1706 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1707 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1708 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1709 buf[3] = castchar((uv&0x3f)|0x80);
1710 return 4;
1711 }
1712 if (uv <= 0x3ffffff) {
1713 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1714 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1715 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1716 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1717 buf[4] = castchar((uv&0x3f)|0x80);
1718 return 5;
1719 }
1720 if (uv <= 0x7fffffff) {
1721 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1722 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1723 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1724 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1725 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1726 buf[5] = castchar((uv&0x3f)|0x80);
1727 return 6;
1728 }
1729 rb_raise(rb_eRangeError, "pack(U): value out of range");
1730
1732}
1733
1734static const unsigned long utf8_limits[] = {
1735 0x0, /* 1 */
1736 0x80, /* 2 */
1737 0x800, /* 3 */
1738 0x10000, /* 4 */
1739 0x200000, /* 5 */
1740 0x4000000, /* 6 */
1741 0x80000000, /* 7 */
1742};
1743
1744static unsigned long
1745utf8_to_uv(const char *p, long *lenp)
1746{
1747 int c = *p++ & 0xff;
1748 unsigned long uv = c;
1749 long n;
1750
1751 if (!(uv & 0x80)) {
1752 *lenp = 1;
1753 return uv;
1754 }
1755 if (!(uv & 0x40)) {
1756 *lenp = 1;
1757 rb_raise(rb_eArgError, "malformed UTF-8 character");
1758 }
1759
1760 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1761 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1762 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1763 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1764 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1765 else {
1766 *lenp = 1;
1767 rb_raise(rb_eArgError, "malformed UTF-8 character");
1768 }
1769 if (n > *lenp) {
1770 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1771 n, *lenp);
1772 }
1773 *lenp = n--;
1774 if (n != 0) {
1775 while (n--) {
1776 c = *p++ & 0xff;
1777 if ((c & 0xc0) != 0x80) {
1778 *lenp -= n + 1;
1779 rb_raise(rb_eArgError, "malformed UTF-8 character");
1780 }
1781 else {
1782 c &= 0x3f;
1783 uv = uv << 6 | c;
1784 }
1785 }
1786 }
1787 n = *lenp - 1;
1788 if (uv < utf8_limits[n]) {
1789 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1790 }
1791 return uv;
1792}
1793
1794#include "pack.rbinc"
1795
1796void
1797Init_pack(void)
1798{
1799 id_associated = rb_make_internal_id();
1800}
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition eval.c:1017
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition coderange.h:181
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition assume.h:29
#define SSIZET2NUM
Old name of RB_SSIZE2NUM.
Definition size_t.h:64
#define STRTOUL
Old name of ruby_strtoul.
Definition ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition coderange.h:189
VALUE rb_eRangeError
RangeError exception.
Definition error.c:1422
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1418
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1416
VALUE rb_to_float(VALUE val)
Identical to rb_check_to_float(), except it raises on error.
Definition object.c:3813
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
Definition object.c:3365
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition pack.c:1688
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition bignum.h:549
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1499
#define rb_str_buf_cat
Just another name of rb_str_cat.
Definition string.h:1682
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
Definition string.h:1533
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3403
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
Definition string.c:2773
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:2727
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition string.c:1701
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition string.c:1833
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition variable.c:2024
int len
Length of the buffer.
Definition io.h:8
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11etc.
Definition util.c:60
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RBIMPL_ATTR_NORETURN()
Wraps (or simulates) [[noreturn]]
Definition noreturn.h:38
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition rarray.h:52
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:76
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition rstring.h:409
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition variable.c:515
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376