Ruby 4.1.0dev (2026-03-06 revision 9aca729140424bbf465c11ab8ab53e5cc6602c01)
pack.c (9aca729140424bbf465c11ab8ab53e5cc6602c01)
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/internal/config.h"
13
14#include <ctype.h>
15#include <errno.h>
16#include <float.h>
17#include <sys/types.h>
18
19#include "internal.h"
20#include "internal/array.h"
21#include "internal/bits.h"
22#include "internal/numeric.h"
23#include "internal/string.h"
24#include "internal/symbol.h"
25#include "internal/variable.h"
26#include "ruby/util.h"
27
28#include "builtin.h"
29
30/*
31 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
32 * instead of HAVE_LONG_LONG or LONG_LONG.
33 * This means q! and Q! means always the standard long long type and
34 * causes ArgumentError for platforms which has no long long type,
35 * even if the platform has an implementation specific 64bit type.
36 * This behavior is consistent with the document of pack/unpack.
37 */
38#ifdef HAVE_TRUE_LONG_LONG
39static const char natstr[] = "sSiIlLqQjJ";
40# define endstr natstr
41#else
42static const char natstr[] = "sSiIlLjJ";
43static const char endstr[] = "sSiIlLqQjJ";
44#endif
45
46#ifdef HAVE_TRUE_LONG_LONG
47/* It is intentional to use long long instead of LONG_LONG. */
48# define NATINT_LEN_Q NATINT_LEN(long long, 8)
49#else
50# define NATINT_LEN_Q 8
51#endif
52
53#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
54# define NATINT_PACK
55#endif
56
57#ifdef DYNAMIC_ENDIAN
58/* for universal binary of NEXTSTEP and MacOS X */
59/* useless since autoconf 2.63? */
60static int
61is_bigendian(void)
62{
63 static int init = 0;
64 static int endian_value;
65 char *p;
66
67 if (init) return endian_value;
68 init = 1;
69 p = (char*)&init;
70 return endian_value = p[0]?0:1;
71}
72# define BIGENDIAN_P() (is_bigendian())
73#elif defined(WORDS_BIGENDIAN)
74# define BIGENDIAN_P() 1
75#else
76# define BIGENDIAN_P() 0
77#endif
78
79#ifdef NATINT_PACK
80# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
81#else
82# define NATINT_LEN(type,len) ((int)sizeof(type))
83#endif
84
85typedef union {
86 float f;
87 uint32_t u;
88 char buf[4];
90typedef union {
91 double d;
92 uint64_t u;
93 char buf[8];
95#define swapf(x) swap32(x)
96#define swapd(x) swap64(x)
97
98#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
99#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
100#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
101#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
102#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
103#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
104#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
105#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
106
107#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
108#define HTONF(x) ((x).u = rb_htonf((x).u))
109#define HTOVF(x) ((x).u = rb_htovf((x).u))
110#define NTOHF(x) ((x).u = rb_ntohf((x).u))
111#define VTOHF(x) ((x).u = rb_vtohf((x).u))
112
113#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
114#define HTOND(x) ((x).u = rb_htond((x).u))
115#define HTOVD(x) ((x).u = rb_htovd((x).u))
116#define NTOHD(x) ((x).u = rb_ntohd((x).u))
117#define VTOHD(x) ((x).u = rb_vtohd((x).u))
118
119#define MAX_INTEGER_PACK_SIZE 8
120
121static const char toofew[] = "too few arguments";
122
123static void encodes(VALUE,const char*,long,int,int);
124static void qpencode(VALUE,VALUE,long);
125
126static unsigned long utf8_to_uv(const char*,long*);
127
128static ID id_associated;
129
130static void
131str_associate(VALUE str, VALUE add)
132{
133 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
134 rb_ivar_set(str, id_associated, add);
135}
136
137static VALUE
138str_associated(VALUE str)
139{
140 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
141 if (!associates)
142 rb_raise(rb_eArgError, "no associated pointer");
143 return associates;
144}
145
146static VALUE
147associated_pointer(VALUE associates, const char *t)
148{
149 const VALUE *p = RARRAY_CONST_PTR(associates);
150 const VALUE *pend = p + RARRAY_LEN(associates);
151 for (; p < pend; p++) {
152 VALUE tmp = *p;
153 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
154 }
155 rb_raise(rb_eArgError, "non associated pointer");
157}
158
160static void
161unknown_directive(const char *mode, char type, VALUE fmt)
162{
163 char unknown[5];
164
165 if (ISPRINT(type)) {
166 unknown[0] = type;
167 unknown[1] = '\0';
168 }
169 else {
170 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
171 }
172 fmt = rb_str_quote_unprintable(fmt);
173 rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'",
174 mode, unknown, fmt);
175}
176
177static float
178VALUE_to_float(VALUE obj)
179{
180 VALUE v = rb_to_float(obj);
181 double d = RFLOAT_VALUE(v);
182
183 if (isnan(d)) {
184 return NAN;
185 }
186 else if (d < -FLT_MAX) {
187 return -INFINITY;
188 }
189 else if (d <= FLT_MAX) {
190 return d;
191 }
192 else {
193 return INFINITY;
194 }
195}
196
197static void
198str_expand_fill(VALUE res, int c, long len)
199{
200 long olen = RSTRING_LEN(res);
201 memset(RSTRING_PTR(res) + olen, c, len);
202 rb_str_set_len(res, olen + len);
203}
204
205static char *
206skip_to_eol(const char *p, const char *pend)
207{
208 p = memchr(p, '\n', pend - p);
209 return (char *)(p ? p + 1 : pend);
210}
211
212#define skip_blank(p, type) \
213 (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
214
215#ifndef NATINT_PACK
216# define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
217#endif
218static char *
219pack_modifiers(const char *p, char type, int *natint, int *explicit_endian)
220{
221 while (1) {
222 switch (*p) {
223 case '_':
224 case '!':
225 if (strchr(natstr, type)) {
226#ifdef NATINT_PACK
227 *natint = 1;
228#endif
229 p++;
230 }
231 else {
232 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
233 }
234 break;
235
236 case '<':
237 case '>':
238 if (!strchr(endstr, type)) {
239 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
240 }
241 if (*explicit_endian) {
242 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
243 }
244 *explicit_endian = *p++;
245 break;
246 default:
247 return (char *)p;
248 }
249 }
250}
251
252static VALUE
253pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
254{
255 const char *p, *pend;
256 VALUE res, from, associates = 0;
257 long len, idx, plen;
258 const char *ptr;
259 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
260 int integer_size, bigendian_p;
261
262 StringValue(fmt);
264 p = RSTRING_PTR(fmt);
265 pend = p + RSTRING_LEN(fmt);
266
267 if (NIL_P(buffer)) {
268 res = rb_str_buf_new(0);
269 }
270 else {
271 if (!RB_TYPE_P(buffer, T_STRING))
272 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
273 rb_str_modify(buffer);
274 res = buffer;
275 }
276
277 idx = 0;
278
279#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
280#define MORE_ITEM (idx < RARRAY_LEN(ary))
281#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
282#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
283
284 while (p < pend) {
285 int explicit_endian = 0;
286 if (RSTRING_END(fmt) != pend) {
287 rb_raise(rb_eRuntimeError, "format string modified");
288 }
289 const char type = *p++; /* get data type */
290#ifdef NATINT_PACK
291 int natint = 0; /* native integer */
292#endif
293
294 if (skip_blank(p, type)) continue;
295 p = pack_modifiers(p, type, &natint, &explicit_endian);
296
297 if (*p == '*') { /* set data length */
298 len = strchr("@Xxu", type) ? 0
299 : strchr("PMm", type) ? 1
300 : RARRAY_LEN(ary) - idx;
301 p++;
302 }
303 else if (ISDIGIT(*p)) {
304 errno = 0;
305 len = STRTOUL(p, (char**)&p, 10);
306 if (len < 0 || errno) {
307 rb_raise(rb_eRangeError, "pack length too big");
308 }
309 }
310 else {
311 len = 1;
312 }
313
314 switch (type) {
315 case 'U':
316 /* if encoding is US-ASCII, upgrade to UTF-8 */
317 if (enc_info == 1) enc_info = 2;
318 break;
319 case 'm': case 'M': case 'u':
320 /* keep US-ASCII (do nothing) */
321 break;
322 default:
323 /* fall back to BINARY */
324 enc_info = 0;
325 break;
326 }
327 switch (type) {
328 case 'A': case 'a': case 'Z':
329 case 'B': case 'b':
330 case 'H': case 'h':
331 from = NEXTFROM;
332 if (NIL_P(from)) {
333 ptr = "";
334 plen = 0;
335 }
336 else {
337 StringValue(from);
338 ptr = RSTRING_PTR(from);
339 plen = RSTRING_LEN(from);
340 }
341
342 if (p[-1] == '*')
343 len = plen;
344
345 switch (type) {
346 case 'a': /* arbitrary binary string (null padded) */
347 case 'A': /* arbitrary binary string (ASCII space padded) */
348 case 'Z': /* null terminated string */
349 if (plen >= len) {
350 rb_str_buf_cat(res, ptr, len);
351 if (p[-1] == '*' && type == 'Z')
352 rb_str_buf_cat(res, "", 1);
353 }
354 else {
356 rb_str_buf_cat(res, ptr, plen);
357 str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen);
358 }
359 break;
360
361#define castchar(from) (char)((from) & 0xff)
362
363 case 'b': /* bit string (ascending) */
364 {
365 int byte = 0;
366 long i, j = 0;
367
368 if (len > plen) {
369 j = (len - plen + 1)/2;
370 len = plen;
371 }
372 for (i=0; i++ < len; ptr++) {
373 if (*ptr & 1)
374 byte |= 128;
375 if (i & 7)
376 byte >>= 1;
377 else {
378 char c = castchar(byte);
379 rb_str_buf_cat(res, &c, 1);
380 byte = 0;
381 }
382 }
383 if (len & 7) {
384 char c;
385 byte >>= 7 - (len & 7);
386 c = castchar(byte);
387 rb_str_buf_cat(res, &c, 1);
388 }
389 len = j;
390 goto grow;
391 }
392 break;
393
394 case 'B': /* bit string (descending) */
395 {
396 int byte = 0;
397 long i, j = 0;
398
399 if (len > plen) {
400 j = (len - plen + 1)/2;
401 len = plen;
402 }
403 for (i=0; i++ < len; ptr++) {
404 byte |= *ptr & 1;
405 if (i & 7)
406 byte <<= 1;
407 else {
408 char c = castchar(byte);
409 rb_str_buf_cat(res, &c, 1);
410 byte = 0;
411 }
412 }
413 if (len & 7) {
414 char c;
415 byte <<= 7 - (len & 7);
416 c = castchar(byte);
417 rb_str_buf_cat(res, &c, 1);
418 }
419 len = j;
420 goto grow;
421 }
422 break;
423
424 case 'h': /* hex string (low nibble first) */
425 {
426 int byte = 0;
427 long i, j = 0;
428
429 if (len > plen) {
430 j = (len + 1) / 2 - (plen + 1) / 2;
431 len = plen;
432 }
433 for (i=0; i++ < len; ptr++) {
434 if (ISALPHA(*ptr))
435 byte |= (((*ptr & 15) + 9) & 15) << 4;
436 else
437 byte |= (*ptr & 15) << 4;
438 if (i & 1)
439 byte >>= 4;
440 else {
441 char c = castchar(byte);
442 rb_str_buf_cat(res, &c, 1);
443 byte = 0;
444 }
445 }
446 if (len & 1) {
447 char c = castchar(byte);
448 rb_str_buf_cat(res, &c, 1);
449 }
450 len = j;
451 goto grow;
452 }
453 break;
454
455 case 'H': /* hex string (high nibble first) */
456 {
457 int byte = 0;
458 long i, j = 0;
459
460 if (len > plen) {
461 j = (len + 1) / 2 - (plen + 1) / 2;
462 len = plen;
463 }
464 for (i=0; i++ < len; ptr++) {
465 if (ISALPHA(*ptr))
466 byte |= ((*ptr & 15) + 9) & 15;
467 else
468 byte |= *ptr & 15;
469 if (i & 1)
470 byte <<= 4;
471 else {
472 char c = castchar(byte);
473 rb_str_buf_cat(res, &c, 1);
474 byte = 0;
475 }
476 }
477 if (len & 1) {
478 char c = castchar(byte);
479 rb_str_buf_cat(res, &c, 1);
480 }
481 len = j;
482 goto grow;
483 }
484 break;
485 }
486 break;
487
488 case 'c': /* signed char */
489 case 'C': /* unsigned char */
490 integer_size = 1;
491 bigendian_p = BIGENDIAN_P(); /* not effective */
492 goto pack_integer;
493
494 case 's': /* s for int16_t, s! for signed short */
495 case 'S': /* S for uint16_t, S! for unsigned short */
496 integer_size = NATINT_LEN(short, 2);
497 bigendian_p = BIGENDIAN_P();
498 goto pack_integer;
499
500 case 'i': /* i and i! for signed int */
501 case 'I': /* I and I! for unsigned int */
502 integer_size = (int)sizeof(int);
503 bigendian_p = BIGENDIAN_P();
504 goto pack_integer;
505
506 case 'l': /* l for int32_t, l! for signed long */
507 case 'L': /* L for uint32_t, L! for unsigned long */
508 integer_size = NATINT_LEN(long, 4);
509 bigendian_p = BIGENDIAN_P();
510 goto pack_integer;
511
512 case 'q': /* q for int64_t, q! for signed long long */
513 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
514 integer_size = NATINT_LEN_Q;
515 bigendian_p = BIGENDIAN_P();
516 goto pack_integer;
517
518 case 'j': /* j for intptr_t */
519 integer_size = sizeof(intptr_t);
520 bigendian_p = BIGENDIAN_P();
521 goto pack_integer;
522
523 case 'J': /* J for uintptr_t */
524 integer_size = sizeof(uintptr_t);
525 bigendian_p = BIGENDIAN_P();
526 goto pack_integer;
527
528 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
529 integer_size = 2;
530 bigendian_p = 1;
531 goto pack_integer;
532
533 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
534 integer_size = 4;
535 bigendian_p = 1;
536 goto pack_integer;
537
538 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
539 integer_size = 2;
540 bigendian_p = 0;
541 goto pack_integer;
542
543 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
544 integer_size = 4;
545 bigendian_p = 0;
546 goto pack_integer;
547
548 pack_integer:
549 if (explicit_endian) {
550 bigendian_p = explicit_endian == '>';
551 }
552 if (integer_size > MAX_INTEGER_PACK_SIZE)
553 rb_bug("unexpected integer size for pack: %d", integer_size);
554 while (len-- > 0) {
555 char intbuf[MAX_INTEGER_PACK_SIZE];
556
557 from = NEXTFROM;
558 rb_integer_pack(from, intbuf, integer_size, 1, 0,
561 rb_str_buf_cat(res, intbuf, integer_size);
562 }
563 break;
564
565 case 'f': /* single precision float in native format */
566 case 'F': /* ditto */
567 while (len-- > 0) {
568 float f;
569
570 from = NEXTFROM;
571 f = VALUE_to_float(from);
572 rb_str_buf_cat(res, (char*)&f, sizeof(float));
573 }
574 break;
575
576 case 'e': /* single precision float in VAX byte-order */
577 while (len-- > 0) {
578 FLOAT_CONVWITH(tmp);
579
580 from = NEXTFROM;
581 tmp.f = VALUE_to_float(from);
582 HTOVF(tmp);
583 rb_str_buf_cat(res, tmp.buf, sizeof(float));
584 }
585 break;
586
587 case 'E': /* double precision float in VAX byte-order */
588 while (len-- > 0) {
589 DOUBLE_CONVWITH(tmp);
590 from = NEXTFROM;
591 tmp.d = RFLOAT_VALUE(rb_to_float(from));
592 HTOVD(tmp);
593 rb_str_buf_cat(res, tmp.buf, sizeof(double));
594 }
595 break;
596
597 case 'd': /* double precision float in native format */
598 case 'D': /* ditto */
599 while (len-- > 0) {
600 double d;
601
602 from = NEXTFROM;
603 d = RFLOAT_VALUE(rb_to_float(from));
604 rb_str_buf_cat(res, (char*)&d, sizeof(double));
605 }
606 break;
607
608 case 'g': /* single precision float in network byte-order */
609 while (len-- > 0) {
610 FLOAT_CONVWITH(tmp);
611 from = NEXTFROM;
612 tmp.f = VALUE_to_float(from);
613 HTONF(tmp);
614 rb_str_buf_cat(res, tmp.buf, sizeof(float));
615 }
616 break;
617
618 case 'G': /* double precision float in network byte-order */
619 while (len-- > 0) {
620 DOUBLE_CONVWITH(tmp);
621
622 from = NEXTFROM;
623 tmp.d = RFLOAT_VALUE(rb_to_float(from));
624 HTOND(tmp);
625 rb_str_buf_cat(res, tmp.buf, sizeof(double));
626 }
627 break;
628
629 case 'x': /* null byte */
630 grow:
632 str_expand_fill(res, '\0', len);
633 break;
634
635 case 'X': /* back up byte */
636 shrink:
637 plen = RSTRING_LEN(res);
638 if (plen < len)
639 rb_raise(rb_eArgError, "X outside of string");
640 rb_str_set_len(res, plen - len);
641 break;
642
643 case '@': /* null fill to absolute position */
644 len -= RSTRING_LEN(res);
645 if (len > 0) goto grow;
646 len = -len;
647 if (len > 0) goto shrink;
648 break;
649
650 case '%':
651 rb_raise(rb_eArgError, "%% is not supported");
652 break;
653
654 case 'U': /* Unicode character */
655 while (len-- > 0) {
656 SIGNED_VALUE l;
657 char buf[8];
658 int le;
659
660 from = NEXTFROM;
661 from = rb_to_int(from);
662 l = NUM2LONG(from);
663 if (l < 0) {
664 rb_raise(rb_eRangeError, "pack(U): value out of range");
665 }
666 le = rb_uv_to_utf8(buf, l);
667 rb_str_buf_cat(res, (char*)buf, le);
668 }
669 break;
670
671 case 'r': /* r for SLEB128 encoding (signed) */
672 case 'R': /* R for ULEB128 encoding (unsigned) */
673 {
674 int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
675
676 if (type == 'r') {
677 pack_flags |= INTEGER_PACK_2COMP;
678 }
679
680 while (len-- > 0) {
681 size_t numbytes, nlz_bits;
682 int sign, extra = 0;
683 char *cp;
684 const long start = RSTRING_LEN(res);
685
686 from = NEXTFROM;
687 from = rb_to_int(from);
688 if (type == 'R' && rb_int_negative_p(from)) {
689 rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128");
690 }
691
692 numbytes = rb_absint_numwords(from, 7, &nlz_bits);
693 if (numbytes == 0) {
694 numbytes = 1;
695 }
696 else if (nlz_bits == 0 && type == 'r') {
697 /* No leading zero bits, we need an extra byte for sign extension */
698 extra = 1;
699 }
700 rb_str_modify_expand(res, numbytes + extra);
701
702 cp = RSTRING_PTR(res) + start;
703 sign = rb_integer_pack(from, cp, numbytes, 1, 1, pack_flags);
704
705 if (extra) {
706 /* Need an extra byte */
707 cp[numbytes++] = sign < 0 ? 0x7f : 0x00;
708 }
709 rb_str_set_len(res, start + numbytes);
710
711 while (1 < numbytes) {
712 *cp |= 0x80;
713 cp++;
714 numbytes--;
715 }
716 }
717 }
718 break;
719 case 'u': /* uuencoded string */
720 case 'm': /* base64 encoded string */
721 from = NEXTFROM;
722 StringValue(from);
723 ptr = RSTRING_PTR(from);
724 plen = RSTRING_LEN(from);
725
726 if (len == 0 && type == 'm') {
727 encodes(res, ptr, plen, type, 0);
728 ptr += plen;
729 break;
730 }
731 if (len <= 2)
732 len = 45;
733 else if (len > 63 && type == 'u')
734 len = 63;
735 else
736 len = len / 3 * 3;
737 while (plen > 0) {
738 long todo;
739
740 if (plen > len)
741 todo = len;
742 else
743 todo = plen;
744 encodes(res, ptr, todo, type, 1);
745 plen -= todo;
746 ptr += todo;
747 }
748 break;
749
750 case 'M': /* quoted-printable encoded string */
751 from = rb_obj_as_string(NEXTFROM);
752 if (len <= 1)
753 len = 72;
754 qpencode(res, from, len);
755 break;
756
757 case 'P': /* pointer to packed byte string */
758 from = THISFROM;
759 if (!NIL_P(from)) {
760 StringValue(from);
761 if (RSTRING_LEN(from) < len) {
762 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
763 RSTRING_LEN(from), len);
764 }
765 }
766 len = 1;
767 /* FALL THROUGH */
768 case 'p': /* pointer to string */
769 while (len-- > 0) {
770 char *t;
771 from = NEXTFROM;
772 if (NIL_P(from)) {
773 t = 0;
774 }
775 else {
776 t = StringValuePtr(from);
777 }
778 if (!associates) {
779 associates = rb_ary_new();
780 }
781 rb_ary_push(associates, from);
782 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
783 }
784 break;
785
786 case 'w': /* BER compressed integer */
787 while (len-- > 0) {
788 VALUE buf;
789 size_t numbytes;
790 int sign;
791 char *cp;
792
793 from = NEXTFROM;
794 from = rb_to_int(from);
795 numbytes = rb_absint_numwords(from, 7, NULL);
796 if (numbytes == 0)
797 numbytes = 1;
798 buf = rb_str_new(NULL, numbytes);
799
800 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
801
802 if (sign < 0)
803 rb_raise(rb_eArgError, "can't compress negative numbers");
804 if (sign == 2)
805 rb_bug("buffer size problem?");
806
807 cp = RSTRING_PTR(buf);
808 while (1 < numbytes) {
809 *cp |= 0x80;
810 cp++;
811 numbytes--;
812 }
813
814 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
815 }
816 break;
817
818 default: {
819 unknown_directive("pack", type, fmt);
820 break;
821 }
822 }
823 }
824
825 if (associates) {
826 str_associate(res, associates);
827 }
828 switch (enc_info) {
829 case 1:
830 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
831 break;
832 case 2:
833 rb_enc_set_index(res, rb_utf8_encindex());
834 break;
835 default:
836 /* do nothing, keep ASCII-8BIT */
837 break;
838 }
839 return res;
840}
841
842VALUE
843rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
844{
845 return pack_pack(ec, ary, fmt, buffer);
846}
847
848static const char uu_table[] =
849"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
850static const char b64_table[] =
851"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
852
853static void
854encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
855{
856 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
857 char buff[buff_size + 1]; /* +1 for tail_lf */
858 long i = 0;
859 const char *const trans = type == 'u' ? uu_table : b64_table;
860 char padding;
861 const unsigned char *s = (const unsigned char *)s0;
862
863 if (type == 'u') {
864 buff[i++] = (char)len + ' ';
865 padding = '`';
866 }
867 else {
868 padding = '=';
869 }
870 while (len >= input_unit) {
871 while (len >= input_unit && buff_size-i >= encoded_unit) {
872 buff[i++] = trans[077 & (*s >> 2)];
873 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
874 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
875 buff[i++] = trans[077 & s[2]];
876 s += input_unit;
877 len -= input_unit;
878 }
879 if (buff_size-i < encoded_unit) {
880 rb_str_buf_cat(str, buff, i);
881 i = 0;
882 }
883 }
884
885 if (len == 2) {
886 buff[i++] = trans[077 & (*s >> 2)];
887 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
888 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
889 buff[i++] = padding;
890 }
891 else if (len == 1) {
892 buff[i++] = trans[077 & (*s >> 2)];
893 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
894 buff[i++] = padding;
895 buff[i++] = padding;
896 }
897 if (tail_lf) buff[i++] = '\n';
898 rb_str_buf_cat(str, buff, i);
899 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
900}
901
902static const char hex_table[] = "0123456789ABCDEF";
903
904static void
905qpencode(VALUE str, VALUE from, long len)
906{
907 char buff[1024];
908 long i = 0, n = 0, prev = EOF;
909 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
910 unsigned char *send = s + RSTRING_LEN(from);
911
912 while (s < send) {
913 if ((*s > 126) ||
914 (*s < 32 && *s != '\n' && *s != '\t') ||
915 (*s == '=')) {
916 buff[i++] = '=';
917 buff[i++] = hex_table[*s >> 4];
918 buff[i++] = hex_table[*s & 0x0f];
919 n += 3;
920 prev = EOF;
921 }
922 else if (*s == '\n') {
923 if (prev == ' ' || prev == '\t') {
924 buff[i++] = '=';
925 buff[i++] = *s;
926 }
927 buff[i++] = *s;
928 n = 0;
929 prev = *s;
930 }
931 else {
932 buff[i++] = *s;
933 n++;
934 prev = *s;
935 }
936 if (n > len) {
937 buff[i++] = '=';
938 buff[i++] = '\n';
939 n = 0;
940 prev = '\n';
941 }
942 if (i > 1024 - 5) {
943 rb_str_buf_cat(str, buff, i);
944 i = 0;
945 }
946 s++;
947 }
948 if (n > 0) {
949 buff[i++] = '=';
950 buff[i++] = '\n';
951 }
952 if (i > 0) {
953 rb_str_buf_cat(str, buff, i);
954 }
955}
956
957static inline int
958hex2num(char c)
959{
960 int n;
961 n = ruby_digit36_to_number_table[(unsigned char)c];
962 if (16 <= n)
963 n = -1;
964 return n;
965}
966
967#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
968 tmp_len = 0; \
969 if (len > (long)((send-s)/(sz))) { \
970 if (!star) { \
971 tmp_len = len-(send-s)/(sz); \
972 } \
973 len = (send-s)/(sz); \
974 } \
975} while (0)
976
977#define PACK_ITEM_ADJUST() do { \
978 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
979 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
980} while (0)
981
982/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
983 * 12.4/12.5/12.6 C compiler optimization bug
984 * with "-xO4" optimization option.
985 */
986#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
987# define AVOID_CC_BUG volatile
988#else
989# define AVOID_CC_BUG
990#endif
991
992enum unpack_mode {
993 UNPACK_ARRAY,
994 UNPACK_BLOCK,
995 UNPACK_1
996};
997
998static VALUE
999pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
1000{
1001#define hexdigits ruby_hexdigits
1002 char *s, *send;
1003 char *p, *pend;
1004 VALUE ary, associates = Qfalse;
1005 long len;
1006 AVOID_CC_BUG long tmp_len;
1007 int signed_p, integer_size, bigendian_p;
1008#define UNPACK_PUSH(item) do {\
1009 VALUE item_val = (item);\
1010 if ((mode) == UNPACK_BLOCK) {\
1011 rb_yield(item_val);\
1012 }\
1013 else if ((mode) == UNPACK_ARRAY) {\
1014 rb_ary_push(ary, item_val);\
1015 }\
1016 else /* if ((mode) == UNPACK_1) { */ {\
1017 return item_val; \
1018 }\
1019 } while (0)
1020
1021 StringValue(str);
1022 StringValue(fmt);
1024
1025 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
1026 len = RSTRING_LEN(str);
1027 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
1028
1029 s = RSTRING_PTR(str);
1030 send = s + len;
1031 s += offset;
1032
1033 p = RSTRING_PTR(fmt);
1034 pend = p + RSTRING_LEN(fmt);
1035
1036#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
1037
1038 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1039 while (p < pend) {
1040 int explicit_endian = 0;
1041 const char type = *p++;
1042#ifdef NATINT_PACK
1043 int natint = 0; /* native integer */
1044#endif
1045 int star = 0;
1046
1047 if (skip_blank(p, type)) continue;
1048 p = pack_modifiers(p, type, &natint, &explicit_endian);
1049
1050 if (p >= pend)
1051 len = 1;
1052 else if (*p == '*') {
1053 star = 1;
1054 len = send - s;
1055 p++;
1056 }
1057 else if (ISDIGIT(*p)) {
1058 errno = 0;
1059 len = STRTOUL(p, (char**)&p, 10);
1060 if (len < 0 || errno) {
1061 rb_raise(rb_eRangeError, "pack length too big");
1062 }
1063 }
1064 else {
1065 len = (type != '@');
1066 }
1067
1068 switch (type) {
1069 case '%':
1070 rb_raise(rb_eArgError, "%% is not supported");
1071 break;
1072
1073 case 'A':
1074 if (len > send - s) len = send - s;
1075 {
1076 long end = len;
1077 char *t = s + len - 1;
1078
1079 while (t >= s) {
1080 if (*t != ' ' && *t != '\0') break;
1081 t--; len--;
1082 }
1083 UNPACK_PUSH(rb_str_new(s, len));
1084 s += end;
1085 }
1086 break;
1087
1088 case 'Z':
1089 {
1090 char *t = s;
1091
1092 if (len > send-s) len = send-s;
1093 while (t < s+len && *t) t++;
1094 UNPACK_PUSH(rb_str_new(s, t-s));
1095 if (t < send) t++;
1096 s = star ? t : s+len;
1097 }
1098 break;
1099
1100 case 'a':
1101 if (len > send - s) len = send - s;
1102 UNPACK_PUSH(rb_str_new(s, len));
1103 s += len;
1104 break;
1105
1106 case 'b':
1107 {
1108 VALUE bitstr;
1109 char *t;
1110 int bits;
1111 long i;
1112
1113 if (p[-1] == '*' || len > (send - s) * 8)
1114 len = (send - s) * 8;
1115 bits = 0;
1116 bitstr = rb_usascii_str_new(0, len);
1117 t = RSTRING_PTR(bitstr);
1118 for (i=0; i<len; i++) {
1119 if (i & 7) bits >>= 1;
1120 else bits = (unsigned char)*s++;
1121 *t++ = (bits & 1) ? '1' : '0';
1122 }
1123 UNPACK_PUSH(bitstr);
1124 }
1125 break;
1126
1127 case 'B':
1128 {
1129 VALUE bitstr;
1130 char *t;
1131 int bits;
1132 long i;
1133
1134 if (p[-1] == '*' || len > (send - s) * 8)
1135 len = (send - s) * 8;
1136 bits = 0;
1137 bitstr = rb_usascii_str_new(0, len);
1138 t = RSTRING_PTR(bitstr);
1139 for (i=0; i<len; i++) {
1140 if (i & 7) bits <<= 1;
1141 else bits = (unsigned char)*s++;
1142 *t++ = (bits & 128) ? '1' : '0';
1143 }
1144 UNPACK_PUSH(bitstr);
1145 }
1146 break;
1147
1148 case 'h':
1149 {
1150 VALUE bitstr;
1151 char *t;
1152 int bits;
1153 long i;
1154
1155 if (p[-1] == '*' || len > (send - s) * 2)
1156 len = (send - s) * 2;
1157 bits = 0;
1158 bitstr = rb_usascii_str_new(0, len);
1159 t = RSTRING_PTR(bitstr);
1160 for (i=0; i<len; i++) {
1161 if (i & 1)
1162 bits >>= 4;
1163 else
1164 bits = (unsigned char)*s++;
1165 *t++ = hexdigits[bits & 15];
1166 }
1167 UNPACK_PUSH(bitstr);
1168 }
1169 break;
1170
1171 case 'H':
1172 {
1173 VALUE bitstr;
1174 char *t;
1175 int bits;
1176 long i;
1177
1178 if (p[-1] == '*' || len > (send - s) * 2)
1179 len = (send - s) * 2;
1180 bits = 0;
1181 bitstr = rb_usascii_str_new(0, len);
1182 t = RSTRING_PTR(bitstr);
1183 for (i=0; i<len; i++) {
1184 if (i & 1)
1185 bits <<= 4;
1186 else
1187 bits = (unsigned char)*s++;
1188 *t++ = hexdigits[(bits >> 4) & 15];
1189 }
1190 UNPACK_PUSH(bitstr);
1191 }
1192 break;
1193
1194 case 'c':
1195 signed_p = 1;
1196 integer_size = 1;
1197 bigendian_p = BIGENDIAN_P(); /* not effective */
1198 goto unpack_integer;
1199
1200 case 'C':
1201 signed_p = 0;
1202 integer_size = 1;
1203 bigendian_p = BIGENDIAN_P(); /* not effective */
1204 goto unpack_integer;
1205
1206 case 's':
1207 signed_p = 1;
1208 integer_size = NATINT_LEN(short, 2);
1209 bigendian_p = BIGENDIAN_P();
1210 goto unpack_integer;
1211
1212 case 'S':
1213 signed_p = 0;
1214 integer_size = NATINT_LEN(short, 2);
1215 bigendian_p = BIGENDIAN_P();
1216 goto unpack_integer;
1217
1218 case 'i':
1219 signed_p = 1;
1220 integer_size = (int)sizeof(int);
1221 bigendian_p = BIGENDIAN_P();
1222 goto unpack_integer;
1223
1224 case 'I':
1225 signed_p = 0;
1226 integer_size = (int)sizeof(int);
1227 bigendian_p = BIGENDIAN_P();
1228 goto unpack_integer;
1229
1230 case 'l':
1231 signed_p = 1;
1232 integer_size = NATINT_LEN(long, 4);
1233 bigendian_p = BIGENDIAN_P();
1234 goto unpack_integer;
1235
1236 case 'L':
1237 signed_p = 0;
1238 integer_size = NATINT_LEN(long, 4);
1239 bigendian_p = BIGENDIAN_P();
1240 goto unpack_integer;
1241
1242 case 'q':
1243 signed_p = 1;
1244 integer_size = NATINT_LEN_Q;
1245 bigendian_p = BIGENDIAN_P();
1246 goto unpack_integer;
1247
1248 case 'Q':
1249 signed_p = 0;
1250 integer_size = NATINT_LEN_Q;
1251 bigendian_p = BIGENDIAN_P();
1252 goto unpack_integer;
1253
1254 case 'j':
1255 signed_p = 1;
1256 integer_size = sizeof(intptr_t);
1257 bigendian_p = BIGENDIAN_P();
1258 goto unpack_integer;
1259
1260 case 'J':
1261 signed_p = 0;
1262 integer_size = sizeof(uintptr_t);
1263 bigendian_p = BIGENDIAN_P();
1264 goto unpack_integer;
1265
1266 case 'n':
1267 signed_p = 0;
1268 integer_size = 2;
1269 bigendian_p = 1;
1270 goto unpack_integer;
1271
1272 case 'N':
1273 signed_p = 0;
1274 integer_size = 4;
1275 bigendian_p = 1;
1276 goto unpack_integer;
1277
1278 case 'v':
1279 signed_p = 0;
1280 integer_size = 2;
1281 bigendian_p = 0;
1282 goto unpack_integer;
1283
1284 case 'V':
1285 signed_p = 0;
1286 integer_size = 4;
1287 bigendian_p = 0;
1288 goto unpack_integer;
1289
1290 unpack_integer:
1291 if (explicit_endian) {
1292 bigendian_p = explicit_endian == '>';
1293 }
1294 PACK_LENGTH_ADJUST_SIZE(integer_size);
1295 while (len-- > 0) {
1296 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1297 VALUE val;
1298 if (signed_p)
1299 flags |= INTEGER_PACK_2COMP;
1300 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1301 UNPACK_PUSH(val);
1302 s += integer_size;
1303 }
1304 PACK_ITEM_ADJUST();
1305 break;
1306
1307 case 'f':
1308 case 'F':
1309 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1310 while (len-- > 0) {
1311 float tmp;
1312 UNPACK_FETCH(&tmp, float);
1313 UNPACK_PUSH(DBL2NUM((double)tmp));
1314 }
1315 PACK_ITEM_ADJUST();
1316 break;
1317
1318 case 'e':
1319 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1320 while (len-- > 0) {
1321 FLOAT_CONVWITH(tmp);
1322 UNPACK_FETCH(tmp.buf, float);
1323 VTOHF(tmp);
1324 UNPACK_PUSH(DBL2NUM(tmp.f));
1325 }
1326 PACK_ITEM_ADJUST();
1327 break;
1328
1329 case 'E':
1330 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1331 while (len-- > 0) {
1332 DOUBLE_CONVWITH(tmp);
1333 UNPACK_FETCH(tmp.buf, double);
1334 VTOHD(tmp);
1335 UNPACK_PUSH(DBL2NUM(tmp.d));
1336 }
1337 PACK_ITEM_ADJUST();
1338 break;
1339
1340 case 'D':
1341 case 'd':
1342 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1343 while (len-- > 0) {
1344 double tmp;
1345 UNPACK_FETCH(&tmp, double);
1346 UNPACK_PUSH(DBL2NUM(tmp));
1347 }
1348 PACK_ITEM_ADJUST();
1349 break;
1350
1351 case 'g':
1352 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1353 while (len-- > 0) {
1354 FLOAT_CONVWITH(tmp);
1355 UNPACK_FETCH(tmp.buf, float);
1356 NTOHF(tmp);
1357 UNPACK_PUSH(DBL2NUM(tmp.f));
1358 }
1359 PACK_ITEM_ADJUST();
1360 break;
1361
1362 case 'G':
1363 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1364 while (len-- > 0) {
1365 DOUBLE_CONVWITH(tmp);
1366 UNPACK_FETCH(tmp.buf, double);
1367 NTOHD(tmp);
1368 UNPACK_PUSH(DBL2NUM(tmp.d));
1369 }
1370 PACK_ITEM_ADJUST();
1371 break;
1372
1373 case 'U':
1374 if (len > send - s) len = send - s;
1375 while (len > 0 && s < send) {
1376 long alen = send - s;
1377 unsigned long l;
1378
1379 l = utf8_to_uv(s, &alen);
1380 s += alen; len--;
1381 UNPACK_PUSH(ULONG2NUM(l));
1382 }
1383 break;
1384
1385 case 'u':
1386 {
1387 VALUE buf = rb_str_new(0, (send - s)*3/4);
1388 char *ptr = RSTRING_PTR(buf);
1389 long total = 0;
1390
1391 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1392 long a,b,c,d;
1393 char hunk[3];
1394
1395 len = ((unsigned char)*s++ - ' ') & 077;
1396
1397 total += len;
1398 if (total > RSTRING_LEN(buf)) {
1399 len -= total - RSTRING_LEN(buf);
1400 total = RSTRING_LEN(buf);
1401 }
1402
1403 while (len > 0) {
1404 long mlen = len > 3 ? 3 : len;
1405
1406 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1407 a = ((unsigned char)*s++ - ' ') & 077;
1408 else
1409 a = 0;
1410 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1411 b = ((unsigned char)*s++ - ' ') & 077;
1412 else
1413 b = 0;
1414 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1415 c = ((unsigned char)*s++ - ' ') & 077;
1416 else
1417 c = 0;
1418 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1419 d = ((unsigned char)*s++ - ' ') & 077;
1420 else
1421 d = 0;
1422 hunk[0] = (char)(a << 2 | b >> 4);
1423 hunk[1] = (char)(b << 4 | c >> 2);
1424 hunk[2] = (char)(c << 6 | d);
1425 memcpy(ptr, hunk, mlen);
1426 ptr += mlen;
1427 len -= mlen;
1428 }
1429 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1430 s++; /* possible checksum byte */
1431 if (s < send && *s == '\r') s++;
1432 if (s < send && *s == '\n') s++;
1433 }
1434
1435 rb_str_set_len(buf, total);
1436 UNPACK_PUSH(buf);
1437 }
1438 break;
1439
1440 case 'm':
1441 {
1442 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1443 char *ptr = RSTRING_PTR(buf);
1444 int a = -1,b = -1,c = 0,d = 0;
1445 static signed char b64_xtable[256];
1446
1447 if (b64_xtable['/'] <= 0) {
1448 int i;
1449
1450 for (i = 0; i < 256; i++) {
1451 b64_xtable[i] = -1;
1452 }
1453 for (i = 0; i < 64; i++) {
1454 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1455 }
1456 }
1457 if (len == 0) {
1458 while (s < send) {
1459 a = b = c = d = -1;
1460 a = b64_xtable[(unsigned char)*s++];
1461 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1462 b = b64_xtable[(unsigned char)*s++];
1463 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1464 if (*s == '=') {
1465 if (s + 2 == send && *(s + 1) == '=') break;
1466 rb_raise(rb_eArgError, "invalid base64");
1467 }
1468 c = b64_xtable[(unsigned char)*s++];
1469 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1470 if (s + 1 == send && *s == '=') break;
1471 d = b64_xtable[(unsigned char)*s++];
1472 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1473 *ptr++ = castchar(a << 2 | b >> 4);
1474 *ptr++ = castchar(b << 4 | c >> 2);
1475 *ptr++ = castchar(c << 6 | d);
1476 }
1477 if (c == -1) {
1478 *ptr++ = castchar(a << 2 | b >> 4);
1479 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1480 }
1481 else if (d == -1) {
1482 *ptr++ = castchar(a << 2 | b >> 4);
1483 *ptr++ = castchar(b << 4 | c >> 2);
1484 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1485 }
1486 }
1487 else {
1488 while (s < send) {
1489 a = b = c = d = -1;
1490 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1491 if (s >= send) break;
1492 s++;
1493 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1494 if (s >= send) break;
1495 s++;
1496 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1497 if (*s == '=' || s >= send) break;
1498 s++;
1499 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1500 if (*s == '=' || s >= send) break;
1501 s++;
1502 *ptr++ = castchar(a << 2 | b >> 4);
1503 *ptr++ = castchar(b << 4 | c >> 2);
1504 *ptr++ = castchar(c << 6 | d);
1505 a = -1;
1506 }
1507 if (a != -1 && b != -1) {
1508 if (c == -1)
1509 *ptr++ = castchar(a << 2 | b >> 4);
1510 else {
1511 *ptr++ = castchar(a << 2 | b >> 4);
1512 *ptr++ = castchar(b << 4 | c >> 2);
1513 }
1514 }
1515 }
1516 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1517 UNPACK_PUSH(buf);
1518 }
1519 break;
1520
1521 case 'M':
1522 {
1523 VALUE buf = rb_str_new(0, send - s);
1524 char *ptr = RSTRING_PTR(buf), *ss = s;
1525 int csum = 0;
1526 int c1, c2;
1527
1528 while (s < send) {
1529 if (*s == '=') {
1530 if (++s == send) break;
1531 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1532 s++;
1533 if (*s != '\n') {
1534 if ((c1 = hex2num(*s)) == -1) break;
1535 if (++s == send) break;
1536 if ((c2 = hex2num(*s)) == -1) break;
1537 csum |= *ptr++ = castchar(c1 << 4 | c2);
1538 }
1539 }
1540 else {
1541 csum |= *ptr++ = *s;
1542 }
1543 s++;
1544 ss = s;
1545 }
1546 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1547 rb_str_buf_cat(buf, ss, send-ss);
1549 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1550 UNPACK_PUSH(buf);
1551 }
1552 break;
1553
1554 case '@':
1555 if (len > RSTRING_LEN(str))
1556 rb_raise(rb_eArgError, "@ outside of string");
1557 s = RSTRING_PTR(str) + len;
1558 break;
1559
1560 case 'X':
1561 if (len > s - RSTRING_PTR(str))
1562 rb_raise(rb_eArgError, "X outside of string");
1563 s -= len;
1564 break;
1565
1566 case 'x':
1567 if (len > send - s)
1568 rb_raise(rb_eArgError, "x outside of string");
1569 s += len;
1570 break;
1571
1572 case '^':
1573 UNPACK_PUSH(SSIZET2NUM(s - RSTRING_PTR(str)));
1574 break;
1575
1576 case 'P':
1577 if (sizeof(char *) <= (size_t)(send - s)) {
1578 VALUE tmp = Qnil;
1579 char *t;
1580
1581 UNPACK_FETCH(&t, char *);
1582 if (t) {
1583 if (!associates) associates = str_associated(str);
1584 tmp = associated_pointer(associates, t);
1585 if (len < RSTRING_LEN(tmp)) {
1586 tmp = rb_str_new(t, len);
1587 str_associate(tmp, associates);
1588 }
1589 }
1590 UNPACK_PUSH(tmp);
1591 }
1592 break;
1593
1594 case 'p':
1595 if (len > (long)((send - s) / sizeof(char *)))
1596 len = (send - s) / sizeof(char *);
1597 while (len-- > 0) {
1598 if ((size_t)(send - s) < sizeof(char *))
1599 break;
1600 else {
1601 VALUE tmp = Qnil;
1602 char *t;
1603
1604 UNPACK_FETCH(&t, char *);
1605 if (t) {
1606 if (!associates) associates = str_associated(str);
1607 tmp = associated_pointer(associates, t);
1608 }
1609 UNPACK_PUSH(tmp);
1610 }
1611 }
1612 break;
1613
1614 case 'r':
1615 case 'R':
1616 {
1617 int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
1618
1619 if (type == 'r') {
1620 pack_flags |= INTEGER_PACK_2COMP;
1621 }
1622 char *s0 = s;
1623 while (len > 0 && s < send) {
1624 if (*s & 0x80) {
1625 s++;
1626 }
1627 else {
1628 s++;
1629 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags));
1630 len--;
1631 s0 = s;
1632 }
1633 }
1634 /* Handle incomplete value and remaining expected values with nil (only if not using *) */
1635 if (!star) {
1636 if (s0 != s && len > 0) {
1637 UNPACK_PUSH(Qnil);
1638 len--;
1639 }
1640 while (len-- > 0) {
1641 UNPACK_PUSH(Qnil);
1642 }
1643 }
1644 }
1645 break;
1646
1647 case 'w':
1648 {
1649 char *s0 = s;
1650 while (len > 0 && s < send) {
1651 if (*s & 0x80) {
1652 s++;
1653 }
1654 else {
1655 s++;
1656 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1657 len--;
1658 s0 = s;
1659 }
1660 }
1661 }
1662 break;
1663
1664 default:
1665 unknown_directive("unpack", type, fmt);
1666 break;
1667 }
1668 }
1669
1670 return ary;
1671}
1672
1673static VALUE
1674pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1675{
1676 enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1677 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1678}
1679
1680static VALUE
1681pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1682{
1683 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1684}
1685
1686int
1687rb_uv_to_utf8(char buf[6], unsigned long uv)
1688{
1689 if (uv <= 0x7f) {
1690 buf[0] = (char)uv;
1691 return 1;
1692 }
1693 if (uv <= 0x7ff) {
1694 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1695 buf[1] = castchar((uv&0x3f)|0x80);
1696 return 2;
1697 }
1698 if (uv <= 0xffff) {
1699 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1700 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1701 buf[2] = castchar((uv&0x3f)|0x80);
1702 return 3;
1703 }
1704 if (uv <= 0x1fffff) {
1705 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1706 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1707 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1708 buf[3] = castchar((uv&0x3f)|0x80);
1709 return 4;
1710 }
1711 if (uv <= 0x3ffffff) {
1712 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1713 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1714 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1715 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1716 buf[4] = castchar((uv&0x3f)|0x80);
1717 return 5;
1718 }
1719 if (uv <= 0x7fffffff) {
1720 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1721 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1722 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1723 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1724 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1725 buf[5] = castchar((uv&0x3f)|0x80);
1726 return 6;
1727 }
1728 rb_raise(rb_eRangeError, "pack(U): value out of range");
1729
1731}
1732
1733static const unsigned long utf8_limits[] = {
1734 0x0, /* 1 */
1735 0x80, /* 2 */
1736 0x800, /* 3 */
1737 0x10000, /* 4 */
1738 0x200000, /* 5 */
1739 0x4000000, /* 6 */
1740 0x80000000, /* 7 */
1741};
1742
1743static unsigned long
1744utf8_to_uv(const char *p, long *lenp)
1745{
1746 int c = *p++ & 0xff;
1747 unsigned long uv = c;
1748 long n;
1749
1750 if (!(uv & 0x80)) {
1751 *lenp = 1;
1752 return uv;
1753 }
1754 if (!(uv & 0x40)) {
1755 *lenp = 1;
1756 rb_raise(rb_eArgError, "malformed UTF-8 character");
1757 }
1758
1759 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1760 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1761 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1762 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1763 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1764 else {
1765 *lenp = 1;
1766 rb_raise(rb_eArgError, "malformed UTF-8 character");
1767 }
1768 if (n > *lenp) {
1769 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1770 n, *lenp);
1771 }
1772 *lenp = n--;
1773 if (n != 0) {
1774 while (n--) {
1775 c = *p++ & 0xff;
1776 if ((c & 0xc0) != 0x80) {
1777 *lenp -= n + 1;
1778 rb_raise(rb_eArgError, "malformed UTF-8 character");
1779 }
1780 else {
1781 c &= 0x3f;
1782 uv = uv << 6 | c;
1783 }
1784 }
1785 }
1786 n = *lenp - 1;
1787 if (uv < utf8_limits[n]) {
1788 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1789 }
1790 return uv;
1791}
1792
1793#include "pack.rbinc"
1794
1795void
1796Init_pack(void)
1797{
1798 id_associated = rb_make_internal_id();
1799}
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition eval.c:1017
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition coderange.h:181
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition assume.h:29
#define SSIZET2NUM
Old name of RB_SSIZE2NUM.
Definition size_t.h:64
#define STRTOUL
Old name of ruby_strtoul.
Definition ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition coderange.h:189
VALUE rb_eRangeError
RangeError exception.
Definition error.c:1422
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1418
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1416
VALUE rb_to_float(VALUE val)
Identical to rb_check_to_float(), except it raises on error.
Definition object.c:3813
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
Definition object.c:3365
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition pack.c:1687
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition bignum.h:549
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1499
#define rb_str_buf_cat
Just another name of rb_str_cat.
Definition string.h:1682
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
Definition string.h:1533
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3405
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
Definition string.c:2775
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:2729
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition string.c:1701
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition string.c:1833
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition variable.c:2024
int len
Length of the buffer.
Definition io.h:8
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11etc.
Definition util.c:60
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RBIMPL_ATTR_NORETURN()
Wraps (or simulates) [[noreturn]]
Definition noreturn.h:38
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition rarray.h:52
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:76
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition rstring.h:409
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition variable.c:515
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376