Ruby  3.4.0dev (2024-12-06 revision 892c46283a5ea4179500d951c9d4866c0051f27b)
marshal.c (892c46283a5ea4179500d951c9d4866c0051f27b)
1 /**********************************************************************
2 
3  marshal.c -
4 
5  $Author$
6  created at: Thu Apr 27 16:30:01 JST 1995
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/internal/config.h"
13 
14 #include <math.h>
15 #ifdef HAVE_FLOAT_H
16 #include <float.h>
17 #endif
18 #ifdef HAVE_IEEEFP_H
19 #include <ieeefp.h>
20 #endif
21 
22 #include "encindex.h"
23 #include "id_table.h"
24 #include "internal.h"
25 #include "internal/array.h"
26 #include "internal/bignum.h"
27 #include "internal/class.h"
28 #include "internal/encoding.h"
29 #include "internal/error.h"
30 #include "internal/hash.h"
31 #include "internal/numeric.h"
32 #include "internal/object.h"
33 #include "internal/struct.h"
34 #include "internal/symbol.h"
35 #include "internal/util.h"
36 #include "internal/vm.h"
37 #include "ruby/io.h"
38 #include "ruby/ruby.h"
39 #include "ruby/st.h"
40 #include "ruby/util.h"
41 #include "builtin.h"
42 #include "shape.h"
43 
44 #define BITSPERSHORT (2*CHAR_BIT)
45 #define SHORTMASK ((1<<BITSPERSHORT)-1)
46 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
47 
48 #if SIZEOF_SHORT == SIZEOF_BDIGIT
49 #define SHORTLEN(x) (x)
50 #else
51 static size_t
52 shortlen(size_t len, BDIGIT *ds)
53 {
54  BDIGIT num;
55  int offset = 0;
56 
57  num = ds[len-1];
58  while (num) {
59  num = SHORTDN(num);
60  offset++;
61  }
62  return (len - 1)*SIZEOF_BDIGIT/2 + offset;
63 }
64 #define SHORTLEN(x) shortlen((x),d)
65 #endif
66 
67 #define MARSHAL_MAJOR 4
68 #define MARSHAL_MINOR 8
69 
70 #define TYPE_NIL '0'
71 #define TYPE_TRUE 'T'
72 #define TYPE_FALSE 'F'
73 #define TYPE_FIXNUM 'i'
74 
75 #define TYPE_EXTENDED 'e'
76 #define TYPE_UCLASS 'C'
77 #define TYPE_OBJECT 'o'
78 #define TYPE_DATA 'd'
79 #define TYPE_USERDEF 'u'
80 #define TYPE_USRMARSHAL 'U'
81 #define TYPE_FLOAT 'f'
82 #define TYPE_BIGNUM 'l'
83 #define TYPE_STRING '"'
84 #define TYPE_REGEXP '/'
85 #define TYPE_ARRAY '['
86 #define TYPE_HASH '{'
87 #define TYPE_HASH_DEF '}'
88 #define TYPE_STRUCT 'S'
89 #define TYPE_MODULE_OLD 'M'
90 #define TYPE_CLASS 'c'
91 #define TYPE_MODULE 'm'
92 
93 #define TYPE_SYMBOL ':'
94 #define TYPE_SYMLINK ';'
95 
96 #define TYPE_IVAR 'I'
97 #define TYPE_LINK '@'
98 
99 static ID s_dump, s_load, s_mdump, s_mload;
100 static ID s_dump_data, s_load_data, s_alloc, s_call;
101 static ID s_getbyte, s_read, s_write, s_binmode;
102 static ID s_encoding_short, s_ruby2_keywords_flag;
103 
104 #define name_s_dump "_dump"
105 #define name_s_load "_load"
106 #define name_s_mdump "marshal_dump"
107 #define name_s_mload "marshal_load"
108 #define name_s_dump_data "_dump_data"
109 #define name_s_load_data "_load_data"
110 #define name_s_alloc "_alloc"
111 #define name_s_call "call"
112 #define name_s_getbyte "getbyte"
113 #define name_s_read "read"
114 #define name_s_write "write"
115 #define name_s_binmode "binmode"
116 #define name_s_encoding_short "E"
117 #define name_s_ruby2_keywords_flag "K"
118 
119 typedef struct {
120  VALUE newclass;
121  VALUE oldclass;
122  VALUE (*dumper)(VALUE);
123  VALUE (*loader)(VALUE, VALUE);
124 } marshal_compat_t;
125 
126 static st_table *compat_allocator_tbl;
127 static VALUE compat_allocator_tbl_wrapper;
128 static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit);
129 static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze);
130 
131 static st_table *compat_allocator_table(void);
132 
133 void
134 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
135 {
136  marshal_compat_t *compat;
137  rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
138 
139  if (!allocator) {
140  rb_raise(rb_eTypeError, "no allocator");
141  }
142 
143  compat_allocator_table();
144  compat = ALLOC(marshal_compat_t);
145  RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->newclass, newclass);
146  RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->oldclass, oldclass);
147  compat->dumper = dumper;
148  compat->loader = loader;
149 
150  st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
151 }
152 
153 struct dump_arg {
154  VALUE str, dest;
155  st_table *symbols;
156  st_table *data;
157  st_table *compat_tbl;
158  st_table *encodings;
159  st_index_t num_entries;
160 };
161 
162 struct dump_call_arg {
163  VALUE obj;
164  struct dump_arg *arg;
165  int limit;
166 };
167 
168 static VALUE
169 check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name)
170 {
171  if (!arg->symbols) {
172  rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
173  name);
174  }
175  return ret;
176 }
177 
178 static VALUE
179 check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv,
180  struct dump_arg *arg, const char *name)
181 {
182  VALUE ret = rb_funcallv(obj, sym, argc, argv);
183  VALUE klass = CLASS_OF(obj);
184  if (CLASS_OF(ret) == klass) {
185  rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance",
186  klass, name);
187  }
188  return check_dump_arg(ret, arg, name);
189 }
190 
191 #define dump_funcall(arg, obj, sym, argc, argv) \
192  check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
193 #define dump_check_funcall(arg, obj, sym, argc, argv) \
194  check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
195 
196 static void clear_dump_arg(struct dump_arg *arg);
197 
198 static void
199 mark_dump_arg(void *ptr)
200 {
201  struct dump_arg *p = ptr;
202  if (!p->symbols)
203  return;
204  rb_mark_set(p->symbols);
205  rb_mark_set(p->data);
206  rb_mark_hash(p->compat_tbl);
207  rb_gc_mark(p->str);
208 }
209 
210 static void
211 free_dump_arg(void *ptr)
212 {
213  clear_dump_arg(ptr);
214 }
215 
216 static size_t
217 memsize_dump_arg(const void *ptr)
218 {
219  const struct dump_arg *p = (struct dump_arg *)ptr;
220  size_t memsize = 0;
221  if (p->symbols) memsize += rb_st_memsize(p->symbols);
222  if (p->data) memsize += rb_st_memsize(p->data);
223  if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl);
224  if (p->encodings) memsize += rb_st_memsize(p->encodings);
225  return memsize;
226 }
227 
228 static const rb_data_type_t dump_arg_data = {
229  "dump_arg",
230  {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
231  0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
232 };
233 
234 static VALUE
235 must_not_be_anonymous(const char *type, VALUE path)
236 {
237  char *n = RSTRING_PTR(path);
238 
239  if (!rb_enc_asciicompat(rb_enc_get(path))) {
240  /* cannot occur? */
241  rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE,
242  type, path);
243  }
244  if (n[0] == '#') {
245  rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE,
246  type, path);
247  }
248  return path;
249 }
250 
251 static VALUE
252 class2path(VALUE klass)
253 {
254  VALUE path = rb_class_path(klass);
255 
256  must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
257  if (rb_path_to_class(path) != rb_class_real(klass)) {
258  rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path);
259  }
260  return path;
261 }
262 
263 int ruby_marshal_write_long(long x, char *buf);
264 static void w_long(long, struct dump_arg*);
265 static int w_encoding(VALUE encname, struct dump_call_arg *arg);
266 static VALUE encoding_name(VALUE obj, struct dump_arg *arg);
267 
268 static void
269 w_nbyte(const char *s, long n, struct dump_arg *arg)
270 {
271  VALUE buf = arg->str;
272  rb_str_buf_cat(buf, s, n);
273  if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
274  rb_io_write(arg->dest, buf);
275  rb_str_resize(buf, 0);
276  }
277 }
278 
279 static void
280 w_byte(char c, struct dump_arg *arg)
281 {
282  w_nbyte(&c, 1, arg);
283 }
284 
285 static void
286 w_bytes(const char *s, long n, struct dump_arg *arg)
287 {
288  w_long(n, arg);
289  w_nbyte(s, n, arg);
290 }
291 
292 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
293 
294 static void
295 w_short(int x, struct dump_arg *arg)
296 {
297  w_byte((char)((x >> 0) & 0xff), arg);
298  w_byte((char)((x >> 8) & 0xff), arg);
299 }
300 
301 static void
302 w_long(long x, struct dump_arg *arg)
303 {
304  char buf[sizeof(long)+1];
305  int i = ruby_marshal_write_long(x, buf);
306  if (i < 0) {
307  rb_raise(rb_eTypeError, "long too big to dump");
308  }
309  w_nbyte(buf, i, arg);
310 }
311 
312 int
313 ruby_marshal_write_long(long x, char *buf)
314 {
315  int i;
316 
317 #if SIZEOF_LONG > 4
318  if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
319  /* big long does not fit in 4 bytes */
320  return -1;
321  }
322 #endif
323 
324  if (x == 0) {
325  buf[0] = 0;
326  return 1;
327  }
328  if (0 < x && x < 123) {
329  buf[0] = (char)(x + 5);
330  return 1;
331  }
332  if (-124 < x && x < 0) {
333  buf[0] = (char)((x - 5)&0xff);
334  return 1;
335  }
336  for (i=1;i<(int)sizeof(long)+1;i++) {
337  buf[i] = (char)(x & 0xff);
338  x = RSHIFT(x,8);
339  if (x == 0) {
340  buf[0] = i;
341  break;
342  }
343  if (x == -1) {
344  buf[0] = -i;
345  break;
346  }
347  }
348  return i+1;
349 }
350 
351 #ifdef DBL_MANT_DIG
352 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
353 
354 #if DBL_MANT_DIG > 32
355 #define MANT_BITS 32
356 #elif DBL_MANT_DIG > 24
357 #define MANT_BITS 24
358 #elif DBL_MANT_DIG > 16
359 #define MANT_BITS 16
360 #else
361 #define MANT_BITS 8
362 #endif
363 
364 static double
365 load_mantissa(double d, const char *buf, long len)
366 {
367  if (!len) return d;
368  if (--len > 0 && !*buf++) { /* binary mantissa mark */
369  int e, s = d < 0, dig = 0;
370  unsigned long m;
371 
372  modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
373  do {
374  m = 0;
375  switch (len) {
376  default: m = *buf++ & 0xff; /* fall through */
377 #if MANT_BITS > 24
378  case 3: m = (m << 8) | (*buf++ & 0xff); /* fall through */
379 #endif
380 #if MANT_BITS > 16
381  case 2: m = (m << 8) | (*buf++ & 0xff); /* fall through */
382 #endif
383 #if MANT_BITS > 8
384  case 1: m = (m << 8) | (*buf++ & 0xff);
385 #endif
386  }
387  dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
388  d += ldexp((double)m, dig);
389  } while ((len -= MANT_BITS / 8) > 0);
390  d = ldexp(d, e - DECIMAL_MANT);
391  if (s) d = -d;
392  }
393  return d;
394 }
395 #else
396 #define load_mantissa(d, buf, len) (d)
397 #endif
398 
399 #ifdef DBL_DIG
400 #define FLOAT_DIG (DBL_DIG+2)
401 #else
402 #define FLOAT_DIG 17
403 #endif
404 
405 static void
406 w_float(double d, struct dump_arg *arg)
407 {
408  char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
409 
410  if (isinf(d)) {
411  if (d < 0) w_cstr("-inf", arg);
412  else w_cstr("inf", arg);
413  }
414  else if (isnan(d)) {
415  w_cstr("nan", arg);
416  }
417  else if (d == 0.0) {
418  if (signbit(d)) w_cstr("-0", arg);
419  else w_cstr("0", arg);
420  }
421  else {
422  int decpt, sign, digs, len = 0;
423  char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
424  if (sign) buf[len++] = '-';
425  digs = (int)(e - p);
426  if (decpt < -3 || decpt > digs) {
427  buf[len++] = p[0];
428  if (--digs > 0) buf[len++] = '.';
429  memcpy(buf + len, p + 1, digs);
430  len += digs;
431  len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
432  }
433  else if (decpt > 0) {
434  memcpy(buf + len, p, decpt);
435  len += decpt;
436  if ((digs -= decpt) > 0) {
437  buf[len++] = '.';
438  memcpy(buf + len, p + decpt, digs);
439  len += digs;
440  }
441  }
442  else {
443  buf[len++] = '0';
444  buf[len++] = '.';
445  if (decpt) {
446  memset(buf + len, '0', -decpt);
447  len -= decpt;
448  }
449  memcpy(buf + len, p, digs);
450  len += digs;
451  }
452  free(p);
453  w_bytes(buf, len, arg);
454  }
455 }
456 
457 static void
458 w_symbol(VALUE sym, struct dump_arg *arg)
459 {
460  st_data_t num;
461  VALUE encname;
462 
463  if (st_lookup(arg->symbols, sym, &num)) {
464  w_byte(TYPE_SYMLINK, arg);
465  w_long((long)num, arg);
466  }
467  else {
468  const VALUE orig_sym = sym;
469  sym = rb_sym2str(sym);
470  if (!sym) {
471  rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
472  }
473  encname = encoding_name(sym, arg);
474  if (NIL_P(encname) ||
475  is_ascii_string(sym)) {
476  encname = Qnil;
477  }
478  else {
479  w_byte(TYPE_IVAR, arg);
480  }
481  w_byte(TYPE_SYMBOL, arg);
482  w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
483  st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
484  if (!NIL_P(encname)) {
485  struct dump_call_arg c_arg;
486  c_arg.limit = 1;
487  c_arg.arg = arg;
488  w_long(1L, arg);
489  w_encoding(encname, &c_arg);
490  }
491  }
492 }
493 
494 static void
495 w_unique(VALUE s, struct dump_arg *arg)
496 {
497  must_not_be_anonymous("class", s);
498  w_symbol(rb_str_intern(s), arg);
499 }
500 
501 static void w_object(VALUE,struct dump_arg*,int);
502 
503 static int
504 hash_each(VALUE key, VALUE value, VALUE v)
505 {
506  struct dump_call_arg *arg = (void *)v;
507  w_object(key, arg->arg, arg->limit);
508  w_object(value, arg->arg, arg->limit);
509  return ST_CONTINUE;
510 }
511 
512 #define SINGLETON_DUMP_UNABLE_P(klass) \
513  (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
514  rb_ivar_count(klass) > 0)
515 
516 static void
517 w_extended(VALUE klass, struct dump_arg *arg, int check)
518 {
519  if (check && RCLASS_SINGLETON_P(klass)) {
520  VALUE origin = RCLASS_ORIGIN(klass);
521  if (SINGLETON_DUMP_UNABLE_P(klass) ||
522  (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
523  rb_raise(rb_eTypeError, "singleton can't be dumped");
524  }
525  klass = RCLASS_SUPER(klass);
526  }
527  while (BUILTIN_TYPE(klass) == T_ICLASS) {
528  if (!FL_TEST(klass, RICLASS_IS_ORIGIN) ||
529  BUILTIN_TYPE(RBASIC(klass)->klass) != T_MODULE) {
530  VALUE path = rb_class_name(RBASIC(klass)->klass);
531  w_byte(TYPE_EXTENDED, arg);
532  w_unique(path, arg);
533  }
534  klass = RCLASS_SUPER(klass);
535  }
536 }
537 
538 static void
539 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
540 {
541  VALUE path;
542  st_data_t real_obj;
543  VALUE klass;
544 
545  if (arg->compat_tbl &&
546  st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
547  obj = (VALUE)real_obj;
548  }
549  klass = CLASS_OF(obj);
550  w_extended(klass, arg, check);
551  w_byte(type, arg);
552  path = class2path(rb_class_real(klass));
553  w_unique(path, arg);
554 }
555 
556 static void
557 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
558 {
559  VALUE klass = CLASS_OF(obj);
560 
561  w_extended(klass, arg, TRUE);
562  klass = rb_class_real(klass);
563  if (klass != super) {
564  w_byte(TYPE_UCLASS, arg);
565  w_unique(class2path(klass), arg);
566  }
567 }
568 
569 static bool
570 rb_hash_ruby2_keywords_p(VALUE obj)
571 {
572  return (RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS) != 0;
573 }
574 
575 static void
576 rb_hash_ruby2_keywords(VALUE obj)
577 {
578  RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
579 }
580 
581 static inline bool
582 to_be_skipped_id(const ID id)
583 {
584  if (id == s_encoding_short) return true;
585  if (id == s_ruby2_keywords_flag) return true;
586  if (id == rb_id_encoding()) return true;
587  return !rb_id2str(id);
588 }
589 
590 struct w_ivar_arg {
591  struct dump_call_arg *dump;
592  st_data_t num_ivar;
593 };
594 
595 static int
596 w_obj_each(ID id, VALUE value, st_data_t a)
597 {
598  struct w_ivar_arg *ivarg = (struct w_ivar_arg *)a;
599  struct dump_call_arg *arg = ivarg->dump;
600 
601  if (to_be_skipped_id(id)) {
602  if (id == s_encoding_short) {
603  rb_warn("instance variable '"name_s_encoding_short"' on class %"PRIsVALUE" is not dumped",
604  CLASS_OF(arg->obj));
605  }
606  if (id == s_ruby2_keywords_flag) {
607  rb_warn("instance variable '"name_s_ruby2_keywords_flag"' on class %"PRIsVALUE" is not dumped",
608  CLASS_OF(arg->obj));
609  }
610  return ST_CONTINUE;
611  }
612  --ivarg->num_ivar;
613  w_symbol(ID2SYM(id), arg->arg);
614  w_object(value, arg->arg, arg->limit);
615  return ST_CONTINUE;
616 }
617 
618 static int
619 obj_count_ivars(ID id, VALUE val, st_data_t a)
620 {
621  if (!to_be_skipped_id(id) && UNLIKELY(!++*(st_index_t *)a)) {
622  rb_raise(rb_eRuntimeError, "too many instance variables");
623  }
624  return ST_CONTINUE;
625 }
626 
627 static VALUE
628 encoding_name(VALUE obj, struct dump_arg *arg)
629 {
630  if (rb_enc_capable(obj)) {
631  int encidx = rb_enc_get_index(obj);
632  rb_encoding *enc = 0;
633  st_data_t name;
634 
635  if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
636  return Qnil;
637  }
638 
639  /* special treatment for US-ASCII and UTF-8 */
640  if (encidx == rb_usascii_encindex()) {
641  return Qfalse;
642  }
643  else if (encidx == rb_utf8_encindex()) {
644  return Qtrue;
645  }
646 
647  if (arg->encodings ?
648  !st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name) :
649  (arg->encodings = st_init_strcasetable(), 1)) {
650  name = (st_data_t)rb_str_new_cstr(rb_enc_name(enc));
651  st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
652  }
653  return (VALUE)name;
654  }
655  else {
656  return Qnil;
657  }
658 }
659 
660 static int
661 w_encoding(VALUE encname, struct dump_call_arg *arg)
662 {
663  int limit = arg->limit;
664  if (limit >= 0) ++limit;
665  switch (encname) {
666  case Qfalse:
667  case Qtrue:
668  w_symbol(ID2SYM(s_encoding_short), arg->arg);
669  w_object(encname, arg->arg, limit);
670  return 1;
671  case Qnil:
672  return 0;
673  }
674  w_symbol(ID2SYM(rb_id_encoding()), arg->arg);
675  w_object(encname, arg->arg, limit);
676  return 1;
677 }
678 
679 static st_index_t
680 has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
681 {
682  st_index_t num = !NIL_P(encname);
683 
684  if (SPECIAL_CONST_P(obj)) goto generic;
685  switch (BUILTIN_TYPE(obj)) {
686  case T_OBJECT:
687  case T_CLASS:
688  case T_MODULE:
689  break; /* counted elsewhere */
690  case T_HASH:
691  if (rb_hash_ruby2_keywords_p(obj)) ++num;
692  /* fall through */
693  default:
694  generic:
695  rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
696  if (num) *ivobj = obj;
697  }
698 
699  return num;
700 }
701 
702 static void
703 w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg)
704 {
705  shape_id_t shape_id = rb_shape_get_shape_id(arg->obj);
706  struct w_ivar_arg ivarg = {arg, num};
707  if (!num) return;
708  rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg);
709 
710  if (shape_id != rb_shape_get_shape_id(arg->obj)) {
711  rb_shape_t * expected_shape = rb_shape_get_shape_by_id(shape_id);
712  rb_shape_t * actual_shape = rb_shape_get_shape(arg->obj);
713 
714  // If the shape tree got _shorter_ then we probably removed an IV
715  // If the shape tree got longer, then we probably added an IV.
716  // The exception message might not be accurate when someone adds and
717  // removes the same number of IVs, but they will still get an exception
718  if (rb_shape_depth(expected_shape) > rb_shape_depth(actual_shape)) {
719  rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance",
720  CLASS_OF(arg->obj));
721  }
722  else {
723  rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance",
724  CLASS_OF(arg->obj));
725  }
726  }
727 }
728 
729 static void
730 w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg)
731 {
732  w_long(num, arg->arg);
733  num -= w_encoding(encname, arg);
734  if (RB_TYPE_P(ivobj, T_HASH) && rb_hash_ruby2_keywords_p(ivobj)) {
735  int limit = arg->limit;
736  if (limit >= 0) ++limit;
737  w_symbol(ID2SYM(s_ruby2_keywords_flag), arg->arg);
738  w_object(Qtrue, arg->arg, limit);
739  num--;
740  }
741  if (!UNDEF_P(ivobj) && num) {
742  w_ivar_each(ivobj, num, arg);
743  }
744 }
745 
746 static void
747 w_objivar(VALUE obj, struct dump_call_arg *arg)
748 {
749  st_data_t num = 0;
750 
751  rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
752  w_long(num, arg->arg);
753  w_ivar_each(obj, num, arg);
754 }
755 
756 #if SIZEOF_LONG > 4
757 // Optimized dump for fixnum larger than 31-bits
758 static void
759 w_bigfixnum(VALUE obj, struct dump_arg *arg)
760 {
761  RUBY_ASSERT(FIXNUM_P(obj));
762 
763  w_byte(TYPE_BIGNUM, arg);
764 
765 #if SIZEOF_LONG == SIZEOF_VALUE
766  long num, slen_num;
767  num = FIX2LONG(obj);
768 #else
769  long long num, slen_num;
770  num = NUM2LL(obj);
771 #endif
772 
773  char sign = num < 0 ? '-' : '+';
774  w_byte(sign, arg);
775 
776  // Guaranteed not to overflow, as FIXNUM is 1-bit less than long
777  if (num < 0) num = -num;
778 
779  // calculate the size in shorts
780  int slen = 0;
781  {
782  slen_num = num;
783  while (slen_num) {
784  slen++;
785  slen_num = SHORTDN(slen_num);
786  }
787  }
788 
789  RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2);
790 
791  w_long((long)slen, arg);
792 
793  for (int i = 0; i < slen; i++) {
794  w_short(num & SHORTMASK, arg);
795  num = SHORTDN(num);
796  }
797 
798  // We aren't adding this object to the link table, but we need to increment
799  // the index.
800  arg->num_entries++;
801 
802  RUBY_ASSERT(num == 0);
803 }
804 #endif
805 
806 static void
807 w_remember(VALUE obj, struct dump_arg *arg)
808 {
809  st_add_direct(arg->data, obj, arg->num_entries++);
810 }
811 
812 static void
813 w_object(VALUE obj, struct dump_arg *arg, int limit)
814 {
815  struct dump_call_arg c_arg;
816  VALUE ivobj = Qundef;
817  st_data_t num;
818  st_index_t hasiv = 0;
819  VALUE encname = Qnil;
820 
821  if (limit == 0) {
822  rb_raise(rb_eArgError, "exceed depth limit");
823  }
824 
825  if (NIL_P(obj)) {
826  w_byte(TYPE_NIL, arg);
827  }
828  else if (obj == Qtrue) {
829  w_byte(TYPE_TRUE, arg);
830  }
831  else if (obj == Qfalse) {
832  w_byte(TYPE_FALSE, arg);
833  }
834  else if (FIXNUM_P(obj)) {
835 #if SIZEOF_LONG <= 4
836  w_byte(TYPE_FIXNUM, arg);
837  w_long(FIX2INT(obj), arg);
838 #else
839  if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
840  w_byte(TYPE_FIXNUM, arg);
841  w_long(FIX2LONG(obj), arg);
842  }
843  else {
844  w_bigfixnum(obj, arg);
845  }
846 #endif
847  }
848  else if (SYMBOL_P(obj)) {
849  w_symbol(obj, arg);
850  }
851  else {
852  if (st_lookup(arg->data, obj, &num)) {
853  w_byte(TYPE_LINK, arg);
854  w_long((long)num, arg);
855  return;
856  }
857 
858  if (limit > 0) limit--;
859  c_arg.limit = limit;
860  c_arg.arg = arg;
861  c_arg.obj = obj;
862 
863  if (FLONUM_P(obj)) {
864  w_remember(obj, arg);
865  w_byte(TYPE_FLOAT, arg);
866  w_float(RFLOAT_VALUE(obj), arg);
867  return;
868  }
869 
870  VALUE v;
871 
872  if (!RBASIC_CLASS(obj)) {
873  rb_raise(rb_eTypeError, "can't dump internal %s",
874  rb_builtin_type_name(BUILTIN_TYPE(obj)));
875  }
876 
877  if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
878  w_remember(obj, arg);
879 
880  v = dump_funcall(arg, obj, s_mdump, 0, 0);
881  w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
882  w_object(v, arg, limit);
883  return;
884  }
885  if (rb_obj_respond_to(obj, s_dump, TRUE)) {
886  VALUE ivobj2 = Qundef;
887  st_index_t hasiv2;
888  VALUE encname2;
889 
890  v = INT2NUM(limit);
891  v = dump_funcall(arg, obj, s_dump, 1, &v);
892  if (!RB_TYPE_P(v, T_STRING)) {
893  rb_raise(rb_eTypeError, "_dump() must return string");
894  }
895  hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
896  hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2);
897  if (hasiv2) {
898  hasiv = hasiv2;
899  ivobj = ivobj2;
900  encname = encname2;
901  }
902  if (hasiv) w_byte(TYPE_IVAR, arg);
903  w_class(TYPE_USERDEF, obj, arg, FALSE);
904  w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
905  if (hasiv) {
906  w_ivar(hasiv, ivobj, encname, &c_arg);
907  }
908  w_remember(obj, arg);
909  return;
910  }
911 
912  w_remember(obj, arg);
913 
914  hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
915  {
916  st_data_t compat_data;
917  rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
918  if (st_lookup(compat_allocator_tbl,
919  (st_data_t)allocator,
920  &compat_data)) {
921  marshal_compat_t *compat = (marshal_compat_t*)compat_data;
922  VALUE real_obj = obj;
923  obj = compat->dumper(real_obj);
924  if (!arg->compat_tbl) {
925  arg->compat_tbl = rb_init_identtable();
926  }
927  st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
928  if (obj != real_obj && UNDEF_P(ivobj)) hasiv = 0;
929  }
930  }
931  if (hasiv) w_byte(TYPE_IVAR, arg);
932 
933  switch (BUILTIN_TYPE(obj)) {
934  case T_CLASS:
935  if (FL_TEST(obj, FL_SINGLETON)) {
936  rb_raise(rb_eTypeError, "singleton class can't be dumped");
937  }
938  w_byte(TYPE_CLASS, arg);
939  {
940  VALUE path = class2path(obj);
941  w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
942  RB_GC_GUARD(path);
943  }
944  break;
945 
946  case T_MODULE:
947  w_byte(TYPE_MODULE, arg);
948  {
949  VALUE path = class2path(obj);
950  w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
951  RB_GC_GUARD(path);
952  }
953  break;
954 
955  case T_FLOAT:
956  w_byte(TYPE_FLOAT, arg);
957  w_float(RFLOAT_VALUE(obj), arg);
958  break;
959 
960  case T_BIGNUM:
961  w_byte(TYPE_BIGNUM, arg);
962  {
963  char sign = BIGNUM_SIGN(obj) ? '+' : '-';
964  size_t len = BIGNUM_LEN(obj);
965  size_t slen;
966  size_t j;
967  BDIGIT *d = BIGNUM_DIGITS(obj);
968 
969  slen = SHORTLEN(len);
970  if (LONG_MAX < slen) {
971  rb_raise(rb_eTypeError, "too big Bignum can't be dumped");
972  }
973 
974  w_byte(sign, arg);
975  w_long((long)slen, arg);
976  for (j = 0; j < len; j++) {
977 #if SIZEOF_BDIGIT > SIZEOF_SHORT
978  BDIGIT num = *d;
979  int i;
980 
981  for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) {
982  w_short(num & SHORTMASK, arg);
983  num = SHORTDN(num);
984  if (j == len - 1 && num == 0) break;
985  }
986 #else
987  w_short(*d, arg);
988 #endif
989  d++;
990  }
991  }
992  break;
993 
994  case T_STRING:
995  w_uclass(obj, rb_cString, arg);
996  w_byte(TYPE_STRING, arg);
997  w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
998  break;
999 
1000  case T_REGEXP:
1001  w_uclass(obj, rb_cRegexp, arg);
1002  w_byte(TYPE_REGEXP, arg);
1003  {
1004  int opts = rb_reg_options(obj);
1005  w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
1006  w_byte((char)opts, arg);
1007  }
1008  break;
1009 
1010  case T_ARRAY:
1011  w_uclass(obj, rb_cArray, arg);
1012  w_byte(TYPE_ARRAY, arg);
1013  {
1014  long i, len = RARRAY_LEN(obj);
1015 
1016  w_long(len, arg);
1017  for (i=0; i<RARRAY_LEN(obj); i++) {
1018  w_object(RARRAY_AREF(obj, i), arg, limit);
1019  if (len != RARRAY_LEN(obj)) {
1020  rb_raise(rb_eRuntimeError, "array modified during dump");
1021  }
1022  }
1023  }
1024  break;
1025 
1026  case T_HASH:
1027  w_uclass(obj, rb_cHash, arg);
1028  if (rb_hash_compare_by_id_p(obj)) {
1029  w_byte(TYPE_UCLASS, arg);
1030  w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg);
1031  }
1032  if (NIL_P(RHASH_IFNONE(obj))) {
1033  w_byte(TYPE_HASH, arg);
1034  }
1035  else if (FL_TEST(obj, RHASH_PROC_DEFAULT)) {
1036  rb_raise(rb_eTypeError, "can't dump hash with default proc");
1037  }
1038  else {
1039  w_byte(TYPE_HASH_DEF, arg);
1040  }
1041  w_long(rb_hash_size_num(obj), arg);
1042  rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
1043  if (!NIL_P(RHASH_IFNONE(obj))) {
1044  w_object(RHASH_IFNONE(obj), arg, limit);
1045  }
1046  break;
1047 
1048  case T_STRUCT:
1049  w_class(TYPE_STRUCT, obj, arg, TRUE);
1050  {
1051  long len = RSTRUCT_LEN(obj);
1052  VALUE mem;
1053  long i;
1054 
1055  w_long(len, arg);
1056  mem = rb_struct_members(obj);
1057  for (i=0; i<len; i++) {
1058  w_symbol(RARRAY_AREF(mem, i), arg);
1059  w_object(RSTRUCT_GET(obj, i), arg, limit);
1060  }
1061  }
1062  break;
1063 
1064  case T_OBJECT:
1065  w_class(TYPE_OBJECT, obj, arg, TRUE);
1066  w_objivar(obj, &c_arg);
1067  break;
1068 
1069  case T_DATA:
1070  {
1071  VALUE v;
1072 
1073  if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
1074  rb_raise(rb_eTypeError,
1075  "no _dump_data is defined for class %"PRIsVALUE,
1076  rb_obj_class(obj));
1077  }
1078  v = dump_funcall(arg, obj, s_dump_data, 0, 0);
1079  w_class(TYPE_DATA, obj, arg, TRUE);
1080  w_object(v, arg, limit);
1081  }
1082  break;
1083 
1084  default:
1085  rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE,
1086  rb_obj_class(obj));
1087  break;
1088  }
1089  RB_GC_GUARD(obj);
1090  }
1091  if (hasiv) {
1092  w_ivar(hasiv, ivobj, encname, &c_arg);
1093  }
1094 }
1095 
1096 static void
1097 clear_dump_arg(struct dump_arg *arg)
1098 {
1099  if (!arg->symbols) return;
1100  st_free_table(arg->symbols);
1101  arg->symbols = 0;
1102  st_free_table(arg->data);
1103  arg->data = 0;
1104  arg->num_entries = 0;
1105  if (arg->compat_tbl) {
1106  st_free_table(arg->compat_tbl);
1107  arg->compat_tbl = 0;
1108  }
1109  if (arg->encodings) {
1110  st_free_table(arg->encodings);
1111  arg->encodings = 0;
1112  }
1113 }
1114 
1115 NORETURN(static inline void io_needed(void));
1116 static inline void
1117 io_needed(void)
1118 {
1119  rb_raise(rb_eTypeError, "instance of IO needed");
1120 }
1121 
1122 /*
1123  * call-seq:
1124  * dump( obj [, anIO] , limit=-1 ) -> anIO
1125  *
1126  * Serializes obj and all descendant objects. If anIO is
1127  * specified, the serialized data will be written to it, otherwise the
1128  * data will be returned as a String. If limit is specified, the
1129  * traversal of subobjects will be limited to that depth. If limit is
1130  * negative, no checking of depth will be performed.
1131  *
1132  * class Klass
1133  * def initialize(str)
1134  * @str = str
1135  * end
1136  * def say_hello
1137  * @str
1138  * end
1139  * end
1140  *
1141  * (produces no output)
1142  *
1143  * o = Klass.new("hello\n")
1144  * data = Marshal.dump(o)
1145  * obj = Marshal.load(data)
1146  * obj.say_hello #=> "hello\n"
1147  *
1148  * Marshal can't dump following objects:
1149  * * anonymous Class/Module.
1150  * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1151  * and so on)
1152  * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1153  * ThreadGroup, Continuation
1154  * * objects which define singleton methods
1155  */
1156 static VALUE
1157 marshal_dump(int argc, VALUE *argv, VALUE _)
1158 {
1159  VALUE obj, port, a1, a2;
1160  int limit = -1;
1161 
1162  port = Qnil;
1163  rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
1164  if (argc == 3) {
1165  if (!NIL_P(a2)) limit = NUM2INT(a2);
1166  if (NIL_P(a1)) io_needed();
1167  port = a1;
1168  }
1169  else if (argc == 2) {
1170  if (FIXNUM_P(a1)) limit = FIX2INT(a1);
1171  else if (NIL_P(a1)) io_needed();
1172  else port = a1;
1173  }
1174  return rb_marshal_dump_limited(obj, port, limit);
1175 }
1176 
1177 VALUE
1178 rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
1179 {
1180  struct dump_arg *arg;
1181  VALUE wrapper; /* used to avoid memory leak in case of exception */
1182 
1183  wrapper = TypedData_Make_Struct(0, struct dump_arg, &dump_arg_data, arg);
1184  arg->dest = 0;
1185  arg->symbols = st_init_numtable();
1186  arg->data = rb_init_identtable();
1187  arg->num_entries = 0;
1188  arg->compat_tbl = 0;
1189  arg->encodings = 0;
1190  arg->str = rb_str_buf_new(0);
1191  if (!NIL_P(port)) {
1192  if (!rb_respond_to(port, s_write)) {
1193  io_needed();
1194  }
1195  arg->dest = port;
1196  dump_check_funcall(arg, port, s_binmode, 0, 0);
1197  }
1198  else {
1199  port = arg->str;
1200  }
1201 
1202  w_byte(MARSHAL_MAJOR, arg);
1203  w_byte(MARSHAL_MINOR, arg);
1204 
1205  w_object(obj, arg, limit);
1206  if (arg->dest) {
1207  rb_io_write(arg->dest, arg->str);
1208  rb_str_resize(arg->str, 0);
1209  }
1210  clear_dump_arg(arg);
1211  RB_GC_GUARD(wrapper);
1212 
1213  return port;
1214 }
1215 
1216 struct load_arg {
1217  VALUE src;
1218  char *buf;
1219  long buflen;
1220  long readable;
1221  long offset;
1222  st_table *symbols;
1223  st_table *data;
1224  st_table *partial_objects;
1225  VALUE proc;
1226  st_table *compat_tbl;
1227  bool freeze;
1228 };
1229 
1230 static VALUE
1231 check_load_arg(VALUE ret, struct load_arg *arg, const char *name)
1232 {
1233  if (!arg->symbols) {
1234  rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
1235  name);
1236  }
1237  return ret;
1238 }
1239 #define load_funcall(arg, obj, sym, argc, argv) \
1240  check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1241 
1242 static void clear_load_arg(struct load_arg *arg);
1243 
1244 static void
1245 mark_load_arg(void *ptr)
1246 {
1247  struct load_arg *p = ptr;
1248  if (!p->symbols)
1249  return;
1250  rb_mark_tbl(p->symbols);
1251  rb_mark_tbl(p->data);
1252  rb_mark_tbl(p->partial_objects);
1253  rb_mark_hash(p->compat_tbl);
1254 }
1255 
1256 static void
1257 free_load_arg(void *ptr)
1258 {
1259  clear_load_arg(ptr);
1260 }
1261 
1262 static size_t
1263 memsize_load_arg(const void *ptr)
1264 {
1265  const struct load_arg *p = (struct load_arg *)ptr;
1266  size_t memsize = 0;
1267  if (p->symbols) memsize += rb_st_memsize(p->symbols);
1268  if (p->data) memsize += rb_st_memsize(p->data);
1269  if (p->partial_objects) memsize += rb_st_memsize(p->partial_objects);
1270  if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl);
1271  return memsize;
1272 }
1273 
1274 static const rb_data_type_t load_arg_data = {
1275  "load_arg",
1276  {mark_load_arg, free_load_arg, memsize_load_arg,},
1277  0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
1278 };
1279 
1280 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1281 static VALUE r_object(struct load_arg *arg);
1282 static VALUE r_symbol(struct load_arg *arg);
1283 
1284 NORETURN(static void too_short(void));
1285 static void
1286 too_short(void)
1287 {
1288  rb_raise(rb_eArgError, "marshal data too short");
1289 }
1290 
1291 static st_index_t
1292 r_prepare(struct load_arg *arg)
1293 {
1294  st_index_t idx = arg->data->num_entries;
1295 
1296  st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1297  return idx;
1298 }
1299 
1300 static unsigned char
1301 r_byte1_buffered(struct load_arg *arg)
1302 {
1303  if (arg->buflen == 0) {
1304  long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
1305  VALUE str, n = LONG2NUM(readable);
1306 
1307  str = load_funcall(arg, arg->src, s_read, 1, &n);
1308  if (NIL_P(str)) too_short();
1309  StringValue(str);
1310  memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
1311  arg->offset = 0;
1312  arg->buflen = RSTRING_LEN(str);
1313  }
1314  arg->buflen--;
1315  return arg->buf[arg->offset++];
1316 }
1317 
1318 static int
1319 r_byte(struct load_arg *arg)
1320 {
1321  int c;
1322 
1323  if (RB_TYPE_P(arg->src, T_STRING)) {
1324  if (RSTRING_LEN(arg->src) > arg->offset) {
1325  c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1326  }
1327  else {
1328  too_short();
1329  }
1330  }
1331  else {
1332  if (arg->readable >0 || arg->buflen > 0) {
1333  c = r_byte1_buffered(arg);
1334  }
1335  else {
1336  VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0);
1337  if (NIL_P(v)) rb_eof_error();
1338  c = (unsigned char)NUM2CHR(v);
1339  }
1340  }
1341  return c;
1342 }
1343 
1344 NORETURN(static void long_toobig(int size));
1345 
1346 static void
1347 long_toobig(int size)
1348 {
1349  rb_raise(rb_eTypeError, "long too big for this architecture (size "
1350  STRINGIZE(SIZEOF_LONG)", given %d)", size);
1351 }
1352 
1353 static long
1354 r_long(struct load_arg *arg)
1355 {
1356  register long x;
1357  int c = (signed char)r_byte(arg);
1358  long i;
1359 
1360  if (c == 0) return 0;
1361  if (c > 0) {
1362  if (4 < c && c < 128) {
1363  return c - 5;
1364  }
1365  if (c > (int)sizeof(long)) long_toobig(c);
1366  x = 0;
1367  for (i=0;i<c;i++) {
1368  x |= (long)r_byte(arg) << (8*i);
1369  }
1370  }
1371  else {
1372  if (-129 < c && c < -4) {
1373  return c + 5;
1374  }
1375  c = -c;
1376  if (c > (int)sizeof(long)) long_toobig(c);
1377  x = -1;
1378  for (i=0;i<c;i++) {
1379  x &= ~((long)0xff << (8*i));
1380  x |= (long)r_byte(arg) << (8*i);
1381  }
1382  }
1383  return x;
1384 }
1385 
1386 long
1387 ruby_marshal_read_long(const char **buf, long len)
1388 {
1389  long x;
1390  struct RString src;
1391  struct load_arg arg;
1392  memset(&arg, 0, sizeof(arg));
1393  arg.src = rb_setup_fake_str(&src, *buf, len, 0);
1394  x = r_long(&arg);
1395  *buf += arg.offset;
1396  return x;
1397 }
1398 
1399 static VALUE
1400 r_bytes1(long len, struct load_arg *arg)
1401 {
1402  VALUE str, n = LONG2NUM(len);
1403 
1404  str = load_funcall(arg, arg->src, s_read, 1, &n);
1405  if (NIL_P(str)) too_short();
1406  StringValue(str);
1407  if (RSTRING_LEN(str) != len) too_short();
1408 
1409  return str;
1410 }
1411 
1412 static VALUE
1413 r_bytes1_buffered(long len, struct load_arg *arg)
1414 {
1415  VALUE str;
1416 
1417  if (len <= arg->buflen) {
1418  str = rb_str_new(arg->buf+arg->offset, len);
1419  arg->offset += len;
1420  arg->buflen -= len;
1421  }
1422  else {
1423  long buflen = arg->buflen;
1424  long readable = arg->readable + 1;
1425  long tmp_len, read_len, need_len = len - buflen;
1426  VALUE tmp, n;
1427 
1428  readable = readable < BUFSIZ ? readable : BUFSIZ;
1429  read_len = need_len > readable ? need_len : readable;
1430  n = LONG2NUM(read_len);
1431  tmp = load_funcall(arg, arg->src, s_read, 1, &n);
1432  if (NIL_P(tmp)) too_short();
1433  StringValue(tmp);
1434 
1435  tmp_len = RSTRING_LEN(tmp);
1436 
1437  if (tmp_len < need_len) too_short();
1438 
1439  str = rb_str_new(arg->buf+arg->offset, buflen);
1440  rb_str_cat(str, RSTRING_PTR(tmp), need_len);
1441 
1442  if (tmp_len > need_len) {
1443  buflen = tmp_len - need_len;
1444  memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
1445  arg->buflen = buflen;
1446  }
1447  else {
1448  arg->buflen = 0;
1449  }
1450  arg->offset = 0;
1451  }
1452 
1453  return str;
1454 }
1455 
1456 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1457 
1458 static VALUE
1459 r_bytes0(long len, struct load_arg *arg)
1460 {
1461  VALUE str;
1462 
1463  if (len == 0) return rb_str_new(0, 0);
1464  if (RB_TYPE_P(arg->src, T_STRING)) {
1465  if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1466  str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1467  arg->offset += len;
1468  }
1469  else {
1470  too_short();
1471  }
1472  }
1473  else {
1474  if (arg->readable > 0 || arg->buflen > 0) {
1475  str = r_bytes1_buffered(len, arg);
1476  }
1477  else {
1478  str = r_bytes1(len, arg);
1479  }
1480  }
1481  return str;
1482 }
1483 
1484 static inline int
1485 name_equal(const char *name, size_t nlen, const char *p, long l)
1486 {
1487  if ((size_t)l != nlen || *p != *name) return 0;
1488  return nlen == 1 || memcmp(p+1, name+1, nlen-1) == 0;
1489 }
1490 
1491 static int
1492 sym2encidx(VALUE sym, VALUE val)
1493 {
1494  static const char name_encoding[8] = "encoding";
1495  const char *p;
1496  long l;
1497  if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
1498  RSTRING_GETMEM(sym, p, l);
1499  if (l <= 0) return -1;
1500  if (name_equal(name_encoding, sizeof(name_encoding), p, l)) {
1501  int idx = rb_enc_find_index(StringValueCStr(val));
1502  return idx;
1503  }
1504  if (name_equal(name_s_encoding_short, rb_strlen_lit(name_s_encoding_short), p, l)) {
1505  if (val == Qfalse) return rb_usascii_encindex();
1506  else if (val == Qtrue) return rb_utf8_encindex();
1507  /* bogus ignore */
1508  }
1509  return -1;
1510 }
1511 
1512 static int
1513 symname_equal(VALUE sym, const char *name, size_t nlen)
1514 {
1515  const char *p;
1516  long l;
1517  if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return 0;
1518  RSTRING_GETMEM(sym, p, l);
1519  return name_equal(name, nlen, p, l);
1520 }
1521 
1522 #define BUILD_ASSERT_POSITIVE(n) \
1523  /* make 0 negative to workaround the "zero size array" GCC extension, */ \
1524  ((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */
1525 #define symname_equal_lit(sym, sym_name) \
1526  symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name)))
1527 
1528 static VALUE
1529 r_symlink(struct load_arg *arg)
1530 {
1531  st_data_t sym;
1532  long num = r_long(arg);
1533 
1534  if (!st_lookup(arg->symbols, num, &sym)) {
1535  rb_raise(rb_eArgError, "bad symbol");
1536  }
1537  return (VALUE)sym;
1538 }
1539 
1540 static VALUE
1541 r_symreal(struct load_arg *arg, int ivar)
1542 {
1543  VALUE s = r_bytes(arg);
1544  VALUE sym;
1545  int idx = -1;
1546  st_index_t n = arg->symbols->num_entries;
1547 
1548  if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII);
1549  st_insert(arg->symbols, (st_data_t)n, (st_data_t)s);
1550  if (ivar) {
1551  long num = r_long(arg);
1552  while (num-- > 0) {
1553  sym = r_symbol(arg);
1554  idx = sym2encidx(sym, r_object(arg));
1555  }
1556  }
1557  if (idx > 0) {
1558  rb_enc_associate_index(s, idx);
1559  if (is_broken_string(s)) {
1560  rb_raise(rb_eArgError, "invalid byte sequence in %s: %+"PRIsVALUE,
1561  rb_enc_name(rb_enc_from_index(idx)), s);
1562  }
1563  }
1564 
1565  return s;
1566 }
1567 
1568 static VALUE
1569 r_symbol(struct load_arg *arg)
1570 {
1571  int type, ivar = 0;
1572 
1573  again:
1574  switch ((type = r_byte(arg))) {
1575  default:
1576  rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1577  case TYPE_IVAR:
1578  ivar = 1;
1579  goto again;
1580  case TYPE_SYMBOL:
1581  return r_symreal(arg, ivar);
1582  case TYPE_SYMLINK:
1583  if (ivar) {
1584  rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1585  }
1586  return r_symlink(arg);
1587  }
1588 }
1589 
1590 static VALUE
1591 r_unique(struct load_arg *arg)
1592 {
1593  return r_symbol(arg);
1594 }
1595 
1596 static VALUE
1597 r_string(struct load_arg *arg)
1598 {
1599  return r_bytes(arg);
1600 }
1601 
1602 static VALUE
1603 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
1604 {
1605  st_data_t real_obj = (st_data_t)v;
1606  if (arg->compat_tbl) {
1607  /* real_obj is kept if not found */
1608  st_lookup(arg->compat_tbl, v, &real_obj);
1609  }
1610  st_insert(arg->data, num, real_obj);
1611  st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue);
1612  return v;
1613 }
1614 
1615 static VALUE
1616 r_fixup_compat(VALUE v, struct load_arg *arg)
1617 {
1618  st_data_t data;
1619  st_data_t key = (st_data_t)v;
1620  if (arg->compat_tbl && st_delete(arg->compat_tbl, &key, &data)) {
1621  VALUE real_obj = (VALUE)data;
1622  rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1623  if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1624  marshal_compat_t *compat = (marshal_compat_t*)data;
1625  compat->loader(real_obj, v);
1626  }
1627  v = real_obj;
1628  }
1629  return v;
1630 }
1631 
1632 static VALUE
1633 r_post_proc(VALUE v, struct load_arg *arg)
1634 {
1635  if (arg->proc) {
1636  v = load_funcall(arg, arg->proc, s_call, 1, &v);
1637  }
1638  return v;
1639 }
1640 
1641 static VALUE
1642 r_leave(VALUE v, struct load_arg *arg, bool partial)
1643 {
1644  v = r_fixup_compat(v, arg);
1645  if (!partial) {
1646  st_data_t data;
1647  st_data_t key = (st_data_t)v;
1648  st_delete(arg->partial_objects, &key, &data);
1649  if (arg->freeze) {
1650  if (RB_TYPE_P(v, T_MODULE) || RB_TYPE_P(v, T_CLASS)) {
1651  // noop
1652  }
1653  else if (RB_TYPE_P(v, T_STRING)) {
1654  v = rb_str_to_interned_str(v);
1655  }
1656  else {
1657  OBJ_FREEZE(v);
1658  }
1659  }
1660  v = r_post_proc(v, arg);
1661  }
1662  return v;
1663 }
1664 
1665 static int
1666 copy_ivar_i(ID vid, VALUE value, st_data_t arg)
1667 {
1668  VALUE obj = (VALUE)arg;
1669 
1670  if (!rb_ivar_defined(obj, vid))
1671  rb_ivar_set(obj, vid, value);
1672  return ST_CONTINUE;
1673 }
1674 
1675 static VALUE
1676 r_copy_ivar(VALUE v, VALUE data)
1677 {
1678  rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
1679  return v;
1680 }
1681 
1682 #define override_ivar_error(type, str) \
1683  rb_raise(rb_eTypeError, \
1684  "can't override instance variable of "type" '%"PRIsVALUE"'", \
1685  (str))
1686 
1687 static void
1688 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1689 {
1690  long len;
1691 
1692  len = r_long(arg);
1693  if (len > 0) {
1694  if (RB_TYPE_P(obj, T_MODULE)) {
1695  override_ivar_error("module", rb_mod_name(obj));
1696  }
1697  else if (RB_TYPE_P(obj, T_CLASS)) {
1698  override_ivar_error("class", rb_class_name(obj));
1699  }
1700  do {
1701  VALUE sym = r_symbol(arg);
1702  VALUE val = r_object(arg);
1703  int idx = sym2encidx(sym, val);
1704  if (idx >= 0) {
1705  if (rb_enc_capable(obj)) {
1706  rb_enc_associate_index(obj, idx);
1707  }
1708  else {
1709  rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1710  }
1711  if (has_encoding) *has_encoding = TRUE;
1712  }
1713  else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
1714  if (RB_TYPE_P(obj, T_HASH)) {
1715  rb_hash_ruby2_keywords(obj);
1716  }
1717  else {
1718  rb_raise(rb_eArgError, "ruby2_keywords flag is given but %"PRIsVALUE" is not a Hash", obj);
1719  }
1720  }
1721  else {
1722  rb_ivar_set(obj, rb_intern_str(sym), val);
1723  }
1724  } while (--len > 0);
1725  }
1726 }
1727 
1728 static VALUE
1729 path2class(VALUE path)
1730 {
1731  VALUE v = rb_path_to_class(path);
1732 
1733  if (!RB_TYPE_P(v, T_CLASS)) {
1734  rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
1735  }
1736  return v;
1737 }
1738 
1739 #define path2module(path) must_be_module(rb_path_to_class(path), path)
1740 
1741 static VALUE
1742 must_be_module(VALUE v, VALUE path)
1743 {
1744  if (!RB_TYPE_P(v, T_MODULE)) {
1745  rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
1746  }
1747  return v;
1748 }
1749 
1750 static VALUE
1751 obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
1752 {
1753  st_data_t data;
1754  rb_alloc_func_t allocator;
1755 
1756  allocator = rb_get_alloc_func(klass);
1757  if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1758  marshal_compat_t *compat = (marshal_compat_t*)data;
1759  VALUE real_obj = rb_obj_alloc(klass);
1760  VALUE obj = rb_obj_alloc(compat->oldclass);
1761  if (oldclass) *oldclass = compat->oldclass;
1762 
1763  if (!arg->compat_tbl) {
1764  arg->compat_tbl = rb_init_identtable();
1765  }
1766  st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1767  return obj;
1768  }
1769 
1770  return rb_obj_alloc(klass);
1771 }
1772 
1773 static VALUE
1774 obj_alloc_by_path(VALUE path, struct load_arg *arg)
1775 {
1776  return obj_alloc_by_klass(path2class(path), arg, 0);
1777 }
1778 
1779 static VALUE
1780 append_extmod(VALUE obj, VALUE extmod)
1781 {
1782  long i = RARRAY_LEN(extmod);
1783  while (i > 0) {
1784  VALUE m = RARRAY_AREF(extmod, --i);
1785  rb_extend_object(obj, m);
1786  }
1787  return obj;
1788 }
1789 
1790 #define prohibit_ivar(type, str) do { \
1791  if (!ivp || !*ivp) break; \
1792  override_ivar_error(type, str); \
1793  } while (0)
1794 
1795 static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type);
1796 
1797 static VALUE
1798 r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
1799 {
1800  int type = r_byte(arg);
1801  return r_object_for(arg, partial, ivp, extmod, type);
1802 }
1803 
1804 static VALUE
1805 r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type)
1806 {
1807  VALUE (*hash_new_with_size)(st_index_t) = rb_hash_new_with_size;
1808  VALUE v = Qnil;
1809  long id;
1810  st_data_t link;
1811 
1812  switch (type) {
1813  case TYPE_LINK:
1814  id = r_long(arg);
1815  if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1816  rb_raise(rb_eArgError, "dump format error (unlinked)");
1817  }
1818  v = (VALUE)link;
1819  if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
1820  v = r_post_proc(v, arg);
1821  }
1822  break;
1823 
1824  case TYPE_IVAR:
1825  {
1826  int ivar = TRUE;
1827  v = r_object0(arg, true, &ivar, extmod);
1828  if (ivar) r_ivar(v, NULL, arg);
1829  v = r_leave(v, arg, partial);
1830  }
1831  break;
1832 
1833  case TYPE_EXTENDED:
1834  {
1835  VALUE path = r_unique(arg);
1836  VALUE m = rb_path_to_class(path);
1837  if (NIL_P(extmod)) extmod = rb_ary_hidden_new(0);
1838 
1839  if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
1840  VALUE c;
1841 
1842  v = r_object0(arg, true, 0, Qnil);
1843  c = CLASS_OF(v);
1844  if (c != m || FL_TEST(c, FL_SINGLETON)) {
1845  rb_raise(rb_eArgError,
1846  "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
1847  path, rb_class_name(c));
1848  }
1849  c = rb_singleton_class(v);
1850  while (RARRAY_LEN(extmod) > 0) {
1851  m = rb_ary_pop(extmod);
1852  rb_prepend_module(c, m);
1853  }
1854  }
1855  else {
1856  must_be_module(m, path);
1857  rb_ary_push(extmod, m);
1858 
1859  v = r_object0(arg, true, 0, extmod);
1860  while (RARRAY_LEN(extmod) > 0) {
1861  m = rb_ary_pop(extmod);
1862  rb_extend_object(v, m);
1863  }
1864  }
1865  v = r_leave(v, arg, partial);
1866  }
1867  break;
1868 
1869  case TYPE_UCLASS:
1870  {
1871  VALUE c = path2class(r_unique(arg));
1872 
1873  if (FL_TEST(c, FL_SINGLETON)) {
1874  rb_raise(rb_eTypeError, "singleton can't be loaded");
1875  }
1876  type = r_byte(arg);
1877  if ((c == rb_cHash) &&
1878  /* Hack for compare_by_identify */
1879  (type == TYPE_HASH || type == TYPE_HASH_DEF)) {
1880  hash_new_with_size = rb_ident_hash_new_with_size;
1881  goto type_hash;
1882  }
1883  v = r_object_for(arg, partial, 0, extmod, type);
1884  if (RB_SPECIAL_CONST_P(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
1885  goto format_error;
1886  }
1887  if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1888  VALUE tmp = rb_obj_alloc(c);
1889 
1890  if (TYPE(v) != TYPE(tmp)) goto format_error;
1891  }
1892  RBASIC_SET_CLASS(v, c);
1893  }
1894  break;
1895 
1896  format_error:
1897  rb_raise(rb_eArgError, "dump format error (user class)");
1898 
1899  case TYPE_NIL:
1900  v = Qnil;
1901  v = r_leave(v, arg, false);
1902  break;
1903 
1904  case TYPE_TRUE:
1905  v = Qtrue;
1906  v = r_leave(v, arg, false);
1907  break;
1908 
1909  case TYPE_FALSE:
1910  v = Qfalse;
1911  v = r_leave(v, arg, false);
1912  break;
1913 
1914  case TYPE_FIXNUM:
1915  {
1916  long i = r_long(arg);
1917  v = LONG2FIX(i);
1918  }
1919  v = r_leave(v, arg, false);
1920  break;
1921 
1922  case TYPE_FLOAT:
1923  {
1924  double d;
1925  VALUE str = r_bytes(arg);
1926  const char *ptr = RSTRING_PTR(str);
1927 
1928  if (strcmp(ptr, "nan") == 0) {
1929  d = nan("");
1930  }
1931  else if (strcmp(ptr, "inf") == 0) {
1932  d = HUGE_VAL;
1933  }
1934  else if (strcmp(ptr, "-inf") == 0) {
1935  d = -HUGE_VAL;
1936  }
1937  else {
1938  char *e;
1939  d = strtod(ptr, &e);
1940  d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1941  }
1942  v = DBL2NUM(d);
1943  v = r_entry(v, arg);
1944  v = r_leave(v, arg, false);
1945  }
1946  break;
1947 
1948  case TYPE_BIGNUM:
1949  {
1950  long len;
1951  VALUE data;
1952  int sign;
1953 
1954  sign = r_byte(arg);
1955  len = r_long(arg);
1956 
1957  if (SIZEOF_VALUE >= 8 && len <= 4) {
1958  // Representable within uintptr, likely FIXNUM
1959  VALUE num = 0;
1960  for (int i = 0; i < len; i++) {
1961  num |= (VALUE)r_byte(arg) << (i * 16);
1962  num |= (VALUE)r_byte(arg) << (i * 16 + 8);
1963  }
1964 #if SIZEOF_VALUE == SIZEOF_LONG
1965  v = ULONG2NUM(num);
1966 #else
1967  v = ULL2NUM(num);
1968 #endif
1969  if (sign == '-') {
1970  v = rb_int_uminus(v);
1971  }
1972  }
1973  else {
1974  data = r_bytes0(len * 2, arg);
1975  v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
1976  INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
1977  rb_str_resize(data, 0L);
1978  }
1979  v = r_entry(v, arg);
1980  v = r_leave(v, arg, false);
1981  }
1982  break;
1983 
1984  case TYPE_STRING:
1985  v = r_entry(r_string(arg), arg);
1986  v = r_leave(v, arg, partial);
1987  break;
1988 
1989  case TYPE_REGEXP:
1990  {
1991  VALUE str = r_bytes(arg);
1992  int options = r_byte(arg);
1993  int has_encoding = FALSE;
1994  st_index_t idx = r_prepare(arg);
1995 
1996  if (ivp) {
1997  r_ivar(str, &has_encoding, arg);
1998  *ivp = FALSE;
1999  }
2000  if (!has_encoding) {
2001  /* 1.8 compatibility; remove escapes undefined in 1.8 */
2002  char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
2003  long len = RSTRING_LEN(str);
2004  long bs = 0;
2005  for (; len-- > 0; *dst++ = *src++) {
2006  switch (*src) {
2007  case '\\': bs++; break;
2008  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2009  case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
2010  case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
2011  case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
2012  case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
2013  if (bs & 1) --dst;
2014  /* fall through */
2015  default: bs = 0; break;
2016  }
2017  }
2018  rb_str_set_len(str, dst - ptr);
2019  }
2020  VALUE regexp = rb_reg_new_str(str, options);
2021  r_copy_ivar(regexp, str);
2022 
2023  v = r_entry0(regexp, idx, arg);
2024  v = r_leave(v, arg, partial);
2025  }
2026  break;
2027 
2028  case TYPE_ARRAY:
2029  {
2030  long len = r_long(arg);
2031 
2032  v = rb_ary_new2(len);
2033  v = r_entry(v, arg);
2034  arg->readable += len - 1;
2035  while (len--) {
2036  rb_ary_push(v, r_object(arg));
2037  arg->readable--;
2038  }
2039  v = r_leave(v, arg, partial);
2040  arg->readable++;
2041  }
2042  break;
2043 
2044  case TYPE_HASH:
2045  case TYPE_HASH_DEF:
2046  type_hash:
2047  {
2048  long len = r_long(arg);
2049 
2050  v = hash_new_with_size(len);
2051  v = r_entry(v, arg);
2052  arg->readable += (len - 1) * 2;
2053  while (len--) {
2054  VALUE key = r_object(arg);
2055  VALUE value = r_object(arg);
2056  rb_hash_aset(v, key, value);
2057  arg->readable -= 2;
2058  }
2059  arg->readable += 2;
2060  if (type == TYPE_HASH_DEF) {
2061  RHASH_SET_IFNONE(v, r_object(arg));
2062  }
2063  v = r_leave(v, arg, partial);
2064  }
2065  break;
2066 
2067  case TYPE_STRUCT:
2068  {
2069  VALUE mem, values;
2070  long i;
2071  VALUE slot;
2072  st_index_t idx = r_prepare(arg);
2073  VALUE klass = path2class(r_unique(arg));
2074  long len = r_long(arg);
2075 
2076  v = rb_obj_alloc(klass);
2077  if (!RB_TYPE_P(v, T_STRUCT)) {
2078  rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass));
2079  }
2080  mem = rb_struct_s_members(klass);
2081  if (RARRAY_LEN(mem) != len) {
2082  rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)",
2083  rb_class_name(klass));
2084  }
2085 
2086  arg->readable += (len - 1) * 2;
2087  v = r_entry0(v, idx, arg);
2088  values = rb_ary_new2(len);
2089  {
2090  VALUE keywords = Qfalse;
2091  if (RTEST(rb_struct_s_keyword_init(klass))) {
2092  keywords = rb_hash_new();
2093  rb_ary_push(values, keywords);
2094  }
2095 
2096  for (i=0; i<len; i++) {
2097  VALUE n = rb_sym2str(RARRAY_AREF(mem, i));
2098  slot = r_symbol(arg);
2099 
2100  if (!rb_str_equal(n, slot)) {
2101  rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")",
2102  rb_class_name(klass),
2103  slot, n);
2104  }
2105  if (keywords) {
2106  rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg));
2107  }
2108  else {
2109  rb_ary_push(values, r_object(arg));
2110  }
2111  arg->readable -= 2;
2112  }
2113  }
2114  rb_struct_initialize(v, values);
2115  v = r_leave(v, arg, partial);
2116  arg->readable += 2;
2117  }
2118  break;
2119 
2120  case TYPE_USERDEF:
2121  {
2122  VALUE name = r_unique(arg);
2123  VALUE klass = path2class(name);
2124  VALUE data;
2125  st_data_t d;
2126 
2127  if (!rb_obj_respond_to(klass, s_load, TRUE)) {
2128  rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method '_load'",
2129  name);
2130  }
2131  data = r_string(arg);
2132  if (ivp) {
2133  r_ivar(data, NULL, arg);
2134  *ivp = FALSE;
2135  }
2136  v = load_funcall(arg, klass, s_load, 1, &data);
2137  v = r_entry(v, arg);
2138  if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
2139  marshal_compat_t *compat = (marshal_compat_t*)d;
2140  v = compat->loader(klass, v);
2141  }
2142  if (!partial) {
2143  if (arg->freeze) {
2144  OBJ_FREEZE(v);
2145  }
2146  v = r_post_proc(v, arg);
2147  }
2148  }
2149  break;
2150 
2151  case TYPE_USRMARSHAL:
2152  {
2153  VALUE name = r_unique(arg);
2154  VALUE klass = path2class(name);
2155  VALUE oldclass = 0;
2156  VALUE data;
2157 
2158  v = obj_alloc_by_klass(klass, arg, &oldclass);
2159  if (!NIL_P(extmod)) {
2160  /* for the case marshal_load is overridden */
2161  append_extmod(v, extmod);
2162  }
2163  if (!rb_obj_respond_to(v, s_mload, TRUE)) {
2164  rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method 'marshal_load'",
2165  name);
2166  }
2167  v = r_entry(v, arg);
2168  data = r_object(arg);
2169  load_funcall(arg, v, s_mload, 1, &data);
2170  v = r_fixup_compat(v, arg);
2171  v = r_copy_ivar(v, data);
2172  if (arg->freeze) {
2173  OBJ_FREEZE(v);
2174  }
2175  v = r_post_proc(v, arg);
2176  if (!NIL_P(extmod)) {
2177  if (oldclass) append_extmod(v, extmod);
2178  rb_ary_clear(extmod);
2179  }
2180  }
2181  break;
2182 
2183  case TYPE_OBJECT:
2184  {
2185  st_index_t idx = r_prepare(arg);
2186  v = obj_alloc_by_path(r_unique(arg), arg);
2187  if (!RB_TYPE_P(v, T_OBJECT)) {
2188  rb_raise(rb_eArgError, "dump format error");
2189  }
2190  v = r_entry0(v, idx, arg);
2191  r_ivar(v, NULL, arg);
2192  v = r_leave(v, arg, partial);
2193  }
2194  break;
2195 
2196  case TYPE_DATA:
2197  {
2198  VALUE name = r_unique(arg);
2199  VALUE klass = path2class(name);
2200  VALUE oldclass = 0;
2201  VALUE r;
2202 
2203  v = obj_alloc_by_klass(klass, arg, &oldclass);
2204  if (!RB_TYPE_P(v, T_DATA)) {
2205  rb_raise(rb_eArgError, "dump format error");
2206  }
2207  v = r_entry(v, arg);
2208  if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
2209  rb_raise(rb_eTypeError,
2210  "class %"PRIsVALUE" needs to have instance method '_load_data'",
2211  name);
2212  }
2213  r = r_object0(arg, partial, 0, extmod);
2214  load_funcall(arg, v, s_load_data, 1, &r);
2215  v = r_leave(v, arg, partial);
2216  }
2217  break;
2218 
2219  case TYPE_MODULE_OLD:
2220  {
2221  VALUE str = r_bytes(arg);
2222 
2223  v = rb_path_to_class(str);
2224  prohibit_ivar("class/module", str);
2225  v = r_entry(v, arg);
2226  v = r_leave(v, arg, partial);
2227  }
2228  break;
2229 
2230  case TYPE_CLASS:
2231  {
2232  VALUE str = r_bytes(arg);
2233 
2234  v = path2class(str);
2235  prohibit_ivar("class", str);
2236  v = r_entry(v, arg);
2237  v = r_leave(v, arg, partial);
2238  }
2239  break;
2240 
2241  case TYPE_MODULE:
2242  {
2243  VALUE str = r_bytes(arg);
2244 
2245  v = path2module(str);
2246  prohibit_ivar("module", str);
2247  v = r_entry(v, arg);
2248  v = r_leave(v, arg, partial);
2249  }
2250  break;
2251 
2252  case TYPE_SYMBOL:
2253  if (ivp) {
2254  v = r_symreal(arg, *ivp);
2255  *ivp = FALSE;
2256  }
2257  else {
2258  v = r_symreal(arg, 0);
2259  }
2260  v = rb_str_intern(v);
2261  v = r_leave(v, arg, partial);
2262  break;
2263 
2264  case TYPE_SYMLINK:
2265  v = rb_str_intern(r_symlink(arg));
2266  break;
2267 
2268  default:
2269  rb_raise(rb_eArgError, "dump format error(0x%x)", type);
2270  break;
2271  }
2272 
2273  if (UNDEF_P(v)) {
2274  rb_raise(rb_eArgError, "dump format error (bad link)");
2275  }
2276 
2277  return v;
2278 }
2279 
2280 static VALUE
2281 r_object(struct load_arg *arg)
2282 {
2283  return r_object0(arg, false, 0, Qnil);
2284 }
2285 
2286 static void
2287 clear_load_arg(struct load_arg *arg)
2288 {
2289  xfree(arg->buf);
2290  arg->buf = NULL;
2291  arg->buflen = 0;
2292  arg->offset = 0;
2293  arg->readable = 0;
2294  if (!arg->symbols) return;
2295  st_free_table(arg->symbols);
2296  arg->symbols = 0;
2297  st_free_table(arg->data);
2298  arg->data = 0;
2299  st_free_table(arg->partial_objects);
2300  arg->partial_objects = 0;
2301  if (arg->compat_tbl) {
2302  st_free_table(arg->compat_tbl);
2303  arg->compat_tbl = 0;
2304  }
2305 }
2306 
2307 VALUE
2308 rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze)
2309 {
2310  int major, minor;
2311  VALUE v;
2312  VALUE wrapper; /* used to avoid memory leak in case of exception */
2313  struct load_arg *arg;
2314 
2315  v = rb_check_string_type(port);
2316  if (!NIL_P(v)) {
2317  port = v;
2318  }
2319  else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
2320  rb_check_funcall(port, s_binmode, 0, 0);
2321  }
2322  else {
2323  io_needed();
2324  }
2325  wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg);
2326  arg->src = port;
2327  arg->offset = 0;
2328  arg->symbols = st_init_numtable();
2329  arg->data = rb_init_identtable();
2330  arg->partial_objects = rb_init_identtable();
2331  arg->compat_tbl = 0;
2332  arg->proc = 0;
2333  arg->readable = 0;
2334  arg->freeze = freeze;
2335 
2336  if (NIL_P(v))
2337  arg->buf = xmalloc(BUFSIZ);
2338  else
2339  arg->buf = 0;
2340 
2341  major = r_byte(arg);
2342  minor = r_byte(arg);
2343  if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
2344  clear_load_arg(arg);
2345  rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
2346 \tformat version %d.%d required; %d.%d given",
2347  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2348  }
2349  if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
2350  rb_warn("incompatible marshal file format (can be read)\n\
2351 \tformat version %d.%d required; %d.%d given",
2352  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2353  }
2354 
2355  if (!NIL_P(proc)) arg->proc = proc;
2356  v = r_object(arg);
2357  clear_load_arg(arg);
2358  RB_GC_GUARD(wrapper);
2359 
2360  return v;
2361 }
2362 
2363 static VALUE
2364 marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze)
2365 {
2366  return rb_marshal_load_with_proc(source, proc, RTEST(freeze));
2367 }
2368 
2369 #include "marshal.rbinc"
2370 
2371 /*
2372  * The marshaling library converts collections of Ruby objects into a
2373  * byte stream, allowing them to be stored outside the currently
2374  * active script. This data may subsequently be read and the original
2375  * objects reconstituted.
2376  *
2377  * Marshaled data has major and minor version numbers stored along
2378  * with the object information. In normal use, marshaling can only
2379  * load data written with the same major version number and an equal
2380  * or lower minor version number. If Ruby's ``verbose'' flag is set
2381  * (normally using -d, -v, -w, or --verbose) the major and minor
2382  * numbers must match exactly. Marshal versioning is independent of
2383  * Ruby's version numbers. You can extract the version by reading the
2384  * first two bytes of marshaled data.
2385  *
2386  * str = Marshal.dump("thing")
2387  * RUBY_VERSION #=> "1.9.0"
2388  * str[0].ord #=> 4
2389  * str[1].ord #=> 8
2390  *
2391  * Some objects cannot be dumped: if the objects to be dumped include
2392  * bindings, procedure or method objects, instances of class IO, or
2393  * singleton objects, a TypeError will be raised.
2394  *
2395  * If your class has special serialization needs (for example, if you
2396  * want to serialize in some specific format), or if it contains
2397  * objects that would otherwise not be serializable, you can implement
2398  * your own serialization strategy.
2399  *
2400  * There are two methods of doing this, your object can define either
2401  * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2402  * precedence over _dump if both are defined. marshal_dump may result in
2403  * smaller Marshal strings.
2404  *
2405  * == Security considerations
2406  *
2407  * By design, Marshal.load can deserialize almost any class loaded into the
2408  * Ruby process. In many cases this can lead to remote code execution if the
2409  * Marshal data is loaded from an untrusted source.
2410  *
2411  * As a result, Marshal.load is not suitable as a general purpose serialization
2412  * format and you should never unmarshal user supplied input or other untrusted
2413  * data.
2414  *
2415  * If you need to deserialize untrusted data, use JSON or another serialization
2416  * format that is only able to load simple, 'primitive' types such as String,
2417  * Array, Hash, etc. Never allow user input to specify arbitrary types to
2418  * deserialize into.
2419  *
2420  * == marshal_dump and marshal_load
2421  *
2422  * When dumping an object the method marshal_dump will be called.
2423  * marshal_dump must return a result containing the information necessary for
2424  * marshal_load to reconstitute the object. The result can be any object.
2425  *
2426  * When loading an object dumped using marshal_dump the object is first
2427  * allocated then marshal_load is called with the result from marshal_dump.
2428  * marshal_load must recreate the object from the information in the result.
2429  *
2430  * Example:
2431  *
2432  * class MyObj
2433  * def initialize name, version, data
2434  * @name = name
2435  * @version = version
2436  * @data = data
2437  * end
2438  *
2439  * def marshal_dump
2440  * [@name, @version]
2441  * end
2442  *
2443  * def marshal_load array
2444  * @name, @version = array
2445  * end
2446  * end
2447  *
2448  * == _dump and _load
2449  *
2450  * Use _dump and _load when you need to allocate the object you're restoring
2451  * yourself.
2452  *
2453  * When dumping an object the instance method _dump is called with an Integer
2454  * which indicates the maximum depth of objects to dump (a value of -1 implies
2455  * that you should disable depth checking). _dump must return a String
2456  * containing the information necessary to reconstitute the object.
2457  *
2458  * The class method _load should take a String and use it to return an object
2459  * of the same class.
2460  *
2461  * Example:
2462  *
2463  * class MyObj
2464  * def initialize name, version, data
2465  * @name = name
2466  * @version = version
2467  * @data = data
2468  * end
2469  *
2470  * def _dump level
2471  * [@name, @version].join ':'
2472  * end
2473  *
2474  * def self._load args
2475  * new(*args.split(':'))
2476  * end
2477  * end
2478  *
2479  * Since Marshal.dump outputs a string you can have _dump return a Marshal
2480  * string which is Marshal.loaded in _load for complex objects.
2481  */
2482 void
2483 Init_marshal(void)
2484 {
2485  VALUE rb_mMarshal = rb_define_module("Marshal");
2486 #define set_id(sym) sym = rb_intern_const(name_##sym)
2487  set_id(s_dump);
2488  set_id(s_load);
2489  set_id(s_mdump);
2490  set_id(s_mload);
2491  set_id(s_dump_data);
2492  set_id(s_load_data);
2493  set_id(s_alloc);
2494  set_id(s_call);
2495  set_id(s_getbyte);
2496  set_id(s_read);
2497  set_id(s_write);
2498  set_id(s_binmode);
2499  set_id(s_encoding_short);
2500  set_id(s_ruby2_keywords_flag);
2501 
2502  rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
2503 
2504  /* major version */
2505  rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
2506  /* minor version */
2507  rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
2508 }
2509 
2510 static int
2511 marshal_compat_table_mark_i(st_data_t key, st_data_t value, st_data_t _)
2512 {
2513  marshal_compat_t *p = (marshal_compat_t *)value;
2514  rb_gc_mark_movable(p->newclass);
2515  rb_gc_mark_movable(p->oldclass);
2516  return ST_CONTINUE;
2517 }
2518 
2519 static void
2520 marshal_compat_table_mark(void *tbl)
2521 {
2522  if (!tbl) return;
2523  st_foreach(tbl, marshal_compat_table_mark_i, 0);
2524 }
2525 
2526 static int
2527 marshal_compat_table_free_i(st_data_t key, st_data_t value, st_data_t _)
2528 {
2529  xfree((marshal_compat_t *)value);
2530  return ST_CONTINUE;
2531 }
2532 
2533 static void
2534 marshal_compat_table_free(void *data)
2535 {
2536  st_foreach(data, marshal_compat_table_free_i, 0);
2537  st_free_table(data);
2538 }
2539 
2540 static size_t
2541 marshal_compat_table_memsize(const void *data)
2542 {
2543  return st_memsize(data) + sizeof(marshal_compat_t) * st_table_size(data);
2544 }
2545 
2546 static int
2547 marshal_compat_table_compact_i(st_data_t key, st_data_t value, st_data_t _)
2548 {
2549  marshal_compat_t *p = (marshal_compat_t *)value;
2550  p->newclass = rb_gc_location(p->newclass);
2551  p->oldclass = rb_gc_location(p->oldclass);
2552  return ST_CONTINUE;
2553 }
2554 
2555 static void
2556 marshal_compat_table_compact(void *tbl)
2557 {
2558  if (!tbl) return;
2559  st_foreach(tbl, marshal_compat_table_compact_i, 0);
2560 }
2561 
2562 static const rb_data_type_t marshal_compat_type = {
2563  .wrap_struct_name = "marshal_compat_table",
2564  .function = {
2565  .dmark = marshal_compat_table_mark,
2566  .dfree = marshal_compat_table_free,
2567  .dsize = marshal_compat_table_memsize,
2568  .dcompact = marshal_compat_table_compact,
2569  },
2570  .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY,
2571 };
2572 
2573 static st_table *
2574 compat_allocator_table(void)
2575 {
2576  if (compat_allocator_tbl) return compat_allocator_tbl;
2577  compat_allocator_tbl = st_init_numtable();
2578  compat_allocator_tbl_wrapper =
2579  TypedData_Wrap_Struct(0, &marshal_compat_type, compat_allocator_tbl);
2580  rb_vm_register_global_object(compat_allocator_tbl_wrapper);
2581  return compat_allocator_tbl;
2582 }
2583 
2584 VALUE
2585 rb_marshal_dump(VALUE obj, VALUE port)
2586 {
2587  return rb_marshal_dump_limited(obj, port, -1);
2588 }
2589 
2590 VALUE
2591 rb_marshal_load(VALUE port)
2592 {
2593  return rb_marshal_load_with_proc(port, Qnil, false);
2594 }
Defines RBIMPL_HAS_BUILTIN.
int len
Length of the buffer.
Definition: io.h:8