00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "ruby.h"
00014 #include "rubyio.h"
00015 #include "st.h"
00016 #include "util.h"
00017
00018 #include <math.h>
00019 #ifdef HAVE_FLOAT_H
00020 #include <float.h>
00021 #endif
00022 #ifdef HAVE_IEEEFP_H
00023 #include <ieeefp.h>
00024 #endif
00025
00026 #define BITSPERSHORT (2*CHAR_BIT)
00027 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00028 #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
00029
00030 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00031 #define SHORTLEN(x) (x)
00032 #else
00033 static int
00034 shortlen(len, ds)
00035 long len;
00036 BDIGIT *ds;
00037 {
00038 BDIGIT num;
00039 int offset = 0;
00040
00041 num = ds[len-1];
00042 while (num) {
00043 num = SHORTDN(num);
00044 offset++;
00045 }
00046 return (len - 1)*sizeof(BDIGIT)/2 + offset;
00047 }
00048 #define SHORTLEN(x) shortlen((x),d)
00049 #endif
00050
00051 #define MARSHAL_MAJOR 4
00052 #define MARSHAL_MINOR 8
00053
00054 #define TYPE_NIL '0'
00055 #define TYPE_TRUE 'T'
00056 #define TYPE_FALSE 'F'
00057 #define TYPE_FIXNUM 'i'
00058
00059 #define TYPE_EXTENDED 'e'
00060 #define TYPE_UCLASS 'C'
00061 #define TYPE_OBJECT 'o'
00062 #define TYPE_DATA 'd'
00063 #define TYPE_USERDEF 'u'
00064 #define TYPE_USRMARSHAL 'U'
00065 #define TYPE_FLOAT 'f'
00066 #define TYPE_BIGNUM 'l'
00067 #define TYPE_STRING '"'
00068 #define TYPE_REGEXP '/'
00069 #define TYPE_ARRAY '['
00070 #define TYPE_HASH '{'
00071 #define TYPE_HASH_DEF '}'
00072 #define TYPE_STRUCT 'S'
00073 #define TYPE_MODULE_OLD 'M'
00074 #define TYPE_CLASS 'c'
00075 #define TYPE_MODULE 'm'
00076
00077 #define TYPE_SYMBOL ':'
00078 #define TYPE_SYMLINK ';'
00079
00080 #define TYPE_IVAR 'I'
00081 #define TYPE_LINK '@'
00082
00083 static ID s_dump, s_load, s_mdump, s_mload;
00084 static ID s_dump_data, s_load_data, s_alloc;
00085 static ID s_getc, s_read, s_write, s_binmode;
00086
00087 struct dump_arg {
00088 VALUE obj;
00089 VALUE str, dest;
00090 st_table *symbols;
00091 st_table *data;
00092 int taint;
00093 };
00094
00095 struct dump_call_arg {
00096 VALUE obj;
00097 struct dump_arg *arg;
00098 int limit;
00099 };
00100
00101 static VALUE
00102 class2path(klass)
00103 VALUE klass;
00104 {
00105 VALUE path = rb_class_path(klass);
00106 char *n = RSTRING(path)->ptr;
00107
00108 if (n[0] == '#') {
00109 rb_raise(rb_eTypeError, "can't dump anonymous %s %s",
00110 (TYPE(klass) == T_CLASS ? "class" : "module"),
00111 n);
00112 }
00113 if (rb_path2class(n) != rb_class_real(klass)) {
00114 rb_raise(rb_eTypeError, "%s can't be referred", n);
00115 }
00116 return path;
00117 }
00118
00119 static void w_long (long, struct dump_arg*);
00120
00121 static void
00122 w_nbyte(s, n, arg)
00123 char *s;
00124 int n;
00125 struct dump_arg *arg;
00126 {
00127 VALUE buf = arg->str;
00128 rb_str_buf_cat(buf, s, n);
00129 if (arg->dest && RSTRING(buf)->len >= BUFSIZ) {
00130 if (arg->taint) OBJ_TAINT(buf);
00131 rb_io_write(arg->dest, buf);
00132 rb_str_resize(buf, 0);
00133 }
00134 }
00135
00136 static void
00137 w_byte(c, arg)
00138 char c;
00139 struct dump_arg *arg;
00140 {
00141 w_nbyte(&c, 1, arg);
00142 }
00143
00144 static void
00145 w_bytes(s, n, arg)
00146 char *s;
00147 int n;
00148 struct dump_arg *arg;
00149 {
00150 w_long(n, arg);
00151 w_nbyte(s, n, arg);
00152 }
00153
00154 static void
00155 w_short(x, arg)
00156 int x;
00157 struct dump_arg *arg;
00158 {
00159 w_byte((x >> 0) & 0xff, arg);
00160 w_byte((x >> 8) & 0xff, arg);
00161 }
00162
00163 static void
00164 w_long(x, arg)
00165 long x;
00166 struct dump_arg *arg;
00167 {
00168 char buf[sizeof(long)+1];
00169 int i, len = 0;
00170
00171 #if SIZEOF_LONG > 4
00172 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00173
00174 rb_raise(rb_eTypeError, "long too big to dump");
00175 }
00176 #endif
00177
00178 if (x == 0) {
00179 w_byte(0, arg);
00180 return;
00181 }
00182 if (0 < x && x < 123) {
00183 w_byte(x + 5, arg);
00184 return;
00185 }
00186 if (-124 < x && x < 0) {
00187 w_byte((x - 5)&0xff, arg);
00188 return;
00189 }
00190 for (i=1;i<sizeof(long)+1;i++) {
00191 buf[i] = x & 0xff;
00192 x = RSHIFT(x,8);
00193 if (x == 0) {
00194 buf[0] = i;
00195 break;
00196 }
00197 if (x == -1) {
00198 buf[0] = -i;
00199 break;
00200 }
00201 }
00202 len = i;
00203 for (i=0;i<=len;i++) {
00204 w_byte(buf[i], arg);
00205 }
00206 }
00207
00208 #ifdef DBL_MANT_DIG
00209 #define DECIMAL_MANT (53-16)
00210
00211 #if DBL_MANT_DIG > 32
00212 #define MANT_BITS 32
00213 #elif DBL_MANT_DIG > 24
00214 #define MANT_BITS 24
00215 #elif DBL_MANT_DIG > 16
00216 #define MANT_BITS 16
00217 #else
00218 #define MANT_BITS 8
00219 #endif
00220
00221 static int
00222 save_mantissa(d, buf)
00223 double d;
00224 char *buf;
00225 {
00226 int e, i = 0;
00227 unsigned long m;
00228 double n;
00229
00230 d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00231 if (d > 0) {
00232 buf[i++] = 0;
00233 do {
00234 d = modf(ldexp(d, MANT_BITS), &n);
00235 m = (unsigned long)n;
00236 #if MANT_BITS > 24
00237 buf[i++] = m >> 24;
00238 #endif
00239 #if MANT_BITS > 16
00240 buf[i++] = m >> 16;
00241 #endif
00242 #if MANT_BITS > 8
00243 buf[i++] = m >> 8;
00244 #endif
00245 buf[i++] = m;
00246 } while (d > 0);
00247 while (!buf[i - 1]) --i;
00248 }
00249 return i;
00250 }
00251
00252 static double
00253 load_mantissa(d, buf, len)
00254 double d;
00255 const char *buf;
00256 int len;
00257 {
00258 if (--len > 0 && !*buf++) {
00259 int e, s = d < 0, dig = 0;
00260 unsigned long m;
00261
00262 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00263 do {
00264 m = 0;
00265 switch (len) {
00266 default: m = *buf++ & 0xff;
00267 #if MANT_BITS > 24
00268 case 3: m = (m << 8) | (*buf++ & 0xff);
00269 #endif
00270 #if MANT_BITS > 16
00271 case 2: m = (m << 8) | (*buf++ & 0xff);
00272 #endif
00273 #if MANT_BITS > 8
00274 case 1: m = (m << 8) | (*buf++ & 0xff);
00275 #endif
00276 }
00277 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00278 d += ldexp((double)m, dig);
00279 } while ((len -= MANT_BITS / 8) > 0);
00280 d = ldexp(d, e - DECIMAL_MANT);
00281 if (s) d = -d;
00282 }
00283 return d;
00284 }
00285 #else
00286 #define load_mantissa(d, buf, len) (d)
00287 #define save_mantissa(d, buf) 0
00288 #endif
00289
00290 #ifdef DBL_DIG
00291 #define FLOAT_DIG (DBL_DIG+2)
00292 #else
00293 #define FLOAT_DIG 17
00294 #endif
00295
00296 static void
00297 w_float(d, arg)
00298 double d;
00299 struct dump_arg *arg;
00300 {
00301 char buf[100];
00302
00303 if (isinf(d)) {
00304 if (d < 0) strcpy(buf, "-inf");
00305 else strcpy(buf, "inf");
00306 }
00307 else if (isnan(d)) {
00308 strcpy(buf, "nan");
00309 }
00310 else if (d == 0.0) {
00311 if (1.0/d < 0) strcpy(buf, "-0");
00312 else strcpy(buf, "0");
00313 }
00314 else {
00315 int len;
00316
00317
00318 sprintf(buf, "%.*g", FLOAT_DIG, d);
00319 len = strlen(buf);
00320 w_bytes(buf, len + save_mantissa(d, buf + len), arg);
00321 return;
00322 }
00323 w_bytes(buf, strlen(buf), arg);
00324 }
00325
00326 static void
00327 w_symbol(id, arg)
00328 ID id;
00329 struct dump_arg *arg;
00330 {
00331 char *sym = rb_id2name(id);
00332 st_data_t num;
00333
00334 if (st_lookup(arg->symbols, id, &num)) {
00335 w_byte(TYPE_SYMLINK, arg);
00336 w_long((long)num, arg);
00337 }
00338 else {
00339 w_byte(TYPE_SYMBOL, arg);
00340 w_bytes(sym, strlen(sym), arg);
00341 st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00342 }
00343 }
00344
00345 static void
00346 w_unique(s, arg)
00347 char *s;
00348 struct dump_arg *arg;
00349 {
00350 if (s[0] == '#') {
00351 rb_raise(rb_eTypeError, "can't dump anonymous class %s", s);
00352 }
00353 w_symbol(rb_intern(s), arg);
00354 }
00355
00356 static void w_object (VALUE,struct dump_arg*,int);
00357
00358 static int
00359 hash_each(key, value, arg)
00360 VALUE key, value;
00361 struct dump_call_arg *arg;
00362 {
00363 w_object(key, arg->arg, arg->limit);
00364 w_object(value, arg->arg, arg->limit);
00365 return ST_CONTINUE;
00366 }
00367
00368 static void
00369 w_extended(klass, arg, check)
00370 VALUE klass;
00371 struct dump_arg *arg;
00372 int check;
00373 {
00374 char *path;
00375
00376 if (FL_TEST(klass, FL_SINGLETON)) {
00377 if (check && RCLASS(klass)->m_tbl->num_entries ||
00378 (RCLASS(klass)->iv_tbl && RCLASS(klass)->iv_tbl->num_entries > 1)) {
00379 rb_raise(rb_eTypeError, "singleton can't be dumped");
00380 }
00381 klass = RCLASS(klass)->super;
00382 }
00383 while (BUILTIN_TYPE(klass) == T_ICLASS) {
00384 path = rb_class2name(RBASIC(klass)->klass);
00385 w_byte(TYPE_EXTENDED, arg);
00386 w_unique(path, arg);
00387 klass = RCLASS(klass)->super;
00388 }
00389 }
00390
00391 static void
00392 w_class(type, obj, arg, check)
00393 int type;
00394 VALUE obj;
00395 struct dump_arg *arg;
00396 int check;
00397 {
00398 char *path;
00399
00400 VALUE klass = CLASS_OF(obj);
00401 w_extended(klass, arg, check);
00402 w_byte(type, arg);
00403 path = RSTRING(class2path(rb_class_real(klass)))->ptr;
00404 w_unique(path, arg);
00405 }
00406
00407 static void
00408 w_uclass(obj, base_klass, arg)
00409 VALUE obj, base_klass;
00410 struct dump_arg *arg;
00411 {
00412 VALUE klass = CLASS_OF(obj);
00413
00414 w_extended(klass, arg, Qtrue);
00415 klass = rb_class_real(klass);
00416 if (klass != base_klass) {
00417 w_byte(TYPE_UCLASS, arg);
00418 w_unique(RSTRING(class2path(klass))->ptr, arg);
00419 }
00420 }
00421
00422 static int
00423 w_obj_each(id, value, arg)
00424 ID id;
00425 VALUE value;
00426 struct dump_call_arg *arg;
00427 {
00428 w_symbol(id, arg->arg);
00429 w_object(value, arg->arg, arg->limit);
00430 return ST_CONTINUE;
00431 }
00432
00433 static void
00434 w_ivar(tbl, arg)
00435 st_table *tbl;
00436 struct dump_call_arg *arg;
00437 {
00438 if (tbl) {
00439 w_long(tbl->num_entries, arg->arg);
00440 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00441 }
00442 else {
00443 w_long(0, arg->arg);
00444 }
00445 }
00446
00447 static void
00448 w_object(obj, arg, limit)
00449 VALUE obj;
00450 struct dump_arg *arg;
00451 int limit;
00452 {
00453 struct dump_call_arg c_arg;
00454 st_table *ivtbl = 0;
00455 st_data_t num;
00456
00457 if (limit == 0) {
00458 rb_raise(rb_eArgError, "exceed depth limit");
00459 }
00460
00461 limit--;
00462 c_arg.limit = limit;
00463 c_arg.arg = arg;
00464
00465 if (st_lookup(arg->data, obj, &num)) {
00466 w_byte(TYPE_LINK, arg);
00467 w_long((long)num, arg);
00468 return;
00469 }
00470
00471 if (ivtbl = rb_generic_ivar_table(obj)) {
00472 w_byte(TYPE_IVAR, arg);
00473 }
00474 if (obj == Qnil) {
00475 w_byte(TYPE_NIL, arg);
00476 }
00477 else if (obj == Qtrue) {
00478 w_byte(TYPE_TRUE, arg);
00479 }
00480 else if (obj == Qfalse) {
00481 w_byte(TYPE_FALSE, arg);
00482 }
00483 else if (FIXNUM_P(obj)) {
00484 #if SIZEOF_LONG <= 4
00485 w_byte(TYPE_FIXNUM, arg);
00486 w_long(FIX2INT(obj), arg);
00487 #else
00488 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00489 w_byte(TYPE_FIXNUM, arg);
00490 w_long(FIX2LONG(obj), arg);
00491 }
00492 else {
00493 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00494 }
00495 #endif
00496 }
00497 else if (SYMBOL_P(obj)) {
00498 w_symbol(SYM2ID(obj), arg);
00499 }
00500 else {
00501 if (OBJ_TAINTED(obj)) arg->taint = Qtrue;
00502
00503 st_add_direct(arg->data, obj, arg->data->num_entries);
00504 if (rb_respond_to(obj, s_mdump)) {
00505 VALUE v;
00506
00507 v = rb_funcall(obj, s_mdump, 0, 0);
00508 w_class(TYPE_USRMARSHAL, obj, arg, Qfalse);
00509 w_object(v, arg, limit);
00510 if (ivtbl) w_ivar(0, &c_arg);
00511 return;
00512 }
00513 if (rb_respond_to(obj, s_dump)) {
00514 VALUE v;
00515
00516 v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
00517 if (TYPE(v) != T_STRING) {
00518 rb_raise(rb_eTypeError, "_dump() must return string");
00519 }
00520 if (!ivtbl && (ivtbl = rb_generic_ivar_table(v))) {
00521 w_byte(TYPE_IVAR, arg);
00522 }
00523 w_class(TYPE_USERDEF, obj, arg, Qfalse);
00524 w_bytes(RSTRING(v)->ptr, RSTRING(v)->len, arg);
00525 if (ivtbl) {
00526 w_ivar(ivtbl, &c_arg);
00527 }
00528 return;
00529 }
00530
00531 switch (BUILTIN_TYPE(obj)) {
00532 case T_CLASS:
00533 if (FL_TEST(obj, FL_SINGLETON)) {
00534 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00535 }
00536 w_byte(TYPE_CLASS, arg);
00537 {
00538 VALUE path = class2path(obj);
00539 w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg);
00540 }
00541 break;
00542
00543 case T_MODULE:
00544 w_byte(TYPE_MODULE, arg);
00545 {
00546 VALUE path = class2path(obj);
00547 w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg);
00548 }
00549 break;
00550
00551 case T_FLOAT:
00552 w_byte(TYPE_FLOAT, arg);
00553 w_float(RFLOAT(obj)->value, arg);
00554 break;
00555
00556 case T_BIGNUM:
00557 w_byte(TYPE_BIGNUM, arg);
00558 {
00559 char sign = RBIGNUM(obj)->sign ? '+' : '-';
00560 long len = RBIGNUM(obj)->len;
00561 BDIGIT *d = RBIGNUM(obj)->digits;
00562
00563 w_byte(sign, arg);
00564 w_long(SHORTLEN(len), arg);
00565 while (len--) {
00566 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00567 BDIGIT num = *d;
00568 int i;
00569
00570 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00571 w_short(num & SHORTMASK, arg);
00572 num = SHORTDN(num);
00573 if (len == 0 && num == 0) break;
00574 }
00575 #else
00576 w_short(*d, arg);
00577 #endif
00578 d++;
00579 }
00580 }
00581 break;
00582
00583 case T_STRING:
00584 w_uclass(obj, rb_cString, arg);
00585 w_byte(TYPE_STRING, arg);
00586 w_bytes(RSTRING(obj)->ptr, RSTRING(obj)->len, arg);
00587 break;
00588
00589 case T_REGEXP:
00590 w_uclass(obj, rb_cRegexp, arg);
00591 w_byte(TYPE_REGEXP, arg);
00592 w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
00593 w_byte(rb_reg_options(obj), arg);
00594 break;
00595
00596 case T_ARRAY:
00597 w_uclass(obj, rb_cArray, arg);
00598 w_byte(TYPE_ARRAY, arg);
00599 {
00600 long len = RARRAY(obj)->len;
00601 VALUE *ptr = RARRAY(obj)->ptr;
00602
00603 w_long(len, arg);
00604 while (len--) {
00605 w_object(*ptr, arg, limit);
00606 ptr++;
00607 }
00608 }
00609 break;
00610
00611 case T_HASH:
00612 w_uclass(obj, rb_cHash, arg);
00613 if (NIL_P(RHASH(obj)->ifnone)) {
00614 w_byte(TYPE_HASH, arg);
00615 }
00616 else if (FL_TEST(obj, FL_USER2)) {
00617
00618 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00619 }
00620 else {
00621 w_byte(TYPE_HASH_DEF, arg);
00622 }
00623 w_long(RHASH(obj)->tbl->num_entries, arg);
00624 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00625 if (!NIL_P(RHASH(obj)->ifnone)) {
00626 w_object(RHASH(obj)->ifnone, arg, limit);
00627 }
00628 break;
00629
00630 case T_STRUCT:
00631 w_class(TYPE_STRUCT, obj, arg, Qtrue);
00632 {
00633 long len = RSTRUCT(obj)->len;
00634 VALUE mem;
00635 long i;
00636
00637 w_long(len, arg);
00638 mem = rb_struct_members(obj);
00639 for (i=0; i<len; i++) {
00640 w_symbol(SYM2ID(RARRAY(mem)->ptr[i]), arg);
00641 w_object(RSTRUCT(obj)->ptr[i], arg, limit);
00642 }
00643 }
00644 break;
00645
00646 case T_OBJECT:
00647 w_class(TYPE_OBJECT, obj, arg, Qtrue);
00648 w_ivar(ROBJECT(obj)->iv_tbl, &c_arg);
00649 break;
00650
00651 case T_DATA:
00652 {
00653 VALUE v;
00654
00655 w_class(TYPE_DATA, obj, arg, Qtrue);
00656 if (!rb_respond_to(obj, s_dump_data)) {
00657 rb_raise(rb_eTypeError,
00658 "no marshal_dump is defined for class %s",
00659 rb_obj_classname(obj));
00660 }
00661 v = rb_funcall(obj, s_dump_data, 0);
00662 w_object(v, arg, limit);
00663 }
00664 break;
00665
00666 default:
00667 rb_raise(rb_eTypeError, "can't dump %s",
00668 rb_obj_classname(obj));
00669 break;
00670 }
00671 }
00672 if (ivtbl) {
00673 w_ivar(ivtbl, &c_arg);
00674 }
00675 }
00676
00677 static VALUE
00678 dump(arg)
00679 struct dump_call_arg *arg;
00680 {
00681 w_object(arg->obj, arg->arg, arg->limit);
00682 if (arg->arg->dest) {
00683 rb_io_write(arg->arg->dest, arg->arg->str);
00684 rb_str_resize(arg->arg->str, 0);
00685 }
00686 return 0;
00687 }
00688
00689 static VALUE
00690 dump_ensure(arg)
00691 struct dump_arg *arg;
00692 {
00693 st_free_table(arg->symbols);
00694 st_free_table(arg->data);
00695 if (arg->taint) {
00696 OBJ_TAINT(arg->str);
00697 }
00698 return 0;
00699 }
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727 static VALUE
00728 marshal_dump(argc, argv)
00729 int argc;
00730 VALUE* argv;
00731 {
00732 VALUE obj, port, a1, a2;
00733 int limit = -1;
00734 struct dump_arg arg;
00735 struct dump_call_arg c_arg;
00736
00737 port = Qnil;
00738 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00739 if (argc == 3) {
00740 if (!NIL_P(a2)) limit = NUM2INT(a2);
00741 if (NIL_P(a1)) goto type_error;
00742 port = a1;
00743 }
00744 else if (argc == 2) {
00745 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00746 else if (NIL_P(a1)) goto type_error;
00747 else port = a1;
00748 }
00749 arg.dest = 0;
00750 if (!NIL_P(port)) {
00751 if (!rb_respond_to(port, s_write)) {
00752 type_error:
00753 rb_raise(rb_eTypeError, "instance of IO needed");
00754 }
00755 arg.str = rb_str_buf_new(0);
00756 arg.dest = port;
00757 if (rb_respond_to(port, s_binmode)) {
00758 rb_funcall2(port, s_binmode, 0, 0);
00759 }
00760 }
00761 else {
00762 port = rb_str_buf_new(0);
00763 arg.str = port;
00764 }
00765
00766 arg.symbols = st_init_numtable();
00767 arg.data = st_init_numtable();
00768 arg.taint = Qfalse;
00769 c_arg.obj = obj;
00770 c_arg.arg = &arg;
00771 c_arg.limit = limit;
00772
00773 w_byte(MARSHAL_MAJOR, &arg);
00774 w_byte(MARSHAL_MINOR, &arg);
00775
00776 rb_ensure(dump, (VALUE)&c_arg, dump_ensure, (VALUE)&arg);
00777
00778 return port;
00779 }
00780
00781 struct load_arg {
00782 VALUE src;
00783 long offset;
00784 st_table *symbols;
00785 VALUE data;
00786 VALUE proc;
00787 int taint;
00788 };
00789
00790 static VALUE r_object (struct load_arg *arg);
00791
00792 static int
00793 r_byte(arg)
00794 struct load_arg *arg;
00795 {
00796 int c;
00797
00798 if (TYPE(arg->src) == T_STRING) {
00799 if (RSTRING(arg->src)->len > arg->offset) {
00800 c = (unsigned char)RSTRING(arg->src)->ptr[arg->offset++];
00801 }
00802 else {
00803 rb_raise(rb_eArgError, "marshal data too short");
00804 }
00805 }
00806 else {
00807 VALUE src = arg->src;
00808 VALUE v = rb_funcall2(src, s_getc, 0, 0);
00809 if (NIL_P(v)) rb_eof_error();
00810 c = (unsigned char)FIX2INT(v);
00811 }
00812 return c;
00813 }
00814
00815 static void
00816 long_toobig(size)
00817 int size;
00818 {
00819 rb_raise(rb_eTypeError, "long too big for this architecture (size %d, given %d)",
00820 sizeof(long), size);
00821 }
00822
00823 #undef SIGN_EXTEND_CHAR
00824 #if __STDC__
00825 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
00826 #else
00827
00828 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
00829 #endif
00830
00831 static long
00832 r_long(arg)
00833 struct load_arg *arg;
00834 {
00835 register long x;
00836 int c = SIGN_EXTEND_CHAR(r_byte(arg));
00837 long i;
00838
00839 if (c == 0) return 0;
00840 if (c > 0) {
00841 if (4 < c && c < 128) {
00842 return c - 5;
00843 }
00844 if (c > sizeof(long)) long_toobig(c);
00845 x = 0;
00846 for (i=0;i<c;i++) {
00847 x |= (long)r_byte(arg) << (8*i);
00848 }
00849 }
00850 else {
00851 if (-129 < c && c < -4) {
00852 return c + 5;
00853 }
00854 c = -c;
00855 if (c > sizeof(long)) long_toobig(c);
00856 x = -1;
00857 for (i=0;i<c;i++) {
00858 x &= ~((long)0xff << (8*i));
00859 x |= (long)r_byte(arg) << (8*i);
00860 }
00861 }
00862 return x;
00863 }
00864
00865 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
00866
00867 static VALUE
00868 r_bytes0(len, arg)
00869 long len;
00870 struct load_arg *arg;
00871 {
00872 VALUE str;
00873
00874 if (len == 0) return rb_str_new(0, 0);
00875 if (TYPE(arg->src) == T_STRING) {
00876 if (RSTRING(arg->src)->len > arg->offset) {
00877 str = rb_str_new(RSTRING(arg->src)->ptr+arg->offset, len);
00878 arg->offset += len;
00879 }
00880 else {
00881 too_short:
00882 rb_raise(rb_eArgError, "marshal data too short");
00883 }
00884 }
00885 else {
00886 VALUE src = arg->src;
00887 VALUE n = LONG2NUM(len);
00888 str = rb_funcall2(src, s_read, 1, &n);
00889 if (NIL_P(str)) goto too_short;
00890 StringValue(str);
00891 if (RSTRING(str)->len != len) goto too_short;
00892 if (OBJ_TAINTED(str)) arg->taint = Qtrue;
00893 }
00894 return str;
00895 }
00896
00897 static ID
00898 r_symlink(arg)
00899 struct load_arg *arg;
00900 {
00901 ID id;
00902 long num = r_long(arg);
00903
00904 if (st_lookup(arg->symbols, num, &id)) {
00905 return id;
00906 }
00907 rb_raise(rb_eArgError, "bad symbol");
00908 }
00909
00910 static ID
00911 r_symreal(arg)
00912 struct load_arg *arg;
00913 {
00914 ID id;
00915
00916 id = rb_intern(RSTRING(r_bytes(arg))->ptr);
00917 st_insert(arg->symbols, arg->symbols->num_entries, id);
00918
00919 return id;
00920 }
00921
00922 static ID
00923 r_symbol(arg)
00924 struct load_arg *arg;
00925 {
00926 if (r_byte(arg) == TYPE_SYMLINK) {
00927 return r_symlink(arg);
00928 }
00929 return r_symreal(arg);
00930 }
00931
00932 static char*
00933 r_unique(arg)
00934 struct load_arg *arg;
00935 {
00936 return rb_id2name(r_symbol(arg));
00937 }
00938
00939 static VALUE
00940 r_string(arg)
00941 struct load_arg *arg;
00942 {
00943 return r_bytes(arg);
00944 }
00945
00946 static VALUE
00947 r_entry(v, arg)
00948 VALUE v;
00949 struct load_arg *arg;
00950 {
00951 rb_hash_aset(arg->data, INT2FIX(RHASH(arg->data)->tbl->num_entries), v);
00952 if (arg->taint) OBJ_TAINT(v);
00953 return v;
00954 }
00955
00956 static void
00957 r_ivar(obj, arg)
00958 VALUE obj;
00959 struct load_arg *arg;
00960 {
00961 long len;
00962
00963 len = r_long(arg);
00964 if (len > 0) {
00965 while (len--) {
00966 ID id = r_symbol(arg);
00967 VALUE val = r_object(arg);
00968 rb_ivar_set(obj, id, val);
00969 }
00970 }
00971 }
00972
00973 static VALUE
00974 path2class(path)
00975 char *path;
00976 {
00977 VALUE v = rb_path2class(path);
00978
00979 if (TYPE(v) != T_CLASS) {
00980 rb_raise(rb_eArgError, "%s does not refer class", path);
00981 }
00982 return v;
00983 }
00984
00985 static VALUE
00986 path2module(path)
00987 char *path;
00988 {
00989 VALUE v = rb_path2class(path);
00990
00991 if (TYPE(v) != T_MODULE) {
00992 rb_raise(rb_eArgError, "%s does not refer module", path);
00993 }
00994 return v;
00995 }
00996
00997 static VALUE
00998 r_object0(arg, proc, ivp, extmod)
00999 struct load_arg *arg;
01000 VALUE proc;
01001 int *ivp;
01002 VALUE extmod;
01003 {
01004 VALUE v = Qnil;
01005 int type = r_byte(arg);
01006 long id;
01007
01008 switch (type) {
01009 case TYPE_LINK:
01010 id = r_long(arg);
01011 v = rb_hash_aref(arg->data, LONG2FIX(id));
01012 if (NIL_P(v)) {
01013 rb_raise(rb_eArgError, "dump format error (unlinked)");
01014 }
01015 return v;
01016
01017 case TYPE_IVAR:
01018 {
01019 int ivar = Qtrue;
01020
01021 v = r_object0(arg, 0, &ivar, extmod);
01022 if (ivar) r_ivar(v, arg);
01023 }
01024 break;
01025
01026 case TYPE_EXTENDED:
01027 {
01028 VALUE m = path2module(r_unique(arg));
01029
01030 if (NIL_P(extmod)) extmod = rb_ary_new2(0);
01031 rb_ary_push(extmod, m);
01032
01033 v = r_object0(arg, 0, 0, extmod);
01034 while (RARRAY(extmod)->len > 0) {
01035 m = rb_ary_pop(extmod);
01036 rb_extend_object(v, m);
01037 }
01038 }
01039 break;
01040
01041 case TYPE_UCLASS:
01042 {
01043 VALUE c = path2class(r_unique(arg));
01044
01045 if (FL_TEST(c, FL_SINGLETON)) {
01046 rb_raise(rb_eTypeError, "singleton can't be loaded");
01047 }
01048 v = r_object0(arg, 0, 0, extmod);
01049 if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
01050 format_error:
01051 rb_raise(rb_eArgError, "dump format error (user class)");
01052 }
01053 if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01054 VALUE tmp = rb_obj_alloc(c);
01055
01056 if (TYPE(v) != TYPE(tmp)) goto format_error;
01057 }
01058 RBASIC(v)->klass = c;
01059 }
01060 break;
01061
01062 case TYPE_NIL:
01063 v = Qnil;
01064 break;
01065
01066 case TYPE_TRUE:
01067 v = Qtrue;
01068 break;
01069
01070 case TYPE_FALSE:
01071 v = Qfalse;
01072 break;
01073
01074 case TYPE_FIXNUM:
01075 {
01076 long i = r_long(arg);
01077 v = LONG2FIX(i);
01078 }
01079 break;
01080
01081 case TYPE_FLOAT:
01082 {
01083 double d, t = 0.0;
01084 VALUE str = r_bytes(arg);
01085 const char *ptr = RSTRING(str)->ptr;
01086
01087 if (strcmp(ptr, "nan") == 0) {
01088 d = t / t;
01089 }
01090 else if (strcmp(ptr, "inf") == 0) {
01091 d = 1.0 / t;
01092 }
01093 else if (strcmp(ptr, "-inf") == 0) {
01094 d = -1.0 / t;
01095 }
01096 else {
01097 char *e;
01098 d = strtod(ptr, &e);
01099 d = load_mantissa(d, e, RSTRING(str)->len - (e - ptr));
01100 }
01101 v = rb_float_new(d);
01102 r_entry(v, arg);
01103 }
01104 break;
01105
01106 case TYPE_BIGNUM:
01107 {
01108 long len;
01109 BDIGIT *digits;
01110 volatile VALUE data;
01111
01112 NEWOBJ(big, struct RBignum);
01113 OBJSETUP(big, rb_cBignum, T_BIGNUM);
01114 big->sign = (r_byte(arg) == '+');
01115 len = r_long(arg);
01116 data = r_bytes0(len * 2, arg);
01117 #if SIZEOF_BDIGITS == SIZEOF_SHORT
01118 big->len = len;
01119 #else
01120 big->len = (len + 1) * 2 / sizeof(BDIGIT);
01121 #endif
01122 big->digits = digits = ALLOC_N(BDIGIT, big->len);
01123 MEMCPY(digits, RSTRING(data)->ptr, char, len * 2);
01124 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01125 MEMZERO((char *)digits + len * 2, char,
01126 big->len * sizeof(BDIGIT) - len * 2);
01127 #endif
01128 len = big->len;
01129 while (len > 0) {
01130 unsigned char *p = (unsigned char *)digits;
01131 BDIGIT num = 0;
01132 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01133 int shift = 0;
01134 int i;
01135
01136 for (i=0; i<SIZEOF_BDIGITS; i++) {
01137 num |= (int)p[i] << shift;
01138 shift += 8;
01139 }
01140 #else
01141 num = p[0] | (p[1] << 8);
01142 #endif
01143 *digits++ = num;
01144 len--;
01145 }
01146 v = rb_big_norm((VALUE)big);
01147 r_entry(v, arg);
01148 }
01149 break;
01150
01151 case TYPE_STRING:
01152 v = r_entry(r_string(arg), arg);
01153 break;
01154
01155 case TYPE_REGEXP:
01156 {
01157 volatile VALUE str = r_bytes(arg);
01158 int options = r_byte(arg);
01159 v = r_entry(rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len, options), arg);
01160 }
01161 break;
01162
01163 case TYPE_ARRAY:
01164 {
01165 volatile long len = r_long(arg);
01166
01167 v = rb_ary_new2(len);
01168 r_entry(v, arg);
01169 while (len--) {
01170 rb_ary_push(v, r_object(arg));
01171 }
01172 }
01173 break;
01174
01175 case TYPE_HASH:
01176 case TYPE_HASH_DEF:
01177 {
01178 long len = r_long(arg);
01179
01180 v = rb_hash_new();
01181 r_entry(v, arg);
01182 while (len--) {
01183 VALUE key = r_object(arg);
01184 VALUE value = r_object(arg);
01185 rb_hash_aset(v, key, value);
01186 }
01187 if (type == TYPE_HASH_DEF) {
01188 RHASH(v)->ifnone = r_object(arg);
01189 }
01190 }
01191 break;
01192
01193 case TYPE_STRUCT:
01194 {
01195 VALUE klass, mem, values;
01196 volatile long i;
01197 long len;
01198 ID slot;
01199
01200 klass = path2class(r_unique(arg));
01201 mem = rb_struct_s_members(klass);
01202 if (mem == Qnil) {
01203 rb_raise(rb_eTypeError, "uninitialized struct");
01204 }
01205 len = r_long(arg);
01206
01207 values = rb_ary_new2(len);
01208 for (i=0; i<len; i++) {
01209 rb_ary_push(values, Qnil);
01210 }
01211 v = rb_struct_alloc(klass, values);
01212 r_entry(v, arg);
01213 for (i=0; i<len; i++) {
01214 slot = r_symbol(arg);
01215
01216 if (RARRAY(mem)->ptr[i] != ID2SYM(slot)) {
01217 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01218 rb_class2name(klass),
01219 rb_id2name(slot),
01220 rb_id2name(SYM2ID(RARRAY(mem)->ptr[i])));
01221 }
01222 rb_struct_aset(v, LONG2FIX(i), r_object(arg));
01223 }
01224 }
01225 break;
01226
01227 case TYPE_USERDEF:
01228 {
01229 VALUE klass = path2class(r_unique(arg));
01230 VALUE data;
01231
01232 if (!rb_respond_to(klass, s_load)) {
01233 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01234 rb_class2name(klass));
01235 }
01236 data = r_string(arg);
01237 if (ivp) {
01238 r_ivar(data, arg);
01239 *ivp = Qfalse;
01240 }
01241 v = rb_funcall(klass, s_load, 1, data);
01242 r_entry(v, arg);
01243 }
01244 break;
01245
01246 case TYPE_USRMARSHAL:
01247 {
01248 VALUE klass = path2class(r_unique(arg));
01249 VALUE data;
01250
01251 v = rb_obj_alloc(klass);
01252 if (! NIL_P(extmod)) {
01253 while (RARRAY(extmod)->len > 0) {
01254 VALUE m = rb_ary_pop(extmod);
01255 rb_extend_object(v, m);
01256 }
01257 }
01258 if (!rb_respond_to(v, s_mload)) {
01259 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01260 rb_class2name(klass));
01261 }
01262 r_entry(v, arg);
01263 data = r_object(arg);
01264 rb_funcall(v, s_mload, 1, data);
01265 }
01266 break;
01267
01268 case TYPE_OBJECT:
01269 {
01270 VALUE klass = path2class(r_unique(arg));
01271
01272 v = rb_obj_alloc(klass);
01273 if (TYPE(v) != T_OBJECT) {
01274 rb_raise(rb_eArgError, "dump format error");
01275 }
01276 r_entry(v, arg);
01277 r_ivar(v, arg);
01278 }
01279 break;
01280
01281 case TYPE_DATA:
01282 {
01283 VALUE klass = path2class(r_unique(arg));
01284 if (rb_respond_to(klass, s_alloc)) {
01285 static int warn = Qtrue;
01286 if (warn) {
01287 rb_warn("define `allocate' instead of `_alloc'");
01288 warn = Qfalse;
01289 }
01290 v = rb_funcall(klass, s_alloc, 0);
01291 }
01292 else {
01293 v = rb_obj_alloc(klass);
01294 }
01295 if (TYPE(v) != T_DATA) {
01296 rb_raise(rb_eArgError, "dump format error");
01297 }
01298 r_entry(v, arg);
01299 if (!rb_respond_to(v, s_load_data)) {
01300 rb_raise(rb_eTypeError,
01301 "class %s needs to have instance method `_load_data'",
01302 rb_class2name(klass));
01303 }
01304 rb_funcall(v, s_load_data, 1, r_object0(arg, 0, 0, extmod));
01305 }
01306 break;
01307
01308 case TYPE_MODULE_OLD:
01309 {
01310 volatile VALUE str = r_bytes(arg);
01311
01312 v = rb_path2class(RSTRING(str)->ptr);
01313 r_entry(v, arg);
01314 }
01315 break;
01316
01317 case TYPE_CLASS:
01318 {
01319 volatile VALUE str = r_bytes(arg);
01320
01321 v = path2class(RSTRING(str)->ptr);
01322 r_entry(v, arg);
01323 }
01324 break;
01325
01326 case TYPE_MODULE:
01327 {
01328 volatile VALUE str = r_bytes(arg);
01329
01330 v = path2module(RSTRING(str)->ptr);
01331 r_entry(v, arg);
01332 }
01333 break;
01334
01335 case TYPE_SYMBOL:
01336 v = ID2SYM(r_symreal(arg));
01337 break;
01338
01339 case TYPE_SYMLINK:
01340 return ID2SYM(r_symlink(arg));
01341
01342 default:
01343 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01344 break;
01345 }
01346 if (proc) {
01347 rb_funcall(proc, rb_intern("call"), 1, v);
01348 }
01349 return v;
01350 }
01351
01352 static VALUE
01353 r_object(arg)
01354 struct load_arg *arg;
01355 {
01356 return r_object0(arg, arg->proc, 0, Qnil);
01357 }
01358
01359 static VALUE
01360 load(arg)
01361 struct load_arg *arg;
01362 {
01363 return r_object(arg);
01364 }
01365
01366 static VALUE
01367 load_ensure(arg)
01368 struct load_arg *arg;
01369 {
01370 st_free_table(arg->symbols);
01371 return 0;
01372 }
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385 static VALUE
01386 marshal_load(argc, argv)
01387 int argc;
01388 VALUE *argv;
01389 {
01390 VALUE port, proc;
01391 int major, minor;
01392 VALUE v;
01393 struct load_arg arg;
01394
01395 rb_scan_args(argc, argv, "11", &port, &proc);
01396 if (rb_respond_to(port, rb_intern("to_str"))) {
01397 arg.taint = OBJ_TAINTED(port);
01398 StringValue(port);
01399 }
01400 else if (rb_respond_to(port, s_getc) && rb_respond_to(port, s_read)) {
01401 if (rb_respond_to(port, s_binmode)) {
01402 rb_funcall2(port, s_binmode, 0, 0);
01403 }
01404 arg.taint = Qtrue;
01405 }
01406 else {
01407 rb_raise(rb_eTypeError, "instance of IO needed");
01408 }
01409 arg.src = port;
01410 arg.offset = 0;
01411
01412 major = r_byte(&arg);
01413 minor = r_byte(&arg);
01414 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01415 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01416 \tformat version %d.%d required; %d.%d given",
01417 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01418 }
01419 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01420 rb_warn("incompatible marshal file format (can be read)\n\
01421 \tformat version %d.%d required; %d.%d given",
01422 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01423 }
01424
01425 arg.symbols = st_init_numtable();
01426 arg.data = rb_hash_new();
01427 if (NIL_P(proc)) arg.proc = 0;
01428 else arg.proc = proc;
01429 v = rb_ensure(load, (VALUE)&arg, load_ensure, (VALUE)&arg);
01430
01431 return v;
01432 }
01433
01434
01435
01436
01437
01438
01439
01440
01441
01442
01443
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453
01454
01455
01456
01457
01458
01459
01460
01461
01462
01463
01464
01465
01466
01467 void
01468 Init_marshal()
01469 {
01470 VALUE rb_mMarshal = rb_define_module("Marshal");
01471
01472 s_dump = rb_intern("_dump");
01473 s_load = rb_intern("_load");
01474 s_mdump = rb_intern("marshal_dump");
01475 s_mload = rb_intern("marshal_load");
01476 s_dump_data = rb_intern("_dump_data");
01477 s_load_data = rb_intern("_load_data");
01478 s_alloc = rb_intern("_alloc");
01479 s_getc = rb_intern("getc");
01480 s_read = rb_intern("read");
01481 s_write = rb_intern("write");
01482 s_binmode = rb_intern("binmode");
01483
01484 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
01485 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
01486 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
01487
01488 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
01489 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
01490 }
01491
01492 VALUE
01493 rb_marshal_dump(obj, port)
01494 VALUE obj, port;
01495 {
01496 int argc = 1;
01497 VALUE argv[2];
01498
01499 argv[0] = obj;
01500 argv[1] = port;
01501 if (!NIL_P(port)) argc = 2;
01502 return marshal_dump(argc, argv);
01503 }
01504
01505 VALUE
01506 rb_marshal_load(port)
01507 VALUE port;
01508 {
01509 return marshal_load(1, &port);
01510 }
01511