00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "ruby.h"
00016 #include "re.h"
00017
00018 #define BEG(no) regs->beg[no]
00019 #define END(no) regs->end[no]
00020
00021 #include <math.h>
00022 #include <ctype.h>
00023
00024 #ifdef HAVE_UNISTD_H
00025 #include <unistd.h>
00026 #endif
00027
00028 VALUE rb_cString;
00029
00030 #define STR_TMPLOCK FL_USER1
00031 #define STR_ASSOC FL_USER3
00032 #define STR_NOCAPA (ELTS_SHARED|STR_ASSOC)
00033
00034 #define RESIZE_CAPA(str,capacity) do {\
00035 REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
00036 if (!FL_TEST(str, STR_NOCAPA))\
00037 RSTRING(str)->aux.capa = (capacity);\
00038 } while (0)
00039
00040 VALUE rb_fs;
00041
00042 static inline void
00043 str_mod_check(s, p, len)
00044 VALUE s;
00045 char *p;
00046 long len;
00047 {
00048 if (RSTRING(s)->ptr != p || RSTRING(s)->len != len) {
00049 rb_raise(rb_eRuntimeError, "string modified");
00050 }
00051 }
00052
00053 static inline void
00054 str_frozen_check(s)
00055 VALUE s;
00056 {
00057 if (OBJ_FROZEN(s)) {
00058 rb_raise(rb_eRuntimeError, "string frozen");
00059 }
00060 }
00061
00062 static VALUE str_alloc (VALUE);
00063 static VALUE
00064 str_alloc(klass)
00065 VALUE klass;
00066 {
00067 NEWOBJ(str, struct RString);
00068 OBJSETUP(str, klass, T_STRING);
00069
00070 str->ptr = 0;
00071 str->len = 0;
00072 str->aux.capa = 0;
00073
00074 return (VALUE)str;
00075 }
00076
00077 static VALUE
00078 str_new(klass, ptr, len)
00079 VALUE klass;
00080 const char *ptr;
00081 long len;
00082 {
00083 VALUE str;
00084
00085 if (len < 0) {
00086 rb_raise(rb_eArgError, "negative string size (or size too big)");
00087 }
00088
00089 str = str_alloc(klass);
00090 RSTRING(str)->len = len;
00091 RSTRING(str)->aux.capa = len;
00092 RSTRING(str)->ptr = ALLOC_N(char,len+1);
00093 if (ptr) {
00094 memcpy(RSTRING(str)->ptr, ptr, len);
00095 }
00096 RSTRING(str)->ptr[len] = '\0';
00097 return str;
00098 }
00099
00100 VALUE
00101 rb_str_new(ptr, len)
00102 const char *ptr;
00103 long len;
00104 {
00105 return str_new(rb_cString, ptr, len);
00106 }
00107
00108 VALUE
00109 rb_str_new2(ptr)
00110 const char *ptr;
00111 {
00112 if (!ptr) {
00113 rb_raise(rb_eArgError, "NULL pointer given");
00114 }
00115 return rb_str_new(ptr, strlen(ptr));
00116 }
00117
00118 VALUE
00119 rb_tainted_str_new(ptr, len)
00120 const char *ptr;
00121 long len;
00122 {
00123 VALUE str = rb_str_new(ptr, len);
00124
00125 OBJ_TAINT(str);
00126 return str;
00127 }
00128
00129 VALUE
00130 rb_tainted_str_new2(ptr)
00131 const char *ptr;
00132 {
00133 VALUE str = rb_str_new2(ptr);
00134
00135 OBJ_TAINT(str);
00136 return str;
00137 }
00138
00139 static VALUE
00140 str_new3(klass, str)
00141 VALUE klass, str;
00142 {
00143 VALUE str2 = str_alloc(klass);
00144
00145 RSTRING(str2)->len = RSTRING(str)->len;
00146 RSTRING(str2)->ptr = RSTRING(str)->ptr;
00147 RSTRING(str2)->aux.shared = str;
00148 FL_SET(str2, ELTS_SHARED);
00149 OBJ_INFECT(str2, str);
00150
00151 return str2;
00152 }
00153
00154 VALUE
00155 rb_str_new3(str)
00156 VALUE str;
00157 {
00158 return str_new3(rb_obj_class(str), str);
00159 }
00160
00161 static VALUE
00162 str_new4(klass, str)
00163 VALUE klass, str;
00164 {
00165 VALUE str2 = str_alloc(klass);
00166
00167 RSTRING(str2)->len = RSTRING(str)->len;
00168 RSTRING(str2)->ptr = RSTRING(str)->ptr;
00169 if (FL_TEST(str, ELTS_SHARED)) {
00170 FL_SET(str2, ELTS_SHARED);
00171 RSTRING(str2)->aux.shared = RSTRING(str)->aux.shared;
00172 }
00173 else {
00174 FL_SET(str, ELTS_SHARED);
00175 RSTRING(str)->aux.shared = str2;
00176 }
00177
00178 return str2;
00179 }
00180
00181 VALUE
00182 rb_str_new4(orig)
00183 VALUE orig;
00184 {
00185 VALUE klass, str;
00186
00187 if (OBJ_FROZEN(orig)) return orig;
00188 klass = rb_obj_class(orig);
00189 if (FL_TEST(orig, ELTS_SHARED) && (str = RSTRING(orig)->aux.shared) && klass == RBASIC(str)->klass) {
00190 long ofs;
00191 ofs = RSTRING(str)->len - RSTRING(orig)->len;
00192 if (ofs > 0) {
00193 str = str_new3(klass, str);
00194 RSTRING(str)->ptr += ofs;
00195 RSTRING(str)->len -= ofs;
00196 }
00197 }
00198 else if (FL_TEST(orig, STR_ASSOC)) {
00199 str = str_new(klass, RSTRING(orig)->ptr, RSTRING(orig)->len);
00200 }
00201 else {
00202 str = str_new4(klass, orig);
00203 }
00204 OBJ_INFECT(str, orig);
00205 OBJ_FREEZE(str);
00206 return str;
00207 }
00208
00209 VALUE
00210 rb_str_new5(obj, ptr, len)
00211 VALUE obj;
00212 const char *ptr;
00213 long len;
00214 {
00215 return str_new(rb_obj_class(obj), ptr, len);
00216 }
00217
00218 #define STR_BUF_MIN_SIZE 128
00219
00220 VALUE
00221 rb_str_buf_new(capa)
00222 long capa;
00223 {
00224 VALUE str = str_alloc(rb_cString);
00225
00226 if (capa < STR_BUF_MIN_SIZE) {
00227 capa = STR_BUF_MIN_SIZE;
00228 }
00229 RSTRING(str)->ptr = 0;
00230 RSTRING(str)->len = 0;
00231 RSTRING(str)->aux.capa = capa;
00232 RSTRING(str)->ptr = ALLOC_N(char, capa+1);
00233 RSTRING(str)->ptr[0] = '\0';
00234
00235 return str;
00236 }
00237
00238 VALUE
00239 rb_str_buf_new2(ptr)
00240 const char *ptr;
00241 {
00242 VALUE str;
00243 long len = strlen(ptr);
00244
00245 str = rb_str_buf_new(len);
00246 rb_str_buf_cat(str, ptr, len);
00247
00248 return str;
00249 }
00250
00251 VALUE
00252 rb_str_to_str(str)
00253 VALUE str;
00254 {
00255 return rb_convert_type(str, T_STRING, "String", "to_str");
00256 }
00257
00258 static void
00259 rb_str_shared_replace(str, str2)
00260 VALUE str, str2;
00261 {
00262 if (str == str2) return;
00263 rb_str_modify(str);
00264 if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
00265 if (NIL_P(str2)) {
00266 RSTRING(str)->ptr = 0;
00267 RSTRING(str)->len = 0;
00268 RSTRING(str)->aux.capa = 0;
00269 FL_UNSET(str, STR_NOCAPA);
00270 return;
00271 }
00272 RSTRING(str)->ptr = RSTRING(str2)->ptr;
00273 RSTRING(str)->len = RSTRING(str2)->len;
00274 FL_UNSET(str, STR_NOCAPA);
00275 if (FL_TEST(str2, STR_NOCAPA)) {
00276 FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
00277 RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
00278 }
00279 else {
00280 RSTRING(str)->aux.capa = RSTRING(str2)->aux.capa;
00281 }
00282 RSTRING(str2)->ptr = 0;
00283 RSTRING(str2)->len = 0;
00284 RSTRING(str2)->aux.capa = 0;
00285 FL_UNSET(str2, STR_NOCAPA);
00286 if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
00287 }
00288
00289 static ID id_to_s;
00290
00291 VALUE
00292 rb_obj_as_string(obj)
00293 VALUE obj;
00294 {
00295 VALUE str;
00296
00297 if (TYPE(obj) == T_STRING) {
00298 return obj;
00299 }
00300 str = rb_funcall(obj, id_to_s, 0);
00301 if (TYPE(str) != T_STRING)
00302 return rb_any_to_s(obj);
00303 if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
00304 return str;
00305 }
00306
00307 static VALUE rb_str_replace (VALUE, VALUE);
00308
00309 VALUE
00310 rb_str_dup(str)
00311 VALUE str;
00312 {
00313 VALUE dup = str_alloc(rb_obj_class(str));
00314 rb_str_replace(dup, str);
00315 return dup;
00316 }
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326 static VALUE
00327 rb_str_init(argc, argv, str)
00328 int argc;
00329 VALUE *argv;
00330 VALUE str;
00331 {
00332 VALUE orig;
00333
00334 if (rb_scan_args(argc, argv, "01", &orig) == 1)
00335 rb_str_replace(str, orig);
00336 return str;
00337 }
00338
00339
00340
00341
00342
00343
00344
00345
00346 static VALUE
00347 rb_str_length(str)
00348 VALUE str;
00349 {
00350 return LONG2NUM(RSTRING(str)->len);
00351 }
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363 static VALUE
00364 rb_str_empty(str)
00365 VALUE str;
00366 {
00367 if (RSTRING(str)->len == 0)
00368 return Qtrue;
00369 return Qfalse;
00370 }
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382 VALUE
00383 rb_str_plus(str1, str2)
00384 VALUE str1, str2;
00385 {
00386 VALUE str3;
00387
00388 StringValue(str2);
00389 str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len);
00390 memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len);
00391 memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len,
00392 RSTRING(str2)->ptr, RSTRING(str2)->len);
00393 RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0';
00394
00395 if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
00396 OBJ_TAINT(str3);
00397 return str3;
00398 }
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 VALUE
00411 rb_str_times(str, times)
00412 VALUE str;
00413 VALUE times;
00414 {
00415 VALUE str2;
00416 long i, len;
00417
00418 len = NUM2LONG(times);
00419 if (len < 0) {
00420 rb_raise(rb_eArgError, "negative argument");
00421 }
00422 if (len && LONG_MAX/len < RSTRING(str)->len) {
00423 rb_raise(rb_eArgError, "argument too big");
00424 }
00425
00426 str2 = rb_str_new5(str,0, len *= RSTRING(str)->len);
00427 for (i = 0; i < len; i += RSTRING(str)->len) {
00428 memcpy(RSTRING(str2)->ptr + i,
00429 RSTRING(str)->ptr, RSTRING(str)->len);
00430 }
00431 RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0';
00432
00433 OBJ_INFECT(str2, str);
00434
00435 return str2;
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452 static VALUE
00453 rb_str_format(str, arg)
00454 VALUE str, arg;
00455 {
00456 VALUE *argv;
00457
00458 if (TYPE(arg) == T_ARRAY) {
00459 argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1);
00460 argv[0] = str;
00461 MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len);
00462 return rb_f_sprintf(RARRAY(arg)->len+1, argv);
00463 }
00464
00465 argv = ALLOCA_N(VALUE, 2);
00466 argv[0] = str;
00467 argv[1] = arg;
00468 return rb_f_sprintf(2, argv);
00469 }
00470
00471 static int
00472 str_independent(str)
00473 VALUE str;
00474 {
00475 if (FL_TEST(str, STR_TMPLOCK)) {
00476 rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
00477 }
00478 if (OBJ_FROZEN(str)) rb_error_frozen("string");
00479 if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
00480 rb_raise(rb_eSecurityError, "Insecure: can't modify string");
00481 if (!FL_TEST(str, ELTS_SHARED)) return 1;
00482 return 0;
00483 }
00484
00485 static void
00486 str_make_independent(str)
00487 VALUE str;
00488 {
00489 char *ptr;
00490
00491 ptr = ALLOC_N(char, RSTRING(str)->len+1);
00492 if (RSTRING(str)->ptr) {
00493 memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
00494 }
00495 ptr[RSTRING(str)->len] = 0;
00496 RSTRING(str)->ptr = ptr;
00497 RSTRING(str)->aux.capa = RSTRING(str)->len;
00498 FL_UNSET(str, STR_NOCAPA);
00499 }
00500
00501 void
00502 rb_str_modify(str)
00503 VALUE str;
00504 {
00505 if (!str_independent(str))
00506 str_make_independent(str);
00507 }
00508
00509 void
00510 rb_str_associate(str, add)
00511 VALUE str, add;
00512 {
00513 if (FL_TEST(str, STR_ASSOC)) {
00514
00515 rb_ary_concat(RSTRING(str)->aux.shared, add);
00516 }
00517 else {
00518 if (FL_TEST(str, ELTS_SHARED)) {
00519 str_make_independent(str);
00520 }
00521 else if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
00522 RESIZE_CAPA(str, RSTRING(str)->len);
00523 }
00524 RSTRING(str)->aux.shared = add;
00525 FL_SET(str, STR_ASSOC);
00526 }
00527 }
00528
00529 VALUE
00530 rb_str_associated(str)
00531 VALUE str;
00532 {
00533 if (FL_TEST(str, STR_ASSOC)) {
00534 return RSTRING(str)->aux.shared;
00535 }
00536 return Qfalse;
00537 }
00538
00539 static char *null_str = "";
00540
00541 VALUE
00542 rb_string_value(ptr)
00543 volatile VALUE *ptr;
00544 {
00545 VALUE s = *ptr;
00546 if (TYPE(s) != T_STRING) {
00547 s = rb_str_to_str(s);
00548 *ptr = s;
00549 }
00550 if (!RSTRING(s)->ptr) {
00551 FL_SET(s, ELTS_SHARED);
00552 RSTRING(s)->ptr = null_str;
00553 }
00554 return s;
00555 }
00556
00557 char *
00558 rb_string_value_ptr(ptr)
00559 volatile VALUE *ptr;
00560 {
00561 return RSTRING(rb_string_value(ptr))->ptr;
00562 }
00563
00564 char *
00565 rb_string_value_cstr(ptr)
00566 volatile VALUE *ptr;
00567 {
00568 VALUE str = rb_string_value(ptr);
00569 char *s = RSTRING(str)->ptr;
00570
00571 if (!s || RSTRING(str)->len != strlen(s)) {
00572 rb_raise(rb_eArgError, "string contains null byte");
00573 }
00574 return s;
00575 }
00576
00577 VALUE
00578 rb_check_string_type(str)
00579 VALUE str;
00580 {
00581 str = rb_check_convert_type(str, T_STRING, "String", "to_str");
00582 if (!NIL_P(str) && !RSTRING(str)->ptr) {
00583 FL_SET(str, ELTS_SHARED);
00584 RSTRING(str)->ptr = null_str;
00585 }
00586 return str;
00587 }
00588
00589 VALUE
00590 rb_str_substr(str, beg, len)
00591 VALUE str;
00592 long beg, len;
00593 {
00594 VALUE str2;
00595
00596 if (len < 0) return Qnil;
00597 if (beg > RSTRING(str)->len) return Qnil;
00598 if (beg < 0) {
00599 beg += RSTRING(str)->len;
00600 if (beg < 0) return Qnil;
00601 }
00602 if (beg + len > RSTRING(str)->len) {
00603 len = RSTRING(str)->len - beg;
00604 }
00605 if (len < 0) {
00606 len = 0;
00607 }
00608 if (len == 0) {
00609 str2 = rb_str_new5(str,0,0);
00610 }
00611 else if (len > sizeof(struct RString)/2 &&
00612 beg + len == RSTRING(str)->len && !FL_TEST(str, STR_ASSOC)) {
00613 str2 = rb_str_new3(rb_str_new4(str));
00614 RSTRING(str2)->ptr += RSTRING(str2)->len - len;
00615 RSTRING(str2)->len = len;
00616 }
00617 else {
00618 str2 = rb_str_new5(str, RSTRING(str)->ptr+beg, len);
00619 }
00620 OBJ_INFECT(str2, str);
00621
00622 return str2;
00623 }
00624
00625 VALUE
00626 rb_str_freeze(str)
00627 VALUE str;
00628 {
00629 return rb_obj_freeze(str);
00630 }
00631
00632 VALUE
00633 rb_str_dup_frozen(str)
00634 VALUE str;
00635 {
00636 if (FL_TEST(str, ELTS_SHARED) && RSTRING(str)->aux.shared) {
00637 VALUE shared = RSTRING(str)->aux.shared;
00638 if (RSTRING(shared)->len == RSTRING(str)->len) {
00639 OBJ_FREEZE(shared);
00640 return shared;
00641 }
00642 }
00643 if (OBJ_FROZEN(str)) return str;
00644 str = rb_str_dup(str);
00645 OBJ_FREEZE(str);
00646 return str;
00647 }
00648
00649 VALUE
00650 rb_str_locktmp(str)
00651 VALUE str;
00652 {
00653 if (FL_TEST(str, STR_TMPLOCK)) {
00654 rb_raise(rb_eRuntimeError, "temporal locking already locked string");
00655 }
00656 FL_SET(str, STR_TMPLOCK);
00657 return str;
00658 }
00659
00660 VALUE
00661 rb_str_unlocktmp(str)
00662 VALUE str;
00663 {
00664 if (!FL_TEST(str, STR_TMPLOCK)) {
00665 rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
00666 }
00667 FL_UNSET(str, STR_TMPLOCK);
00668 return str;
00669 }
00670
00671 VALUE
00672 rb_str_resize(str, len)
00673 VALUE str;
00674 long len;
00675 {
00676 if (len < 0) {
00677 rb_raise(rb_eArgError, "negative string size (or size too big)");
00678 }
00679
00680 rb_str_modify(str);
00681 if (len != RSTRING(str)->len) {
00682 if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
00683 REALLOC_N(RSTRING(str)->ptr, char, len+1);
00684 if (!FL_TEST(str, STR_NOCAPA)) {
00685 RSTRING(str)->aux.capa = len;
00686 }
00687 }
00688 RSTRING(str)->len = len;
00689 RSTRING(str)->ptr[len] = '\0';
00690 }
00691 return str;
00692 }
00693
00694 VALUE
00695 rb_str_buf_cat(str, ptr, len)
00696 VALUE str;
00697 const char *ptr;
00698 long len;
00699 {
00700 long capa, total;
00701
00702 if (len == 0) return str;
00703 if (len < 0) {
00704 rb_raise(rb_eArgError, "negative string size (or size too big)");
00705 }
00706 rb_str_modify(str);
00707 if (FL_TEST(str, STR_ASSOC)) {
00708 FL_UNSET(str, STR_ASSOC);
00709 capa = RSTRING(str)->aux.capa = RSTRING(str)->len;
00710 }
00711 else {
00712 capa = RSTRING(str)->aux.capa;
00713 }
00714 total = RSTRING(str)->len+len;
00715 if (capa <= total) {
00716 while (total > capa) {
00717 capa = (capa + 1) * 2;
00718 }
00719 RESIZE_CAPA(str, capa);
00720 }
00721 memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
00722 RSTRING(str)->len = total;
00723 RSTRING(str)->ptr[total] = '\0';
00724
00725 return str;
00726 }
00727
00728 VALUE
00729 rb_str_buf_cat2(str, ptr)
00730 VALUE str;
00731 const char *ptr;
00732 {
00733 return rb_str_buf_cat(str, ptr, strlen(ptr));
00734 }
00735
00736 VALUE
00737 rb_str_cat(str, ptr, len)
00738 VALUE str;
00739 const char *ptr;
00740 long len;
00741 {
00742 if (len < 0) {
00743 rb_raise(rb_eArgError, "negative string size (or size too big)");
00744 }
00745 if (FL_TEST(str, STR_ASSOC)) {
00746 rb_str_modify(str);
00747 REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len);
00748 memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
00749 RSTRING(str)->len += len;
00750 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
00751 return str;
00752 }
00753
00754 return rb_str_buf_cat(str, ptr, len);
00755 }
00756
00757 VALUE
00758 rb_str_cat2(str, ptr)
00759 VALUE str;
00760 const char *ptr;
00761 {
00762 return rb_str_cat(str, ptr, strlen(ptr));
00763 }
00764
00765 VALUE
00766 rb_str_buf_append(str, str2)
00767 VALUE str, str2;
00768 {
00769 long capa, len;
00770
00771 rb_str_modify(str);
00772 if (FL_TEST(str, STR_ASSOC)) {
00773 FL_UNSET(str, STR_ASSOC);
00774 capa = RSTRING(str)->aux.capa = RSTRING(str)->len;
00775 }
00776 else {
00777 capa = RSTRING(str)->aux.capa;
00778 }
00779 len = RSTRING(str)->len+RSTRING(str2)->len;
00780 if (capa <= len) {
00781 while (len > capa) {
00782 capa = (capa + 1) * 2;
00783 }
00784 RESIZE_CAPA(str, capa);
00785 }
00786 memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
00787 RSTRING(str2)->ptr, RSTRING(str2)->len);
00788 RSTRING(str)->len += RSTRING(str2)->len;
00789 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
00790 OBJ_INFECT(str, str2);
00791
00792 return str;
00793 }
00794
00795 VALUE
00796 rb_str_append(str, str2)
00797 VALUE str, str2;
00798 {
00799 StringValue(str2);
00800 rb_str_modify(str);
00801 if (RSTRING(str2)->len > 0) {
00802 if (FL_TEST(str, STR_ASSOC)) {
00803 long len = RSTRING(str)->len+RSTRING(str2)->len;
00804 REALLOC_N(RSTRING(str)->ptr, char, len+1);
00805 memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
00806 RSTRING(str2)->ptr, RSTRING(str2)->len);
00807 RSTRING(str)->ptr[len] = '\0';
00808 RSTRING(str)->len = len;
00809 }
00810 else {
00811 return rb_str_buf_append(str, str2);
00812 }
00813 }
00814 OBJ_INFECT(str, str2);
00815 return str;
00816 }
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835 VALUE
00836 rb_str_concat(str1, str2)
00837 VALUE str1, str2;
00838 {
00839 if (FIXNUM_P(str2)) {
00840 int i = FIX2INT(str2);
00841 if (0 <= i && i <= 0xff) {
00842 char c = i;
00843 return rb_str_cat(str1, &c, 1);
00844 }
00845 }
00846 str1 = rb_str_append(str1, str2);
00847
00848 return str1;
00849 }
00850
00851 int
00852 rb_str_hash(str)
00853 VALUE str;
00854 {
00855 register long len = RSTRING(str)->len;
00856 register char *p = RSTRING(str)->ptr;
00857 register int key = 0;
00858
00859 #ifdef HASH_ELFHASH
00860 register unsigned int g;
00861
00862 while (len--) {
00863 key = (key << 4) + *p++;
00864 if (g = key & 0xF0000000)
00865 key ^= g >> 24;
00866 key &= ~g;
00867 }
00868 #elif HASH_PERL
00869 while (len--) {
00870 key += *p++;
00871 key += (key << 10);
00872 key ^= (key >> 6);
00873 }
00874 key += (key << 3);
00875 key ^= (key >> 11);
00876 key += (key << 15);
00877 #else
00878 while (len--) {
00879 key = key*65599 + *p;
00880 p++;
00881 }
00882 key = key + (key>>5);
00883 #endif
00884 return key;
00885 }
00886
00887
00888
00889
00890
00891
00892
00893
00894 static VALUE
00895 rb_str_hash_m(str)
00896 VALUE str;
00897 {
00898 int key = rb_str_hash(str);
00899 return INT2FIX(key);
00900 }
00901
00902 #define lesser(a,b) (((a)>(b))?(b):(a))
00903
00904 int
00905 rb_str_cmp(str1, str2)
00906 VALUE str1, str2;
00907 {
00908 long len;
00909 int retval;
00910
00911 len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
00912 retval = rb_memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
00913 if (retval == 0) {
00914 if (RSTRING(str1)->len == RSTRING(str2)->len) return 0;
00915 if (RSTRING(str1)->len > RSTRING(str2)->len) return 1;
00916 return -1;
00917 }
00918 if (retval > 0) return 1;
00919 return -1;
00920 }
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932 static VALUE
00933 rb_str_equal(str1, str2)
00934 VALUE str1, str2;
00935 {
00936 if (str1 == str2) return Qtrue;
00937 if (TYPE(str2) != T_STRING) {
00938 if (!rb_respond_to(str2, rb_intern("to_str"))) {
00939 return Qfalse;
00940 }
00941 return rb_equal(str2, str1);
00942 }
00943 if (RSTRING(str1)->len == RSTRING(str2)->len &&
00944 rb_str_cmp(str1, str2) == 0) {
00945 return Qtrue;
00946 }
00947 return Qfalse;
00948 }
00949
00950 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
00951
00952
00953
00954
00955
00956
00957
00958
00959 static VALUE
00960 rb_str_eql(str1, str2)
00961 VALUE str1, str2;
00962 {
00963 if (TYPE(str2) != T_STRING || RSTRING(str1)->len != RSTRING(str2)->len)
00964 return Qfalse;
00965
00966 if (memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr,
00967 lesser(RSTRING(str1)->len, RSTRING(str2)->len)) == 0)
00968 return Qtrue;
00969
00970 return Qfalse;
00971 }
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998 static VALUE
00999 rb_str_cmp_m(str1, str2)
01000 VALUE str1, str2;
01001 {
01002 long result;
01003
01004 if (TYPE(str2) != T_STRING) {
01005 if (!rb_respond_to(str2, rb_intern("to_str"))) {
01006 return Qnil;
01007 }
01008 else if (!rb_respond_to(str2, rb_intern("<=>"))) {
01009 return Qnil;
01010 }
01011 else {
01012 VALUE tmp = rb_funcall(str2, rb_intern("<=>"), 1, str1);
01013
01014 if (NIL_P(tmp)) return Qnil;
01015 if (!FIXNUM_P(tmp)) {
01016 return rb_funcall(LONG2FIX(0), '-', 1, tmp);
01017 }
01018 result = -FIX2LONG(tmp);
01019 }
01020 }
01021 else {
01022 result = rb_str_cmp(str1, str2);
01023 }
01024 return LONG2NUM(result);
01025 }
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039 static VALUE
01040 rb_str_casecmp(str1, str2)
01041 VALUE str1, str2;
01042 {
01043 long len;
01044 int retval;
01045
01046 StringValue(str2);
01047 len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
01048 retval = rb_memcicmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
01049 if (retval == 0) {
01050 if (RSTRING(str1)->len == RSTRING(str2)->len) return INT2FIX(0);
01051 if (RSTRING(str1)->len > RSTRING(str2)->len) return INT2FIX(1);
01052 return INT2FIX(-1);
01053 }
01054 if (retval == 0) return INT2FIX(0);
01055 if (retval > 0) return INT2FIX(1);
01056 return INT2FIX(-1);
01057 }
01058
01059 static long
01060 rb_str_index(str, sub, offset)
01061 VALUE str, sub;
01062 long offset;
01063 {
01064 long pos;
01065
01066 if (offset < 0) {
01067 offset += RSTRING(str)->len;
01068 if (offset < 0) return -1;
01069 }
01070 if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1;
01071 if (RSTRING(sub)->len == 0) return offset;
01072 pos = rb_memsearch(RSTRING(sub)->ptr, RSTRING(sub)->len,
01073 RSTRING(str)->ptr+offset, RSTRING(str)->len-offset);
01074 if (pos < 0) return pos;
01075 return pos + offset;
01076 }
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097 static VALUE
01098 rb_str_index_m(argc, argv, str)
01099 int argc;
01100 VALUE *argv;
01101 VALUE str;
01102 {
01103 VALUE sub;
01104 VALUE initpos;
01105 long pos;
01106
01107 if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
01108 pos = NUM2LONG(initpos);
01109 }
01110 else {
01111 pos = 0;
01112 }
01113 if (pos < 0) {
01114 pos += RSTRING(str)->len;
01115 if (pos < 0) {
01116 if (TYPE(sub) == T_REGEXP) {
01117 rb_backref_set(Qnil);
01118 }
01119 return Qnil;
01120 }
01121 }
01122
01123 switch (TYPE(sub)) {
01124 case T_REGEXP:
01125 pos = rb_reg_adjust_startpos(sub, str, pos, 0);
01126 pos = rb_reg_search(sub, str, pos, 0);
01127 break;
01128
01129 case T_FIXNUM:
01130 {
01131 int c = FIX2INT(sub);
01132 long len = RSTRING(str)->len;
01133 unsigned char *p = RSTRING(str)->ptr;
01134
01135 for (;pos<len;pos++) {
01136 if (p[pos] == c) return LONG2NUM(pos);
01137 }
01138 return Qnil;
01139 }
01140
01141 default: {
01142 VALUE tmp;
01143
01144 tmp = rb_check_string_type(sub);
01145 if (NIL_P(tmp)) {
01146 rb_raise(rb_eTypeError, "type mismatch: %s given",
01147 rb_obj_classname(sub));
01148 }
01149 sub = tmp;
01150 }
01151
01152 case T_STRING:
01153 pos = rb_str_index(str, sub, pos);
01154 break;
01155 }
01156
01157 if (pos == -1) return Qnil;
01158 return LONG2NUM(pos);
01159 }
01160
01161 static long
01162 rb_str_rindex(str, sub, pos)
01163 VALUE str, sub;
01164 long pos;
01165 {
01166 long len = RSTRING(sub)->len;
01167 char *s, *sbeg, *t;
01168
01169
01170 if (RSTRING(str)->len < len) return -1;
01171 if (RSTRING(str)->len - pos < len) {
01172 pos = RSTRING(str)->len - len;
01173 }
01174 sbeg = RSTRING(str)->ptr;
01175 s = RSTRING(str)->ptr + pos;
01176 t = RSTRING(sub)->ptr;
01177 if (len) {
01178 while (sbeg <= s) {
01179 if (rb_memcmp(s, t, len) == 0) {
01180 return s - RSTRING(str)->ptr;
01181 }
01182 s--;
01183 }
01184 return -1;
01185 }
01186 else {
01187 return pos;
01188 }
01189 }
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211 static VALUE
01212 rb_str_rindex_m(argc, argv, str)
01213 int argc;
01214 VALUE *argv;
01215 VALUE str;
01216 {
01217 VALUE sub;
01218 VALUE position;
01219 long pos;
01220
01221 if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
01222 pos = NUM2LONG(position);
01223 if (pos < 0) {
01224 pos += RSTRING(str)->len;
01225 if (pos < 0) {
01226 if (TYPE(sub) == T_REGEXP) {
01227 rb_backref_set(Qnil);
01228 }
01229 return Qnil;
01230 }
01231 }
01232 if (pos > RSTRING(str)->len) pos = RSTRING(str)->len;
01233 }
01234 else {
01235 pos = RSTRING(str)->len;
01236 }
01237
01238 switch (TYPE(sub)) {
01239 case T_REGEXP:
01240 if (RREGEXP(sub)->len) {
01241 pos = rb_reg_adjust_startpos(sub, str, pos, 1);
01242 pos = rb_reg_search(sub, str, pos, 1);
01243 }
01244 if (pos >= 0) return LONG2NUM(pos);
01245 break;
01246
01247 case T_STRING:
01248 pos = rb_str_rindex(str, sub, pos);
01249 if (pos >= 0) return LONG2NUM(pos);
01250 break;
01251
01252 case T_FIXNUM:
01253 {
01254 int c = FIX2INT(sub);
01255 unsigned char *p = RSTRING(str)->ptr + pos;
01256 unsigned char *pbeg = RSTRING(str)->ptr;
01257
01258 if (pos == RSTRING(str)->len) {
01259 if (pos == 0) return Qnil;
01260 --p;
01261 }
01262 while (pbeg <= p) {
01263 if (*p == c) return LONG2NUM((char*)p - RSTRING(str)->ptr);
01264 p--;
01265 }
01266 return Qnil;
01267 }
01268
01269 default:
01270 rb_raise(rb_eTypeError, "type mismatch: %s given",
01271 rb_obj_classname(sub));
01272 }
01273 return Qnil;
01274 }
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292 static VALUE
01293 rb_str_match(x, y)
01294 VALUE x, y;
01295 {
01296 switch (TYPE(y)) {
01297 case T_STRING:
01298 rb_raise(rb_eTypeError, "type mismatch: String given");
01299
01300 case T_REGEXP:
01301 return rb_reg_match(y, x);
01302
01303 default:
01304 return rb_funcall(y, rb_intern("=~"), 1, x);
01305 }
01306 }
01307
01308
01309 static VALUE get_pat (VALUE, int);
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325 static VALUE
01326 rb_str_match_m(str, re)
01327 VALUE str, re;
01328 {
01329 return rb_funcall(get_pat(re, 0), rb_intern("match"), 1, str);
01330 }
01331
01332 static char
01333 succ_char(s)
01334 char *s;
01335 {
01336 char c = *s;
01337
01338
01339 if ('0' <= c && c < '9') (*s)++;
01340 else if (c == '9') {
01341 *s = '0';
01342 return '1';
01343 }
01344
01345 else if ('a' <= c && c < 'z') (*s)++;
01346 else if (c == 'z') {
01347 return *s = 'a';
01348 }
01349
01350 else if ('A' <= c && c < 'Z') (*s)++;
01351 else if (c == 'Z') {
01352 return *s = 'A';
01353 }
01354 return 0;
01355 }
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383 static VALUE
01384 rb_str_succ(orig)
01385 VALUE orig;
01386 {
01387 VALUE str;
01388 char *sbeg, *s;
01389 int c = -1;
01390 long n = 0;
01391
01392 str = rb_str_new5(orig, RSTRING(orig)->ptr, RSTRING(orig)->len);
01393 OBJ_INFECT(str, orig);
01394 if (RSTRING(str)->len == 0) return str;
01395
01396 sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
01397
01398 while (sbeg <= s) {
01399 if (ISALNUM(*s)) {
01400 if ((c = succ_char(s)) == 0) break;
01401 n = s - sbeg;
01402 }
01403 s--;
01404 }
01405 if (c == -1) {
01406 sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
01407 c = '\001';
01408 while (sbeg <= s) {
01409 if ((*s += 1) != 0) break;
01410 s--;
01411 }
01412 }
01413 if (s < sbeg) {
01414 RESIZE_CAPA(str, RSTRING(str)->len + 1);
01415 s = RSTRING(str)->ptr + n;
01416 memmove(s+1, s, RSTRING(str)->len - n);
01417 *s = c;
01418 RSTRING(str)->len += 1;
01419 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01420 }
01421
01422 return str;
01423 }
01424
01425
01426
01427
01428
01429
01430
01431
01432
01433
01434
01435 static VALUE
01436 rb_str_succ_bang(str)
01437 VALUE str;
01438 {
01439 rb_str_shared_replace(str, rb_str_succ(str));
01440
01441 return str;
01442 }
01443
01444 VALUE
01445 rb_str_upto(beg, end, excl)
01446 VALUE beg, end;
01447 int excl;
01448 {
01449 VALUE current, after_end;
01450 ID succ = rb_intern("succ");
01451 int n;
01452
01453 StringValue(end);
01454 n = rb_str_cmp(beg, end);
01455 if (n > 0 || (excl && n == 0)) return beg;
01456 after_end = rb_funcall(end, succ, 0, 0);
01457 current = beg;
01458 while (!rb_str_equal(current, after_end)) {
01459 rb_yield(current);
01460 if (!excl && rb_str_equal(current, end)) break;
01461 current = rb_funcall(current, succ, 0, 0);
01462 StringValue(current);
01463 if (excl && rb_str_equal(current, end)) break;
01464 StringValue(current);
01465 if (RSTRING(current)->len > RSTRING(end)->len)
01466 break;
01467 }
01468
01469 return beg;
01470 }
01471
01472
01473
01474
01475
01476
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492
01493 static VALUE
01494 rb_str_upto_m(beg, end)
01495 VALUE beg, end;
01496 {
01497 return rb_str_upto(beg, end, Qfalse);
01498 }
01499
01500 static VALUE
01501 rb_str_subpat(str, re, nth)
01502 VALUE str, re;
01503 int nth;
01504 {
01505 if (rb_reg_search(re, str, 0, 0) >= 0) {
01506 return rb_reg_nth_match(nth, rb_backref_get());
01507 }
01508 return Qnil;
01509 }
01510
01511 static VALUE
01512 rb_str_aref(str, indx)
01513 VALUE str;
01514 VALUE indx;
01515 {
01516 long idx;
01517
01518 switch (TYPE(indx)) {
01519 case T_FIXNUM:
01520 idx = FIX2LONG(indx);
01521
01522 num_index:
01523 if (idx < 0) {
01524 idx = RSTRING(str)->len + idx;
01525 }
01526 if (idx < 0 || RSTRING(str)->len <= idx) {
01527 return Qnil;
01528 }
01529 return INT2FIX(RSTRING(str)->ptr[idx] & 0xff);
01530
01531 case T_REGEXP:
01532 return rb_str_subpat(str, indx, 0);
01533
01534 case T_STRING:
01535 if (rb_str_index(str, indx, 0) != -1)
01536 return rb_str_dup(indx);
01537 return Qnil;
01538
01539 default:
01540
01541 {
01542 long beg, len;
01543 VALUE tmp;
01544
01545 switch (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 0)) {
01546 case Qfalse:
01547 break;
01548 case Qnil:
01549 return Qnil;
01550 default:
01551 tmp = rb_str_substr(str, beg, len);
01552 OBJ_INFECT(tmp, indx);
01553 return tmp;
01554 }
01555 }
01556 idx = NUM2LONG(indx);
01557 goto num_index;
01558 }
01559 return Qnil;
01560 }
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609
01610 static VALUE
01611 rb_str_aref_m(argc, argv, str)
01612 int argc;
01613 VALUE *argv;
01614 VALUE str;
01615 {
01616 if (argc == 2) {
01617 if (TYPE(argv[0]) == T_REGEXP) {
01618 return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
01619 }
01620 return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
01621 }
01622 if (argc != 1) {
01623 rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
01624 }
01625 return rb_str_aref(str, argv[0]);
01626 }
01627
01628 static void
01629 rb_str_splice(str, beg, len, val)
01630 VALUE str;
01631 long beg, len;
01632 VALUE val;
01633 {
01634 if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
01635
01636 StringValue(val);
01637 rb_str_modify(str);
01638
01639 if (RSTRING(str)->len < beg) {
01640 out_of_range:
01641 rb_raise(rb_eIndexError, "index %ld out of string", beg);
01642 }
01643 if (beg < 0) {
01644 if (-beg > RSTRING(str)->len) {
01645 goto out_of_range;
01646 }
01647 beg += RSTRING(str)->len;
01648 }
01649 if (RSTRING(str)->len < beg + len) {
01650 len = RSTRING(str)->len - beg;
01651 }
01652
01653 if (len < RSTRING(val)->len) {
01654
01655 RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(val)->len - len + 1);
01656 }
01657
01658 if (RSTRING(val)->len != len) {
01659 memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len,
01660 RSTRING(str)->ptr + beg + len,
01661 RSTRING(str)->len - (beg + len));
01662 }
01663 if (RSTRING(str)->len < beg && len < 0) {
01664 MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len);
01665 }
01666 if (RSTRING(val)->len > 0) {
01667 memmove(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len);
01668 }
01669 RSTRING(str)->len += RSTRING(val)->len - len;
01670 if (RSTRING(str)->ptr) {
01671 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01672 }
01673 OBJ_INFECT(str, val);
01674 }
01675
01676 void
01677 rb_str_update(str, beg, len, val)
01678 VALUE str;
01679 long beg, len;
01680 VALUE val;
01681 {
01682 rb_str_splice(str, beg, len, val);
01683 }
01684
01685 static void
01686 rb_str_subpat_set(str, re, nth, val)
01687 VALUE str, re;
01688 int nth;
01689 VALUE val;
01690 {
01691 VALUE match;
01692 long start, end, len;
01693
01694 if (rb_reg_search(re, str, 0, 0) < 0) {
01695 rb_raise(rb_eIndexError, "regexp not matched");
01696 }
01697 match = rb_backref_get();
01698 if (nth >= RMATCH(match)->regs->num_regs) {
01699 out_of_range:
01700 rb_raise(rb_eIndexError, "index %d out of regexp", nth);
01701 }
01702 if (nth < 0) {
01703 if (-nth >= RMATCH(match)->regs->num_regs) {
01704 goto out_of_range;
01705 }
01706 nth += RMATCH(match)->regs->num_regs;
01707 }
01708
01709 start = RMATCH(match)->BEG(nth);
01710 if (start == -1) {
01711 rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
01712 }
01713 end = RMATCH(match)->END(nth);
01714 len = end - start;
01715 rb_str_splice(str, start, len, val);
01716 }
01717
01718 static VALUE
01719 rb_str_aset(str, indx, val)
01720 VALUE str;
01721 VALUE indx, val;
01722 {
01723 long idx, beg;
01724
01725 switch (TYPE(indx)) {
01726 case T_FIXNUM:
01727 num_index:
01728 idx = FIX2LONG(indx);
01729 if (RSTRING(str)->len <= idx) {
01730 out_of_range:
01731 rb_raise(rb_eIndexError, "index %ld out of string", idx);
01732 }
01733 if (idx < 0) {
01734 if (-idx > RSTRING(str)->len)
01735 goto out_of_range;
01736 idx += RSTRING(str)->len;
01737 }
01738 if (FIXNUM_P(val)) {
01739 rb_str_modify(str);
01740 if (RSTRING(str)->len == idx) {
01741 RSTRING(str)->len += 1;
01742 RESIZE_CAPA(str, RSTRING(str)->len);
01743 }
01744 RSTRING(str)->ptr[idx] = FIX2INT(val) & 0xff;
01745 }
01746 else {
01747 rb_str_splice(str, idx, 1, val);
01748 }
01749 return val;
01750
01751 case T_REGEXP:
01752 rb_str_subpat_set(str, indx, 0, val);
01753 return val;
01754
01755 case T_STRING:
01756 beg = rb_str_index(str, indx, 0);
01757 if (beg < 0) {
01758 rb_raise(rb_eIndexError, "string not matched");
01759 }
01760 rb_str_splice(str, beg, RSTRING(indx)->len, val);
01761 return val;
01762
01763 default:
01764
01765 {
01766 long beg, len;
01767 if (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 2)) {
01768 rb_str_splice(str, beg, len, val);
01769 return val;
01770 }
01771 }
01772 idx = NUM2LONG(indx);
01773 goto num_index;
01774 }
01775 }
01776
01777
01778
01779
01780
01781
01782
01783
01784
01785
01786
01787
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797
01798
01799
01800
01801
01802 static VALUE
01803 rb_str_aset_m(argc, argv, str)
01804 int argc;
01805 VALUE *argv;
01806 VALUE str;
01807 {
01808 if (argc == 3) {
01809 if (TYPE(argv[0]) == T_REGEXP) {
01810 rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
01811 }
01812 else {
01813 rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
01814 }
01815 return argv[2];
01816 }
01817 if (argc != 2) {
01818 rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
01819 }
01820 return rb_str_aset(str, argv[0], argv[1]);
01821 }
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836
01837
01838
01839
01840 static VALUE
01841 rb_str_insert(str, idx, str2)
01842 VALUE str, idx, str2;
01843 {
01844 long pos = NUM2LONG(idx);
01845
01846 if (pos == -1) {
01847 pos = RSTRING(str)->len;
01848 }
01849 else if (pos < 0) {
01850 pos++;
01851 }
01852 rb_str_splice(str, pos, 0, str2);
01853 return str;
01854 }
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876
01877
01878 static VALUE
01879 rb_str_slice_bang(argc, argv, str)
01880 int argc;
01881 VALUE *argv;
01882 VALUE str;
01883 {
01884 VALUE result;
01885 VALUE buf[3];
01886 int i;
01887
01888 if (argc < 1 || 2 < argc) {
01889 rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
01890 }
01891 for (i=0; i<argc; i++) {
01892 buf[i] = argv[i];
01893 }
01894 buf[i] = rb_str_new(0,0);
01895 result = rb_str_aref_m(argc, buf, str);
01896 if (!NIL_P(result)) {
01897 rb_str_aset_m(argc+1, buf, str);
01898 }
01899 return result;
01900 }
01901
01902 static VALUE
01903 get_pat(pat, quote)
01904 VALUE pat;
01905 int quote;
01906 {
01907 VALUE val;
01908
01909 switch (TYPE(pat)) {
01910 case T_REGEXP:
01911 return pat;
01912
01913 case T_STRING:
01914 break;
01915
01916 default:
01917 val = rb_check_string_type(pat);
01918 if (NIL_P(val)) {
01919 Check_Type(pat, T_REGEXP);
01920 }
01921 pat = val;
01922 }
01923
01924 if (quote) {
01925 pat = rb_reg_quote(pat);
01926 }
01927
01928 return rb_reg_regcomp(pat);
01929 }
01930
01931
01932
01933
01934
01935
01936
01937
01938
01939
01940
01941
01942 static VALUE
01943 rb_str_sub_bang(argc, argv, str)
01944 int argc;
01945 VALUE *argv;
01946 VALUE str;
01947 {
01948 VALUE pat, repl, match;
01949 struct re_registers *regs;
01950 int iter = 0;
01951 int tainted = 0;
01952 long plen;
01953
01954 if (argc == 1 && rb_block_given_p()) {
01955 iter = 1;
01956 }
01957 else if (argc == 2) {
01958 repl = argv[1];
01959 StringValue(repl);
01960 if (OBJ_TAINTED(repl)) tainted = 1;
01961 }
01962 else {
01963 rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
01964 }
01965
01966 pat = get_pat(argv[0], 1);
01967 if (rb_reg_search(pat, str, 0, 0) >= 0) {
01968 rb_str_modify(str);
01969 match = rb_backref_get();
01970 regs = RMATCH(match)->regs;
01971
01972 if (iter) {
01973 char *p = RSTRING(str)->ptr; long len = RSTRING(str)->len;
01974
01975 rb_match_busy(match);
01976 repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
01977 str_mod_check(str, p, len);
01978 str_frozen_check(str);
01979 rb_backref_set(match);
01980 }
01981 else {
01982 repl = rb_reg_regsub(repl, str, regs);
01983 }
01984 if (OBJ_TAINTED(repl)) tainted = 1;
01985 plen = END(0) - BEG(0);
01986 if (RSTRING(repl)->len > plen) {
01987 RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(repl)->len - plen);
01988 }
01989 if (RSTRING(repl)->len != plen) {
01990 memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len,
01991 RSTRING(str)->ptr + BEG(0) + plen,
01992 RSTRING(str)->len - BEG(0) - plen);
01993 }
01994 memcpy(RSTRING(str)->ptr + BEG(0),
01995 RSTRING(repl)->ptr, RSTRING(repl)->len);
01996 RSTRING(str)->len += RSTRING(repl)->len - plen;
01997 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01998 if (tainted) OBJ_TAINT(str);
01999
02000 return str;
02001 }
02002 return Qnil;
02003 }
02004
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036 static VALUE
02037 rb_str_sub(argc, argv, str)
02038 int argc;
02039 VALUE *argv;
02040 VALUE str;
02041 {
02042 str = rb_str_dup(str);
02043 rb_str_sub_bang(argc, argv, str);
02044 return str;
02045 }
02046
02047 static VALUE
02048 str_gsub(argc, argv, str, bang)
02049 int argc;
02050 VALUE *argv;
02051 VALUE str;
02052 int bang;
02053 {
02054 VALUE pat, val, repl, match, dest;
02055 struct re_registers *regs;
02056 long beg, n;
02057 long offset, blen, slen, len;
02058 int iter = 0;
02059 char *buf, *bp, *sp, *cp;
02060 int tainted = 0;
02061
02062 if (argc == 1 && rb_block_given_p()) {
02063 iter = 1;
02064 }
02065 else if (argc == 2) {
02066 repl = argv[1];
02067 StringValue(repl);
02068 if (OBJ_TAINTED(repl)) tainted = 1;
02069 }
02070 else {
02071 rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
02072 }
02073
02074 pat = get_pat(argv[0], 1);
02075 offset=0; n=0;
02076 beg = rb_reg_search(pat, str, 0, 0);
02077 if (beg < 0) {
02078 if (bang) return Qnil;
02079 return rb_str_dup(str);
02080 }
02081
02082 blen = RSTRING(str)->len + 30;
02083 dest = str_new(0, 0, blen);
02084 buf = RSTRING(dest)->ptr;
02085 bp = buf;
02086 sp = cp = RSTRING(str)->ptr;
02087 slen = RSTRING(str)->len;
02088
02089 rb_str_locktmp(dest);
02090 while (beg >= 0) {
02091 n++;
02092 match = rb_backref_get();
02093 regs = RMATCH(match)->regs;
02094 if (iter) {
02095 rb_match_busy(match);
02096 val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
02097 str_mod_check(str, sp, slen);
02098 if (bang) str_frozen_check(str);
02099 if (val == dest) {
02100 rb_raise(rb_eRuntimeError, "block should not cheat");
02101 }
02102 rb_backref_set(match);
02103 }
02104 else {
02105 val = rb_reg_regsub(repl, str, regs);
02106 }
02107 if (OBJ_TAINTED(val)) tainted = 1;
02108 len = (bp - buf) + (beg - offset) + RSTRING(val)->len + 3;
02109 if (blen < len) {
02110 while (blen < len) blen *= 2;
02111 len = bp - buf;
02112 RESIZE_CAPA(dest, blen);
02113 RSTRING(dest)->len = blen;
02114 buf = RSTRING(dest)->ptr;
02115 bp = buf + len;
02116 }
02117 len = beg - offset;
02118 memcpy(bp, cp, len);
02119 bp += len;
02120 memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len);
02121 bp += RSTRING(val)->len;
02122 offset = END(0);
02123 if (BEG(0) == END(0)) {
02124
02125
02126
02127
02128 if (RSTRING(str)->len <= END(0)) break;
02129 len = mbclen2(RSTRING(str)->ptr[END(0)], pat);
02130 memcpy(bp, RSTRING(str)->ptr+END(0), len);
02131 bp += len;
02132 offset = END(0) + len;
02133 }
02134 cp = RSTRING(str)->ptr + offset;
02135 if (offset > RSTRING(str)->len) break;
02136 beg = rb_reg_search(pat, str, offset, 0);
02137 }
02138 if (RSTRING(str)->len > offset) {
02139 len = bp - buf;
02140 if (blen - len < RSTRING(str)->len - offset) {
02141 blen = len + RSTRING(str)->len - offset;
02142 RESIZE_CAPA(dest, blen);
02143 buf = RSTRING(dest)->ptr;
02144 bp = buf + len;
02145 }
02146 memcpy(bp, cp, RSTRING(str)->len - offset);
02147 bp += RSTRING(str)->len - offset;
02148 }
02149 rb_backref_set(match);
02150 *bp = '\0';
02151 rb_str_unlocktmp(dest);
02152 if (bang) {
02153 if (str_independent(str)) {
02154 free(RSTRING(str)->ptr);
02155 }
02156 FL_UNSET(str, STR_NOCAPA);
02157 RSTRING(str)->ptr = buf;
02158 RSTRING(str)->aux.capa = blen;
02159 RSTRING(dest)->ptr = 0;
02160 RSTRING(dest)->len = 0;
02161 }
02162 else {
02163 RBASIC(dest)->klass = rb_obj_class(str);
02164 OBJ_INFECT(dest, str);
02165 str = dest;
02166 }
02167 RSTRING(str)->len = bp - buf;
02168
02169 if (tainted) OBJ_TAINT(str);
02170 return str;
02171 }
02172
02173
02174
02175
02176
02177
02178
02179
02180
02181
02182
02183 static VALUE
02184 rb_str_gsub_bang(argc, argv, str)
02185 int argc;
02186 VALUE *argv;
02187 VALUE str;
02188 {
02189 return str_gsub(argc, argv, str, 1);
02190 }
02191
02192
02193
02194
02195
02196
02197
02198
02199
02200
02201
02202
02203
02204
02205
02206
02207
02208
02209
02210
02211
02212
02213
02214
02215
02216
02217
02218
02219
02220
02221
02222
02223
02224 static VALUE
02225 rb_str_gsub(argc, argv, str)
02226 int argc;
02227 VALUE *argv;
02228 VALUE str;
02229 {
02230 return str_gsub(argc, argv, str, 0);
02231 }
02232
02233
02234
02235
02236
02237
02238
02239
02240
02241
02242
02243
02244
02245 static VALUE
02246 rb_str_replace(str, str2)
02247 VALUE str, str2;
02248 {
02249 if (str == str2) return str;
02250
02251 StringValue(str2);
02252 if (FL_TEST(str2, ELTS_SHARED)) {
02253 if (str_independent(str)) {
02254 free(RSTRING(str)->ptr);
02255 }
02256 RSTRING(str)->len = RSTRING(str2)->len;
02257 RSTRING(str)->ptr = RSTRING(str2)->ptr;
02258 FL_SET(str, ELTS_SHARED);
02259 FL_UNSET(str, STR_ASSOC);
02260 RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
02261 }
02262 else {
02263 rb_str_modify(str);
02264 rb_str_resize(str, RSTRING(str2)->len);
02265 memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
02266 if (FL_TEST(str2, STR_ASSOC)) {
02267 FL_SET(str, STR_ASSOC);
02268 RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
02269 }
02270 }
02271
02272 OBJ_INFECT(str, str2);
02273 return str;
02274 }
02275
02276 static VALUE
02277 uscore_get()
02278 {
02279 VALUE line;
02280
02281 line = rb_lastline_get();
02282 if (TYPE(line) != T_STRING) {
02283 rb_raise(rb_eTypeError, "$_ value need to be String (%s given)",
02284 NIL_P(line) ? "nil" : rb_obj_classname(line));
02285 }
02286 return line;
02287 }
02288
02289
02290
02291
02292
02293
02294
02295
02296
02297 static VALUE
02298 rb_f_sub_bang(argc, argv)
02299 int argc;
02300 VALUE *argv;
02301 {
02302 return rb_str_sub_bang(argc, argv, uscore_get());
02303 }
02304
02305
02306
02307
02308
02309
02310
02311
02312
02313
02314 static VALUE
02315 rb_f_sub(argc, argv)
02316 int argc;
02317 VALUE *argv;
02318 {
02319 VALUE str = rb_str_dup(uscore_get());
02320
02321 if (NIL_P(rb_str_sub_bang(argc, argv, str)))
02322 return str;
02323 rb_lastline_set(str);
02324 return str;
02325 }
02326
02327
02328
02329
02330
02331
02332
02333
02334
02335
02336
02337
02338
02339
02340 static VALUE
02341 rb_f_gsub_bang(argc, argv)
02342 int argc;
02343 VALUE *argv;
02344 {
02345 return rb_str_gsub_bang(argc, argv, uscore_get());
02346 }
02347
02348
02349
02350
02351
02352
02353
02354
02355
02356
02357
02358
02359
02360
02361 static VALUE
02362 rb_f_gsub(argc, argv)
02363 int argc;
02364 VALUE *argv;
02365 {
02366 VALUE str = rb_str_dup(uscore_get());
02367
02368 if (NIL_P(rb_str_gsub_bang(argc, argv, str)))
02369 return str;
02370 rb_lastline_set(str);
02371 return str;
02372 }
02373
02374
02375
02376
02377
02378
02379
02380
02381
02382 static VALUE
02383 rb_str_reverse_bang(str)
02384 VALUE str;
02385 {
02386 char *s, *e;
02387 char c;
02388
02389 if (RSTRING(str)->len > 1) {
02390 rb_str_modify(str);
02391 s = RSTRING(str)->ptr;
02392 e = s + RSTRING(str)->len - 1;
02393 while (s < e) {
02394 c = *s;
02395 *s++ = *e;
02396 *e-- = c;
02397 }
02398 }
02399 return str;
02400 }
02401
02402
02403
02404
02405
02406
02407
02408
02409
02410
02411
02412 static VALUE
02413 rb_str_reverse(str)
02414 VALUE str;
02415 {
02416 VALUE obj;
02417 char *s, *e, *p;
02418
02419 if (RSTRING(str)->len <= 1) return rb_str_dup(str);
02420
02421 obj = rb_str_new5(str, 0, RSTRING(str)->len);
02422 s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1;
02423 p = RSTRING(obj)->ptr;
02424
02425 while (e >= s) {
02426 *p++ = *e--;
02427 }
02428 OBJ_INFECT(obj, str);
02429
02430 return obj;
02431 }
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447 static VALUE
02448 rb_str_include(str, arg)
02449 VALUE str, arg;
02450 {
02451 long i;
02452
02453 if (FIXNUM_P(arg)) {
02454 if (memchr(RSTRING(str)->ptr, FIX2INT(arg), RSTRING(str)->len))
02455 return Qtrue;
02456 return Qfalse;
02457 }
02458
02459 StringValue(arg);
02460 i = rb_str_index(str, arg, 0);
02461
02462 if (i == -1) return Qfalse;
02463 return Qtrue;
02464 }
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488 static VALUE
02489 rb_str_to_i(argc, argv, str)
02490 int argc;
02491 VALUE *argv;
02492 VALUE str;
02493 {
02494 VALUE b;
02495 int base;
02496
02497 rb_scan_args(argc, argv, "01", &b);
02498 if (argc == 0) base = 10;
02499 else base = NUM2INT(b);
02500
02501 if (base < 0) {
02502 rb_raise(rb_eArgError, "illegal radix %d", base);
02503 }
02504 return rb_str_to_inum(str, base, Qfalse);
02505 }
02506
02507
02508
02509
02510
02511
02512
02513
02514
02515
02516
02517
02518
02519
02520
02521
02522 static VALUE
02523 rb_str_to_f(str)
02524 VALUE str;
02525 {
02526 return rb_float_new(rb_str_to_dbl(str, Qfalse));
02527 }
02528
02529
02530
02531
02532
02533
02534
02535
02536
02537
02538 static VALUE
02539 rb_str_to_s(str)
02540 VALUE str;
02541 {
02542 if (rb_obj_class(str) != rb_cString) {
02543 VALUE dup = str_alloc(rb_cString);
02544 rb_str_replace(dup, str);
02545 return dup;
02546 }
02547 return str;
02548 }
02549
02550
02551
02552
02553
02554
02555
02556
02557
02558
02559
02560
02561
02562 VALUE
02563 rb_str_inspect(str)
02564 VALUE str;
02565 {
02566 char *p, *pend;
02567 VALUE result = rb_str_buf_new2("\"");
02568 char s[5];
02569
02570 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
02571 while (p < pend) {
02572 char c = *p++;
02573 if (ismbchar(c) && p < pend) {
02574 int len = mbclen(c);
02575 rb_str_buf_cat(result, p - 1, len);
02576 p += len - 1;
02577 }
02578 else if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
02579 s[0] = '\\'; s[1] = c;
02580 rb_str_buf_cat(result, s, 2);
02581 }
02582 else if (ISPRINT(c)) {
02583 s[0] = c;
02584 rb_str_buf_cat(result, s, 1);
02585 }
02586 else if (c == '\n') {
02587 s[0] = '\\'; s[1] = 'n';
02588 rb_str_buf_cat(result, s, 2);
02589 }
02590 else if (c == '\r') {
02591 s[0] = '\\'; s[1] = 'r';
02592 rb_str_buf_cat(result, s, 2);
02593 }
02594 else if (c == '\t') {
02595 s[0] = '\\'; s[1] = 't';
02596 rb_str_buf_cat(result, s, 2);
02597 }
02598 else if (c == '\f') {
02599 s[0] = '\\'; s[1] = 'f';
02600 rb_str_buf_cat(result, s, 2);
02601 }
02602 else if (c == '\013') {
02603 s[0] = '\\'; s[1] = 'v';
02604 rb_str_buf_cat(result, s, 2);
02605 }
02606 else if (c == '\007') {
02607 s[0] = '\\'; s[1] = 'a';
02608 rb_str_buf_cat(result, s, 2);
02609 }
02610 else if (c == 033) {
02611 s[0] = '\\'; s[1] = 'e';
02612 rb_str_buf_cat(result, s, 2);
02613 }
02614 else {
02615 sprintf(s, "\\%03o", c & 0377);
02616 rb_str_buf_cat2(result, s);
02617 }
02618 }
02619 rb_str_buf_cat2(result, "\"");
02620
02621 OBJ_INFECT(result, str);
02622 return result;
02623 }
02624
02625
02626
02627
02628
02629
02630
02631
02632
02633
02634 VALUE
02635 rb_str_dump(str)
02636 VALUE str;
02637 {
02638 long len;
02639 char *p, *pend;
02640 char *q, *qend;
02641 VALUE result;
02642
02643 len = 2;
02644 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
02645 while (p < pend) {
02646 char c = *p++;
02647 switch (c) {
02648 case '"': case '\\':
02649 case '\n': case '\r':
02650 case '\t': case '\f':
02651 case '\013': case '\007': case '\033':
02652 len += 2;
02653 break;
02654
02655 case '#':
02656 len += IS_EVSTR(p, pend) ? 2 : 1;
02657 break;
02658
02659 default:
02660 if (ISPRINT(c)) {
02661 len++;
02662 }
02663 else {
02664 len += 4;
02665 }
02666 break;
02667 }
02668 }
02669
02670 result = rb_str_new5(str, 0, len);
02671 p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
02672 q = RSTRING(result)->ptr; qend = q + len;
02673
02674 *q++ = '"';
02675 while (p < pend) {
02676 char c = *p++;
02677
02678 if (c == '"' || c == '\\') {
02679 *q++ = '\\';
02680 *q++ = c;
02681 }
02682 else if (c == '#') {
02683 if (IS_EVSTR(p, pend)) *q++ = '\\';
02684 *q++ = '#';
02685 }
02686 else if (ISPRINT(c)) {
02687 *q++ = c;
02688 }
02689 else if (c == '\n') {
02690 *q++ = '\\';
02691 *q++ = 'n';
02692 }
02693 else if (c == '\r') {
02694 *q++ = '\\';
02695 *q++ = 'r';
02696 }
02697 else if (c == '\t') {
02698 *q++ = '\\';
02699 *q++ = 't';
02700 }
02701 else if (c == '\f') {
02702 *q++ = '\\';
02703 *q++ = 'f';
02704 }
02705 else if (c == '\013') {
02706 *q++ = '\\';
02707 *q++ = 'v';
02708 }
02709 else if (c == '\007') {
02710 *q++ = '\\';
02711 *q++ = 'a';
02712 }
02713 else if (c == '\033') {
02714 *q++ = '\\';
02715 *q++ = 'e';
02716 }
02717 else {
02718 *q++ = '\\';
02719 sprintf(q, "%03o", c&0xff);
02720 q += 3;
02721 }
02722 }
02723 *q++ = '"';
02724
02725 OBJ_INFECT(result, str);
02726 return result;
02727 }
02728
02729
02730
02731
02732
02733
02734
02735
02736
02737
02738 static VALUE
02739 rb_str_upcase_bang(str)
02740 VALUE str;
02741 {
02742 char *s, *send;
02743 int modify = 0;
02744
02745 rb_str_modify(str);
02746 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02747 while (s < send) {
02748 if (ismbchar(*s)) {
02749 s+=mbclen(*s) - 1;
02750 }
02751 else if (ISLOWER(*s)) {
02752 *s = toupper(*s);
02753 modify = 1;
02754 }
02755 s++;
02756 }
02757
02758 if (modify) return str;
02759 return Qnil;
02760 }
02761
02762
02763
02764
02765
02766
02767
02768
02769
02770
02771
02772
02773
02774 static VALUE
02775 rb_str_upcase(str)
02776 VALUE str;
02777 {
02778 str = rb_str_dup(str);
02779 rb_str_upcase_bang(str);
02780 return str;
02781 }
02782
02783
02784
02785
02786
02787
02788
02789
02790
02791
02792 static VALUE
02793 rb_str_downcase_bang(str)
02794 VALUE str;
02795 {
02796 char *s, *send;
02797 int modify = 0;
02798
02799 rb_str_modify(str);
02800 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02801 while (s < send) {
02802 if (ismbchar(*s)) {
02803 s+=mbclen(*s) - 1;
02804 }
02805 else if (ISUPPER(*s)) {
02806 *s = tolower(*s);
02807 modify = 1;
02808 }
02809 s++;
02810 }
02811
02812 if (modify) return str;
02813 return Qnil;
02814 }
02815
02816
02817
02818
02819
02820
02821
02822
02823
02824
02825
02826
02827
02828 static VALUE
02829 rb_str_downcase(str)
02830 VALUE str;
02831 {
02832 str = rb_str_dup(str);
02833 rb_str_downcase_bang(str);
02834 return str;
02835 }
02836
02837
02838
02839
02840
02841
02842
02843
02844
02845
02846
02847
02848
02849
02850
02851 static VALUE
02852 rb_str_capitalize_bang(str)
02853 VALUE str;
02854 {
02855 char *s, *send;
02856 int modify = 0;
02857
02858 rb_str_modify(str);
02859 if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil;
02860 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02861 if (ISLOWER(*s)) {
02862 *s = toupper(*s);
02863 modify = 1;
02864 }
02865 while (++s < send) {
02866 if (ismbchar(*s)) {
02867 s+=mbclen(*s) - 1;
02868 }
02869 else if (ISUPPER(*s)) {
02870 *s = tolower(*s);
02871 modify = 1;
02872 }
02873 }
02874 if (modify) return str;
02875 return Qnil;
02876 }
02877
02878
02879
02880
02881
02882
02883
02884
02885
02886
02887
02888
02889
02890
02891 static VALUE
02892 rb_str_capitalize(str)
02893 VALUE str;
02894 {
02895 str = rb_str_dup(str);
02896 rb_str_capitalize_bang(str);
02897 return str;
02898 }
02899
02900
02901
02902
02903
02904
02905
02906
02907
02908
02909 static VALUE
02910 rb_str_swapcase_bang(str)
02911 VALUE str;
02912 {
02913 char *s, *send;
02914 int modify = 0;
02915
02916 rb_str_modify(str);
02917 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02918 while (s < send) {
02919 if (ismbchar(*s)) {
02920 s+=mbclen(*s) - 1;
02921 }
02922 else if (ISUPPER(*s)) {
02923 *s = tolower(*s);
02924 modify = 1;
02925 }
02926 else if (ISLOWER(*s)) {
02927 *s = toupper(*s);
02928 modify = 1;
02929 }
02930 s++;
02931 }
02932
02933 if (modify) return str;
02934 return Qnil;
02935 }
02936
02937
02938
02939
02940
02941
02942
02943
02944
02945
02946
02947
02948
02949 static VALUE
02950 rb_str_swapcase(str)
02951 VALUE str;
02952 {
02953 str = rb_str_dup(str);
02954 rb_str_swapcase_bang(str);
02955 return str;
02956 }
02957
02958 typedef unsigned char *USTR;
02959
02960 struct tr {
02961 int gen, now, max;
02962 char *p, *pend;
02963 };
02964
02965 static int
02966 trnext(t)
02967 struct tr *t;
02968 {
02969 for (;;) {
02970 if (!t->gen) {
02971 if (t->p == t->pend) return -1;
02972 if (t->p < t->pend - 1 && *t->p == '\\') {
02973 t->p++;
02974 }
02975 t->now = *(USTR)t->p++;
02976 if (t->p < t->pend - 1 && *t->p == '-') {
02977 t->p++;
02978 if (t->p < t->pend) {
02979 if (t->now > *(USTR)t->p) {
02980 t->p++;
02981 continue;
02982 }
02983 t->gen = 1;
02984 t->max = *(USTR)t->p++;
02985 }
02986 }
02987 return t->now;
02988 }
02989 else if (++t->now < t->max) {
02990 return t->now;
02991 }
02992 else {
02993 t->gen = 0;
02994 return t->max;
02995 }
02996 }
02997 }
02998
02999 static VALUE rb_str_delete_bang (int,VALUE*,VALUE);
03000
03001 static VALUE
03002 tr_trans(str, src, repl, sflag)
03003 VALUE str, src, repl;
03004 int sflag;
03005 {
03006 struct tr trsrc, trrepl;
03007 int cflag = 0;
03008 int trans[256];
03009 int i, c, modify = 0;
03010 char *s, *send;
03011
03012 StringValue(src);
03013 StringValue(repl);
03014 if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil;
03015 trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len;
03016 if (RSTRING(src)->len >= 2 && RSTRING(src)->ptr[0] == '^') {
03017 cflag++;
03018 trsrc.p++;
03019 }
03020 if (RSTRING(repl)->len == 0) {
03021 return rb_str_delete_bang(1, &src, str);
03022 }
03023 trrepl.p = RSTRING(repl)->ptr;
03024 trrepl.pend = trrepl.p + RSTRING(repl)->len;
03025 trsrc.gen = trrepl.gen = 0;
03026 trsrc.now = trrepl.now = 0;
03027 trsrc.max = trrepl.max = 0;
03028
03029 if (cflag) {
03030 for (i=0; i<256; i++) {
03031 trans[i] = 1;
03032 }
03033 while ((c = trnext(&trsrc)) >= 0) {
03034 trans[c & 0xff] = -1;
03035 }
03036 while ((c = trnext(&trrepl)) >= 0)
03037 ;
03038 for (i=0; i<256; i++) {
03039 if (trans[i] >= 0) {
03040 trans[i] = trrepl.now;
03041 }
03042 }
03043 }
03044 else {
03045 int r;
03046
03047 for (i=0; i<256; i++) {
03048 trans[i] = -1;
03049 }
03050 while ((c = trnext(&trsrc)) >= 0) {
03051 r = trnext(&trrepl);
03052 if (r == -1) r = trrepl.now;
03053 trans[c & 0xff] = r;
03054 }
03055 }
03056
03057 rb_str_modify(str);
03058 s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
03059 if (sflag) {
03060 char *t = s;
03061 int c0, last = -1;
03062
03063 while (s < send) {
03064 c0 = *s++;
03065 if ((c = trans[c0 & 0xff]) >= 0) {
03066 if (last == c) continue;
03067 last = c;
03068 *t++ = c & 0xff;
03069 modify = 1;
03070 }
03071 else {
03072 last = -1;
03073 *t++ = c0;
03074 }
03075 }
03076 if (RSTRING(str)->len > (t - RSTRING(str)->ptr)) {
03077 RSTRING(str)->len = (t - RSTRING(str)->ptr);
03078 modify = 1;
03079 *t = '\0';
03080 }
03081 }
03082 else {
03083 while (s < send) {
03084 if ((c = trans[*s & 0xff]) >= 0) {
03085 *s = c & 0xff;
03086 modify = 1;
03087 }
03088 s++;
03089 }
03090 }
03091
03092 if (modify) return str;
03093 return Qnil;
03094 }
03095
03096
03097
03098
03099
03100
03101
03102
03103
03104
03105
03106 static VALUE
03107 rb_str_tr_bang(str, src, repl)
03108 VALUE str, src, repl;
03109 {
03110 return tr_trans(str, src, repl, 0);
03111 }
03112
03113
03114
03115
03116
03117
03118
03119
03120
03121
03122
03123
03124
03125
03126
03127
03128
03129
03130
03131 static VALUE
03132 rb_str_tr(str, src, repl)
03133 VALUE str, src, repl;
03134 {
03135 str = rb_str_dup(str);
03136 tr_trans(str, src, repl, 0);
03137 return str;
03138 }
03139
03140 static void
03141 tr_setup_table(str, table, init)
03142 VALUE str;
03143 char table[256];
03144 int init;
03145 {
03146 char buf[256];
03147 struct tr tr;
03148 int i, c;
03149 int cflag = 0;
03150
03151 tr.p = RSTRING(str)->ptr; tr.pend = tr.p + RSTRING(str)->len;
03152 tr.gen = tr.now = tr.max = 0;
03153 if (RSTRING(str)->len > 1 && RSTRING(str)->ptr[0] == '^') {
03154 cflag = 1;
03155 tr.p++;
03156 }
03157
03158 if (init) {
03159 for (i=0; i<256; i++) {
03160 table[i] = 1;
03161 }
03162 }
03163 for (i=0; i<256; i++) {
03164 buf[i] = cflag;
03165 }
03166 while ((c = trnext(&tr)) >= 0) {
03167 buf[c & 0xff] = !cflag;
03168 }
03169 for (i=0; i<256; i++) {
03170 table[i] = table[i] && buf[i];
03171 }
03172 }
03173
03174
03175
03176
03177
03178
03179
03180
03181
03182
03183 static VALUE
03184 rb_str_delete_bang(argc, argv, str)
03185 int argc;
03186 VALUE *argv;
03187 VALUE str;
03188 {
03189 char *s, *send, *t;
03190 char squeez[256];
03191 int modify = 0;
03192 int init = 1;
03193 int i;
03194
03195 if (argc < 1) {
03196 rb_raise(rb_eArgError, "wrong number of arguments");
03197 }
03198 for (i=0; i<argc; i++) {
03199 VALUE s = argv[i];
03200
03201 StringValue(s);
03202 tr_setup_table(s, squeez, init);
03203 init = 0;
03204 }
03205
03206 rb_str_modify(str);
03207 s = t = RSTRING(str)->ptr;
03208 if (!s || RSTRING(str)->len == 0) return Qnil;
03209 send = s + RSTRING(str)->len;
03210 while (s < send) {
03211 if (squeez[*s & 0xff])
03212 modify = 1;
03213 else
03214 *t++ = *s;
03215 s++;
03216 }
03217 *t = '\0';
03218 RSTRING(str)->len = t - RSTRING(str)->ptr;
03219
03220 if (modify) return str;
03221 return Qnil;
03222 }
03223
03224
03225
03226
03227
03228
03229
03230
03231
03232
03233
03234
03235
03236
03237
03238
03239 static VALUE
03240 rb_str_delete(argc, argv, str)
03241 int argc;
03242 VALUE *argv;
03243 VALUE str;
03244 {
03245 str = rb_str_dup(str);
03246 rb_str_delete_bang(argc, argv, str);
03247 return str;
03248 }
03249
03250
03251
03252
03253
03254
03255
03256
03257
03258
03259 static VALUE
03260 rb_str_squeeze_bang(argc, argv, str)
03261 int argc;
03262 VALUE *argv;
03263 VALUE str;
03264 {
03265 char squeez[256];
03266 char *s, *send, *t;
03267 int c, save, modify = 0;
03268 int init = 1;
03269 int i;
03270
03271 if (argc == 0) {
03272 for (i=0; i<256; i++) {
03273 squeez[i] = 1;
03274 }
03275 }
03276 else {
03277 for (i=0; i<argc; i++) {
03278 VALUE s = argv[i];
03279
03280 StringValue(s);
03281 tr_setup_table(s, squeez, init);
03282 init = 0;
03283 }
03284 }
03285
03286 rb_str_modify(str);
03287 s = t = RSTRING(str)->ptr;
03288 if (!s || RSTRING(str)->len == 0) return Qnil;
03289 send = s + RSTRING(str)->len;
03290 save = -1;
03291 while (s < send) {
03292 c = *s++ & 0xff;
03293 if (c != save || !squeez[c]) {
03294 *t++ = save = c;
03295 }
03296 }
03297 *t = '\0';
03298 if (t - RSTRING(str)->ptr != RSTRING(str)->len) {
03299 RSTRING(str)->len = t - RSTRING(str)->ptr;
03300 modify = 1;
03301 }
03302
03303 if (modify) return str;
03304 return Qnil;
03305 }
03306
03307
03308
03309
03310
03311
03312
03313
03314
03315
03316
03317
03318
03319
03320
03321
03322
03323 static VALUE
03324 rb_str_squeeze(argc, argv, str)
03325 int argc;
03326 VALUE *argv;
03327 VALUE str;
03328 {
03329 str = rb_str_dup(str);
03330 rb_str_squeeze_bang(argc, argv, str);
03331 return str;
03332 }
03333
03334
03335
03336
03337
03338
03339
03340
03341
03342
03343 static VALUE
03344 rb_str_tr_s_bang(str, src, repl)
03345 VALUE str, src, repl;
03346 {
03347 return tr_trans(str, src, repl, 1);
03348 }
03349
03350
03351
03352
03353
03354
03355
03356
03357
03358
03359
03360
03361
03362
03363
03364 static VALUE
03365 rb_str_tr_s(str, src, repl)
03366 VALUE str, src, repl;
03367 {
03368 str = rb_str_dup(str);
03369 tr_trans(str, src, repl, 1);
03370 return str;
03371 }
03372
03373
03374
03375
03376
03377
03378
03379
03380
03381
03382
03383
03384
03385
03386
03387
03388
03389
03390 static VALUE
03391 rb_str_count(argc, argv, str)
03392 int argc;
03393 VALUE *argv;
03394 VALUE str;
03395 {
03396 char table[256];
03397 char *s, *send;
03398 int init = 1;
03399 int i;
03400
03401 if (argc < 1) {
03402 rb_raise(rb_eArgError, "wrong number of arguments");
03403 }
03404 for (i=0; i<argc; i++) {
03405 VALUE s = argv[i];
03406
03407 StringValue(s);
03408 tr_setup_table(s, table, init);
03409 init = 0;
03410 }
03411
03412 s = RSTRING(str)->ptr;
03413 if (!s || RSTRING(str)->len == 0) return INT2FIX(0);
03414 send = s + RSTRING(str)->len;
03415 i = 0;
03416 while (s < send) {
03417 if (table[*s++ & 0xff]) {
03418 i++;
03419 }
03420 }
03421 return INT2NUM(i);
03422 }
03423
03424
03425
03426
03427
03428
03429
03430
03431
03432
03433
03434
03435
03436
03437
03438
03439
03440
03441
03442
03443
03444
03445
03446
03447
03448
03449
03450
03451
03452
03453
03454
03455
03456
03457
03458
03459
03460
03461
03462
03463
03464
03465
03466 static VALUE
03467 rb_str_split_m(argc, argv, str)
03468 int argc;
03469 VALUE *argv;
03470 VALUE str;
03471 {
03472 VALUE spat;
03473 VALUE limit;
03474 int awk_split = Qfalse;
03475 long beg, end, i = 0;
03476 int lim = 0;
03477 VALUE result, tmp;
03478
03479 if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
03480 lim = NUM2INT(limit);
03481 if (lim <= 0) limit = Qnil;
03482 else if (lim == 1) {
03483 if (RSTRING(str)->len == 0)
03484 return rb_ary_new2(0);
03485 return rb_ary_new3(1, str);
03486 }
03487 i = 1;
03488 }
03489
03490 if (NIL_P(spat)) {
03491 if (!NIL_P(rb_fs)) {
03492 spat = rb_fs;
03493 goto fs_set;
03494 }
03495 awk_split = Qtrue;
03496 }
03497 else {
03498 fs_set:
03499 if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) {
03500 if (RSTRING(spat)->ptr[0] == ' ') {
03501 awk_split = Qtrue;
03502 }
03503 else {
03504 spat = rb_reg_regcomp(rb_reg_quote(spat));
03505 }
03506 }
03507 else {
03508 spat = get_pat(spat, 1);
03509 }
03510 }
03511
03512 result = rb_ary_new();
03513 beg = 0;
03514 if (awk_split) {
03515 char *ptr = RSTRING(str)->ptr;
03516 long len = RSTRING(str)->len;
03517 char *eptr = ptr + len;
03518 int skip = 1;
03519
03520 for (end = beg = 0; ptr<eptr; ptr++) {
03521 if (skip) {
03522 if (ISSPACE(*ptr)) {
03523 beg++;
03524 }
03525 else {
03526 end = beg+1;
03527 skip = 0;
03528 if (!NIL_P(limit) && lim <= i) break;
03529 }
03530 }
03531 else {
03532 if (ISSPACE(*ptr)) {
03533 rb_ary_push(result, rb_str_substr(str, beg, end-beg));
03534 skip = 1;
03535 beg = end + 1;
03536 if (!NIL_P(limit)) ++i;
03537 }
03538 else {
03539 end++;
03540 }
03541 }
03542 }
03543 }
03544 else {
03545 long start = beg;
03546 long idx;
03547 int last_null = 0;
03548 struct re_registers *regs;
03549
03550 while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
03551 regs = RMATCH(rb_backref_get())->regs;
03552 if (start == end && BEG(0) == END(0)) {
03553 if (!RSTRING(str)->ptr) {
03554 rb_ary_push(result, rb_str_new("", 0));
03555 break;
03556 }
03557 else if (last_null == 1) {
03558 rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat)));
03559 beg = start;
03560 }
03561 else {
03562 start += mbclen2(RSTRING(str)->ptr[start],spat);
03563 last_null = 1;
03564 continue;
03565 }
03566 }
03567 else {
03568 rb_ary_push(result, rb_str_substr(str, beg, end-beg));
03569 beg = start = END(0);
03570 }
03571 last_null = 0;
03572
03573 for (idx=1; idx < regs->num_regs; idx++) {
03574 if (BEG(idx) == -1) continue;
03575 if (BEG(idx) == END(idx))
03576 tmp = rb_str_new5(str, 0, 0);
03577 else
03578 tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
03579 rb_ary_push(result, tmp);
03580 }
03581 if (!NIL_P(limit) && lim <= ++i) break;
03582 }
03583 }
03584 if (RSTRING(str)->len > 0 && (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0)) {
03585 if (RSTRING(str)->len == beg)
03586 tmp = rb_str_new5(str, 0, 0);
03587 else
03588 tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg);
03589 rb_ary_push(result, tmp);
03590 }
03591 if (NIL_P(limit) && lim == 0) {
03592 while (RARRAY(result)->len > 0 &&
03593 RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0)
03594 rb_ary_pop(result);
03595 }
03596
03597 return result;
03598 }
03599
03600 VALUE
03601 rb_str_split(str, sep0)
03602 VALUE str;
03603 const char *sep0;
03604 {
03605 VALUE sep;
03606
03607 StringValue(str);
03608 sep = rb_str_new2(sep0);
03609 return rb_str_split_m(1, &sep, str);
03610 }
03611
03612
03613
03614
03615
03616
03617
03618
03619
03620 static VALUE
03621 rb_f_split(argc, argv)
03622 int argc;
03623 VALUE *argv;
03624 {
03625 return rb_str_split_m(argc, argv, uscore_get());
03626 }
03627
03628
03629
03630
03631
03632
03633
03634
03635
03636
03637
03638
03639
03640
03641
03642
03643
03644
03645
03646
03647
03648
03649
03650
03651
03652
03653
03654
03655
03656
03657
03658
03659
03660
03661 static VALUE
03662 rb_str_each_line(argc, argv, str)
03663 int argc;
03664 VALUE *argv;
03665 VALUE str;
03666 {
03667 VALUE rs;
03668 int newline;
03669 char *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s;
03670 char *ptr = p;
03671 long len = RSTRING(str)->len, rslen;
03672 VALUE line;
03673
03674 if (rb_scan_args(argc, argv, "01", &rs) == 0) {
03675 rs = rb_rs;
03676 }
03677
03678 if (NIL_P(rs)) {
03679 rb_yield(str);
03680 return str;
03681 }
03682 StringValue(rs);
03683 rslen = RSTRING(rs)->len;
03684 if (rslen == 0) {
03685 newline = '\n';
03686 }
03687 else {
03688 newline = RSTRING(rs)->ptr[rslen-1];
03689 }
03690
03691 for (s = p, p += rslen; p < pend; p++) {
03692 if (rslen == 0 && *p == '\n') {
03693 if (*++p != '\n') continue;
03694 while (*p == '\n') p++;
03695 }
03696 if (RSTRING(str)->ptr < p && p[-1] == newline &&
03697 (rslen <= 1 ||
03698 rb_memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) {
03699 line = rb_str_new5(str, s, p - s);
03700 OBJ_INFECT(line, str);
03701 rb_yield(line);
03702 str_mod_check(str, ptr, len);
03703 s = p;
03704 }
03705 }
03706
03707 if (s != pend) {
03708 if (p > pend) p = pend;
03709 line = rb_str_new5(str, s, p - s);
03710 OBJ_INFECT(line, str);
03711 rb_yield(line);
03712 }
03713
03714 return str;
03715 }
03716
03717
03718
03719
03720
03721
03722
03723
03724
03725
03726
03727
03728
03729
03730
03731 static VALUE
03732 rb_str_each_byte(str)
03733 VALUE str;
03734 {
03735 long i;
03736
03737 for (i=0; i<RSTRING(str)->len; i++) {
03738 rb_yield(INT2FIX(RSTRING(str)->ptr[i] & 0xff));
03739 }
03740 return str;
03741 }
03742
03743
03744
03745
03746
03747
03748
03749
03750
03751
03752
03753 static VALUE
03754 rb_str_chop_bang(str)
03755 VALUE str;
03756 {
03757 if (RSTRING(str)->len > 0) {
03758 rb_str_modify(str);
03759 RSTRING(str)->len--;
03760 if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') {
03761 if (RSTRING(str)->len > 0 &&
03762 RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
03763 RSTRING(str)->len--;
03764 }
03765 }
03766 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
03767 return str;
03768 }
03769 return Qnil;
03770 }
03771
03772
03773
03774
03775
03776
03777
03778
03779
03780
03781
03782
03783
03784
03785
03786
03787
03788
03789
03790 static VALUE
03791 rb_str_chop(str)
03792 VALUE str;
03793 {
03794 str = rb_str_dup(str);
03795 rb_str_chop_bang(str);
03796 return str;
03797 }
03798
03799
03800
03801
03802
03803
03804
03805
03806
03807
03808
03809
03810
03811
03812
03813
03814
03815
03816
03817 static VALUE
03818 rb_f_chop_bang(str)
03819 VALUE str;
03820 {
03821 return rb_str_chop_bang(uscore_get());
03822 }
03823
03824
03825
03826
03827
03828
03829
03830
03831
03832
03833
03834
03835
03836
03837
03838
03839
03840
03841
03842 static VALUE
03843 rb_f_chop()
03844 {
03845 VALUE str = uscore_get();
03846
03847 if (RSTRING(str)->len > 0) {
03848 str = rb_str_dup(str);
03849 rb_str_chop_bang(str);
03850 rb_lastline_set(str);
03851 }
03852 return str;
03853 }
03854
03855
03856
03857
03858
03859
03860
03861
03862
03863
03864 static VALUE
03865 rb_str_chomp_bang(argc, argv, str)
03866 int argc;
03867 VALUE *argv;
03868 VALUE str;
03869 {
03870 VALUE rs;
03871 int newline;
03872 char *p;
03873 long len, rslen;
03874
03875 if (rb_scan_args(argc, argv, "01", &rs) == 0) {
03876 len = RSTRING(str)->len;
03877 if (len == 0) return Qnil;
03878 p = RSTRING(str)->ptr;
03879 rs = rb_rs;
03880 if (rs == rb_default_rs) {
03881 smart_chomp:
03882 rb_str_modify(str);
03883 if (RSTRING(str)->ptr[len-1] == '\n') {
03884 RSTRING(str)->len--;
03885 if (RSTRING(str)->len > 0 &&
03886 RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
03887 RSTRING(str)->len--;
03888 }
03889 }
03890 else if (RSTRING(str)->ptr[len-1] == '\r') {
03891 RSTRING(str)->len--;
03892 }
03893 else {
03894 return Qnil;
03895 }
03896 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
03897 return str;
03898 }
03899 }
03900 if (NIL_P(rs)) return Qnil;
03901 StringValue(rs);
03902 len = RSTRING(str)->len;
03903 if (len == 0) return Qnil;
03904 p = RSTRING(str)->ptr;
03905 rslen = RSTRING(rs)->len;
03906 if (rslen == 0) {
03907 while (len>0 && p[len-1] == '\n') {
03908 len--;
03909 if (len>0 && p[len-1] == '\r')
03910 len--;
03911 }
03912 if (len < RSTRING(str)->len) {
03913 rb_str_modify(str);
03914 RSTRING(str)->len = len;
03915 RSTRING(str)->ptr[len] = '\0';
03916 return str;
03917 }
03918 return Qnil;
03919 }
03920 if (rslen > len) return Qnil;
03921 newline = RSTRING(rs)->ptr[rslen-1];
03922 if (rslen == 1 && newline == '\n')
03923 goto smart_chomp;
03924
03925 if (p[len-1] == newline &&
03926 (rslen <= 1 ||
03927 rb_memcmp(RSTRING(rs)->ptr, p+len-rslen, rslen) == 0)) {
03928 rb_str_modify(str);
03929 RSTRING(str)->len -= rslen;
03930 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
03931 return str;
03932 }
03933 return Qnil;
03934 }
03935
03936
03937
03938
03939
03940
03941
03942
03943
03944
03945
03946
03947
03948
03949
03950
03951
03952
03953
03954
03955
03956 static VALUE
03957 rb_str_chomp(argc, argv, str)
03958 int argc;
03959 VALUE *argv;
03960 VALUE str;
03961 {
03962 str = rb_str_dup(str);
03963 rb_str_chomp_bang(argc, argv, str);
03964 return str;
03965 }
03966
03967
03968
03969
03970
03971
03972
03973
03974
03975
03976
03977
03978
03979
03980
03981
03982 static VALUE
03983 rb_f_chomp_bang(argc, argv)
03984 int argc;
03985 VALUE *argv;
03986 {
03987 return rb_str_chomp_bang(argc, argv, uscore_get());
03988 }
03989
03990
03991
03992
03993
03994
03995
03996
03997
03998
03999
04000
04001
04002
04003
04004
04005
04006
04007 static VALUE
04008 rb_f_chomp(argc, argv)
04009 int argc;
04010 VALUE *argv;
04011 {
04012 VALUE str = uscore_get();
04013 VALUE dup = rb_str_dup(str);
04014
04015 if (NIL_P(rb_str_chomp_bang(argc, argv, dup)))
04016 return str;
04017 rb_lastline_set(dup);
04018 return dup;
04019 }
04020
04021
04022
04023
04024
04025
04026
04027
04028
04029
04030
04031
04032
04033
04034 static VALUE
04035 rb_str_lstrip_bang(str)
04036 VALUE str;
04037 {
04038 char *s, *t, *e;
04039
04040 s = RSTRING(str)->ptr;
04041 if (!s || RSTRING(str)->len == 0) return Qnil;
04042 e = t = s + RSTRING(str)->len;
04043
04044 while (s < t && ISSPACE(*s)) s++;
04045
04046 if (s > RSTRING(str)->ptr) {
04047 rb_str_modify(str);
04048 RSTRING(str)->len = t-s;
04049 memmove(RSTRING(str)->ptr, s, RSTRING(str)->len);
04050 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
04051 return str;
04052 }
04053 return Qnil;
04054 }
04055
04056
04057
04058
04059
04060
04061
04062
04063
04064
04065
04066
04067
04068 static VALUE
04069 rb_str_lstrip(str)
04070 VALUE str;
04071 {
04072 str = rb_str_dup(str);
04073 rb_str_lstrip_bang(str);
04074 return str;
04075 }
04076
04077
04078
04079
04080
04081
04082
04083
04084
04085
04086
04087
04088
04089
04090 static VALUE
04091 rb_str_rstrip_bang(str)
04092 VALUE str;
04093 {
04094 char *s, *t, *e;
04095
04096 s = RSTRING(str)->ptr;
04097 if (!s || RSTRING(str)->len == 0) return Qnil;
04098 e = t = s + RSTRING(str)->len;
04099
04100
04101 while (s < t && t[-1] == '\0') t--;
04102
04103
04104 while (s < t && ISSPACE(*(t-1))) t--;
04105
04106 if (t < e) {
04107 rb_str_modify(str);
04108 RSTRING(str)->len = t-s;
04109 RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
04110 return str;
04111 }
04112 return Qnil;
04113 }
04114
04115
04116
04117
04118
04119
04120
04121
04122
04123
04124
04125
04126
04127 static VALUE
04128 rb_str_rstrip(str)
04129 VALUE str;
04130 {
04131 str = rb_str_dup(str);
04132 rb_str_rstrip_bang(str);
04133 return str;
04134 }
04135
04136
04137
04138
04139
04140
04141
04142
04143
04144
04145 static VALUE
04146 rb_str_strip_bang(str)
04147 VALUE str;
04148 {
04149 VALUE l = rb_str_lstrip_bang(str);
04150 VALUE r = rb_str_rstrip_bang(str);
04151
04152 if (NIL_P(l) && NIL_P(r)) return Qnil;
04153 return str;
04154 }
04155
04156
04157
04158
04159
04160
04161
04162
04163
04164
04165
04166
04167 static VALUE
04168 rb_str_strip(str)
04169 VALUE str;
04170 {
04171 str = rb_str_dup(str);
04172 rb_str_strip_bang(str);
04173 return str;
04174 }
04175
04176 static VALUE
04177 scan_once(str, pat, start)
04178 VALUE str, pat;
04179 long *start;
04180 {
04181 VALUE result, match;
04182 struct re_registers *regs;
04183 long i;
04184
04185 if (rb_reg_search(pat, str, *start, 0) >= 0) {
04186 match = rb_backref_get();
04187 regs = RMATCH(match)->regs;
04188 if (BEG(0) == END(0)) {
04189
04190
04191
04192 if (RSTRING(str)->len > END(0))
04193 *start = END(0)+mbclen2(RSTRING(str)->ptr[END(0)],pat);
04194 else
04195 *start = END(0)+1;
04196 }
04197 else {
04198 *start = END(0);
04199 }
04200 if (regs->num_regs == 1) {
04201 return rb_reg_nth_match(0, match);
04202 }
04203 result = rb_ary_new2(regs->num_regs);
04204 for (i=1; i < regs->num_regs; i++) {
04205 rb_ary_push(result, rb_reg_nth_match(i, match));
04206 }
04207
04208 return result;
04209 }
04210 return Qnil;
04211 }
04212
04213
04214
04215
04216
04217
04218
04219
04220
04221
04222
04223
04224
04225
04226
04227
04228
04229
04230
04231
04232
04233
04234
04235
04236
04237
04238
04239
04240
04241
04242
04243
04244
04245 static VALUE
04246 rb_str_scan(str, pat)
04247 VALUE str, pat;
04248 {
04249 VALUE result;
04250 long start = 0;
04251 VALUE match = Qnil;
04252
04253 pat = get_pat(pat, 1);
04254 if (!rb_block_given_p()) {
04255 VALUE ary = rb_ary_new();
04256
04257 while (!NIL_P(result = scan_once(str, pat, &start))) {
04258 match = rb_backref_get();
04259 rb_ary_push(ary, result);
04260 }
04261 rb_backref_set(match);
04262 return ary;
04263 }
04264
04265 while (!NIL_P(result = scan_once(str, pat, &start))) {
04266 match = rb_backref_get();
04267 rb_match_busy(match);
04268 rb_yield(result);
04269 rb_backref_set(match);
04270 }
04271 rb_backref_set(match);
04272 return str;
04273 }
04274
04275
04276
04277
04278
04279
04280
04281
04282
04283
04284 static VALUE
04285 rb_f_scan(self, pat)
04286 VALUE self, pat;
04287 {
04288 return rb_str_scan(uscore_get(), pat);
04289 }
04290
04291
04292
04293
04294
04295
04296
04297
04298
04299
04300
04301
04302
04303
04304
04305
04306 static VALUE
04307 rb_str_hex(str)
04308 VALUE str;
04309 {
04310 return rb_str_to_inum(str, 16, Qfalse);
04311 }
04312
04313
04314
04315
04316
04317
04318
04319
04320
04321
04322
04323
04324
04325
04326
04327
04328 static VALUE
04329 rb_str_oct(str)
04330 VALUE str;
04331 {
04332 return rb_str_to_inum(str, -8, Qfalse);
04333 }
04334
04335
04336
04337
04338
04339
04340
04341
04342
04343
04344
04345
04346 static VALUE
04347 rb_str_crypt(str, salt)
04348 VALUE str, salt;
04349 {
04350 extern char *crypt();
04351 VALUE result;
04352 char *s;
04353
04354 StringValue(salt);
04355 if (RSTRING(salt)->len < 2)
04356 rb_raise(rb_eArgError, "salt too short(need >=2 bytes)");
04357
04358 if (RSTRING(str)->ptr) s = RSTRING(str)->ptr;
04359 else s = "";
04360 result = rb_str_new2(crypt(s, RSTRING(salt)->ptr));
04361 OBJ_INFECT(result, str);
04362 OBJ_INFECT(result, salt);
04363 return result;
04364 }
04365
04366
04367
04368
04369
04370
04371
04372
04373
04374
04375
04376
04377
04378
04379
04380
04381
04382
04383
04384
04385
04386
04387 VALUE
04388 rb_str_intern(s)
04389 VALUE s;
04390 {
04391 volatile VALUE str = s;
04392 ID id;
04393
04394 if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) {
04395 rb_raise(rb_eArgError, "interning empty string");
04396 }
04397 if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len)
04398 rb_raise(rb_eArgError, "symbol string may not contain `\\0'");
04399 id = rb_intern(RSTRING(str)->ptr);
04400 return ID2SYM(id);
04401 }
04402
04403
04404
04405
04406
04407
04408
04409
04410
04411
04412
04413
04414
04415 static VALUE
04416 rb_str_sum(argc, argv, str)
04417 int argc;
04418 VALUE *argv;
04419 VALUE str;
04420 {
04421 VALUE vbits;
04422 int bits;
04423 char *ptr, *p, *pend;
04424 long len;
04425
04426 if (rb_scan_args(argc, argv, "01", &vbits) == 0) {
04427 bits = 16;
04428 }
04429 else bits = NUM2INT(vbits);
04430
04431 ptr = p = RSTRING(str)->ptr;
04432 len = RSTRING(str)->len;
04433 pend = p + len;
04434 if (bits >= sizeof(long)*CHAR_BIT) {
04435 VALUE sum = INT2FIX(0);
04436
04437 while (p < pend) {
04438 str_mod_check(str, ptr, len);
04439 sum = rb_funcall(sum, '+', 1, INT2FIX((unsigned char)*p));
04440 p++;
04441 }
04442 if (bits != 0) {
04443 VALUE mod;
04444
04445 mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
04446 mod = rb_funcall(mod, '-', 1, INT2FIX(1));
04447 sum = rb_funcall(sum, '&', 1, mod);
04448 }
04449 return sum;
04450 }
04451 else {
04452 unsigned long sum = 0;
04453
04454 while (p < pend) {
04455 str_mod_check(str, ptr, len);
04456 sum += (unsigned char)*p;
04457 p++;
04458 }
04459 if (bits != 0) {
04460 sum &= (((unsigned long)1)<<bits)-1;
04461 }
04462 return rb_int2inum(sum);
04463 }
04464 }
04465
04466 static VALUE
04467 rb_str_justify(argc, argv, str, jflag)
04468 int argc;
04469 VALUE *argv;
04470 VALUE str;
04471 char jflag;
04472 {
04473 VALUE w;
04474 long width, flen = 0;
04475 VALUE res;
04476 char *p, *pend, *f = " ";
04477 long n;
04478 VALUE pad;
04479
04480 rb_scan_args(argc, argv, "11", &w, &pad);
04481 width = NUM2LONG(w);
04482 if (argc == 2) {
04483 StringValue(pad);
04484 f = RSTRING(pad)->ptr;
04485 flen = RSTRING(pad)->len;
04486 if (flen == 0) {
04487 rb_raise(rb_eArgError, "zero width padding");
04488 }
04489 }
04490 if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
04491 res = rb_str_new5(str, 0, width);
04492 p = RSTRING(res)->ptr;
04493 if (jflag != 'l') {
04494 n = width - RSTRING(str)->len;
04495 pend = p + ((jflag == 'r') ? n : n/2);
04496 if (flen <= 1) {
04497 while (p < pend) {
04498 *p++ = *f;
04499 }
04500 }
04501 else {
04502 char *q = f;
04503 while (p + flen <= pend) {
04504 memcpy(p,f,flen);
04505 p += flen;
04506 }
04507 while (p < pend) {
04508 *p++ = *q++;
04509 }
04510 }
04511 }
04512 memcpy(p, RSTRING(str)->ptr, RSTRING(str)->len);
04513 if (jflag != 'r') {
04514 p += RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
04515 if (flen <= 1) {
04516 while (p < pend) {
04517 *p++ = *f;
04518 }
04519 }
04520 else {
04521 while (p + flen <= pend) {
04522 memcpy(p,f,flen);
04523 p += flen;
04524 }
04525 while (p < pend) {
04526 *p++ = *f++;
04527 }
04528 }
04529 }
04530 OBJ_INFECT(res, str);
04531 if (flen > 0) OBJ_INFECT(res, pad);
04532 return res;
04533 }
04534
04535
04536
04537
04538
04539
04540
04541
04542
04543
04544
04545
04546
04547
04548
04549 static VALUE
04550 rb_str_ljust(argc, argv, str)
04551 int argc;
04552 VALUE *argv;
04553 VALUE str;
04554 {
04555 return rb_str_justify(argc, argv, str, 'l');
04556 }
04557
04558
04559
04560
04561
04562
04563
04564
04565
04566
04567
04568
04569
04570
04571
04572 static VALUE
04573 rb_str_rjust(argc, argv, str)
04574 int argc;
04575 VALUE *argv;
04576 VALUE str;
04577 {
04578 return rb_str_justify(argc, argv, str, 'r');
04579 }
04580
04581
04582
04583
04584
04585
04586
04587
04588
04589
04590
04591
04592
04593
04594
04595 static VALUE
04596 rb_str_center(argc, argv, str)
04597 int argc;
04598 VALUE *argv;
04599 VALUE str;
04600 {
04601 return rb_str_justify(argc, argv, str, 'c');
04602 }
04603
04604 void
04605 rb_str_setter(val, id, var)
04606 VALUE val;
04607 ID id;
04608 VALUE *var;
04609 {
04610 if (!NIL_P(val) && TYPE(val) != T_STRING) {
04611 rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
04612 }
04613 *var = val;
04614 }
04615
04616
04617
04618
04619
04620
04621
04622
04623
04624
04625
04626
04627
04628
04629
04630 void
04631 Init_String()
04632 {
04633 rb_cString = rb_define_class("String", rb_cObject);
04634 rb_include_module(rb_cString, rb_mComparable);
04635 rb_include_module(rb_cString, rb_mEnumerable);
04636 rb_define_alloc_func(rb_cString, str_alloc);
04637 rb_define_method(rb_cString, "initialize", rb_str_init, -1);
04638 rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
04639 rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1);
04640 rb_define_method(rb_cString, "==", rb_str_equal, 1);
04641 rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
04642 rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
04643 rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
04644 rb_define_method(rb_cString, "+", rb_str_plus, 1);
04645 rb_define_method(rb_cString, "*", rb_str_times, 1);
04646 rb_define_method(rb_cString, "%", rb_str_format, 1);
04647 rb_define_method(rb_cString, "[]", rb_str_aref_m, -1);
04648 rb_define_method(rb_cString, "[]=", rb_str_aset_m, -1);
04649 rb_define_method(rb_cString, "insert", rb_str_insert, 2);
04650 rb_define_method(rb_cString, "length", rb_str_length, 0);
04651 rb_define_method(rb_cString, "size", rb_str_length, 0);
04652 rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
04653 rb_define_method(rb_cString, "=~", rb_str_match, 1);
04654 rb_define_method(rb_cString, "match", rb_str_match_m, 1);
04655 rb_define_method(rb_cString, "succ", rb_str_succ, 0);
04656 rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0);
04657 rb_define_method(rb_cString, "next", rb_str_succ, 0);
04658 rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
04659 rb_define_method(rb_cString, "upto", rb_str_upto_m, 1);
04660 rb_define_method(rb_cString, "index", rb_str_index_m, -1);
04661 rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
04662 rb_define_method(rb_cString, "replace", rb_str_replace, 1);
04663
04664 rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
04665 rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
04666 rb_define_method(rb_cString, "to_s", rb_str_to_s, 0);
04667 rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
04668 rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
04669 rb_define_method(rb_cString, "dump", rb_str_dump, 0);
04670
04671 rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
04672 rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
04673 rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
04674 rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
04675
04676 rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0);
04677 rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0);
04678 rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0);
04679 rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0);
04680
04681 rb_define_method(rb_cString, "hex", rb_str_hex, 0);
04682 rb_define_method(rb_cString, "oct", rb_str_oct, 0);
04683 rb_define_method(rb_cString, "split", rb_str_split_m, -1);
04684 rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
04685 rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
04686 rb_define_method(rb_cString, "concat", rb_str_concat, 1);
04687 rb_define_method(rb_cString, "<<", rb_str_concat, 1);
04688 rb_define_method(rb_cString, "crypt", rb_str_crypt, 1);
04689 rb_define_method(rb_cString, "intern", rb_str_intern, 0);
04690 rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
04691
04692 rb_define_method(rb_cString, "include?", rb_str_include, 1);
04693
04694 rb_define_method(rb_cString, "scan", rb_str_scan, 1);
04695
04696 rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
04697 rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
04698 rb_define_method(rb_cString, "center", rb_str_center, -1);
04699
04700 rb_define_method(rb_cString, "sub", rb_str_sub, -1);
04701 rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
04702 rb_define_method(rb_cString, "chop", rb_str_chop, 0);
04703 rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
04704 rb_define_method(rb_cString, "strip", rb_str_strip, 0);
04705 rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
04706 rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
04707
04708 rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1);
04709 rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1);
04710 rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0);
04711 rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1);
04712 rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0);
04713 rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0);
04714 rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0);
04715
04716 rb_define_method(rb_cString, "tr", rb_str_tr, 2);
04717 rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2);
04718 rb_define_method(rb_cString, "delete", rb_str_delete, -1);
04719 rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
04720 rb_define_method(rb_cString, "count", rb_str_count, -1);
04721
04722 rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2);
04723 rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2);
04724 rb_define_method(rb_cString, "delete!", rb_str_delete_bang, -1);
04725 rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1);
04726
04727 rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
04728 rb_define_method(rb_cString, "each", rb_str_each_line, -1);
04729 rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
04730
04731 rb_define_method(rb_cString, "sum", rb_str_sum, -1);
04732
04733 rb_define_global_function("sub", rb_f_sub, -1);
04734 rb_define_global_function("gsub", rb_f_gsub, -1);
04735
04736 rb_define_global_function("sub!", rb_f_sub_bang, -1);
04737 rb_define_global_function("gsub!", rb_f_gsub_bang, -1);
04738
04739 rb_define_global_function("chop", rb_f_chop, 0);
04740 rb_define_global_function("chop!", rb_f_chop_bang, 0);
04741
04742 rb_define_global_function("chomp", rb_f_chomp, -1);
04743 rb_define_global_function("chomp!", rb_f_chomp_bang, -1);
04744
04745 rb_define_global_function("split", rb_f_split, -1);
04746 rb_define_global_function("scan", rb_f_scan, 1);
04747
04748 rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
04749 rb_define_method(rb_cString, "slice!", rb_str_slice_bang, -1);
04750
04751 id_to_s = rb_intern("to_s");
04752
04753 rb_fs = Qnil;
04754 rb_define_variable("$;", &rb_fs);
04755 rb_define_variable("$-F", &rb_fs);
04756 }
04757