Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals

string.c

Go to the documentation of this file.
00001 /**********************************************************************
00002 
00003   string.c -
00004 
00005   $Author: matz $
00006   $Date: 2005/10/27 08:19:20 $
00007   created at: Mon Aug  9 17:12:58 JST 1993
00008 
00009   Copyright (C) 1993-2003 Yukihiro Matsumoto
00010   Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
00011   Copyright (C) 2000  Information-technology Promotion Agency, Japan
00012 
00013 **********************************************************************/
00014 
00015 #include "ruby.h"
00016 #include "re.h"
00017 
00018 #define BEG(no) regs->beg[no]
00019 #define END(no) regs->end[no]
00020 
00021 #include <math.h>
00022 #include <ctype.h>
00023 
00024 #ifdef HAVE_UNISTD_H
00025 #include <unistd.h>
00026 #endif
00027 
00028 VALUE rb_cString;
00029 
00030 #define STR_TMPLOCK FL_USER1
00031 #define STR_ASSOC   FL_USER3
00032 #define STR_NOCAPA  (ELTS_SHARED|STR_ASSOC)
00033 
00034 #define RESIZE_CAPA(str,capacity) do {\
00035     REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
00036     if (!FL_TEST(str, STR_NOCAPA))\
00037         RSTRING(str)->aux.capa = (capacity);\
00038 } while (0)
00039 
00040 VALUE rb_fs;
00041 
00042 static inline void
00043 str_mod_check(s, p, len)
00044     VALUE s;
00045     char *p;
00046     long len;
00047 {
00048     if (RSTRING(s)->ptr != p || RSTRING(s)->len != len) {
00049         rb_raise(rb_eRuntimeError, "string modified");
00050     }
00051 }
00052 
00053 static inline void
00054 str_frozen_check(s)
00055     VALUE s;
00056 {
00057     if (OBJ_FROZEN(s)) {
00058         rb_raise(rb_eRuntimeError, "string frozen");
00059     }
00060 }
00061 
00062 static VALUE str_alloc (VALUE);
00063 static VALUE
00064 str_alloc(klass)
00065     VALUE klass;
00066 {
00067     NEWOBJ(str, struct RString);
00068     OBJSETUP(str, klass, T_STRING);
00069 
00070     str->ptr = 0;
00071     str->len = 0;
00072     str->aux.capa = 0;
00073 
00074     return (VALUE)str;
00075 }
00076 
00077 static VALUE
00078 str_new(klass, ptr, len)
00079     VALUE klass;
00080     const char *ptr;
00081     long len;
00082 {
00083     VALUE str;
00084 
00085     if (len < 0) {
00086         rb_raise(rb_eArgError, "negative string size (or size too big)");
00087     }
00088 
00089     str = str_alloc(klass);
00090     RSTRING(str)->len = len;
00091     RSTRING(str)->aux.capa = len;
00092     RSTRING(str)->ptr = ALLOC_N(char,len+1);
00093     if (ptr) {
00094         memcpy(RSTRING(str)->ptr, ptr, len);
00095     }
00096     RSTRING(str)->ptr[len] = '\0';
00097     return str;
00098 }
00099 
00100 VALUE
00101 rb_str_new(ptr, len)
00102     const char *ptr;
00103     long len;
00104 {
00105     return str_new(rb_cString, ptr, len);
00106 }
00107 
00108 VALUE
00109 rb_str_new2(ptr)
00110     const char *ptr;
00111 {
00112     if (!ptr) {
00113         rb_raise(rb_eArgError, "NULL pointer given");
00114     }
00115     return rb_str_new(ptr, strlen(ptr));
00116 }
00117 
00118 VALUE
00119 rb_tainted_str_new(ptr, len)
00120     const char *ptr;
00121     long len;
00122 {
00123     VALUE str = rb_str_new(ptr, len);
00124 
00125     OBJ_TAINT(str);
00126     return str;
00127 }
00128 
00129 VALUE
00130 rb_tainted_str_new2(ptr)
00131     const char *ptr;
00132 {
00133     VALUE str = rb_str_new2(ptr);
00134 
00135     OBJ_TAINT(str);
00136     return str;
00137 }
00138 
00139 static VALUE
00140 str_new3(klass, str)
00141     VALUE klass, str;
00142 {
00143     VALUE str2 = str_alloc(klass);
00144 
00145     RSTRING(str2)->len = RSTRING(str)->len;
00146     RSTRING(str2)->ptr = RSTRING(str)->ptr;
00147     RSTRING(str2)->aux.shared = str;
00148     FL_SET(str2, ELTS_SHARED);
00149     OBJ_INFECT(str2, str);
00150 
00151     return str2;
00152 }
00153 
00154 VALUE
00155 rb_str_new3(str)
00156     VALUE str;
00157 {
00158     return str_new3(rb_obj_class(str), str);
00159 }
00160 
00161 static VALUE
00162 str_new4(klass, str)
00163     VALUE klass, str;
00164 {
00165     VALUE str2 = str_alloc(klass);
00166 
00167     RSTRING(str2)->len = RSTRING(str)->len;
00168     RSTRING(str2)->ptr = RSTRING(str)->ptr;
00169     if (FL_TEST(str, ELTS_SHARED)) {
00170         FL_SET(str2, ELTS_SHARED);
00171         RSTRING(str2)->aux.shared = RSTRING(str)->aux.shared;
00172     }
00173     else {
00174         FL_SET(str, ELTS_SHARED);
00175         RSTRING(str)->aux.shared = str2;
00176     }
00177 
00178     return str2;
00179 }
00180 
00181 VALUE
00182 rb_str_new4(orig)
00183     VALUE orig;
00184 {
00185     VALUE klass, str;
00186 
00187     if (OBJ_FROZEN(orig)) return orig;
00188     klass = rb_obj_class(orig);
00189     if (FL_TEST(orig, ELTS_SHARED) && (str = RSTRING(orig)->aux.shared) && klass == RBASIC(str)->klass) {
00190         long ofs;
00191         ofs = RSTRING(str)->len - RSTRING(orig)->len;
00192         if (ofs > 0) {
00193             str = str_new3(klass, str);
00194             RSTRING(str)->ptr += ofs;
00195             RSTRING(str)->len -= ofs;
00196         }
00197     }
00198     else if (FL_TEST(orig, STR_ASSOC)) {
00199         str = str_new(klass, RSTRING(orig)->ptr, RSTRING(orig)->len);
00200     }
00201     else {
00202         str = str_new4(klass, orig);
00203     }
00204     OBJ_INFECT(str, orig);
00205     OBJ_FREEZE(str);
00206     return str;
00207 }
00208 
00209 VALUE
00210 rb_str_new5(obj, ptr, len)
00211     VALUE obj;
00212     const char *ptr;
00213     long len;
00214 {
00215     return str_new(rb_obj_class(obj), ptr, len);
00216 }
00217 
00218 #define STR_BUF_MIN_SIZE 128
00219 
00220 VALUE
00221 rb_str_buf_new(capa)
00222     long capa;
00223 {
00224     VALUE str = str_alloc(rb_cString);
00225 
00226     if (capa < STR_BUF_MIN_SIZE) {
00227         capa = STR_BUF_MIN_SIZE;
00228     }
00229     RSTRING(str)->ptr = 0;
00230     RSTRING(str)->len = 0;
00231     RSTRING(str)->aux.capa = capa;
00232     RSTRING(str)->ptr = ALLOC_N(char, capa+1);
00233     RSTRING(str)->ptr[0] = '\0';
00234 
00235     return str;
00236 }
00237 
00238 VALUE
00239 rb_str_buf_new2(ptr)
00240     const char *ptr;
00241 {
00242     VALUE str;
00243     long len = strlen(ptr);
00244 
00245     str = rb_str_buf_new(len);
00246     rb_str_buf_cat(str, ptr, len);
00247 
00248     return str;
00249 }
00250 
00251 VALUE
00252 rb_str_to_str(str)
00253     VALUE str;
00254 {
00255     return rb_convert_type(str, T_STRING, "String", "to_str");
00256 }
00257 
00258 static void
00259 rb_str_shared_replace(str, str2)
00260     VALUE str, str2;
00261 {
00262     if (str == str2) return;
00263     rb_str_modify(str);
00264     if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
00265     if (NIL_P(str2)) {
00266         RSTRING(str)->ptr = 0;
00267         RSTRING(str)->len = 0;
00268         RSTRING(str)->aux.capa = 0;
00269         FL_UNSET(str, STR_NOCAPA);
00270         return;
00271     }
00272     RSTRING(str)->ptr = RSTRING(str2)->ptr;
00273     RSTRING(str)->len = RSTRING(str2)->len;
00274     FL_UNSET(str, STR_NOCAPA);
00275     if (FL_TEST(str2, STR_NOCAPA)) {
00276         FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
00277         RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
00278     }
00279     else {
00280         RSTRING(str)->aux.capa = RSTRING(str2)->aux.capa;
00281     }
00282     RSTRING(str2)->ptr = 0;     /* abandon str2 */
00283     RSTRING(str2)->len = 0;
00284     RSTRING(str2)->aux.capa = 0;
00285     FL_UNSET(str2, STR_NOCAPA);
00286     if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
00287 }
00288 
00289 static ID id_to_s;
00290 
00291 VALUE
00292 rb_obj_as_string(obj)
00293     VALUE obj;
00294 {
00295     VALUE str;
00296 
00297     if (TYPE(obj) == T_STRING) {
00298         return obj;
00299     }
00300     str = rb_funcall(obj, id_to_s, 0);
00301     if (TYPE(str) != T_STRING)
00302         return rb_any_to_s(obj);
00303     if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
00304     return str;
00305 }
00306 
00307 static VALUE rb_str_replace (VALUE, VALUE);
00308 
00309 VALUE
00310 rb_str_dup(str)
00311     VALUE str;
00312 {
00313     VALUE dup = str_alloc(rb_obj_class(str));
00314     rb_str_replace(dup, str);
00315     return dup;
00316 }
00317 
00318 
00319 /*
00320  *  call-seq:
00321  *     String.new(str="")   => new_str
00322  *  
00323  *  Returns a new string object containing a copy of <i>str</i>.
00324  */
00325 
00326 static VALUE
00327 rb_str_init(argc, argv, str)
00328     int argc;
00329     VALUE *argv;
00330     VALUE str;
00331 {
00332     VALUE orig;
00333 
00334     if (rb_scan_args(argc, argv, "01", &orig) == 1)
00335         rb_str_replace(str, orig);
00336     return str;
00337 }
00338 
00339 /*
00340  *  call-seq:
00341  *     str.length   => integer
00342  *  
00343  *  Returns the length of <i>str</i>.
00344  */
00345 
00346 static VALUE
00347 rb_str_length(str)
00348     VALUE str;
00349 {
00350     return LONG2NUM(RSTRING(str)->len);
00351 }
00352 
00353 /*
00354  *  call-seq:
00355  *     str.empty?   => true or false
00356  *  
00357  *  Returns <code>true</code> if <i>str</i> has a length of zero.
00358  *     
00359  *     "hello".empty?   #=> false
00360  *     "".empty?        #=> true
00361  */
00362 
00363 static VALUE
00364 rb_str_empty(str)
00365     VALUE str;
00366 {
00367     if (RSTRING(str)->len == 0)
00368         return Qtrue;
00369     return Qfalse;
00370 }
00371 
00372 /*
00373  *  call-seq:
00374  *     str + other_str   => new_str
00375  *  
00376  *  Concatenation---Returns a new <code>String</code> containing
00377  *  <i>other_str</i> concatenated to <i>str</i>.
00378  *     
00379  *     "Hello from " + self.to_s   #=> "Hello from main"
00380  */
00381 
00382 VALUE
00383 rb_str_plus(str1, str2)
00384     VALUE str1, str2;
00385 {
00386     VALUE str3;
00387 
00388     StringValue(str2);
00389     str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len);
00390     memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len);
00391     memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len,
00392            RSTRING(str2)->ptr, RSTRING(str2)->len);
00393     RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0';
00394 
00395     if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
00396         OBJ_TAINT(str3);
00397     return str3;
00398 }
00399 
00400 /*
00401  *  call-seq:
00402  *     str * integer   => new_str
00403  *  
00404  *  Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
00405  *  the receiver.
00406  *     
00407  *     "Ho! " * 3   #=> "Ho! Ho! Ho! "
00408  */
00409 
00410 VALUE
00411 rb_str_times(str, times)
00412     VALUE str;
00413     VALUE times;
00414 {
00415     VALUE str2;
00416     long i, len;
00417 
00418     len = NUM2LONG(times);
00419     if (len < 0) {
00420         rb_raise(rb_eArgError, "negative argument");
00421     }
00422     if (len && LONG_MAX/len <  RSTRING(str)->len) {
00423         rb_raise(rb_eArgError, "argument too big");
00424     }
00425 
00426     str2 = rb_str_new5(str,0, len *= RSTRING(str)->len);
00427     for (i = 0; i < len; i += RSTRING(str)->len) {
00428         memcpy(RSTRING(str2)->ptr + i,
00429                RSTRING(str)->ptr, RSTRING(str)->len);
00430     }
00431     RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0';
00432 
00433     OBJ_INFECT(str2, str);
00434 
00435     return str2;
00436 }
00437 
00438 /*
00439  *  call-seq:
00440  *     str % arg   => new_str
00441  *  
00442  *  Format---Uses <i>str</i> as a format specification, and returns the result
00443  *  of applying it to <i>arg</i>. If the format specification contains more than
00444  *  one substitution, then <i>arg</i> must be an <code>Array</code> containing
00445  *  the values to be substituted. See <code>Kernel::sprintf</code> for details
00446  *  of the format string.
00447  *     
00448  *     "%05d" % 123                       #=> "00123"
00449  *     "%-5s: %08x" % [ "ID", self.id ]   #=> "ID   : 200e14d6"
00450  */
00451 
00452 static VALUE
00453 rb_str_format(str, arg)
00454     VALUE str, arg;
00455 {
00456     VALUE *argv;
00457 
00458     if (TYPE(arg) == T_ARRAY) {
00459         argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1);
00460         argv[0] = str;
00461         MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len);
00462         return rb_f_sprintf(RARRAY(arg)->len+1, argv);
00463     }
00464 
00465     argv = ALLOCA_N(VALUE, 2);
00466     argv[0] = str;
00467     argv[1] = arg;
00468     return rb_f_sprintf(2, argv);
00469 }
00470 
00471 static int
00472 str_independent(str)
00473     VALUE str;
00474 {
00475     if (FL_TEST(str, STR_TMPLOCK)) {
00476         rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
00477     }
00478     if (OBJ_FROZEN(str)) rb_error_frozen("string");
00479     if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
00480         rb_raise(rb_eSecurityError, "Insecure: can't modify string");
00481     if (!FL_TEST(str, ELTS_SHARED)) return 1;
00482     return 0;
00483 }
00484 
00485 static void
00486 str_make_independent(str)
00487     VALUE str;
00488 {
00489     char *ptr;
00490 
00491     ptr = ALLOC_N(char, RSTRING(str)->len+1);
00492     if (RSTRING(str)->ptr) {
00493         memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
00494     }
00495     ptr[RSTRING(str)->len] = 0;
00496     RSTRING(str)->ptr = ptr;
00497     RSTRING(str)->aux.capa = RSTRING(str)->len;
00498     FL_UNSET(str, STR_NOCAPA);
00499 }
00500 
00501 void
00502 rb_str_modify(str)
00503     VALUE str;
00504 {
00505     if (!str_independent(str))
00506         str_make_independent(str);
00507 }
00508 
00509 void
00510 rb_str_associate(str, add)
00511     VALUE str, add;
00512 {
00513     if (FL_TEST(str, STR_ASSOC)) {
00514         /* already associated */
00515         rb_ary_concat(RSTRING(str)->aux.shared, add);
00516     }
00517     else {
00518         if (FL_TEST(str, ELTS_SHARED)) {
00519             str_make_independent(str);
00520         }
00521         else if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
00522             RESIZE_CAPA(str, RSTRING(str)->len);
00523         }
00524         RSTRING(str)->aux.shared = add;
00525         FL_SET(str, STR_ASSOC);
00526     }
00527 }
00528 
00529 VALUE
00530 rb_str_associated(str)
00531     VALUE str;
00532 {
00533     if (FL_TEST(str, STR_ASSOC)) {
00534         return RSTRING(str)->aux.shared;
00535     }
00536     return Qfalse;
00537 }
00538 
00539 static char *null_str = "";
00540 
00541 VALUE
00542 rb_string_value(ptr)
00543     volatile VALUE *ptr;
00544 {
00545     VALUE s = *ptr;
00546     if (TYPE(s) != T_STRING) {
00547         s = rb_str_to_str(s);
00548         *ptr = s;
00549     }
00550     if (!RSTRING(s)->ptr) {
00551         FL_SET(s, ELTS_SHARED);
00552         RSTRING(s)->ptr = null_str;
00553     }
00554     return s;
00555 }
00556 
00557 char *
00558 rb_string_value_ptr(ptr)
00559     volatile VALUE *ptr;
00560 {
00561     return RSTRING(rb_string_value(ptr))->ptr;
00562 }
00563 
00564 char *
00565 rb_string_value_cstr(ptr)
00566     volatile VALUE *ptr;
00567 {
00568     VALUE str = rb_string_value(ptr);
00569     char *s = RSTRING(str)->ptr;
00570 
00571     if (!s || RSTRING(str)->len != strlen(s)) {
00572         rb_raise(rb_eArgError, "string contains null byte");
00573     }
00574     return s;
00575 }
00576 
00577 VALUE
00578 rb_check_string_type(str)
00579     VALUE str;
00580 {
00581     str = rb_check_convert_type(str, T_STRING, "String", "to_str");
00582     if (!NIL_P(str) && !RSTRING(str)->ptr) {
00583         FL_SET(str, ELTS_SHARED);
00584         RSTRING(str)->ptr = null_str;
00585     }
00586     return str;
00587 }
00588 
00589 VALUE
00590 rb_str_substr(str, beg, len)
00591     VALUE str;
00592     long beg, len;
00593 {
00594     VALUE str2;
00595 
00596     if (len < 0) return Qnil;
00597     if (beg > RSTRING(str)->len) return Qnil;
00598     if (beg < 0) {
00599         beg += RSTRING(str)->len;
00600         if (beg < 0) return Qnil;
00601     }
00602     if (beg + len > RSTRING(str)->len) {
00603         len = RSTRING(str)->len - beg;
00604     }
00605     if (len < 0) {
00606         len = 0;
00607     }
00608     if (len == 0) {
00609         str2 = rb_str_new5(str,0,0);
00610     }
00611     else if (len > sizeof(struct RString)/2 &&
00612         beg + len == RSTRING(str)->len && !FL_TEST(str, STR_ASSOC)) {
00613         str2 = rb_str_new3(rb_str_new4(str));
00614         RSTRING(str2)->ptr += RSTRING(str2)->len - len;
00615         RSTRING(str2)->len = len;
00616     }
00617     else {
00618         str2 = rb_str_new5(str, RSTRING(str)->ptr+beg, len);
00619     }
00620     OBJ_INFECT(str2, str);
00621 
00622     return str2;
00623 }
00624 
00625 VALUE
00626 rb_str_freeze(str)
00627     VALUE str;
00628 {
00629     return rb_obj_freeze(str);
00630 }
00631 
00632 VALUE
00633 rb_str_dup_frozen(str)
00634     VALUE str;
00635 {
00636     if (FL_TEST(str, ELTS_SHARED) && RSTRING(str)->aux.shared) {
00637         VALUE shared = RSTRING(str)->aux.shared;
00638         if (RSTRING(shared)->len == RSTRING(str)->len) {
00639             OBJ_FREEZE(shared);
00640             return shared;
00641         }
00642     }
00643     if (OBJ_FROZEN(str)) return str;
00644     str = rb_str_dup(str);
00645     OBJ_FREEZE(str);
00646     return str;
00647 }
00648 
00649 VALUE
00650 rb_str_locktmp(str)
00651     VALUE str;
00652 {
00653     if (FL_TEST(str, STR_TMPLOCK)) {
00654         rb_raise(rb_eRuntimeError, "temporal locking already locked string");
00655     }
00656     FL_SET(str, STR_TMPLOCK);
00657     return str;
00658 }
00659 
00660 VALUE
00661 rb_str_unlocktmp(str)
00662     VALUE str;
00663 {
00664     if (!FL_TEST(str, STR_TMPLOCK)) {
00665         rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
00666     }
00667     FL_UNSET(str, STR_TMPLOCK);
00668     return str;
00669 }
00670 
00671 VALUE
00672 rb_str_resize(str, len)
00673     VALUE str;
00674     long len;
00675 {
00676     if (len < 0) {
00677         rb_raise(rb_eArgError, "negative string size (or size too big)");
00678     }
00679 
00680     rb_str_modify(str);
00681     if (len != RSTRING(str)->len) {
00682         if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
00683             REALLOC_N(RSTRING(str)->ptr, char, len+1);
00684             if (!FL_TEST(str, STR_NOCAPA)) {
00685                 RSTRING(str)->aux.capa = len;
00686             }
00687         }
00688         RSTRING(str)->len = len;
00689         RSTRING(str)->ptr[len] = '\0';  /* sentinel */
00690     }
00691     return str;
00692 }
00693 
00694 VALUE
00695 rb_str_buf_cat(str, ptr, len)
00696     VALUE str;
00697     const char *ptr;
00698     long len;
00699 {
00700     long capa, total;
00701 
00702     if (len == 0) return str;
00703     if (len < 0) {
00704         rb_raise(rb_eArgError, "negative string size (or size too big)");
00705     }
00706     rb_str_modify(str);
00707     if (FL_TEST(str, STR_ASSOC)) {
00708         FL_UNSET(str, STR_ASSOC);
00709         capa = RSTRING(str)->aux.capa = RSTRING(str)->len;
00710     }
00711     else {
00712         capa = RSTRING(str)->aux.capa;
00713     }
00714     total = RSTRING(str)->len+len;
00715     if (capa <= total) {
00716         while (total > capa) {
00717             capa = (capa + 1) * 2;
00718         }
00719         RESIZE_CAPA(str, capa);
00720     }
00721     memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
00722     RSTRING(str)->len = total;
00723     RSTRING(str)->ptr[total] = '\0'; /* sentinel */
00724 
00725     return str;
00726 }
00727 
00728 VALUE
00729 rb_str_buf_cat2(str, ptr)
00730     VALUE str;
00731     const char *ptr;
00732 {
00733     return rb_str_buf_cat(str, ptr, strlen(ptr));
00734 }
00735 
00736 VALUE
00737 rb_str_cat(str, ptr, len)
00738     VALUE str;
00739     const char *ptr;
00740     long len;
00741 {
00742     if (len < 0) {
00743         rb_raise(rb_eArgError, "negative string size (or size too big)");
00744     }
00745     if (FL_TEST(str, STR_ASSOC)) {
00746         rb_str_modify(str);
00747         REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len);
00748         memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
00749         RSTRING(str)->len += len;
00750         RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
00751         return str;
00752     }
00753 
00754     return rb_str_buf_cat(str, ptr, len);
00755 }
00756 
00757 VALUE
00758 rb_str_cat2(str, ptr)
00759     VALUE str;
00760     const char *ptr;
00761 {
00762     return rb_str_cat(str, ptr, strlen(ptr));
00763 }
00764 
00765 VALUE
00766 rb_str_buf_append(str, str2)
00767     VALUE str, str2;
00768 {
00769     long capa, len;
00770 
00771     rb_str_modify(str);
00772     if (FL_TEST(str, STR_ASSOC)) {
00773         FL_UNSET(str, STR_ASSOC);
00774         capa = RSTRING(str)->aux.capa = RSTRING(str)->len;
00775     }
00776     else {
00777         capa = RSTRING(str)->aux.capa;
00778     }
00779     len = RSTRING(str)->len+RSTRING(str2)->len;
00780     if (capa <= len) {
00781         while (len > capa) {
00782             capa = (capa + 1) * 2;
00783         }
00784         RESIZE_CAPA(str, capa);
00785     }
00786     memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
00787            RSTRING(str2)->ptr, RSTRING(str2)->len);
00788     RSTRING(str)->len += RSTRING(str2)->len;
00789     RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
00790     OBJ_INFECT(str, str2);
00791 
00792     return str;
00793 }
00794 
00795 VALUE
00796 rb_str_append(str, str2)
00797     VALUE str, str2;
00798 {
00799     StringValue(str2);
00800     rb_str_modify(str);
00801     if (RSTRING(str2)->len > 0) {
00802         if (FL_TEST(str, STR_ASSOC)) {
00803             long len = RSTRING(str)->len+RSTRING(str2)->len;
00804             REALLOC_N(RSTRING(str)->ptr, char, len+1);
00805             memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
00806                    RSTRING(str2)->ptr, RSTRING(str2)->len);
00807             RSTRING(str)->ptr[len] = '\0'; /* sentinel */
00808             RSTRING(str)->len = len;
00809         }
00810         else {
00811             return rb_str_buf_append(str, str2);
00812         }
00813     }
00814     OBJ_INFECT(str, str2);
00815     return str;
00816 }
00817 
00818 
00819 /*
00820  *  call-seq:
00821  *     str << fixnum        => str
00822  *     str.concat(fixnum)   => str
00823  *     str << obj           => str
00824  *     str.concat(obj)      => str
00825  *  
00826  *  Append---Concatenates the given object to <i>str</i>. If the object is a
00827  *  <code>Fixnum</code> between 0 and 255, it is converted to a character before
00828  *  concatenation.
00829  *     
00830  *     a = "hello "
00831  *     a << "world"   #=> "hello world"
00832  *     a.concat(33)   #=> "hello world!"
00833  */
00834 
00835 VALUE
00836 rb_str_concat(str1, str2)
00837     VALUE str1, str2;
00838 {
00839     if (FIXNUM_P(str2)) {
00840         int i = FIX2INT(str2);
00841         if (0 <= i && i <= 0xff) { /* byte */
00842             char c = i;
00843             return rb_str_cat(str1, &c, 1);
00844         }
00845     }
00846     str1 = rb_str_append(str1, str2);
00847 
00848     return str1;
00849 }
00850 
00851 int
00852 rb_str_hash(str)
00853     VALUE str;
00854 {
00855     register long len = RSTRING(str)->len;
00856     register char *p = RSTRING(str)->ptr;
00857     register int key = 0;
00858 
00859 #ifdef HASH_ELFHASH
00860     register unsigned int g;
00861 
00862     while (len--) {
00863         key = (key << 4) + *p++;
00864         if (g = key & 0xF0000000)
00865             key ^= g >> 24;
00866         key &= ~g;
00867     }
00868 #elif HASH_PERL
00869     while (len--) {
00870         key += *p++;
00871         key += (key << 10);
00872         key ^= (key >> 6);
00873     }
00874     key += (key << 3);
00875     key ^= (key >> 11);
00876     key += (key << 15);
00877 #else
00878     while (len--) {
00879         key = key*65599 + *p;
00880         p++;
00881     }
00882     key = key + (key>>5);
00883 #endif
00884     return key;
00885 }
00886 
00887 /*
00888  * call-seq:
00889  *    str.hash   => fixnum
00890  *
00891  * Return a hash based on the string's length and content.
00892  */
00893 
00894 static VALUE
00895 rb_str_hash_m(str)
00896     VALUE str;
00897 {
00898     int key = rb_str_hash(str);
00899     return INT2FIX(key);
00900 }
00901 
00902 #define lesser(a,b) (((a)>(b))?(b):(a))
00903 
00904 int
00905 rb_str_cmp(str1, str2)
00906     VALUE str1, str2;
00907 {
00908     long len;
00909     int retval;
00910 
00911     len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
00912     retval = rb_memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
00913     if (retval == 0) {
00914         if (RSTRING(str1)->len == RSTRING(str2)->len) return 0;
00915         if (RSTRING(str1)->len > RSTRING(str2)->len) return 1;
00916         return -1;
00917     }
00918     if (retval > 0) return 1;
00919     return -1;
00920 }
00921 
00922 
00923 /*
00924  *  call-seq:
00925  *     str == obj   => true or false
00926  *  
00927  *  Equality---If <i>obj</i> is not a <code>String</code>, returns
00928  *  <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i>
00929  *  <code><=></code> <i>obj</i> returns zero.
00930  */
00931 
00932 static VALUE
00933 rb_str_equal(str1, str2)
00934     VALUE str1, str2;
00935 {
00936     if (str1 == str2) return Qtrue;
00937     if (TYPE(str2) != T_STRING) {
00938         if (!rb_respond_to(str2, rb_intern("to_str"))) {
00939             return Qfalse;
00940         }
00941         return rb_equal(str2, str1);
00942     }
00943     if (RSTRING(str1)->len == RSTRING(str2)->len &&
00944         rb_str_cmp(str1, str2) == 0) {
00945         return Qtrue;
00946     }
00947     return Qfalse;
00948 }
00949 
00950 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
00951 
00952 /*
00953  * call-seq:
00954  *   str.eql?(other)   => true or false
00955  *
00956  * Two strings are equal if the have the same length and content.
00957  */
00958 
00959 static VALUE
00960 rb_str_eql(str1, str2)
00961     VALUE str1, str2;
00962 {
00963     if (TYPE(str2) != T_STRING || RSTRING(str1)->len != RSTRING(str2)->len)
00964         return Qfalse;
00965 
00966     if (memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr,
00967                lesser(RSTRING(str1)->len, RSTRING(str2)->len)) == 0)
00968         return Qtrue;
00969 
00970     return Qfalse;
00971 }
00972 
00973 /*
00974  *  call-seq:
00975  *     str <=> other_str   => -1, 0, +1
00976  *  
00977  *  Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
00978  *  <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
00979  *  <i>str</i>. If the strings are of different lengths, and the strings are
00980  *  equal when compared up to the shortest length, then the longer string is
00981  *  considered greater than the shorter one. If the variable <code>$=</code> is
00982  *  <code>false</code>, the comparison is based on comparing the binary values
00983  *  of each character in the string. In older versions of Ruby, setting
00984  *  <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
00985  *  in favor of using <code>String#casecmp</code>.
00986  *
00987  *  <code><=></code> is the basis for the methods <code><</code>,
00988  *  <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
00989  *  included from module <code>Comparable</code>.  The method
00990  *  <code>String#==</code> does not use <code>Comparable#==</code>.
00991  *     
00992  *     "abcdef" <=> "abcde"     #=> 1
00993  *     "abcdef" <=> "abcdef"    #=> 0
00994  *     "abcdef" <=> "abcdefg"   #=> -1
00995  *     "abcdef" <=> "ABCDEF"    #=> 1
00996  */
00997 
00998 static VALUE
00999 rb_str_cmp_m(str1, str2)
01000     VALUE str1, str2;
01001 {
01002     long result;
01003 
01004     if (TYPE(str2) != T_STRING) {
01005         if (!rb_respond_to(str2, rb_intern("to_str"))) {
01006             return Qnil;
01007         }
01008         else if (!rb_respond_to(str2, rb_intern("<=>"))) {
01009             return Qnil;
01010         }
01011         else {
01012             VALUE tmp = rb_funcall(str2, rb_intern("<=>"), 1, str1);
01013 
01014             if (NIL_P(tmp)) return Qnil;
01015             if (!FIXNUM_P(tmp)) {
01016                 return rb_funcall(LONG2FIX(0), '-', 1, tmp);
01017             }
01018             result = -FIX2LONG(tmp);
01019         }
01020     }
01021     else {
01022         result = rb_str_cmp(str1, str2);
01023     }
01024     return LONG2NUM(result);
01025 }
01026 
01027 /*
01028  *  call-seq:
01029  *     str.casecmp(other_str)   => -1, 0, +1
01030  *  
01031  *  Case-insensitive version of <code>String#<=></code>.
01032  *     
01033  *     "abcdef".casecmp("abcde")     #=> 1
01034  *     "aBcDeF".casecmp("abcdef")    #=> 0
01035  *     "abcdef".casecmp("abcdefg")   #=> -1
01036  *     "abcdef".casecmp("ABCDEF")    #=> 0
01037  */
01038 
01039 static VALUE
01040 rb_str_casecmp(str1, str2)
01041     VALUE str1, str2;
01042 {
01043     long len;
01044     int retval;
01045 
01046     StringValue(str2);
01047     len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
01048     retval = rb_memcicmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
01049     if (retval == 0) {
01050         if (RSTRING(str1)->len == RSTRING(str2)->len) return INT2FIX(0);
01051         if (RSTRING(str1)->len > RSTRING(str2)->len) return INT2FIX(1);
01052         return INT2FIX(-1);
01053     }
01054     if (retval == 0) return INT2FIX(0);
01055     if (retval > 0) return INT2FIX(1);
01056     return INT2FIX(-1);
01057 }
01058 
01059 static long
01060 rb_str_index(str, sub, offset)
01061     VALUE str, sub;
01062     long offset;
01063 {
01064     long pos;
01065 
01066     if (offset < 0) {
01067         offset += RSTRING(str)->len;
01068         if (offset < 0) return -1;
01069     }
01070     if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1;
01071     if (RSTRING(sub)->len == 0) return offset;
01072     pos = rb_memsearch(RSTRING(sub)->ptr, RSTRING(sub)->len,
01073                        RSTRING(str)->ptr+offset, RSTRING(str)->len-offset);
01074     if (pos < 0) return pos;
01075     return pos + offset;
01076 }
01077 
01078 
01079 /*
01080  *  call-seq:
01081  *     str.index(substring [, offset])   => fixnum or nil
01082  *     str.index(fixnum [, offset])      => fixnum or nil
01083  *     str.index(regexp [, offset])      => fixnum or nil
01084  *  
01085  *  Returns the index of the first occurrence of the given <i>substring</i>,
01086  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
01087  *  <code>nil</code> if not found. If the second parameter is present, it
01088  *  specifies the position in the string to begin the search.
01089  *     
01090  *     "hello".index('e')             #=> 1
01091  *     "hello".index('lo')            #=> 3
01092  *     "hello".index('a')             #=> nil
01093  *     "hello".index(101)             #=> 1
01094  *     "hello".index(/[aeiou]/, -3)   #=> 4
01095  */
01096 
01097 static VALUE
01098 rb_str_index_m(argc, argv, str)
01099     int argc;
01100     VALUE *argv;
01101     VALUE str;
01102 {
01103     VALUE sub;
01104     VALUE initpos;
01105     long pos;
01106 
01107     if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
01108         pos = NUM2LONG(initpos);
01109     }
01110     else {
01111         pos = 0;
01112     }
01113     if (pos < 0) {
01114         pos += RSTRING(str)->len;
01115         if (pos < 0) {
01116             if (TYPE(sub) == T_REGEXP) {
01117                 rb_backref_set(Qnil);
01118             }
01119             return Qnil;
01120         }
01121     }
01122 
01123     switch (TYPE(sub)) {
01124       case T_REGEXP:
01125         pos = rb_reg_adjust_startpos(sub, str, pos, 0);
01126         pos = rb_reg_search(sub, str, pos, 0);
01127         break;
01128 
01129       case T_FIXNUM:
01130       {
01131           int c = FIX2INT(sub);
01132           long len = RSTRING(str)->len;
01133           unsigned char *p = RSTRING(str)->ptr;
01134 
01135           for (;pos<len;pos++) {
01136               if (p[pos] == c) return LONG2NUM(pos);
01137           }
01138           return Qnil;
01139       }
01140 
01141       default: {
01142           VALUE tmp;
01143 
01144           tmp = rb_check_string_type(sub);
01145           if (NIL_P(tmp)) {
01146               rb_raise(rb_eTypeError, "type mismatch: %s given",
01147                        rb_obj_classname(sub));
01148           }
01149           sub = tmp;
01150       }
01151         /* fall through */
01152       case T_STRING:
01153         pos = rb_str_index(str, sub, pos);
01154         break;
01155     }
01156 
01157     if (pos == -1) return Qnil;
01158     return LONG2NUM(pos);
01159 }
01160 
01161 static long
01162 rb_str_rindex(str, sub, pos)
01163     VALUE str, sub;
01164     long pos;
01165 {
01166     long len = RSTRING(sub)->len;
01167     char *s, *sbeg, *t;
01168 
01169     /* substring longer than string */
01170     if (RSTRING(str)->len < len) return -1;
01171     if (RSTRING(str)->len - pos < len) {
01172         pos = RSTRING(str)->len - len;
01173     }
01174     sbeg = RSTRING(str)->ptr;
01175     s = RSTRING(str)->ptr + pos;
01176     t = RSTRING(sub)->ptr;
01177     if (len) {
01178         while (sbeg <= s) {
01179             if (rb_memcmp(s, t, len) == 0) {
01180                 return s - RSTRING(str)->ptr;
01181             }
01182             s--;
01183         }
01184         return -1;
01185     }
01186     else {
01187         return pos;
01188     }
01189 }
01190 
01191 
01192 /*
01193  *  call-seq:
01194  *     str.rindex(substring [, fixnum])   => fixnum or nil
01195  *     str.rindex(fixnum [, fixnum])   => fixnum or nil
01196  *     str.rindex(regexp [, fixnum])   => fixnum or nil
01197  *  
01198  *  Returns the index of the last occurrence of the given <i>substring</i>,
01199  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
01200  *  <code>nil</code> if not found. If the second parameter is present, it
01201  *  specifies the position in the string to end the search---characters beyond
01202  *  this point will not be considered.
01203  *     
01204  *     "hello".rindex('e')             #=> 1
01205  *     "hello".rindex('l')             #=> 3
01206  *     "hello".rindex('a')             #=> nil
01207  *     "hello".rindex(101)             #=> 1
01208  *     "hello".rindex(/[aeiou]/, -2)   #=> 1
01209  */
01210 
01211 static VALUE
01212 rb_str_rindex_m(argc, argv, str)
01213     int argc;
01214     VALUE *argv;
01215     VALUE str;
01216 {
01217     VALUE sub;
01218     VALUE position;
01219     long pos;
01220 
01221     if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
01222         pos = NUM2LONG(position);
01223         if (pos < 0) {
01224             pos += RSTRING(str)->len;
01225             if (pos < 0) {
01226                 if (TYPE(sub) == T_REGEXP) {
01227                     rb_backref_set(Qnil);
01228                 }
01229                 return Qnil;
01230             }
01231         }
01232         if (pos > RSTRING(str)->len) pos = RSTRING(str)->len;
01233     }
01234     else {
01235         pos = RSTRING(str)->len;
01236     }
01237 
01238     switch (TYPE(sub)) {
01239       case T_REGEXP:
01240         if (RREGEXP(sub)->len) {
01241             pos = rb_reg_adjust_startpos(sub, str, pos, 1);
01242             pos = rb_reg_search(sub, str, pos, 1);
01243         }
01244         if (pos >= 0) return LONG2NUM(pos);
01245         break;
01246 
01247       case T_STRING:
01248         pos = rb_str_rindex(str, sub, pos);
01249         if (pos >= 0) return LONG2NUM(pos);
01250         break;
01251 
01252       case T_FIXNUM:
01253       {
01254           int c = FIX2INT(sub);
01255           unsigned char *p = RSTRING(str)->ptr + pos;
01256           unsigned char *pbeg = RSTRING(str)->ptr;
01257 
01258           if (pos == RSTRING(str)->len) {
01259               if (pos == 0) return Qnil;
01260               --p;
01261           }
01262           while (pbeg <= p) {
01263               if (*p == c) return LONG2NUM((char*)p - RSTRING(str)->ptr);
01264               p--;
01265           }
01266           return Qnil;
01267       }
01268 
01269       default:
01270         rb_raise(rb_eTypeError, "type mismatch: %s given",
01271                  rb_obj_classname(sub));
01272     }
01273     return Qnil;
01274 }
01275 
01276 /*
01277  *  call-seq:
01278  *     str =~ obj   => fixnum or nil
01279  *  
01280  *  Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
01281  *  against <i>str</i>. If <i>obj</i> is a <code>String</code>, look for it in
01282  *  <i>str</i> (similar to <code>String#index</code>). Returns the position the
01283  *  match starts, or <code>nil</code> if there is no match. Otherwise, invokes
01284  *  <i>obj.=~</i>, passing <i>str</i> as an argument. The default
01285  *  <code>=~</code> in <code>Object</code> returns <code>false</code>.
01286  *     
01287  *     "cat o' 9 tails" =~ '\d'   #=> nil
01288  *     "cat o' 9 tails" =~ /\d/   #=> 7
01289  *     "cat o' 9 tails" =~ 9      #=> false
01290  */
01291 
01292 static VALUE
01293 rb_str_match(x, y)
01294     VALUE x, y;
01295 {
01296     switch (TYPE(y)) {
01297       case T_STRING:
01298         rb_raise(rb_eTypeError, "type mismatch: String given");
01299 
01300       case T_REGEXP:
01301         return rb_reg_match(y, x);
01302 
01303       default:
01304         return rb_funcall(y, rb_intern("=~"), 1, x);
01305     }
01306 }
01307 
01308 
01309 static VALUE get_pat (VALUE, int);
01310 
01311 
01312 /*
01313  *  call-seq:
01314  *     str.match(pattern)   => matchdata or nil
01315  *  
01316  *  Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
01317  *  then invokes its <code>match</code> method on <i>str</i>.
01318  *     
01319  *     'hello'.match('(.)\1')      #=> #<MatchData:0x401b3d30>
01320  *     'hello'.match('(.)\1')[0]   #=> "ll"
01321  *     'hello'.match(/(.)\1/)[0]   #=> "ll"
01322  *     'hello'.match('xx')         #=> nil
01323  */
01324 
01325 static VALUE
01326 rb_str_match_m(str, re)
01327     VALUE str, re;
01328 {
01329     return rb_funcall(get_pat(re, 0), rb_intern("match"), 1, str);
01330 }
01331 
01332 static char
01333 succ_char(s)
01334     char *s;
01335 {
01336     char c = *s;
01337 
01338     /* numerics */
01339     if ('0' <= c && c < '9') (*s)++;
01340     else if (c == '9') {
01341         *s = '0';
01342         return '1';
01343     }
01344     /* small alphabets */
01345     else if ('a' <= c && c < 'z') (*s)++;
01346     else if (c == 'z') {
01347         return *s = 'a';
01348     }
01349     /* capital alphabets */
01350     else if ('A' <= c && c < 'Z') (*s)++;
01351     else if (c == 'Z') {
01352         return *s = 'A';
01353     }
01354     return 0;
01355 }
01356 
01357 
01358 /*
01359  *  call-seq:
01360  *     str.succ   => new_str
01361  *     str.next   => new_str
01362  *  
01363  *  Returns the successor to <i>str</i>. The successor is calculated by
01364  *  incrementing characters starting from the rightmost alphanumeric (or
01365  *  the rightmost character if there are no alphanumerics) in the
01366  *  string. Incrementing a digit always results in another digit, and
01367  *  incrementing a letter results in another letter of the same case.
01368  *  Incrementing nonalphanumerics uses the underlying character set's
01369  *  collating sequence.
01370  *     
01371  *  If the increment generates a ``carry,'' the character to the left of
01372  *  it is incremented. This process repeats until there is no carry,
01373  *  adding an additional character if necessary.
01374  *     
01375  *     "abcd".succ        #=> "abce"
01376  *     "THX1138".succ     #=> "THX1139"
01377  *     "<<koala>>".succ   #=> "<<koalb>>"
01378  *     "1999zzz".succ     #=> "2000aaa"
01379  *     "ZZZ9999".succ     #=> "AAAA0000"
01380  *     "***".succ         #=> "**+"
01381  */
01382 
01383 static VALUE
01384 rb_str_succ(orig)
01385     VALUE orig;
01386 {
01387     VALUE str;
01388     char *sbeg, *s;
01389     int c = -1;
01390     long n = 0;
01391 
01392     str = rb_str_new5(orig, RSTRING(orig)->ptr, RSTRING(orig)->len);
01393     OBJ_INFECT(str, orig);
01394     if (RSTRING(str)->len == 0) return str;
01395 
01396     sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
01397 
01398     while (sbeg <= s) {
01399         if (ISALNUM(*s)) {
01400             if ((c = succ_char(s)) == 0) break;
01401             n = s - sbeg;
01402         }
01403         s--;
01404     }
01405     if (c == -1) {              /* str contains no alnum */
01406         sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
01407         c = '\001';
01408         while (sbeg <= s) {
01409             if ((*s += 1) != 0) break;
01410             s--;
01411         }
01412     }
01413     if (s < sbeg) {
01414         RESIZE_CAPA(str, RSTRING(str)->len + 1);
01415         s = RSTRING(str)->ptr + n;
01416         memmove(s+1, s, RSTRING(str)->len - n);
01417         *s = c;
01418         RSTRING(str)->len += 1;
01419         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01420     }
01421 
01422     return str;
01423 }
01424 
01425 
01426 /*
01427  *  call-seq:
01428  *     str.succ!   => str
01429  *     str.next!   => str
01430  *  
01431  *  Equivalent to <code>String#succ</code>, but modifies the receiver in
01432  *  place.
01433  */
01434 
01435 static VALUE
01436 rb_str_succ_bang(str)
01437     VALUE str;
01438 {
01439     rb_str_shared_replace(str, rb_str_succ(str));
01440 
01441     return str;
01442 }
01443 
01444 VALUE
01445 rb_str_upto(beg, end, excl)
01446     VALUE beg, end;
01447     int excl;
01448 {
01449     VALUE current, after_end;
01450     ID succ = rb_intern("succ");
01451     int n;
01452 
01453     StringValue(end);
01454     n = rb_str_cmp(beg, end);
01455     if (n > 0 || (excl && n == 0)) return beg;
01456     after_end = rb_funcall(end, succ, 0, 0);
01457     current = beg;
01458     while (!rb_str_equal(current, after_end)) {
01459         rb_yield(current);
01460         if (!excl && rb_str_equal(current, end)) break;
01461         current = rb_funcall(current, succ, 0, 0);
01462         StringValue(current);
01463         if (excl && rb_str_equal(current, end)) break;
01464         StringValue(current);
01465         if (RSTRING(current)->len > RSTRING(end)->len)
01466             break;
01467     }
01468 
01469     return beg;
01470 }
01471 
01472 
01473 /*
01474  *  call-seq:
01475  *     str.upto(other_str) {|s| block }   => str
01476  *  
01477  *  Iterates through successive values, starting at <i>str</i> and
01478  *  ending at <i>other_str</i> inclusive, passing each value in turn to
01479  *  the block. The <code>String#succ</code> method is used to generate
01480  *  each value.
01481  *     
01482  *     "a8".upto("b6") {|s| print s, ' ' }
01483  *     for s in "a8".."b6"
01484  *       print s, ' '
01485  *     end
01486  *     
01487  *  <em>produces:</em>
01488  *     
01489  *     a8 a9 b0 b1 b2 b3 b4 b5 b6
01490  *     a8 a9 b0 b1 b2 b3 b4 b5 b6
01491  */
01492 
01493 static VALUE
01494 rb_str_upto_m(beg, end)
01495     VALUE beg, end;
01496 {
01497     return rb_str_upto(beg, end, Qfalse);
01498 }
01499 
01500 static VALUE
01501 rb_str_subpat(str, re, nth)
01502     VALUE str, re;
01503     int nth;
01504 {
01505     if (rb_reg_search(re, str, 0, 0) >= 0) {
01506         return rb_reg_nth_match(nth, rb_backref_get());
01507     }
01508     return Qnil;
01509 }
01510 
01511 static VALUE
01512 rb_str_aref(str, indx)
01513     VALUE str;
01514     VALUE indx;
01515 {
01516     long idx;
01517 
01518     switch (TYPE(indx)) {
01519       case T_FIXNUM:
01520         idx = FIX2LONG(indx);
01521 
01522       num_index:
01523         if (idx < 0) {
01524             idx = RSTRING(str)->len + idx;
01525         }
01526         if (idx < 0 || RSTRING(str)->len <= idx) {
01527             return Qnil;
01528         }
01529         return INT2FIX(RSTRING(str)->ptr[idx] & 0xff);
01530 
01531       case T_REGEXP:
01532         return rb_str_subpat(str, indx, 0);
01533 
01534       case T_STRING:
01535         if (rb_str_index(str, indx, 0) != -1)
01536             return rb_str_dup(indx);
01537         return Qnil;
01538 
01539       default:
01540         /* check if indx is Range */
01541         {
01542             long beg, len;
01543             VALUE tmp;
01544 
01545             switch (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 0)) {
01546               case Qfalse:
01547                 break;
01548               case Qnil:
01549                 return Qnil;
01550               default:
01551                 tmp = rb_str_substr(str, beg, len);
01552                 OBJ_INFECT(tmp, indx);
01553                 return tmp;
01554             }
01555         }
01556         idx = NUM2LONG(indx);
01557         goto num_index;
01558     }
01559     return Qnil;                /* not reached */
01560 }
01561 
01562 
01563 /*
01564  *  call-seq:
01565  *     str[fixnum]                 => fixnum or nil
01566  *     str[fixnum, fixnum]         => new_str or nil
01567  *     str[range]                  => new_str or nil
01568  *     str[regexp]                 => new_str or nil
01569  *     str[regexp, fixnum]         => new_str or nil
01570  *     str[other_str]              => new_str or nil
01571  *     str.slice(fixnum)           => fixnum or nil
01572  *     str.slice(fixnum, fixnum)   => new_str or nil
01573  *     str.slice(range)            => new_str or nil
01574  *     str.slice(regexp)           => new_str or nil
01575  *     str.slice(regexp, fixnum)   => new_str or nil
01576  *     str.slice(other_str)        => new_str or nil
01577  *  
01578  *  Element Reference---If passed a single <code>Fixnum</code>, returns the code
01579  *  of the character at that position. If passed two <code>Fixnum</code>
01580  *  objects, returns a substring starting at the offset given by the first, and
01581  *  a length given by the second. If given a range, a substring containing
01582  *  characters at offsets given by the range is returned. In all three cases, if
01583  *  an offset is negative, it is counted from the end of <i>str</i>. Returns
01584  *  <code>nil</code> if the initial offset falls outside the string, the length
01585  *  is negative, or the beginning of the range is greater than the end.
01586  *     
01587  *  If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
01588  *  returned. If a numeric parameter follows the regular expression, that
01589  *  component of the <code>MatchData</code> is returned instead. If a
01590  *  <code>String</code> is given, that string is returned if it occurs in
01591  *  <i>str</i>. In both cases, <code>nil</code> is returned if there is no
01592  *  match.
01593  *     
01594  *     a = "hello there"
01595  *     a[1]                   #=> 101
01596  *     a[1,3]                 #=> "ell"
01597  *     a[1..3]                #=> "ell"
01598  *     a[-3,2]                #=> "er"
01599  *     a[-4..-2]              #=> "her"
01600  *     a[12..-1]              #=> nil
01601  *     a[-2..-4]              #=> ""
01602  *     a[/[aeiou](.)\1/]      #=> "ell"
01603  *     a[/[aeiou](.)\1/, 0]   #=> "ell"
01604  *     a[/[aeiou](.)\1/, 1]   #=> "l"
01605  *     a[/[aeiou](.)\1/, 2]   #=> nil
01606  *     a["lo"]                #=> "lo"
01607  *     a["bye"]               #=> nil
01608  */
01609 
01610 static VALUE
01611 rb_str_aref_m(argc, argv, str)
01612     int argc;
01613     VALUE *argv;
01614     VALUE str;
01615 {
01616     if (argc == 2) {
01617         if (TYPE(argv[0]) == T_REGEXP) {
01618             return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
01619         }
01620         return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
01621     }
01622     if (argc != 1) {
01623         rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
01624     }
01625     return rb_str_aref(str, argv[0]);
01626 }
01627 
01628 static void
01629 rb_str_splice(str, beg, len, val)
01630     VALUE str;
01631     long beg, len;
01632     VALUE val;
01633 {
01634     if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
01635 
01636     StringValue(val);
01637     rb_str_modify(str);
01638 
01639     if (RSTRING(str)->len < beg) {
01640       out_of_range:
01641         rb_raise(rb_eIndexError, "index %ld out of string", beg);
01642     }
01643     if (beg < 0) {
01644         if (-beg > RSTRING(str)->len) {
01645             goto out_of_range;
01646         }
01647         beg += RSTRING(str)->len;
01648     }
01649     if (RSTRING(str)->len < beg + len) {
01650         len = RSTRING(str)->len - beg;
01651     }
01652 
01653     if (len < RSTRING(val)->len) {
01654         /* expand string */
01655         RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(val)->len - len + 1);
01656     }
01657 
01658     if (RSTRING(val)->len != len) {
01659         memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len,
01660                 RSTRING(str)->ptr + beg + len,
01661                 RSTRING(str)->len - (beg + len));
01662     }
01663     if (RSTRING(str)->len < beg && len < 0) {
01664         MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len);
01665     }
01666     if (RSTRING(val)->len > 0) {
01667         memmove(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len);
01668     }
01669     RSTRING(str)->len += RSTRING(val)->len - len;
01670     if (RSTRING(str)->ptr) {
01671         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01672     }
01673     OBJ_INFECT(str, val);
01674 }
01675 
01676 void
01677 rb_str_update(str, beg, len, val)
01678     VALUE str;
01679     long beg, len;
01680     VALUE val;
01681 {
01682     rb_str_splice(str, beg, len, val);
01683 }
01684 
01685 static void
01686 rb_str_subpat_set(str, re, nth, val)
01687     VALUE str, re;
01688     int nth;
01689     VALUE val;
01690 {
01691     VALUE match;
01692     long start, end, len;
01693 
01694     if (rb_reg_search(re, str, 0, 0) < 0) {
01695         rb_raise(rb_eIndexError, "regexp not matched");
01696     }
01697     match = rb_backref_get();
01698     if (nth >= RMATCH(match)->regs->num_regs) {
01699       out_of_range:
01700         rb_raise(rb_eIndexError, "index %d out of regexp", nth);
01701     }
01702     if (nth < 0) {
01703         if (-nth >= RMATCH(match)->regs->num_regs) {
01704             goto out_of_range;
01705         }
01706         nth += RMATCH(match)->regs->num_regs;
01707     }
01708 
01709     start = RMATCH(match)->BEG(nth);
01710     if (start == -1) {
01711         rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
01712     }
01713     end = RMATCH(match)->END(nth);
01714     len = end - start;
01715     rb_str_splice(str, start, len, val);
01716 }
01717 
01718 static VALUE
01719 rb_str_aset(str, indx, val)
01720     VALUE str;
01721     VALUE indx, val;
01722 {
01723     long idx, beg;
01724 
01725     switch (TYPE(indx)) {
01726       case T_FIXNUM:
01727       num_index:
01728         idx = FIX2LONG(indx);
01729         if (RSTRING(str)->len <= idx) {
01730           out_of_range:
01731             rb_raise(rb_eIndexError, "index %ld out of string", idx);
01732         }
01733         if (idx < 0) {
01734             if (-idx > RSTRING(str)->len)
01735                 goto out_of_range;
01736             idx += RSTRING(str)->len;
01737         }
01738         if (FIXNUM_P(val)) {
01739             rb_str_modify(str);
01740             if (RSTRING(str)->len == idx) {
01741                 RSTRING(str)->len += 1;
01742                 RESIZE_CAPA(str, RSTRING(str)->len);
01743             }
01744             RSTRING(str)->ptr[idx] = FIX2INT(val) & 0xff;
01745         }
01746         else {
01747             rb_str_splice(str, idx, 1, val);
01748         }
01749         return val;
01750 
01751       case T_REGEXP:
01752         rb_str_subpat_set(str, indx, 0, val);
01753         return val;
01754 
01755       case T_STRING:
01756         beg = rb_str_index(str, indx, 0);
01757         if (beg < 0) {
01758             rb_raise(rb_eIndexError, "string not matched");
01759         }
01760         rb_str_splice(str, beg, RSTRING(indx)->len, val);
01761         return val;
01762 
01763       default:
01764         /* check if indx is Range */
01765         {
01766             long beg, len;
01767             if (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 2)) {
01768                 rb_str_splice(str, beg, len, val);
01769                 return val;
01770             }
01771         }
01772         idx = NUM2LONG(indx);
01773         goto num_index;
01774     }
01775 }
01776 
01777 /*
01778  *  call-seq:
01779  *     str[fixnum] = fixnum
01780  *     str[fixnum] = new_str
01781  *     str[fixnum, fixnum] = new_str
01782  *     str[range] = aString
01783  *     str[regexp] = new_str
01784  *     str[regexp, fixnum] = new_str
01785  *     str[other_str] = new_str
01786  *  
01787  *  Element Assignment---Replaces some or all of the content of <i>str</i>. The
01788  *  portion of the string affected is determined using the same criteria as
01789  *  <code>String#[]</code>. If the replacement string is not the same length as
01790  *  the text it is replacing, the string will be adjusted accordingly. If the
01791  *  regular expression or string is used as the index doesn't match a position
01792  *  in the string, <code>IndexError</code> is raised. If the regular expression
01793  *  form is used, the optional second <code>Fixnum</code> allows you to specify
01794  *  which portion of the match to replace (effectively using the
01795  *  <code>MatchData</code> indexing rules. The forms that take a
01796  *  <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
01797  *  out of range; the <code>Range</code> form will raise a
01798  *  <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
01799  *  forms will silently ignore the assignment.
01800  */
01801 
01802 static VALUE
01803 rb_str_aset_m(argc, argv, str)
01804     int argc;
01805     VALUE *argv;
01806     VALUE str;
01807 {
01808     if (argc == 3) {
01809         if (TYPE(argv[0]) == T_REGEXP) {
01810             rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
01811         }
01812         else {
01813             rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
01814         }
01815         return argv[2];
01816     }
01817     if (argc != 2) {
01818         rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
01819     }
01820     return rb_str_aset(str, argv[0], argv[1]);
01821 }
01822 
01823 /*
01824  *  call-seq:
01825  *     str.insert(index, other_str)   => str
01826  *  
01827  *  Inserts <i>other_str</i> before the character at the given
01828  *  <i>index</i>, modifying <i>str</i>. Negative indices count from the
01829  *  end of the string, and insert <em>after</em> the given character.
01830  *  The intent is insert <i>aString</i> so that it starts at the given
01831  *  <i>index</i>.
01832  *     
01833  *     "abcd".insert(0, 'X')    #=> "Xabcd"
01834  *     "abcd".insert(3, 'X')    #=> "abcXd"
01835  *     "abcd".insert(4, 'X')    #=> "abcdX"
01836  *     "abcd".insert(-3, 'X')   #=> "abXcd"
01837  *     "abcd".insert(-1, 'X')   #=> "abcdX"
01838  */
01839 
01840 static VALUE
01841 rb_str_insert(str, idx, str2)
01842     VALUE str, idx, str2;
01843 {
01844     long pos = NUM2LONG(idx);
01845 
01846     if (pos == -1) {
01847         pos = RSTRING(str)->len;
01848     }
01849     else if (pos < 0) {
01850         pos++;
01851     }
01852     rb_str_splice(str, pos, 0, str2);
01853     return str;
01854 }
01855 
01856 /*
01857  *  call-seq:
01858  *     str.slice!(fixnum)           => fixnum or nil
01859  *     str.slice!(fixnum, fixnum)   => new_str or nil
01860  *     str.slice!(range)            => new_str or nil
01861  *     str.slice!(regexp)           => new_str or nil
01862  *     str.slice!(other_str)        => new_str or nil
01863  *  
01864  *  Deletes the specified portion from <i>str</i>, and returns the portion
01865  *  deleted. The forms that take a <code>Fixnum</code> will raise an
01866  *  <code>IndexError</code> if the value is out of range; the <code>Range</code>
01867  *  form will raise a <code>RangeError</code>, and the <code>Regexp</code> and
01868  *  <code>String</code> forms will silently ignore the assignment.
01869  *     
01870  *     string = "this is a string"
01871  *     string.slice!(2)        #=> 105
01872  *     string.slice!(3..6)     #=> " is "
01873  *     string.slice!(/s.*t/)   #=> "sa st"
01874  *     string.slice!("r")      #=> "r"
01875  *     string                  #=> "thing"
01876  */
01877 
01878 static VALUE
01879 rb_str_slice_bang(argc, argv, str)
01880     int argc;
01881     VALUE *argv;
01882     VALUE str;
01883 {
01884     VALUE result;
01885     VALUE buf[3];
01886     int i;
01887 
01888     if (argc < 1 || 2 < argc) {
01889         rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
01890     }
01891     for (i=0; i<argc; i++) {
01892         buf[i] = argv[i];
01893     }
01894     buf[i] = rb_str_new(0,0);
01895     result = rb_str_aref_m(argc, buf, str);
01896     if (!NIL_P(result)) {
01897         rb_str_aset_m(argc+1, buf, str);
01898     }
01899     return result;
01900 }
01901 
01902 static VALUE
01903 get_pat(pat, quote)
01904     VALUE pat;
01905     int quote;
01906 {
01907     VALUE val;
01908 
01909     switch (TYPE(pat)) {
01910       case T_REGEXP:
01911         return pat;
01912 
01913       case T_STRING:
01914         break;
01915 
01916       default:
01917         val = rb_check_string_type(pat);
01918         if (NIL_P(val)) {
01919             Check_Type(pat, T_REGEXP);
01920         }
01921         pat = val;
01922     }
01923 
01924     if (quote) {
01925         pat = rb_reg_quote(pat);
01926     }
01927 
01928     return rb_reg_regcomp(pat);
01929 }
01930 
01931 
01932 /*
01933  *  call-seq:
01934  *     str.sub!(pattern, replacement)          => str or nil
01935  *     str.sub!(pattern) {|match| block }      => str or nil
01936  *  
01937  *  Performs the substitutions of <code>String#sub</code> in place,
01938  *  returning <i>str</i>, or <code>nil</code> if no substitutions were
01939  *  performed.
01940  */
01941 
01942 static VALUE
01943 rb_str_sub_bang(argc, argv, str)
01944     int argc;
01945     VALUE *argv;
01946     VALUE str;
01947 {
01948     VALUE pat, repl, match;
01949     struct re_registers *regs;
01950     int iter = 0;
01951     int tainted = 0;
01952     long plen;
01953 
01954     if (argc == 1 && rb_block_given_p()) {
01955         iter = 1;
01956     }
01957     else if (argc == 2) {
01958         repl = argv[1];
01959         StringValue(repl);
01960         if (OBJ_TAINTED(repl)) tainted = 1;
01961     }
01962     else {
01963         rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
01964     }
01965 
01966     pat = get_pat(argv[0], 1);
01967     if (rb_reg_search(pat, str, 0, 0) >= 0) {
01968         rb_str_modify(str);
01969         match = rb_backref_get();
01970         regs = RMATCH(match)->regs;
01971 
01972         if (iter) {
01973             char *p = RSTRING(str)->ptr; long len = RSTRING(str)->len;
01974 
01975             rb_match_busy(match);
01976             repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
01977             str_mod_check(str, p, len);
01978             str_frozen_check(str);
01979             rb_backref_set(match);
01980         }
01981         else {
01982             repl = rb_reg_regsub(repl, str, regs);
01983         }
01984         if (OBJ_TAINTED(repl)) tainted = 1;
01985         plen = END(0) - BEG(0);
01986         if (RSTRING(repl)->len > plen) {
01987             RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(repl)->len - plen);
01988         }
01989         if (RSTRING(repl)->len != plen) {
01990             memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len,
01991                     RSTRING(str)->ptr + BEG(0) + plen,
01992                     RSTRING(str)->len - BEG(0) - plen);
01993         }
01994         memcpy(RSTRING(str)->ptr + BEG(0),
01995                RSTRING(repl)->ptr, RSTRING(repl)->len);
01996         RSTRING(str)->len += RSTRING(repl)->len - plen;
01997         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01998         if (tainted) OBJ_TAINT(str);
01999 
02000         return str;
02001     }
02002     return Qnil;
02003 }
02004 
02005 
02006 /*
02007  *  call-seq:
02008  *     str.sub(pattern, replacement)         => new_str
02009  *     str.sub(pattern) {|match| block }     => new_str
02010  *  
02011  *  Returns a copy of <i>str</i> with the <em>first</em> occurrence of
02012  *  <i>pattern</i> replaced with either <i>replacement</i> or the value of the
02013  *  block. The <i>pattern</i> will typically be a <code>Regexp</code>; if it is
02014  *  a <code>String</code> then no regular expression metacharacters will be
02015  *  interpreted (that is <code>/\d/</code> will match a digit, but
02016  *  <code>'\d'</code> will match a backslash followed by a 'd').
02017  *     
02018  *  If the method call specifies <i>replacement</i>, special variables such as
02019  *  <code>$&</code> will not be useful, as substitution into the string occurs
02020  *  before the pattern match starts. However, the sequences <code>\1</code>,
02021  *  <code>\2</code>, etc., may be used.
02022  *     
02023  *  In the block form, the current match string is passed in as a parameter, and
02024  *  variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
02025  *  <code>$&</code>, and <code>$'</code> will be set appropriately. The value
02026  *  returned by the block will be substituted for the match on each call.
02027  *     
02028  *  The result inherits any tainting in the original string or any supplied
02029  *  replacement string.
02030  *     
02031  *     "hello".sub(/[aeiou]/, '*')               #=> "h*llo"
02032  *     "hello".sub(/([aeiou])/, '<\1>')          #=> "h<e>llo"
02033  *     "hello".sub(/./) {|s| s[0].to_s + ' ' }   #=> "104 ello"
02034  */
02035 
02036 static VALUE
02037 rb_str_sub(argc, argv, str)
02038     int argc;
02039     VALUE *argv;
02040     VALUE str;
02041 {
02042     str = rb_str_dup(str);
02043     rb_str_sub_bang(argc, argv, str);
02044     return str;
02045 }
02046 
02047 static VALUE
02048 str_gsub(argc, argv, str, bang)
02049     int argc;
02050     VALUE *argv;
02051     VALUE str;
02052     int bang;
02053 {
02054     VALUE pat, val, repl, match, dest;
02055     struct re_registers *regs;
02056     long beg, n;
02057     long offset, blen, slen, len;
02058     int iter = 0;
02059     char *buf, *bp, *sp, *cp;
02060     int tainted = 0;
02061 
02062     if (argc == 1 && rb_block_given_p()) {
02063         iter = 1;
02064     }
02065     else if (argc == 2) {
02066         repl = argv[1];
02067         StringValue(repl);
02068         if (OBJ_TAINTED(repl)) tainted = 1;
02069     }
02070     else {
02071         rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
02072     }
02073 
02074     pat = get_pat(argv[0], 1);
02075     offset=0; n=0;
02076     beg = rb_reg_search(pat, str, 0, 0);
02077     if (beg < 0) {
02078         if (bang) return Qnil;  /* no match, no substitution */
02079         return rb_str_dup(str);
02080     }
02081 
02082     blen = RSTRING(str)->len + 30; /* len + margin */
02083     dest = str_new(0, 0, blen);
02084     buf = RSTRING(dest)->ptr;
02085     bp = buf;
02086     sp = cp = RSTRING(str)->ptr;
02087     slen = RSTRING(str)->len;
02088 
02089     rb_str_locktmp(dest);
02090     while (beg >= 0) {
02091         n++;
02092         match = rb_backref_get();
02093         regs = RMATCH(match)->regs;
02094         if (iter) {
02095             rb_match_busy(match);
02096             val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
02097             str_mod_check(str, sp, slen);
02098             if (bang) str_frozen_check(str);
02099             if (val == dest) {  /* paranoid chack [ruby-dev:24827] */
02100                 rb_raise(rb_eRuntimeError, "block should not cheat");
02101             }
02102             rb_backref_set(match);
02103         }
02104         else {
02105             val = rb_reg_regsub(repl, str, regs);
02106         }
02107         if (OBJ_TAINTED(val)) tainted = 1;
02108         len = (bp - buf) + (beg - offset) + RSTRING(val)->len + 3;
02109         if (blen < len) {
02110             while (blen < len) blen *= 2;
02111             len = bp - buf;
02112             RESIZE_CAPA(dest, blen);
02113             RSTRING(dest)->len = blen;
02114             buf = RSTRING(dest)->ptr;
02115             bp = buf + len;
02116         }
02117         len = beg - offset;     /* copy pre-match substr */
02118         memcpy(bp, cp, len);
02119         bp += len;
02120         memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len);
02121         bp += RSTRING(val)->len;
02122         offset = END(0);
02123         if (BEG(0) == END(0)) {
02124             /*
02125              * Always consume at least one character of the input string
02126              * in order to prevent infinite loops.
02127              */
02128             if (RSTRING(str)->len <= END(0)) break;
02129             len = mbclen2(RSTRING(str)->ptr[END(0)], pat);
02130             memcpy(bp, RSTRING(str)->ptr+END(0), len);
02131             bp += len;
02132             offset = END(0) + len;
02133         }
02134         cp = RSTRING(str)->ptr + offset;
02135         if (offset > RSTRING(str)->len) break;
02136         beg = rb_reg_search(pat, str, offset, 0);
02137     }
02138     if (RSTRING(str)->len > offset) {
02139         len = bp - buf;
02140         if (blen - len < RSTRING(str)->len - offset) {
02141             blen = len + RSTRING(str)->len - offset;
02142             RESIZE_CAPA(dest, blen);
02143             buf = RSTRING(dest)->ptr;
02144             bp = buf + len;
02145         }
02146         memcpy(bp, cp, RSTRING(str)->len - offset);
02147         bp += RSTRING(str)->len - offset;
02148     }
02149     rb_backref_set(match);
02150     *bp = '\0';
02151     rb_str_unlocktmp(dest);
02152     if (bang) {
02153         if (str_independent(str)) {
02154             free(RSTRING(str)->ptr);
02155         }
02156         FL_UNSET(str, STR_NOCAPA);
02157         RSTRING(str)->ptr = buf;
02158         RSTRING(str)->aux.capa = blen;
02159         RSTRING(dest)->ptr = 0;
02160         RSTRING(dest)->len = 0;
02161     }
02162     else {
02163         RBASIC(dest)->klass = rb_obj_class(str);
02164         OBJ_INFECT(dest, str);
02165         str = dest;
02166     }
02167     RSTRING(str)->len = bp - buf;
02168 
02169     if (tainted) OBJ_TAINT(str);
02170     return str;
02171 }
02172 
02173 
02174 /*
02175  *  call-seq:
02176  *     str.gsub!(pattern, replacement)        => str or nil
02177  *     str.gsub!(pattern) {|match| block }    => str or nil
02178  *  
02179  *  Performs the substitutions of <code>String#gsub</code> in place, returning
02180  *  <i>str</i>, or <code>nil</code> if no substitutions were performed.
02181  */
02182 
02183 static VALUE
02184 rb_str_gsub_bang(argc, argv, str)
02185     int argc;
02186     VALUE *argv;
02187     VALUE str;
02188 {
02189     return str_gsub(argc, argv, str, 1);
02190 }
02191 
02192 
02193 /*
02194  *  call-seq:
02195  *     str.gsub(pattern, replacement)       => new_str
02196  *     str.gsub(pattern) {|match| block }   => new_str
02197  *  
02198  *  Returns a copy of <i>str</i> with <em>all</em> occurrences of <i>pattern</i>
02199  *  replaced with either <i>replacement</i> or the value of the block. The
02200  *  <i>pattern</i> will typically be a <code>Regexp</code>; if it is a
02201  *  <code>String</code> then no regular expression metacharacters will be
02202  *  interpreted (that is <code>/\d/</code> will match a digit, but
02203  *  <code>'\d'</code> will match a backslash followed by a 'd').
02204  *     
02205  *  If a string is used as the replacement, special variables from the match
02206  *  (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it,
02207  *  as substitution into the string occurs before the pattern match
02208  *  starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on
02209  *  may be used to interpolate successive groups in the match.
02210  *     
02211  *  In the block form, the current match string is passed in as a parameter, and
02212  *  variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
02213  *  <code>$&</code>, and <code>$'</code> will be set appropriately. The value
02214  *  returned by the block will be substituted for the match on each call.
02215  *     
02216  *  The result inherits any tainting in the original string or any supplied
02217  *  replacement string.
02218  *     
02219  *     "hello".gsub(/[aeiou]/, '*')              #=> "h*ll*"
02220  *     "hello".gsub(/([aeiou])/, '<\1>')         #=> "h<e>ll<o>"
02221  *     "hello".gsub(/./) {|s| s[0].to_s + ' '}   #=> "104 101 108 108 111 "
02222  */
02223 
02224 static VALUE
02225 rb_str_gsub(argc, argv, str)
02226     int argc;
02227     VALUE *argv;
02228     VALUE str;
02229 {
02230     return str_gsub(argc, argv, str, 0);
02231 }
02232 
02233 
02234 /*
02235  *  call-seq:
02236  *     str.replace(other_str)   => str
02237  *  
02238  *  Replaces the contents and taintedness of <i>str</i> with the corresponding
02239  *  values in <i>other_str</i>.
02240  *     
02241  *     s = "hello"         #=> "hello"
02242  *     s.replace "world"   #=> "world"
02243  */
02244 
02245 static VALUE
02246 rb_str_replace(str, str2)
02247     VALUE str, str2;
02248 {
02249     if (str == str2) return str;
02250 
02251     StringValue(str2);
02252     if (FL_TEST(str2, ELTS_SHARED)) {
02253         if (str_independent(str)) {
02254             free(RSTRING(str)->ptr);
02255         }
02256         RSTRING(str)->len = RSTRING(str2)->len;
02257         RSTRING(str)->ptr = RSTRING(str2)->ptr;
02258         FL_SET(str, ELTS_SHARED);
02259         FL_UNSET(str, STR_ASSOC);
02260         RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
02261     }
02262     else {
02263         rb_str_modify(str);
02264         rb_str_resize(str, RSTRING(str2)->len);
02265         memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
02266         if (FL_TEST(str2, STR_ASSOC)) {
02267             FL_SET(str, STR_ASSOC);
02268             RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
02269         }
02270     }
02271 
02272     OBJ_INFECT(str, str2);
02273     return str;
02274 }
02275 
02276 static VALUE
02277 uscore_get()
02278 {
02279     VALUE line;
02280 
02281     line = rb_lastline_get();
02282     if (TYPE(line) != T_STRING) {
02283         rb_raise(rb_eTypeError, "$_ value need to be String (%s given)",
02284                  NIL_P(line) ? "nil" : rb_obj_classname(line));
02285     }
02286     return line;
02287 }
02288 
02289 /*
02290  *  call-seq:
02291  *     sub!(pattern, replacement)    => $_ or nil
02292  *     sub!(pattern) {|...| block }  => $_ or nil
02293  *  
02294  *  Equivalent to <code>$_.sub!(<i>args</i>)</code>.
02295  */
02296 
02297 static VALUE
02298 rb_f_sub_bang(argc, argv)
02299     int argc;
02300     VALUE *argv;
02301 {
02302     return rb_str_sub_bang(argc, argv, uscore_get());
02303 }
02304 
02305 /*
02306  *  call-seq:
02307  *     sub(pattern, replacement)   => $_
02308  *     sub(pattern) { block }      => $_
02309  *  
02310  *  Equivalent to <code>$_.sub(<i>args</i>)</code>, except that
02311  *  <code>$_</code> will be updated if substitution occurs.
02312  */
02313 
02314 static VALUE
02315 rb_f_sub(argc, argv)
02316     int argc;
02317     VALUE *argv;
02318 {
02319     VALUE str = rb_str_dup(uscore_get());
02320 
02321     if (NIL_P(rb_str_sub_bang(argc, argv, str)))
02322         return str;
02323     rb_lastline_set(str);
02324     return str;
02325 }
02326 
02327 /*
02328  *  call-seq:
02329  *     gsub!(pattern, replacement)    => string or nil
02330  *     gsub!(pattern) {|...| block }  => string or nil
02331  *  
02332  *  Equivalent to <code>Kernel::gsub</code>, except <code>nil</code> is
02333  *  returned if <code>$_</code> is not modified.
02334  *     
02335  *     $_ = "quick brown fox"
02336  *     gsub! /cat/, '*'   #=> nil
02337  *     $_                 #=> "quick brown fox"
02338  */
02339 
02340 static VALUE
02341 rb_f_gsub_bang(argc, argv)
02342     int argc;
02343     VALUE *argv;
02344 {
02345     return rb_str_gsub_bang(argc, argv, uscore_get());
02346 }
02347 
02348 /*
02349  *  call-seq:
02350  *     gsub(pattern, replacement)    => string
02351  *     gsub(pattern) {|...| block }  => string
02352  *  
02353  *  Equivalent to <code>$_.gsub...</code>, except that <code>$_</code>
02354  *  receives the modified result.
02355  *     
02356  *     $_ = "quick brown fox"
02357  *     gsub /[aeiou]/, '*'   #=> "q**ck br*wn f*x"
02358  *     $_                    #=> "q**ck br*wn f*x"
02359  */
02360 
02361 static VALUE
02362 rb_f_gsub(argc, argv)
02363     int argc;
02364     VALUE *argv;
02365 {
02366     VALUE str = rb_str_dup(uscore_get());
02367 
02368     if (NIL_P(rb_str_gsub_bang(argc, argv, str)))
02369         return str;
02370     rb_lastline_set(str);
02371     return str;
02372 }
02373 
02374 
02375 /*
02376  *  call-seq:
02377  *     str.reverse!   => str
02378  *  
02379  *  Reverses <i>str</i> in place.
02380  */
02381 
02382 static VALUE
02383 rb_str_reverse_bang(str)
02384     VALUE str;
02385 {
02386     char *s, *e;
02387     char c;
02388 
02389     if (RSTRING(str)->len > 1) {
02390         rb_str_modify(str);
02391         s = RSTRING(str)->ptr;
02392         e = s + RSTRING(str)->len - 1;
02393         while (s < e) {
02394             c = *s;
02395             *s++ = *e;
02396             *e-- = c;
02397         }
02398     }
02399     return str;
02400 }
02401 
02402 
02403 /*
02404  *  call-seq:
02405  *     str.reverse   => new_str
02406  *  
02407  *  Returns a new string with the characters from <i>str</i> in reverse order.
02408  *     
02409  *     "stressed".reverse   #=> "desserts"
02410  */
02411 
02412 static VALUE
02413 rb_str_reverse(str)
02414     VALUE str;
02415 {
02416     VALUE obj;
02417     char *s, *e, *p;
02418 
02419     if (RSTRING(str)->len <= 1) return rb_str_dup(str);
02420 
02421     obj = rb_str_new5(str, 0, RSTRING(str)->len);
02422     s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1;
02423     p = RSTRING(obj)->ptr;
02424 
02425     while (e >= s) {
02426         *p++ = *e--;
02427     }
02428     OBJ_INFECT(obj, str);
02429 
02430     return obj;
02431 }
02432 
02433 
02434 /*
02435  *  call-seq:
02436  *     str.include? other_str   => true or false
02437  *     str.include? fixnum      => true or false
02438  *  
02439  *  Returns <code>true</code> if <i>str</i> contains the given string or
02440  *  character.
02441  *     
02442  *     "hello".include? "lo"   #=> true
02443  *     "hello".include? "ol"   #=> false
02444  *     "hello".include? ?h     #=> true
02445  */
02446 
02447 static VALUE
02448 rb_str_include(str, arg)
02449     VALUE str, arg;
02450 {
02451     long i;
02452 
02453     if (FIXNUM_P(arg)) {
02454         if (memchr(RSTRING(str)->ptr, FIX2INT(arg), RSTRING(str)->len))
02455             return Qtrue;
02456         return Qfalse;
02457     }
02458 
02459     StringValue(arg);
02460     i = rb_str_index(str, arg, 0);
02461 
02462     if (i == -1) return Qfalse;
02463     return Qtrue;
02464 }
02465 
02466 
02467 /*
02468  *  call-seq:
02469  *     str.to_i(base=10)   => integer
02470  *  
02471  *  Returns the result of interpreting leading characters in <i>str</i> as an
02472  *  integer base <i>base</i> (2, 8, 10, or 16). Extraneous characters past the
02473  *  end of a valid number are ignored. If there is not a valid number at the
02474  *  start of <i>str</i>, <code>0</code> is returned. This method never raises an
02475  *  exception.
02476  *     
02477  *     "12345".to_i             #=> 12345
02478  *     "99 red balloons".to_i   #=> 99
02479  *     "0a".to_i                #=> 0
02480  *     "0a".to_i(16)            #=> 10
02481  *     "hello".to_i             #=> 0
02482  *     "1100101".to_i(2)        #=> 101
02483  *     "1100101".to_i(8)        #=> 294977
02484  *     "1100101".to_i(10)       #=> 1100101
02485  *     "1100101".to_i(16)       #=> 17826049
02486  */
02487 
02488 static VALUE
02489 rb_str_to_i(argc, argv, str)
02490     int argc;
02491     VALUE *argv;
02492     VALUE str;
02493 {
02494     VALUE b;
02495     int base;
02496 
02497     rb_scan_args(argc, argv, "01", &b);
02498     if (argc == 0) base = 10;
02499     else base = NUM2INT(b);
02500 
02501     if (base < 0) {
02502         rb_raise(rb_eArgError, "illegal radix %d", base);
02503     }
02504     return rb_str_to_inum(str, base, Qfalse);
02505 }
02506 
02507 
02508 /*
02509  *  call-seq:
02510  *     str.to_f   => float
02511  *  
02512  *  Returns the result of interpreting leading characters in <i>str</i> as a
02513  *  floating point number. Extraneous characters past the end of a valid number
02514  *  are ignored. If there is not a valid number at the start of <i>str</i>,
02515  *  <code>0.0</code> is returned. This method never raises an exception.
02516  *     
02517  *     "123.45e1".to_f        #=> 1234.5
02518  *     "45.67 degrees".to_f   #=> 45.67
02519  *     "thx1138".to_f         #=> 0.0
02520  */
02521 
02522 static VALUE
02523 rb_str_to_f(str)
02524     VALUE str;
02525 {
02526     return rb_float_new(rb_str_to_dbl(str, Qfalse));
02527 }
02528 
02529 
02530 /*
02531  *  call-seq:
02532  *     str.to_s     => str
02533  *     str.to_str   => str
02534  *  
02535  *  Returns the receiver.
02536  */
02537 
02538 static VALUE
02539 rb_str_to_s(str)
02540     VALUE str;
02541 {
02542     if (rb_obj_class(str) != rb_cString) {
02543         VALUE dup = str_alloc(rb_cString);
02544         rb_str_replace(dup, str);
02545         return dup;
02546     }
02547     return str;
02548 }
02549 
02550 /*
02551  * call-seq:
02552  *   str.inspect   => string
02553  *
02554  * Returns a printable version of _str_, with special characters
02555  * escaped.
02556  *
02557  *    str = "hello"
02558  *    str[3] = 8
02559  *    str.inspect       #=> "hel\010o"
02560  */
02561 
02562 VALUE
02563 rb_str_inspect(str)
02564     VALUE str;
02565 {
02566     char *p, *pend;
02567     VALUE result = rb_str_buf_new2("\"");
02568     char s[5];
02569 
02570     p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
02571     while (p < pend) {
02572         char c = *p++;
02573         if (ismbchar(c) && p < pend) {
02574             int len = mbclen(c);
02575             rb_str_buf_cat(result, p - 1, len);
02576             p += len - 1;
02577         }
02578         else if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
02579             s[0] = '\\'; s[1] = c;
02580             rb_str_buf_cat(result, s, 2);
02581         }
02582         else if (ISPRINT(c)) {
02583             s[0] = c;
02584             rb_str_buf_cat(result, s, 1);
02585         }
02586         else if (c == '\n') {
02587             s[0] = '\\'; s[1] = 'n';
02588             rb_str_buf_cat(result, s, 2);
02589         }
02590         else if (c == '\r') {
02591             s[0] = '\\'; s[1] = 'r';
02592             rb_str_buf_cat(result, s, 2);
02593         }
02594         else if (c == '\t') {
02595             s[0] = '\\'; s[1] = 't';
02596             rb_str_buf_cat(result, s, 2);
02597         }
02598         else if (c == '\f') {
02599             s[0] = '\\'; s[1] = 'f';
02600             rb_str_buf_cat(result, s, 2);
02601         }
02602         else if (c == '\013') {
02603             s[0] = '\\'; s[1] = 'v';
02604             rb_str_buf_cat(result, s, 2);
02605         }
02606         else if (c == '\007') {
02607             s[0] = '\\'; s[1] = 'a';
02608             rb_str_buf_cat(result, s, 2);
02609         }
02610         else if (c == 033) {
02611             s[0] = '\\'; s[1] = 'e';
02612             rb_str_buf_cat(result, s, 2);
02613         }
02614         else {
02615             sprintf(s, "\\%03o", c & 0377);
02616             rb_str_buf_cat2(result, s);
02617         }
02618     }
02619     rb_str_buf_cat2(result, "\"");
02620 
02621     OBJ_INFECT(result, str);
02622     return result;
02623 }
02624 
02625 
02626 /*
02627  *  call-seq:
02628  *     str.dump   => new_str
02629  *  
02630  *  Produces a version of <i>str</i> with all nonprinting characters replaced by
02631  *  <code>\nnn</code> notation and all special characters escaped.
02632  */
02633 
02634 VALUE
02635 rb_str_dump(str)
02636     VALUE str;
02637 {
02638     long len;
02639     char *p, *pend;
02640     char *q, *qend;
02641     VALUE result;
02642 
02643     len = 2;                    /* "" */
02644     p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
02645     while (p < pend) {
02646         char c = *p++;
02647         switch (c) {
02648           case '"':  case '\\':
02649           case '\n': case '\r':
02650           case '\t': case '\f':
02651           case '\013': case '\007': case '\033':
02652             len += 2;
02653             break;
02654 
02655           case '#':
02656             len += IS_EVSTR(p, pend) ? 2 : 1;
02657             break;
02658 
02659           default:
02660             if (ISPRINT(c)) {
02661                 len++;
02662             }
02663             else {
02664                 len += 4;               /* \nnn */
02665             }
02666             break;
02667         }
02668     }
02669 
02670     result = rb_str_new5(str, 0, len);
02671     p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
02672     q = RSTRING(result)->ptr; qend = q + len;
02673 
02674     *q++ = '"';
02675     while (p < pend) {
02676         char c = *p++;
02677 
02678         if (c == '"' || c == '\\') {
02679             *q++ = '\\';
02680             *q++ = c;
02681         }
02682         else if (c == '#') {
02683             if (IS_EVSTR(p, pend)) *q++ = '\\';
02684             *q++ = '#';
02685         }
02686         else if (ISPRINT(c)) {
02687             *q++ = c;
02688         }
02689         else if (c == '\n') {
02690             *q++ = '\\';
02691             *q++ = 'n';
02692         }
02693         else if (c == '\r') {
02694             *q++ = '\\';
02695             *q++ = 'r';
02696         }
02697         else if (c == '\t') {
02698             *q++ = '\\';
02699             *q++ = 't';
02700         }
02701         else if (c == '\f') {
02702             *q++ = '\\';
02703             *q++ = 'f';
02704         }
02705         else if (c == '\013') {
02706             *q++ = '\\';
02707             *q++ = 'v';
02708         }
02709         else if (c == '\007') {
02710             *q++ = '\\';
02711             *q++ = 'a';
02712         }
02713         else if (c == '\033') {
02714             *q++ = '\\';
02715             *q++ = 'e';
02716         }
02717         else {
02718             *q++ = '\\';
02719             sprintf(q, "%03o", c&0xff);
02720             q += 3;
02721         }
02722     }
02723     *q++ = '"';
02724 
02725     OBJ_INFECT(result, str);
02726     return result;
02727 }
02728 
02729 
02730 /*
02731  *  call-seq:
02732  *     str.upcase!   => str or nil
02733  *  
02734  *  Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
02735  *  were made.
02736  */
02737 
02738 static VALUE
02739 rb_str_upcase_bang(str)
02740     VALUE str;
02741 {
02742     char *s, *send;
02743     int modify = 0;
02744 
02745     rb_str_modify(str);
02746     s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02747     while (s < send) {
02748         if (ismbchar(*s)) {
02749             s+=mbclen(*s) - 1;
02750         }
02751         else if (ISLOWER(*s)) {
02752             *s = toupper(*s);
02753             modify = 1;
02754         }
02755         s++;
02756     }
02757 
02758     if (modify) return str;
02759     return Qnil;
02760 }
02761 
02762 
02763 /*
02764  *  call-seq:
02765  *     str.upcase   => new_str
02766  *  
02767  *  Returns a copy of <i>str</i> with all lowercase letters replaced with their
02768  *  uppercase counterparts. The operation is locale insensitive---only
02769  *  characters ``a'' to ``z'' are affected.
02770  *     
02771  *     "hEllO".upcase   #=> "HELLO"
02772  */
02773 
02774 static VALUE
02775 rb_str_upcase(str)
02776     VALUE str;
02777 {
02778     str = rb_str_dup(str);
02779     rb_str_upcase_bang(str);
02780     return str;
02781 }
02782 
02783 
02784 /*
02785  *  call-seq:
02786  *     str.downcase!   => str or nil
02787  *  
02788  *  Downcases the contents of <i>str</i>, returning <code>nil</code> if no
02789  *  changes were made.
02790  */
02791 
02792 static VALUE
02793 rb_str_downcase_bang(str)
02794     VALUE str;
02795 {
02796     char *s, *send;
02797     int modify = 0;
02798 
02799     rb_str_modify(str);
02800     s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02801     while (s < send) {
02802         if (ismbchar(*s)) {
02803             s+=mbclen(*s) - 1;
02804         }
02805         else if (ISUPPER(*s)) {
02806             *s = tolower(*s);
02807             modify = 1;
02808         }
02809         s++;
02810     }
02811 
02812     if (modify) return str;
02813     return Qnil;
02814 }
02815 
02816 
02817 /*
02818  *  call-seq:
02819  *     str.downcase   => new_str
02820  *  
02821  *  Returns a copy of <i>str</i> with all uppercase letters replaced with their
02822  *  lowercase counterparts. The operation is locale insensitive---only
02823  *  characters ``A'' to ``Z'' are affected.
02824  *     
02825  *     "hEllO".downcase   #=> "hello"
02826  */
02827 
02828 static VALUE
02829 rb_str_downcase(str)
02830     VALUE str;
02831 {
02832     str = rb_str_dup(str);
02833     rb_str_downcase_bang(str);
02834     return str;
02835 }
02836 
02837 
02838 /*
02839  *  call-seq:
02840  *     str.capitalize!   => str or nil
02841  *  
02842  *  Modifies <i>str</i> by converting the first character to uppercase and the
02843  *  remainder to lowercase. Returns <code>nil</code> if no changes are made.
02844  *     
02845  *     a = "hello"
02846  *     a.capitalize!   #=> "Hello"
02847  *     a               #=> "Hello"
02848  *     a.capitalize!   #=> nil
02849  */
02850 
02851 static VALUE
02852 rb_str_capitalize_bang(str)
02853     VALUE str;
02854 {
02855     char *s, *send;
02856     int modify = 0;
02857 
02858     rb_str_modify(str);
02859     if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil;
02860     s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02861     if (ISLOWER(*s)) {
02862         *s = toupper(*s);
02863         modify = 1;
02864     }
02865     while (++s < send) {
02866         if (ismbchar(*s)) {
02867             s+=mbclen(*s) - 1;
02868         }
02869         else if (ISUPPER(*s)) {
02870             *s = tolower(*s);
02871             modify = 1;
02872         }
02873     }
02874     if (modify) return str;
02875     return Qnil;
02876 }
02877 
02878 
02879 /*
02880  *  call-seq:
02881  *     str.capitalize   => new_str
02882  *  
02883  *  Returns a copy of <i>str</i> with the first character converted to uppercase
02884  *  and the remainder to lowercase.
02885  *     
02886  *     "hello".capitalize    #=> "Hello"
02887  *     "HELLO".capitalize    #=> "Hello"
02888  *     "123ABC".capitalize   #=> "123abc"
02889  */
02890 
02891 static VALUE
02892 rb_str_capitalize(str)
02893     VALUE str;
02894 {
02895     str = rb_str_dup(str);
02896     rb_str_capitalize_bang(str);
02897     return str;
02898 }
02899 
02900 
02901 /*
02902  *  call-seq:
02903  *     str.swapcase!   => str or nil
02904  *  
02905  *  Equivalent to <code>String#swapcase</code>, but modifies the receiver in
02906  *  place, returning <i>str</i>, or <code>nil</code> if no changes were made.
02907  */
02908 
02909 static VALUE
02910 rb_str_swapcase_bang(str)
02911     VALUE str;
02912 {
02913     char *s, *send;
02914     int modify = 0;
02915 
02916     rb_str_modify(str);
02917     s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
02918     while (s < send) {
02919         if (ismbchar(*s)) {
02920             s+=mbclen(*s) - 1;
02921         }
02922         else if (ISUPPER(*s)) {
02923             *s = tolower(*s);
02924             modify = 1;
02925         }
02926         else if (ISLOWER(*s)) {
02927             *s = toupper(*s);
02928             modify = 1;
02929         }
02930         s++;
02931     }
02932 
02933     if (modify) return str;
02934     return Qnil;
02935 }
02936 
02937 
02938 /*
02939  *  call-seq:
02940  *     str.swapcase   => new_str
02941  *  
02942  *  Returns a copy of <i>str</i> with uppercase alphabetic characters converted
02943  *  to lowercase and lowercase characters converted to uppercase.
02944  *     
02945  *     "Hello".swapcase          #=> "hELLO"
02946  *     "cYbEr_PuNk11".swapcase   #=> "CyBeR_pUnK11"
02947  */
02948 
02949 static VALUE
02950 rb_str_swapcase(str)
02951     VALUE str;
02952 {
02953     str = rb_str_dup(str);
02954     rb_str_swapcase_bang(str);
02955     return str;
02956 }
02957 
02958 typedef unsigned char *USTR;
02959 
02960 struct tr {
02961     int gen, now, max;
02962     char *p, *pend;
02963 };
02964 
02965 static int
02966 trnext(t)
02967     struct tr *t;
02968 {
02969     for (;;) {
02970         if (!t->gen) {
02971             if (t->p == t->pend) return -1;
02972             if (t->p < t->pend - 1 && *t->p == '\\') {
02973                 t->p++;
02974             }
02975             t->now = *(USTR)t->p++;
02976             if (t->p < t->pend - 1 && *t->p == '-') {
02977                 t->p++;
02978                 if (t->p < t->pend) {
02979                     if (t->now > *(USTR)t->p) {
02980                         t->p++;
02981                         continue;
02982                     }
02983                     t->gen = 1;
02984                     t->max = *(USTR)t->p++;
02985                 }
02986             }
02987             return t->now;
02988         }
02989         else if (++t->now < t->max) {
02990             return t->now;
02991         }
02992         else {
02993             t->gen = 0;
02994             return t->max;
02995         }
02996     }
02997 }
02998 
02999 static VALUE rb_str_delete_bang (int,VALUE*,VALUE);
03000 
03001 static VALUE
03002 tr_trans(str, src, repl, sflag)
03003     VALUE str, src, repl;
03004     int sflag;
03005 {
03006     struct tr trsrc, trrepl;
03007     int cflag = 0;
03008     int trans[256];
03009     int i, c, modify = 0;
03010     char *s, *send;
03011 
03012     StringValue(src);
03013     StringValue(repl);
03014     if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil;
03015     trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len;
03016     if (RSTRING(src)->len >= 2 && RSTRING(src)->ptr[0] == '^') {
03017         cflag++;
03018         trsrc.p++;
03019     }
03020     if (RSTRING(repl)->len == 0) {
03021         return rb_str_delete_bang(1, &src, str);
03022     }
03023     trrepl.p = RSTRING(repl)->ptr;
03024     trrepl.pend = trrepl.p + RSTRING(repl)->len;
03025     trsrc.gen = trrepl.gen = 0;
03026     trsrc.now = trrepl.now = 0;
03027     trsrc.max = trrepl.max = 0;
03028 
03029     if (cflag) {
03030         for (i=0; i<256; i++) {
03031             trans[i] = 1;
03032         }
03033         while ((c = trnext(&trsrc)) >= 0) {
03034             trans[c & 0xff] = -1;
03035         }
03036         while ((c = trnext(&trrepl)) >= 0)
03037             /* retrieve last replacer */;
03038         for (i=0; i<256; i++) {
03039             if (trans[i] >= 0) {
03040                 trans[i] = trrepl.now;
03041             }
03042         }
03043     }
03044     else {
03045         int r;
03046 
03047         for (i=0; i<256; i++) {
03048             trans[i] = -1;
03049         }
03050         while ((c = trnext(&trsrc)) >= 0) {
03051             r = trnext(&trrepl);
03052             if (r == -1) r = trrepl.now;
03053             trans[c & 0xff] = r;
03054         }
03055     }
03056 
03057     rb_str_modify(str);
03058     s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
03059     if (sflag) {
03060         char *t = s;
03061         int c0, last = -1;
03062 
03063         while (s < send) {
03064             c0 = *s++;
03065             if ((c = trans[c0 & 0xff]) >= 0) {
03066                 if (last == c) continue;
03067                 last = c;
03068                 *t++ = c & 0xff;
03069                 modify = 1;
03070             }
03071             else {
03072                 last = -1;
03073                 *t++ = c0;
03074             }
03075         }
03076         if (RSTRING(str)->len > (t - RSTRING(str)->ptr)) {
03077             RSTRING(str)->len = (t - RSTRING(str)->ptr);
03078             modify = 1;
03079             *t = '\0';
03080         }
03081     }
03082     else {
03083         while (s < send) {
03084             if ((c = trans[*s & 0xff]) >= 0) {
03085                 *s = c & 0xff;
03086                 modify = 1;
03087             }
03088             s++;
03089         }
03090     }
03091 
03092     if (modify) return str;
03093     return Qnil;
03094 }
03095 
03096 
03097 /*
03098  *  call-seq:
03099  *     str.tr!(from_str, to_str)   => str or nil
03100  *  
03101  *  Translates <i>str</i> in place, using the same rules as
03102  *  <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
03103  *  changes were made.
03104  */
03105 
03106 static VALUE
03107 rb_str_tr_bang(str, src, repl)
03108     VALUE str, src, repl;
03109 {
03110     return tr_trans(str, src, repl, 0);
03111 }
03112 
03113 
03114 /*
03115  *  call-seq:
03116  *     str.tr(from_str, to_str)   => new_str
03117  *  
03118  *  Returns a copy of <i>str</i> with the characters in <i>from_str</i> replaced
03119  *  by the corresponding characters in <i>to_str</i>. If <i>to_str</i> is
03120  *  shorter than <i>from_str</i>, it is padded with its last character. Both
03121  *  strings may use the c1--c2 notation to denote ranges of characters, and
03122  *  <i>from_str</i> may start with a <code>^</code>, which denotes all
03123  *  characters except those listed.
03124  *     
03125  *     "hello".tr('aeiou', '*')    #=> "h*ll*"
03126  *     "hello".tr('^aeiou', '*')   #=> "*e**o"
03127  *     "hello".tr('el', 'ip')      #=> "hippo"
03128  *     "hello".tr('a-y', 'b-z')    #=> "ifmmp"
03129  */
03130 
03131 static VALUE
03132 rb_str_tr(str, src, repl)
03133     VALUE str, src, repl;
03134 {
03135     str = rb_str_dup(str);
03136     tr_trans(str, src, repl, 0);
03137     return str;
03138 }
03139 
03140 static void
03141 tr_setup_table(str, table, init)
03142     VALUE str;
03143     char table[256];
03144     int init;
03145 {
03146     char buf[256];
03147     struct tr tr;
03148     int i, c;
03149     int cflag = 0;
03150 
03151     tr.p = RSTRING(str)->ptr; tr.pend = tr.p + RSTRING(str)->len;
03152     tr.gen = tr.now = tr.max = 0;
03153     if (RSTRING(str)->len > 1 && RSTRING(str)->ptr[0] == '^') {
03154         cflag = 1;
03155         tr.p++;
03156     }
03157 
03158     if (init) {
03159         for (i=0; i<256; i++) {
03160             table[i] = 1;
03161         }
03162     }
03163     for (i=0; i<256; i++) {
03164         buf[i] = cflag;
03165     }
03166     while ((c = trnext(&tr)) >= 0) {
03167         buf[c & 0xff] = !cflag;
03168     }
03169     for (i=0; i<256; i++) {
03170         table[i] = table[i] && buf[i];
03171     }
03172 }
03173 
03174 
03175 /*
03176  *  call-seq:
03177  *     str.delete!([other_str]+>)   => str or nil
03178  *  
03179  *  Performs a <code>delete</code> operation in place, returning <i>str</i>, or
03180  *  <code>nil</code> if <i>str</i> was not modified.
03181  */
03182 
03183 static VALUE
03184 rb_str_delete_bang(argc, argv, str)
03185     int argc;
03186     VALUE *argv;
03187     VALUE str;
03188 {
03189     char *s, *send, *t;
03190     char squeez[256];
03191     int modify = 0;
03192     int init = 1;
03193     int i;
03194 
03195     if (argc < 1) {
03196         rb_raise(rb_eArgError, "wrong number of arguments");
03197     }
03198     for (i=0; i<argc; i++) {
03199         VALUE s = argv[i];
03200 
03201         StringValue(s);
03202         tr_setup_table(s, squeez, init);
03203         init = 0;
03204     }
03205 
03206     rb_str_modify(str);
03207     s = t = RSTRING(str)->ptr;
03208     if (!s || RSTRING(str)->len == 0) return Qnil;
03209     send = s + RSTRING(str)->len;
03210     while (s < send) {
03211         if (squeez[*s & 0xff])
03212             modify = 1;
03213         else
03214             *t++ = *s;
03215         s++;
03216     }
03217     *t = '\0';
03218     RSTRING(str)->len = t - RSTRING(str)->ptr;
03219 
03220     if (modify) return str;
03221     return Qnil;
03222 }
03223 
03224 
03225 /*
03226  *  call-seq:
03227  *     str.delete([other_str]+)   => new_str
03228  *  
03229  *  Returns a copy of <i>str</i> with all characters in the intersection of its
03230  *  arguments deleted. Uses the same rules for building the set of characters as
03231  *  <code>String#count</code>.
03232  *     
03233  *     "hello".delete "l","lo"        #=> "heo"
03234  *     "hello".delete "lo"            #=> "he"
03235  *     "hello".delete "aeiou", "^e"   #=> "hell"
03236  *     "hello".delete "ej-m"          #=> "ho"
03237  */
03238 
03239 static VALUE
03240 rb_str_delete(argc, argv, str)
03241     int argc;
03242     VALUE *argv;
03243     VALUE str;
03244 {
03245     str = rb_str_dup(str);
03246     rb_str_delete_bang(argc, argv, str);
03247     return str;
03248 }
03249 
03250 
03251 /*
03252  *  call-seq:
03253  *     str.squeeze!([other_str]*)   => str or nil
03254  *  
03255  *  Squeezes <i>str</i> in place, returning either <i>str</i>, or
03256  *  <code>nil</code> if no changes were made.
03257  */
03258 
03259 static VALUE
03260 rb_str_squeeze_bang(argc, argv, str)
03261     int argc;
03262     VALUE *argv;
03263     VALUE str;
03264 {
03265     char squeez[256];
03266     char *s, *send, *t;
03267     int c, save, modify = 0;
03268     int init = 1;
03269     int i;
03270 
03271     if (argc == 0) {
03272         for (i=0; i<256; i++) {
03273             squeez[i] = 1;
03274         }
03275     }
03276     else {
03277         for (i=0; i<argc; i++) {
03278             VALUE s = argv[i];
03279 
03280             StringValue(s);
03281             tr_setup_table(s, squeez, init);
03282             init = 0;
03283         }
03284     }
03285 
03286     rb_str_modify(str);
03287     s = t = RSTRING(str)->ptr;
03288     if (!s || RSTRING(str)->len == 0) return Qnil;
03289     send = s + RSTRING(str)->len;
03290     save = -1;
03291     while (s < send) {
03292         c = *s++ & 0xff;
03293         if (c != save || !squeez[c]) {
03294             *t++ = save = c;
03295         }
03296     }
03297     *t = '\0';
03298     if (t - RSTRING(str)->ptr != RSTRING(str)->len) {
03299         RSTRING(str)->len = t - RSTRING(str)->ptr;
03300         modify = 1;
03301     }
03302 
03303     if (modify) return str;
03304     return Qnil;
03305 }
03306 
03307 
03308 /*
03309  *  call-seq:
03310  *     str.squeeze([other_str]*)    => new_str
03311  *  
03312  *  Builds a set of characters from the <i>other_str</i> parameter(s) using the
03313  *  procedure described for <code>String#count</code>. Returns a new string
03314  *  where runs of the same character that occur in this set are replaced by a
03315  *  single character. If no arguments are given, all runs of identical
03316  *  characters are replaced by a single character.
03317  *     
03318  *     "yellow moon".squeeze                  #=> "yelow mon"
03319  *     "  now   is  the".squeeze(" ")         #=> " now is the"
03320  *     "putters shoot balls".squeeze("m-z")   #=> "puters shot balls"
03321  */
03322 
03323 static VALUE
03324 rb_str_squeeze(argc, argv, str)
03325     int argc;
03326     VALUE *argv;
03327     VALUE str;
03328 {
03329     str = rb_str_dup(str);
03330     rb_str_squeeze_bang(argc, argv, str);
03331     return str;
03332 }
03333 
03334 
03335 /*
03336  *  call-seq:
03337  *     str.tr_s!(from_str, to_str)   => str or nil
03338  *  
03339  *  Performs <code>String#tr_s</code> processing on <i>str</i> in place,
03340  *  returning <i>str</i>, or <code>nil</code> if no changes were made.
03341  */
03342 
03343 static VALUE
03344 rb_str_tr_s_bang(str, src, repl)
03345     VALUE str, src, repl;
03346 {
03347     return tr_trans(str, src, repl, 1);
03348 }
03349 
03350 
03351 /*
03352  *  call-seq:
03353  *     str.tr_s(from_str, to_str)   => new_str
03354  *  
03355  *  Processes a copy of <i>str</i> as described under <code>String#tr</code>,
03356  *  then removes duplicate characters in regions that were affected by the
03357  *  translation.
03358  *     
03359  *     "hello".tr_s('l', 'r')     #=> "hero"
03360  *     "hello".tr_s('el', '*')    #=> "h*o"
03361  *     "hello".tr_s('el', 'hx')   #=> "hhxo"
03362  */
03363 
03364 static VALUE
03365 rb_str_tr_s(str, src, repl)
03366     VALUE str, src, repl;
03367 {
03368     str = rb_str_dup(str);
03369     tr_trans(str, src, repl, 1);
03370     return str;
03371 }
03372 
03373 
03374 /*
03375  *  call-seq:
03376  *     str.count([other_str]+)   => fixnum
03377  *  
03378  *  Each <i>other_str</i> parameter defines a set of characters to count.  The
03379  *  intersection of these sets defines the characters to count in
03380  *  <i>str</i>. Any <i>other_str</i> that starts with a caret (^) is
03381  *  negated. The sequence c1--c2 means all characters between c1 and c2.
03382  *     
03383  *     a = "hello world"
03384  *     a.count "lo"            #=> 5
03385  *     a.count "lo", "o"       #=> 2
03386  *     a.count "hello", "^l"   #=> 4
03387  *     a.count "ej-m"          #=> 4
03388  */
03389 
03390 static VALUE
03391 rb_str_count(argc, argv, str)
03392     int argc;
03393     VALUE *argv;
03394     VALUE str;
03395 {
03396     char table[256];
03397     char *s, *send;
03398     int init = 1;
03399     int i;
03400 
03401     if (argc < 1) {
03402         rb_raise(rb_eArgError, "wrong number of arguments");
03403     }
03404     for (i=0; i<argc; i++) {
03405         VALUE s = argv[i];
03406 
03407         StringValue(s);
03408         tr_setup_table(s, table, init);
03409         init = 0;
03410     }
03411 
03412     s = RSTRING(str)->ptr;
03413     if (!s || RSTRING(str)->len == 0) return INT2FIX(0);
03414     send = s + RSTRING(str)->len;
03415     i = 0;
03416     while (s < send) {
03417         if (table[*s++ & 0xff]) {
03418             i++;
03419         }
03420     }
03421     return INT2NUM(i);
03422 }
03423 
03424 
03425 /*
03426  *  call-seq:
03427  *     str.split(pattern=$;, [limit])   => anArray
03428  *  
03429  *  Divides <i>str</i> into substrings based on a delimiter, returning an array
03430  *  of these substrings.
03431  *     
03432  *  If <i>pattern</i> is a <code>String</code>, then its contents are used as
03433  *  the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
03434  *  space, <i>str</i> is split on whitespace, with leading whitespace and runs
03435  *  of contiguous whitespace characters ignored.
03436  *     
03437  *  If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
03438  *  pattern matches. Whenever the pattern matches a zero-length string,
03439  *  <i>str</i> is split into individual characters.
03440  *     
03441  *  If <i>pattern</i> is omitted, the value of <code>$;</code> is used.  If
03442  *  <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
03443  *  split on whitespace as if ` ' were specified.
03444  *     
03445  *  If the <i>limit</i> parameter is omitted, trailing null fields are
03446  *  suppressed. If <i>limit</i> is a positive number, at most that number of
03447  *  fields will be returned (if <i>limit</i> is <code>1</code>, the entire
03448  *  string is returned as the only entry in an array). If negative, there is no
03449  *  limit to the number of fields returned, and trailing null fields are not
03450  *  suppressed.
03451  *     
03452  *     " now's  the time".split        #=> ["now's", "the", "time"]
03453  *     " now's  the time".split(' ')   #=> ["now's", "the", "time"]
03454  *     " now's  the time".split(/ /)   #=> ["", "now's", "", "the", "time"]
03455  *     "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
03456  *     "hello".split(//)               #=> ["h", "e", "l", "l", "o"]
03457  *     "hello".split(//, 3)            #=> ["h", "e", "llo"]
03458  *     "hi mom".split(%r{\s*})         #=> ["h", "i", "m", "o", "m"]
03459  *     
03460  *     "mellow yellow".split("ello")   #=> ["m", "w y", "w"]
03461  *     "1,2,,3,4,,".split(',')         #=> ["1", "2", "", "3", "4"]
03462  *     "1,2,,3,4,,".split(',', 4)      #=> ["1", "2", "", "3,4,,"]
03463  *     "1,2,,3,4,,".split(',', -4)     #=> ["1", "2", "", "3", "4", "", ""]
03464  */
03465 
03466 static VALUE
03467 rb_str_split_m(argc, argv, str)
03468     int argc;
03469     VALUE *argv;
03470     VALUE str;
03471 {
03472     VALUE spat;
03473     VALUE limit;
03474     int awk_split = Qfalse;
03475     long beg, end, i = 0;
03476     int lim = 0;
03477     VALUE result, tmp;
03478 
03479     if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
03480         lim = NUM2INT(limit);
03481         if (lim <= 0) limit = Qnil;
03482         else if (lim == 1) {
03483             if (RSTRING(str)->len == 0)
03484                 return rb_ary_new2(0);
03485             return rb_ary_new3(1, str);
03486         }
03487         i = 1;
03488     }
03489 
03490     if (NIL_P(spat)) {
03491         if (!NIL_P(rb_fs)) {
03492             spat = rb_fs;
03493             goto fs_set;
03494         }
03495         awk_split = Qtrue;
03496     }
03497     else {
03498       fs_set:
03499         if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) {
03500             if (RSTRING(spat)->ptr[0] == ' ') {
03501                 awk_split = Qtrue;
03502             }
03503             else {
03504                 spat = rb_reg_regcomp(rb_reg_quote(spat));
03505             }
03506         }
03507         else {
03508             spat = get_pat(spat, 1);
03509         }
03510     }
03511 
03512     result = rb_ary_new();
03513     beg = 0;
03514     if (awk_split) {
03515         char *ptr = RSTRING(str)->ptr;
03516         long len = RSTRING(str)->len;
03517         char *eptr = ptr + len;
03518         int skip = 1;
03519 
03520         for (end = beg = 0; ptr<eptr; ptr++) {
03521             if (skip) {
03522                 if (ISSPACE(*ptr)) {
03523                     beg++;
03524                 }
03525                 else {
03526                     end = beg+1;
03527                     skip = 0;
03528                     if (!NIL_P(limit) && lim <= i) break;
03529                 }
03530             }
03531             else {
03532                 if (ISSPACE(*ptr)) {
03533                     rb_ary_push(result, rb_str_substr(str, beg, end-beg));
03534                     skip = 1;
03535                     beg = end + 1;
03536                     if (!NIL_P(limit)) ++i;
03537                 }
03538                 else {
03539                     end++;
03540                 }
03541             }
03542         }
03543     }
03544     else {
03545         long start = beg;
03546         long idx;
03547         int last_null = 0;
03548         struct re_registers *regs;
03549 
03550         while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
03551             regs = RMATCH(rb_backref_get())->regs;
03552             if (start == end && BEG(0) == END(0)) {
03553                 if (!RSTRING(str)->ptr) {
03554                     rb_ary_push(result, rb_str_new("", 0));
03555                     break;
03556                 }
03557                 else if (last_null == 1) {
03558                     rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat)));
03559                     beg = start;
03560                 }
03561                 else {
03562                     start += mbclen2(RSTRING(str)->ptr[start],spat);
03563                     last_null = 1;
03564                     continue;
03565                 }
03566             }
03567             else {
03568                 rb_ary_push(result, rb_str_substr(str, beg, end-beg));
03569                 beg = start = END(0);
03570             }
03571             last_null = 0;
03572 
03573             for (idx=1; idx < regs->num_regs; idx++) {
03574                 if (BEG(idx) == -1) continue;
03575                 if (BEG(idx) == END(idx))
03576                     tmp = rb_str_new5(str, 0, 0);
03577                 else
03578                     tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
03579                 rb_ary_push(result, tmp);
03580             }
03581             if (!NIL_P(limit) && lim <= ++i) break;
03582         }
03583     }
03584     if (RSTRING(str)->len > 0 && (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0)) {
03585         if (RSTRING(str)->len == beg)
03586             tmp = rb_str_new5(str, 0, 0);
03587         else
03588             tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg);
03589         rb_ary_push(result, tmp);
03590     }
03591     if (NIL_P(limit) && lim == 0) {
03592         while (RARRAY(result)->len > 0 &&
03593                RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0)
03594             rb_ary_pop(result);
03595     }
03596 
03597     return result;
03598 }
03599 
03600 VALUE
03601 rb_str_split(str, sep0)
03602     VALUE str;
03603     const char *sep0;
03604 {
03605     VALUE sep;
03606 
03607     StringValue(str);
03608     sep = rb_str_new2(sep0);
03609     return rb_str_split_m(1, &sep, str);
03610 }
03611 
03612 /*
03613  *  call-seq:
03614  *     split([pattern [, limit]])    => array
03615  *  
03616  *  Equivalent to <code>$_.split(<i>pattern</i>, <i>limit</i>)</code>.
03617  *  See <code>String#split</code>.
03618  */
03619 
03620 static VALUE
03621 rb_f_split(argc, argv)
03622     int argc;
03623     VALUE *argv;
03624 {
03625     return rb_str_split_m(argc, argv, uscore_get());
03626 }
03627 
03628 /*
03629  *  call-seq:
03630  *     str.each(separator=$/) {|substr| block }        => str
03631  *     str.each_line(separator=$/) {|substr| block }   => str
03632  *  
03633  *  Splits <i>str</i> using the supplied parameter as the record separator
03634  *  (<code>$/</code> by default), passing each substring in turn to the supplied
03635  *  block. If a zero-length record separator is supplied, the string is split on
03636  *  <code>\n</code> characters, except that multiple successive newlines are
03637  *  appended together.
03638  *     
03639  *     print "Example one\n"
03640  *     "hello\nworld".each {|s| p s}
03641  *     print "Example two\n"
03642  *     "hello\nworld".each('l') {|s| p s}
03643  *     print "Example three\n"
03644  *     "hello\n\n\nworld".each('') {|s| p s}
03645  *     
03646  *  <em>produces:</em>
03647  *     
03648  *     Example one
03649  *     "hello\n"
03650  *     "world"
03651  *     Example two
03652  *     "hel"
03653  *     "l"
03654  *     "o\nworl"
03655  *     "d"
03656  *     Example three
03657  *     "hello\n\n\n"
03658  *     "world"
03659  */
03660 
03661 static VALUE
03662 rb_str_each_line(argc, argv, str)
03663     int argc;
03664     VALUE *argv;
03665     VALUE str;
03666 {
03667     VALUE rs;
03668     int newline;
03669     char *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s;
03670     char *ptr = p;
03671     long len = RSTRING(str)->len, rslen;
03672     VALUE line;
03673 
03674     if (rb_scan_args(argc, argv, "01", &rs) == 0) {
03675         rs = rb_rs;
03676     }
03677 
03678     if (NIL_P(rs)) {
03679         rb_yield(str);
03680         return str;
03681     }
03682     StringValue(rs);
03683     rslen = RSTRING(rs)->len;
03684     if (rslen == 0) {
03685         newline = '\n';
03686     }
03687     else {
03688         newline = RSTRING(rs)->ptr[rslen-1];
03689     }
03690 
03691     for (s = p, p += rslen; p < pend; p++) {
03692         if (rslen == 0 && *p == '\n') {
03693             if (*++p != '\n') continue;
03694             while (*p == '\n') p++;
03695         }
03696         if (RSTRING(str)->ptr < p && p[-1] == newline &&
03697             (rslen <= 1 ||
03698              rb_memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) {
03699             line = rb_str_new5(str, s, p - s);
03700             OBJ_INFECT(line, str);
03701             rb_yield(line);
03702             str_mod_check(str, ptr, len);
03703             s = p;
03704         }
03705     }
03706 
03707     if (s != pend) {
03708         if (p > pend) p = pend;
03709         line = rb_str_new5(str, s, p - s);
03710         OBJ_INFECT(line, str);
03711         rb_yield(line);
03712     }
03713 
03714     return str;
03715 }
03716 
03717 
03718 /*
03719  *  call-seq:
03720  *     str.each_byte {|fixnum| block }    => str
03721  *  
03722  *  Passes each byte in <i>str</i> to the given block.
03723  *     
03724  *     "hello".each_byte {|c| print c, ' ' }
03725  *     
03726  *  <em>produces:</em>
03727  *     
03728  *     104 101 108 108 111
03729  */
03730 
03731 static VALUE
03732 rb_str_each_byte(str)
03733     VALUE str;
03734 {
03735     long i;
03736 
03737     for (i=0; i<RSTRING(str)->len; i++) {
03738         rb_yield(INT2FIX(RSTRING(str)->ptr[i] & 0xff));
03739     }
03740     return str;
03741 }
03742 
03743 
03744 /*
03745  *  call-seq:
03746  *     str.chop!   => str or nil
03747  *  
03748  *  Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
03749  *  or <code>nil</code> if <i>str</i> is the empty string.  See also
03750  *  <code>String#chomp!</code>.
03751  */
03752 
03753 static VALUE
03754 rb_str_chop_bang(str)
03755     VALUE str;
03756 {
03757     if (RSTRING(str)->len > 0) {
03758         rb_str_modify(str);
03759         RSTRING(str)->len--;
03760         if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') {
03761             if (RSTRING(str)->len > 0 &&
03762                 RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
03763                 RSTRING(str)->len--;
03764             }
03765         }
03766         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
03767         return str;
03768     }
03769     return Qnil;
03770 }
03771 
03772 
03773 /*
03774  *  call-seq:
03775  *     str.chop   => new_str
03776  *  
03777  *  Returns a new <code>String</code> with the last character removed.  If the
03778  *  string ends with <code>\r\n</code>, both characters are removed. Applying
03779  *  <code>chop</code> to an empty string returns an empty
03780  *  string. <code>String#chomp</code> is often a safer alternative, as it leaves
03781  *  the string unchanged if it doesn't end in a record separator.
03782  *     
03783  *     "string\r\n".chop   #=> "string"
03784  *     "string\n\r".chop   #=> "string\n"
03785  *     "string\n".chop     #=> "string"
03786  *     "string".chop       #=> "strin"
03787  *     "x".chop.chop       #=> ""
03788  */
03789 
03790 static VALUE
03791 rb_str_chop(str)
03792     VALUE str;
03793 {
03794     str = rb_str_dup(str);
03795     rb_str_chop_bang(str);
03796     return str;
03797 }
03798 
03799 
03800 /*
03801  *  call-seq:
03802  *     chop!    => $_ or nil
03803  *  
03804  *  Equivalent to <code>$_.chop!</code>.
03805  *     
03806  *     a  = "now\r\n"
03807  *     $_ = a
03808  *     chop!   #=> "now"
03809  *     chop!   #=> "no"
03810  *     chop!   #=> "n"
03811  *     chop!   #=> ""
03812  *     chop!   #=> nil
03813  *     $_      #=> ""
03814  *     a       #=> ""
03815  */
03816 
03817 static VALUE
03818 rb_f_chop_bang(str)
03819     VALUE str;
03820 {
03821     return rb_str_chop_bang(uscore_get());
03822 }
03823 
03824 /*
03825  *  call-seq:
03826  *     chop   => string
03827  *  
03828  *  Equivalent to <code>($_.dup).chop!</code>, except <code>nil</code>
03829  *  is never returned. See <code>String#chop!</code>.
03830  *     
03831  *     a  =  "now\r\n"
03832  *     $_ = a
03833  *     chop   #=> "now"
03834  *     $_     #=> "now"
03835  *     chop   #=> "no"
03836  *     chop   #=> "n"
03837  *     chop   #=> ""
03838  *     chop   #=> ""
03839  *     a      #=> "now\r\n"
03840  */
03841 
03842 static VALUE
03843 rb_f_chop()
03844 {
03845     VALUE str = uscore_get();
03846 
03847     if (RSTRING(str)->len > 0) {
03848         str = rb_str_dup(str);
03849         rb_str_chop_bang(str);
03850         rb_lastline_set(str);
03851     }
03852     return str;
03853 }
03854 
03855 
03856 /*
03857  *  call-seq:
03858  *     str.chomp!(separator=$/)   => str or nil
03859  *  
03860  *  Modifies <i>str</i> in place as described for <code>String#chomp</code>,
03861  *  returning <i>str</i>, or <code>nil</code> if no modifications were made.
03862  */
03863 
03864 static VALUE
03865 rb_str_chomp_bang(argc, argv, str)
03866     int argc;
03867     VALUE *argv;
03868     VALUE str;
03869 {
03870     VALUE rs;
03871     int newline;
03872     char *p;
03873     long len, rslen;
03874 
03875     if (rb_scan_args(argc, argv, "01", &rs) == 0) {
03876         len = RSTRING(str)->len;
03877         if (len == 0) return Qnil;
03878         p = RSTRING(str)->ptr;
03879         rs = rb_rs;
03880         if (rs == rb_default_rs) {
03881           smart_chomp:
03882             rb_str_modify(str);
03883             if (RSTRING(str)->ptr[len-1] == '\n') {
03884                 RSTRING(str)->len--;
03885                 if (RSTRING(str)->len > 0 &&
03886                     RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
03887                     RSTRING(str)->len--;
03888                 }
03889             }
03890             else if (RSTRING(str)->ptr[len-1] == '\r') {
03891                 RSTRING(str)->len--;
03892             }
03893             else {
03894                 return Qnil;
03895             }
03896             RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
03897             return str;
03898         }
03899     }
03900     if (NIL_P(rs)) return Qnil;
03901     StringValue(rs);
03902     len = RSTRING(str)->len;
03903     if (len == 0) return Qnil;
03904     p = RSTRING(str)->ptr;
03905     rslen = RSTRING(rs)->len;
03906     if (rslen == 0) {
03907         while (len>0 && p[len-1] == '\n') {
03908             len--;
03909             if (len>0 && p[len-1] == '\r')
03910                 len--;
03911         }
03912         if (len < RSTRING(str)->len) {
03913             rb_str_modify(str);
03914             RSTRING(str)->len = len;
03915             RSTRING(str)->ptr[len] = '\0';
03916             return str;
03917         }
03918         return Qnil;
03919     }
03920     if (rslen > len) return Qnil;
03921     newline = RSTRING(rs)->ptr[rslen-1];
03922     if (rslen == 1 && newline == '\n')
03923         goto smart_chomp;
03924 
03925     if (p[len-1] == newline &&
03926         (rslen <= 1 ||
03927          rb_memcmp(RSTRING(rs)->ptr, p+len-rslen, rslen) == 0)) {
03928         rb_str_modify(str);
03929         RSTRING(str)->len -= rslen;
03930         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
03931         return str;
03932     }
03933     return Qnil;
03934 }
03935 
03936 
03937 /*
03938  *  call-seq:
03939  *     str.chomp(separator=$/)   => new_str
03940  *  
03941  *  Returns a new <code>String</code> with the given record separator removed
03942  *  from the end of <i>str</i> (if present). If <code>$/</code> has not been
03943  *  changed from the default Ruby record separator, then <code>chomp</code> also
03944  *  removes carriage return characters (that is it will remove <code>\n</code>,
03945  *  <code>\r</code>, and <code>\r\n</code>).
03946  *     
03947  *     "hello".chomp            #=> "hello"
03948  *     "hello\n".chomp          #=> "hello"
03949  *     "hello\r\n".chomp        #=> "hello"
03950  *     "hello\n\r".chomp        #=> "hello\n"
03951  *     "hello\r".chomp          #=> "hello"
03952  *     "hello \n there".chomp   #=> "hello \n there"
03953  *     "hello".chomp("llo")     #=> "he"
03954  */
03955 
03956 static VALUE
03957 rb_str_chomp(argc, argv, str)
03958     int argc;
03959     VALUE *argv;
03960     VALUE str;
03961 {
03962     str = rb_str_dup(str);
03963     rb_str_chomp_bang(argc, argv, str);
03964     return str;
03965 }
03966 
03967 /*
03968  *  call-seq:
03969  *     chomp!             => $_ or nil
03970  *     chomp!(string)     => $_ or nil
03971  *  
03972  *  Equivalent to <code>$_.chomp!(<em>string</em>)</code>. See
03973  *  <code>String#chomp!</code>
03974  *     
03975  *     $_ = "now\n"
03976  *     chomp!       #=> "now"
03977  *     $_           #=> "now"
03978  *     chomp! "x"   #=> nil
03979  *     $_           #=> "now"
03980  */
03981 
03982 static VALUE
03983 rb_f_chomp_bang(argc, argv)
03984     int argc;
03985     VALUE *argv;
03986 {
03987     return rb_str_chomp_bang(argc, argv, uscore_get());
03988 }
03989 
03990 /*
03991  *  call-seq:
03992  *     chomp            => $_
03993  *     chomp(string)    => $_
03994  *  
03995  *  Equivalent to <code>$_ = $_.chomp(<em>string</em>)</code>. See
03996  *  <code>String#chomp</code>.
03997  *     
03998  *     $_ = "now\n"
03999  *     chomp         #=> "now"
04000  *     $_            #=> "now"
04001  *     chomp "ow"    #=> "n"
04002  *     $_            #=> "n"
04003  *     chomp "xxx"   #=> "n"
04004  *     $_            #=> "n"
04005  */
04006 
04007 static VALUE
04008 rb_f_chomp(argc, argv)
04009     int argc;
04010     VALUE *argv;
04011 {
04012     VALUE str = uscore_get();
04013     VALUE dup = rb_str_dup(str);
04014 
04015     if (NIL_P(rb_str_chomp_bang(argc, argv, dup)))
04016         return str;
04017     rb_lastline_set(dup);
04018     return dup;
04019 }
04020 
04021 
04022 /*
04023  *  call-seq:
04024  *     str.lstrip!   => self or nil
04025  *  
04026  *  Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
04027  *  change was made. See also <code>String#rstrip!</code> and
04028  *  <code>String#strip!</code>.
04029  *     
04030  *     "  hello  ".lstrip   #=> "hello  "
04031  *     "hello".lstrip!      #=> nil
04032  */
04033 
04034 static VALUE
04035 rb_str_lstrip_bang(str)
04036     VALUE str;
04037 {
04038     char *s, *t, *e;
04039 
04040     s = RSTRING(str)->ptr;
04041     if (!s || RSTRING(str)->len == 0) return Qnil;
04042     e = t = s + RSTRING(str)->len;
04043     /* remove spaces at head */
04044     while (s < t && ISSPACE(*s)) s++;
04045 
04046     if (s > RSTRING(str)->ptr) {
04047         rb_str_modify(str);
04048         RSTRING(str)->len = t-s;
04049         memmove(RSTRING(str)->ptr, s, RSTRING(str)->len);
04050         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
04051         return str;
04052     }
04053     return Qnil;
04054 }
04055 
04056 
04057 /*
04058  *  call-seq:
04059  *     str.lstrip   => new_str
04060  *  
04061  *  Returns a copy of <i>str</i> with leading whitespace removed. See also
04062  *  <code>String#rstrip</code> and <code>String#strip</code>.
04063  *     
04064  *     "  hello  ".lstrip   #=> "hello  "
04065  *     "hello".lstrip       #=> "hello"
04066  */
04067 
04068 static VALUE
04069 rb_str_lstrip(str)
04070     VALUE str;
04071 {
04072     str = rb_str_dup(str);
04073     rb_str_lstrip_bang(str);
04074     return str;
04075 }
04076 
04077 
04078 /*
04079  *  call-seq:
04080  *     str.rstrip!   => self or nil
04081  *  
04082  *  Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
04083  *  no change was made. See also <code>String#lstrip!</code> and
04084  *  <code>String#strip!</code>.
04085  *     
04086  *     "  hello  ".rstrip   #=> "  hello"
04087  *     "hello".rstrip!      #=> nil
04088  */
04089 
04090 static VALUE
04091 rb_str_rstrip_bang(str)
04092     VALUE str;
04093 {
04094     char *s, *t, *e;
04095 
04096     s = RSTRING(str)->ptr;
04097     if (!s || RSTRING(str)->len == 0) return Qnil;
04098     e = t = s + RSTRING(str)->len;
04099 
04100     /* remove trailing '\0's */
04101     while (s < t && t[-1] == '\0') t--;
04102 
04103     /* remove trailing spaces */
04104     while (s < t && ISSPACE(*(t-1))) t--;
04105 
04106     if (t < e) {
04107         rb_str_modify(str);
04108         RSTRING(str)->len = t-s;
04109         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
04110         return str;
04111     }
04112     return Qnil;
04113 }
04114 
04115 
04116 /*
04117  *  call-seq:
04118  *     str.rstrip   => new_str
04119  *  
04120  *  Returns a copy of <i>str</i> with trailing whitespace removed. See also
04121  *  <code>String#lstrip</code> and <code>String#strip</code>.
04122  *     
04123  *     "  hello  ".rstrip   #=> "  hello"
04124  *     "hello".rstrip       #=> "hello"
04125  */
04126 
04127 static VALUE
04128 rb_str_rstrip(str)
04129     VALUE str;
04130 {
04131     str = rb_str_dup(str);
04132     rb_str_rstrip_bang(str);
04133     return str;
04134 }
04135 
04136 
04137 /*
04138  *  call-seq:
04139  *     str.strip!   => str or nil
04140  *  
04141  *  Removes leading and trailing whitespace from <i>str</i>. Returns
04142  *  <code>nil</code> if <i>str</i> was not altered.
04143  */
04144 
04145 static VALUE
04146 rb_str_strip_bang(str)
04147     VALUE str;
04148 {
04149     VALUE l = rb_str_lstrip_bang(str);
04150     VALUE r = rb_str_rstrip_bang(str);
04151 
04152     if (NIL_P(l) && NIL_P(r)) return Qnil;
04153     return str;
04154 }
04155 
04156 
04157 /*
04158  *  call-seq:
04159  *     str.strip   => new_str
04160  *  
04161  *  Returns a copy of <i>str</i> with leading and trailing whitespace removed.
04162  *     
04163  *     "    hello    ".strip   #=> "hello"
04164  *     "\tgoodbye\r\n".strip   #=> "goodbye"
04165  */
04166 
04167 static VALUE
04168 rb_str_strip(str)
04169     VALUE str;
04170 {
04171     str = rb_str_dup(str);
04172     rb_str_strip_bang(str);
04173     return str;
04174 }
04175 
04176 static VALUE
04177 scan_once(str, pat, start)
04178     VALUE str, pat;
04179     long *start;
04180 {
04181     VALUE result, match;
04182     struct re_registers *regs;
04183     long i;
04184 
04185     if (rb_reg_search(pat, str, *start, 0) >= 0) {
04186         match = rb_backref_get();
04187         regs = RMATCH(match)->regs;
04188         if (BEG(0) == END(0)) {
04189             /*
04190              * Always consume at least one character of the input string
04191              */
04192             if (RSTRING(str)->len > END(0))
04193                 *start = END(0)+mbclen2(RSTRING(str)->ptr[END(0)],pat);
04194             else
04195                 *start = END(0)+1;
04196         }
04197         else {
04198             *start = END(0);
04199         }
04200         if (regs->num_regs == 1) {
04201             return rb_reg_nth_match(0, match);
04202         }
04203         result = rb_ary_new2(regs->num_regs);
04204         for (i=1; i < regs->num_regs; i++) {
04205             rb_ary_push(result, rb_reg_nth_match(i, match));
04206         }
04207 
04208         return result;
04209     }
04210     return Qnil;
04211 }
04212 
04213 
04214 /*
04215  *  call-seq:
04216  *     str.scan(pattern)                         => array
04217  *     str.scan(pattern) {|match, ...| block }   => str
04218  *  
04219  *  Both forms iterate through <i>str</i>, matching the pattern (which may be a
04220  *  <code>Regexp</code> or a <code>String</code>). For each match, a result is
04221  *  generated and either added to the result array or passed to the block. If
04222  *  the pattern contains no groups, each individual result consists of the
04223  *  matched string, <code>$&</code>.  If the pattern contains groups, each
04224  *  individual result is itself an array containing one entry per group.
04225  *     
04226  *     a = "cruel world"
04227  *     a.scan(/\w+/)        #=> ["cruel", "world"]
04228  *     a.scan(/.../)        #=> ["cru", "el ", "wor"]
04229  *     a.scan(/(...)/)      #=> [["cru"], ["el "], ["wor"]]
04230  *     a.scan(/(..)(..)/)   #=> [["cr", "ue"], ["l ", "wo"]]
04231  *     
04232  *  And the block form:
04233  *     
04234  *     a.scan(/\w+/) {|w| print "<<#{w}>> " }
04235  *     print "\n"
04236  *     a.scan(/(.)(.)/) {|a,b| print b, a }
04237  *     print "\n"
04238  *     
04239  *  <em>produces:</em>
04240  *     
04241  *     <<cruel>> <<world>>
04242  *     rceu lowlr
04243  */
04244 
04245 static VALUE
04246 rb_str_scan(str, pat)
04247     VALUE str, pat;
04248 {
04249     VALUE result;
04250     long start = 0;
04251     VALUE match = Qnil;
04252 
04253     pat = get_pat(pat, 1);
04254     if (!rb_block_given_p()) {
04255         VALUE ary = rb_ary_new();
04256 
04257         while (!NIL_P(result = scan_once(str, pat, &start))) {
04258             match = rb_backref_get();
04259             rb_ary_push(ary, result);
04260         }
04261         rb_backref_set(match);
04262         return ary;
04263     }
04264 
04265     while (!NIL_P(result = scan_once(str, pat, &start))) {
04266         match = rb_backref_get();
04267         rb_match_busy(match);
04268         rb_yield(result);
04269         rb_backref_set(match);  /* restore $~ value */
04270     }
04271     rb_backref_set(match);
04272     return str;
04273 }
04274 
04275 /*
04276  *  call-seq:
04277  *     scan(pattern)                   => array
04278  *     scan(pattern) {|///| block }    => $_
04279  *  
04280  *  Equivalent to calling <code>$_.scan</code>. See
04281  *  <code>String#scan</code>.
04282  */
04283 
04284 static VALUE
04285 rb_f_scan(self, pat)
04286     VALUE self, pat;
04287 {
04288     return rb_str_scan(uscore_get(), pat);
04289 }
04290 
04291 
04292 /*
04293  *  call-seq:
04294  *     str.hex   => integer
04295  *  
04296  *  Treats leading characters from <i>str</i> as a string of hexadecimal digits
04297  *  (with an optional sign and an optional <code>0x</code>) and returns the
04298  *  corresponding number. Zero is returned on error.
04299  *     
04300  *     "0x0a".hex     #=> 10
04301  *     "-1234".hex    #=> -4660
04302  *     "0".hex        #=> 0
04303  *     "wombat".hex   #=> 0
04304  */
04305 
04306 static VALUE
04307 rb_str_hex(str)
04308     VALUE str;
04309 {
04310     return rb_str_to_inum(str, 16, Qfalse);
04311 }
04312 
04313 
04314 /*
04315  *  call-seq:
04316  *     str.oct   => integer
04317  *  
04318  *  Treats leading characters of <i>str</i> as a string of octal digits (with an
04319  *  optional sign) and returns the corresponding number.  Returns 0 if the
04320  *  conversion fails.
04321  *     
04322  *     "123".oct       #=> 83
04323  *     "-377".oct      #=> -255
04324  *     "bad".oct       #=> 0
04325  *     "0377bad".oct   #=> 255
04326  */
04327 
04328 static VALUE
04329 rb_str_oct(str)
04330     VALUE str;
04331 {
04332     return rb_str_to_inum(str, -8, Qfalse);
04333 }
04334 
04335 
04336 /*
04337  *  call-seq:
04338  *     str.crypt(other_str)   => new_str
04339  *  
04340  *  Applies a one-way cryptographic hash to <i>str</i> by invoking the standard
04341  *  library function <code>crypt</code>. The argument is the salt string, which
04342  *  should be two characters long, each character drawn from
04343  *  <code>[a-zA-Z0-9./]</code>.
04344  */
04345 
04346 static VALUE
04347 rb_str_crypt(str, salt)
04348     VALUE str, salt;
04349 {
04350     extern char *crypt();
04351     VALUE result;
04352     char *s;
04353 
04354     StringValue(salt);
04355     if (RSTRING(salt)->len < 2)
04356         rb_raise(rb_eArgError, "salt too short(need >=2 bytes)");
04357 
04358     if (RSTRING(str)->ptr) s = RSTRING(str)->ptr;
04359     else s = "";
04360     result = rb_str_new2(crypt(s, RSTRING(salt)->ptr));
04361     OBJ_INFECT(result, str);
04362     OBJ_INFECT(result, salt);
04363     return result;
04364 }
04365 
04366 
04367 /*
04368  *  call-seq:
04369  *     str.intern   => symbol
04370  *     str.to_sym   => symbol
04371  *  
04372  *  Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
04373  *  symbol if it did not previously exist. See <code>Symbol#id2name</code>.
04374  *     
04375  *     "Koala".intern         #=> :Koala
04376  *     s = 'cat'.to_sym       #=> :cat
04377  *     s == :cat              #=> true
04378  *     s = '@cat'.to_sym      #=> :@cat
04379  *     s == :@cat             #=> true
04380  *
04381  *  This can also be used to create symbols that cannot be represented using the
04382  *  <code>:xxx</code> notation.
04383  *     
04384  *     'cat and dog'.to_sym   #=> :"cat and dog"
04385  */
04386 
04387 VALUE
04388 rb_str_intern(s)
04389     VALUE s;
04390 {
04391     volatile VALUE str = s;
04392     ID id;
04393 
04394     if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) {
04395         rb_raise(rb_eArgError, "interning empty string");
04396     }
04397     if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len)
04398         rb_raise(rb_eArgError, "symbol string may not contain `\\0'");
04399     id = rb_intern(RSTRING(str)->ptr);
04400     return ID2SYM(id);
04401 }
04402 
04403 
04404 /*
04405  *  call-seq:
04406  *     str.sum(n=16)   => integer
04407  *  
04408  *  Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
04409  *  where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
04410  *  to 16. The result is simply the sum of the binary value of each character in
04411  *  <i>str</i> modulo <code>2n - 1</code>. This is not a particularly good
04412  *  checksum.
04413  */
04414 
04415 static VALUE
04416 rb_str_sum(argc, argv, str)
04417     int argc;
04418     VALUE *argv;
04419     VALUE str;
04420 {
04421     VALUE vbits;
04422     int bits;
04423     char *ptr, *p, *pend;
04424     long len;
04425 
04426     if (rb_scan_args(argc, argv, "01", &vbits) == 0) {
04427         bits = 16;
04428     }
04429     else bits = NUM2INT(vbits);
04430 
04431     ptr = p = RSTRING(str)->ptr;
04432     len = RSTRING(str)->len;
04433     pend = p + len;
04434     if (bits >= sizeof(long)*CHAR_BIT) {
04435         VALUE sum = INT2FIX(0);
04436 
04437         while (p < pend) {
04438             str_mod_check(str, ptr, len);
04439             sum = rb_funcall(sum, '+', 1, INT2FIX((unsigned char)*p));
04440             p++;
04441         }
04442         if (bits != 0) {
04443             VALUE mod;
04444 
04445             mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
04446             mod = rb_funcall(mod, '-', 1, INT2FIX(1));
04447             sum = rb_funcall(sum, '&', 1, mod);
04448         }
04449         return sum;
04450     }
04451     else {
04452        unsigned long sum = 0;
04453 
04454         while (p < pend) {
04455             str_mod_check(str, ptr, len);
04456             sum += (unsigned char)*p;
04457             p++;
04458         }
04459         if (bits != 0) {
04460            sum &= (((unsigned long)1)<<bits)-1;
04461         }
04462         return rb_int2inum(sum);
04463     }
04464 }
04465 
04466 static VALUE
04467 rb_str_justify(argc, argv, str, jflag)
04468     int argc;
04469     VALUE *argv;
04470     VALUE str;
04471     char jflag;
04472 {
04473     VALUE w;
04474     long width, flen = 0;
04475     VALUE res;
04476     char *p, *pend, *f = " ";
04477     long n;
04478     VALUE pad;
04479 
04480     rb_scan_args(argc, argv, "11", &w, &pad);
04481     width = NUM2LONG(w);
04482     if (argc == 2) {
04483         StringValue(pad);
04484         f = RSTRING(pad)->ptr;
04485         flen = RSTRING(pad)->len;
04486         if (flen == 0) {
04487             rb_raise(rb_eArgError, "zero width padding");
04488         }
04489     }
04490     if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
04491     res = rb_str_new5(str, 0, width);
04492     p = RSTRING(res)->ptr;
04493     if (jflag != 'l') {
04494         n = width - RSTRING(str)->len;
04495         pend = p + ((jflag == 'r') ? n : n/2);
04496         if (flen <= 1) {
04497             while (p < pend) {
04498                 *p++ = *f;
04499             }
04500         }
04501         else {
04502             char *q = f;
04503             while (p + flen <= pend) {
04504                 memcpy(p,f,flen);
04505                 p += flen;
04506             }
04507             while (p < pend) {
04508                 *p++ = *q++;
04509             }
04510         }
04511     }
04512     memcpy(p, RSTRING(str)->ptr, RSTRING(str)->len);
04513     if (jflag != 'r') {
04514         p += RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
04515         if (flen <= 1) {
04516             while (p < pend) {
04517                 *p++ = *f;
04518             }
04519         }
04520         else {
04521             while (p + flen <= pend) {
04522                 memcpy(p,f,flen);
04523                 p += flen;
04524             }
04525             while (p < pend) {
04526                 *p++ = *f++;
04527             }
04528         }
04529     }
04530     OBJ_INFECT(res, str);
04531     if (flen > 0) OBJ_INFECT(res, pad);
04532     return res;
04533 }
04534 
04535 
04536 /*
04537  *  call-seq:
04538  *     str.ljust(integer, padstr=' ')   => new_str
04539  *  
04540  *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
04541  *  <code>String</code> of length <i>integer</i> with <i>str</i> left justified
04542  *  and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
04543  *     
04544  *     "hello".ljust(4)            #=> "hello"
04545  *     "hello".ljust(20)           #=> "hello               "
04546  *     "hello".ljust(20, '1234')   #=> "hello123412341234123"
04547  */
04548 
04549 static VALUE
04550 rb_str_ljust(argc, argv, str)
04551     int argc;
04552     VALUE *argv;
04553     VALUE str;
04554 {
04555     return rb_str_justify(argc, argv, str, 'l');
04556 }
04557 
04558 
04559 /*
04560  *  call-seq:
04561  *     str.rjust(integer, padstr=' ')   => new_str
04562  *  
04563  *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
04564  *  <code>String</code> of length <i>integer</i> with <i>str</i> right justified
04565  *  and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
04566  *     
04567  *     "hello".rjust(4)            #=> "hello"
04568  *     "hello".rjust(20)           #=> "               hello"
04569  *     "hello".rjust(20, '1234')   #=> "123412341234123hello"
04570  */
04571 
04572 static VALUE
04573 rb_str_rjust(argc, argv, str)
04574     int argc;
04575     VALUE *argv;
04576     VALUE str;
04577 {
04578     return rb_str_justify(argc, argv, str, 'r');
04579 }
04580 
04581 
04582 /*
04583  *  call-seq:
04584  *     str.center(integer, padstr)   => new_str
04585  *  
04586  *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
04587  *  <code>String</code> of length <i>integer</i> with <i>str</i> centered and
04588  *  padded with <i>padstr</i>; otherwise, returns <i>str</i>.
04589  *     
04590  *     "hello".center(4)         #=> "hello"
04591  *     "hello".center(20)        #=> "       hello        "
04592  *     "hello".center(20, '123') #=> "1231231hello12312312"
04593  */
04594 
04595 static VALUE
04596 rb_str_center(argc, argv, str)
04597     int argc;
04598     VALUE *argv;
04599     VALUE str;
04600 {
04601     return rb_str_justify(argc, argv, str, 'c');
04602 }
04603 
04604 void
04605 rb_str_setter(val, id, var)
04606     VALUE val;
04607     ID id;
04608     VALUE *var;
04609 {
04610     if (!NIL_P(val) && TYPE(val) != T_STRING) {
04611         rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
04612     }
04613     *var = val;
04614 }
04615 
04616 
04617 /*
04618  *  A <code>String</code> object holds and manipulates an arbitrary sequence of
04619  *  bytes, typically representing characters. String objects may be created
04620  *  using <code>String::new</code> or as literals.
04621  *     
04622  *  Because of aliasing issues, users of strings should be aware of the methods
04623  *  that modify the contents of a <code>String</code> object.  Typically,
04624  *  methods with names ending in ``!'' modify their receiver, while those
04625  *  without a ``!'' return a new <code>String</code>.  However, there are
04626  *  exceptions, such as <code>String#[]=</code>.
04627  *     
04628  */
04629 
04630 void
04631 Init_String()
04632 {
04633     rb_cString  = rb_define_class("String", rb_cObject);
04634     rb_include_module(rb_cString, rb_mComparable);
04635     rb_include_module(rb_cString, rb_mEnumerable);
04636     rb_define_alloc_func(rb_cString, str_alloc);
04637     rb_define_method(rb_cString, "initialize", rb_str_init, -1);
04638     rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
04639     rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1);
04640     rb_define_method(rb_cString, "==", rb_str_equal, 1);
04641     rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
04642     rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
04643     rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
04644     rb_define_method(rb_cString, "+", rb_str_plus, 1);
04645     rb_define_method(rb_cString, "*", rb_str_times, 1);
04646     rb_define_method(rb_cString, "%", rb_str_format, 1);
04647     rb_define_method(rb_cString, "[]", rb_str_aref_m, -1);
04648     rb_define_method(rb_cString, "[]=", rb_str_aset_m, -1);
04649     rb_define_method(rb_cString, "insert", rb_str_insert, 2);
04650     rb_define_method(rb_cString, "length", rb_str_length, 0);
04651     rb_define_method(rb_cString, "size", rb_str_length, 0);
04652     rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
04653     rb_define_method(rb_cString, "=~", rb_str_match, 1);
04654     rb_define_method(rb_cString, "match", rb_str_match_m, 1);
04655     rb_define_method(rb_cString, "succ", rb_str_succ, 0);
04656     rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0);
04657     rb_define_method(rb_cString, "next", rb_str_succ, 0);
04658     rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
04659     rb_define_method(rb_cString, "upto", rb_str_upto_m, 1);
04660     rb_define_method(rb_cString, "index", rb_str_index_m, -1);
04661     rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
04662     rb_define_method(rb_cString, "replace", rb_str_replace, 1);
04663 
04664     rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
04665     rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
04666     rb_define_method(rb_cString, "to_s", rb_str_to_s, 0);
04667     rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
04668     rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
04669     rb_define_method(rb_cString, "dump", rb_str_dump, 0);
04670 
04671     rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
04672     rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
04673     rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
04674     rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
04675 
04676     rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0);
04677     rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0);
04678     rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0);
04679     rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0);
04680 
04681     rb_define_method(rb_cString, "hex", rb_str_hex, 0);
04682     rb_define_method(rb_cString, "oct", rb_str_oct, 0);
04683     rb_define_method(rb_cString, "split", rb_str_split_m, -1);
04684     rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
04685     rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
04686     rb_define_method(rb_cString, "concat", rb_str_concat, 1);
04687     rb_define_method(rb_cString, "<<", rb_str_concat, 1);
04688     rb_define_method(rb_cString, "crypt", rb_str_crypt, 1);
04689     rb_define_method(rb_cString, "intern", rb_str_intern, 0);
04690     rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
04691 
04692     rb_define_method(rb_cString, "include?", rb_str_include, 1);
04693 
04694     rb_define_method(rb_cString, "scan", rb_str_scan, 1);
04695 
04696     rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
04697     rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
04698     rb_define_method(rb_cString, "center", rb_str_center, -1);
04699 
04700     rb_define_method(rb_cString, "sub", rb_str_sub, -1);
04701     rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
04702     rb_define_method(rb_cString, "chop", rb_str_chop, 0);
04703     rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
04704     rb_define_method(rb_cString, "strip", rb_str_strip, 0);
04705     rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
04706     rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
04707 
04708     rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1);
04709     rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1);
04710     rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0);
04711     rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1);
04712     rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0);
04713     rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0);
04714     rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0);
04715 
04716     rb_define_method(rb_cString, "tr", rb_str_tr, 2);
04717     rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2);
04718     rb_define_method(rb_cString, "delete", rb_str_delete, -1);
04719     rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
04720     rb_define_method(rb_cString, "count", rb_str_count, -1);
04721 
04722     rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2);
04723     rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2);
04724     rb_define_method(rb_cString, "delete!", rb_str_delete_bang, -1);
04725     rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1);
04726 
04727     rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
04728     rb_define_method(rb_cString, "each", rb_str_each_line, -1);
04729     rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
04730 
04731     rb_define_method(rb_cString, "sum", rb_str_sum, -1);
04732 
04733     rb_define_global_function("sub", rb_f_sub, -1);
04734     rb_define_global_function("gsub", rb_f_gsub, -1);
04735 
04736     rb_define_global_function("sub!", rb_f_sub_bang, -1);
04737     rb_define_global_function("gsub!", rb_f_gsub_bang, -1);
04738 
04739     rb_define_global_function("chop", rb_f_chop, 0);
04740     rb_define_global_function("chop!", rb_f_chop_bang, 0);
04741 
04742     rb_define_global_function("chomp", rb_f_chomp, -1);
04743     rb_define_global_function("chomp!", rb_f_chomp_bang, -1);
04744 
04745     rb_define_global_function("split", rb_f_split, -1);
04746     rb_define_global_function("scan", rb_f_scan, 1);
04747 
04748     rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
04749     rb_define_method(rb_cString, "slice!", rb_str_slice_bang, -1);
04750 
04751     id_to_s = rb_intern("to_s");
04752 
04753     rb_fs = Qnil;
04754     rb_define_variable("$;", &rb_fs);
04755     rb_define_variable("$-F", &rb_fs);
04756 }
04757 

Generated on Wed Jan 18 23:32:07 2006 for Ruby by doxygen 1.3.5