Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals

string.c

Go to the documentation of this file.
00001 /**********************************************************************
00002 
00003   string.c -
00004 
00005   $Author: matz $
00006   $Date: 2005/10/27 08:19:20 $
00007   created at: Mon Aug  9 17:12:58 JST 1993
00008 
00009   Copyright (C) 1993-2003 Yukihiro Matsumoto
00010   Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
00011   Copyright (C) 2000  Information-technology Promotion Agency, Japan
00012 
00013 **********************************************************************/
00014 
00015 #include "ruby.h"
00016 #include "re.h"
00017 
00018 #define BEG(no) regs->beg[no]
00019 #define END(no) regs->end[no]
00020 
00021 #include <math.h>
00022 #include <ctype.h>
00023 
00024 #ifdef HAVE_UNISTD_H
00025 #include <unistd.h>
00026 #endif
00027 
00028 VALUE rb_cString;
00029 
00030 #define STR_TMPLOCK FL_USER1
00031 #define STR_ASSOC   FL_USER3
00032 #define STR_NOCAPA  (ELTS_SHARED|STR_ASSOC)
00033 
00034 #define RESIZE_CAPA(str,capacity) do {\
00035     REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
00036     if (!FL_TEST(str, STR_NOCAPA))\
00037         RSTRING(str)->aux.capa = (capacity);\
00038 } while (0)
00039 
00040 VALUE rb_fs;
00041 
00042 static inline void
00043 str_mod_check(s, p, len)
00044     VALUE s;
00045     char *p;
00046     long len;
00047 {
00048     if (RSTRING(s)->ptr != p || RSTRING(s)->len != len) {
00049         rb_raise(rb_eRuntimeError, "string modified");
00050     }
00051 }
00052 
00053 static inline void
00054 str_frozen_check(s)
00055     VALUE s;
00056 {
00057     if (OBJ_FROZEN(s)) {
00058         rb_raise(rb_eRuntimeError, "string frozen");
00059     }
00060 }
00061 
00062 static VALUE str_alloc (VALUE);
00063 static VALUE
00064 str_alloc(klass)
00065     VALUE klass;
00066 {
00067     NEWOBJ(str, struct RString);
00068     OBJSETUP(str, klass, T_STRING);
00069 
00070     str->ptr = 0;
00071     str->len = 0;
00072     str->aux.capa = 0;
00073 
00074     return (VALUE)str;
00075 }
00076 
00077 static VALUE
00078 str_new(klass, ptr, len)
00079     VALUE klass;
00080     const char *ptr;
00081     long len;
00082 {
00083     VALUE str;
00084 
00085     if (len < 0) {
00086         rb_raise(rb_eArgError, "negative string size (or size too big)");
00087     }
00088 
00089     str = str_alloc(klass);
00090     RSTRING(str)->len = len;
00091     RSTRING(str)->aux.capa = len;
00092     RSTRING(str)->ptr = ALLOC_N(char,len+1);
00093     if (ptr) {
00094         memcpy(RSTRING(str)->ptr, ptr, len);
00095     }
00096     RSTRING(str)->ptr[len] = '\0';
00097     return str;
00098 }
00099 
00100 VALUE
00101 rb_str_new(ptr, len)
00102     const char *ptr;
00103     long len;
00104 {
00105     return str_new(rb_cString, ptr, len);
00106 }
00107 
00108 VALUE
00109 rb_str_new2(ptr)
00110     const char *ptr;
00111 {
00112     if (!ptr) {
00113         rb_raise(rb_eArgError, "NULL pointer given");
00114     }
00115     return rb_str_new(ptr, strlen(ptr));
00116 }
00117 
00118 VALUE
00119 rb_tainted_str_new(ptr, len)
00120     const char *ptr;
00121     long len;
00122 {
00123     VALUE str = rb_str_new(ptr, len);
00124 
00125     OBJ_TAINT(str);
00126     return str;
00127 }
00128 
00129 VALUE
00130 rb_tainted_str_new2(ptr)
00131     const char *ptr;
00132 {
00133     VALUE str = rb_str_new2(ptr);
00134 
00135     OBJ_TAINT(str);
00136     return str;
00137 }
00138 
00139 static VALUE
00140 str_new3(klass, str)
00141     VALUE klass, str;
00142 {
00143     VALUE str2 = str_alloc(klass);
00144 
00145     RSTRING(str2)->len = RSTRING(str)->len;
00146     RSTRING(str2)->ptr = RSTRING(str)->ptr;
00147     RSTRING(str2)->aux.shared = str;
00148     FL_SET(str2, ELTS_SHARED);
00149     OBJ_INFECT(str2, str);
00150 
00151     return str2;
00152 }
00153 
00154 VALUE
00155 rb_str_new3(str)
00156     VALUE str;
00157 {
00158     return str_new3(rb_obj_class(str), str);
00159 }
00160 
00161 static VALUE
00162 str_new4(klass, str)
00163     VALUE klass, str;
00164 {
00165     VALUE str2 = str_alloc(klass);
00166 
00167     RSTRING(str2)->len = RSTRING(str)->len;
00168     RSTRING(str2)->ptr = RSTRING(str)->ptr;
00169     if (FL_TEST(str, ELTS_SHARED)) {
00170         FL_SET(str2, ELTS_SHARED);
00171         RSTRING(str2)->aux.shared = RSTRING(str)->aux.shared;
00172     }
00173     else {
00174         FL_SET(str, ELTS_SHARED);
00175         RSTRING(str)->aux.shared = str2;
00176     }
00177 
00178     return str2;
00179 }
00180 
00181 VALUE
00182 rb_str_new4(orig)
00183     VALUE orig;
00184 {
00185     VALUE klass, str;
00186 
00187     if (OBJ_FROZEN(orig)) return orig;
00188     klass = rb_obj_class(orig);
00189     if (FL_TEST(orig, ELTS_SHARED) && (str = RSTRING(orig)->aux.shared) && klass == RBASIC(str)->klass) {
00190         long ofs;
00191         ofs = RSTRING(str)->len - RSTRING(orig)->len;
00192         if (ofs > 0) {
00193             str = str_new3(klass, str);
00194             RSTRING(str)->ptr += ofs;
00195             RSTRING(str)->len -= ofs;
00196         }
00197     }
00198     else if (FL_TEST(orig, STR_ASSOC)) {
00199         str = str_new(klass, RSTRING(orig)->ptr, RSTRING(orig)->len);
00200     }
00201     else {
00202         str = str_new4(klass, orig);
00203     }
00204     OBJ_INFECT(str, orig);
00205     OBJ_FREEZE(str);
00206     return str;
00207 }
00208 
00209 VALUE
00210 rb_str_new5(obj, ptr, len)
00211     VALUE obj;
00212     const char *ptr;
00213     long len;
00214 {
00215     return str_new(rb_obj_class(obj), ptr, len);
00216 }
00217 
00218 #define STR_BUF_MIN_SIZE 128
00219 
00220 VALUE
00221 rb_str_buf_new(capa)
00222     long capa;
00223 {
00224     VALUE str = str_alloc(rb_cString);
00225 
00226     if (capa < STR_BUF_MIN_SIZE) {
00227         capa = STR_BUF_MIN_SIZE;
00228     }
00229     RSTRING(str)->ptr = 0;
00230     RSTRING(str)->len = 0;
00231     RSTRING(str)->aux.capa = capa;
00232     RSTRING(str)->ptr = ALLOC_N(char, capa+1);
00233     RSTRING(str)->ptr[0] = '\0';
00234 
00235     return str;
00236 }
00237 
00238 VALUE
00239 rb_str_buf_new2(ptr)
00240     const char *ptr;
00241 {
00242     VALUE str;
00243     long len = strlen(ptr);
00244 
00245     str = rb_str_buf_new(len);
00246     rb_str_buf_cat(str, ptr, len);
00247 
00248     return str;
00249 }
00250 
00251 VALUE
00252 rb_str_to_str(str)
00253     VALUE str;
00254 {
00255     return rb_convert_type(str, T_STRING, "String", "to_str");
00256 }
00257 
00258 static void
00259 rb_str_shared_replace(str, str2)
00260     VALUE str, str2;
00261 {
00262     if (str == str2) return;
00263     rb_str_modify(str);
00264     if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
00265     if (NIL_P(str2)) {
00266         RSTRING(str)->ptr = 0;
00267         RSTRING(str)->len = 0;
00268         RSTRING(str)->aux.capa = 0;
00269         FL_UNSET(str, STR_NOCAPA);
00270         return;
00271     }
00272     RSTRING(str)->ptr = RSTRING(str2)->ptr;
00273     RSTRING(str)->len = RSTRING(str2)->len;
00274     FL_UNSET(str, STR_NOCAPA);
00275     if (FL_TEST(str2, STR_NOCAPA)) {
00276         FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
00277         RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
00278     }
00279     else {
00280         RSTRING(str)->aux.capa = RSTRING(str2)->aux.capa;
00281     }
00282     RSTRING(str2)->ptr = 0;     /* abandon str2 */
00283     RSTRING(str2)->len = 0;
00284     RSTRING(str2)->aux.capa = 0;
00285     FL_UNSET(str2, STR_NOCAPA);
00286     if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
00287 }
00288 
00289 static ID id_to_s;
00290 
00291 VALUE
00292 rb_obj_as_string(obj)
00293     VALUE obj;
00294 {
00295     VALUE str;
00296 
00297     if (TYPE(obj) == T_STRING) {
00298         return obj;
00299     }
00300     str = rb_funcall(obj, id_to_s, 0);
00301     if (TYPE(str) != T_STRING)
00302         return rb_any_to_s(obj);
00303     if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
00304     return str;
00305 }
00306 
00307 static VALUE rb_str_replace (VALUE, VALUE);
00308 
00309 VALUE
00310 rb_str_dup(str)
00311     VALUE str;
00312 {
00313     VALUE dup = str_alloc(rb_obj_class(str));
00314     rb_str_replace(dup, str);
00315     return dup;
00316 }
00317 
00318 
00319 /*
00320  *  call-seq:
00321  *     String.new(str="")   => new_str
00322  *  
00323  *  Returns a new string object containing a copy of <i>str</i>.
00324  */
00325 
00326 static VALUE
00327 rb_str_init(argc, argv, str)
00328     int argc;
00329     VALUE *argv;
00330     VALUE str;
00331 {
00332     VALUE orig;
00333 
00334     if (rb_scan_args(argc, argv, "01", &orig) == 1)
00335         rb_str_replace(str, orig);
00336     return str;
00337 }
00338 
00339 /*
00340  *  call-seq:
00341  *     str.length   => integer
00342  *  
00343  *  Returns the length of <i>str</i>.
00344  */
00345 
00346 static VALUE
00347 rb_str_length(str)
00348     VALUE str;
00349 {
00350     return LONG2NUM(RSTRING(str)->len);
00351 }
00352 
00353 /*
00354  *  call-seq:
00355  *     str.empty?   => true or false
00356  *  
00357  *  Returns <code>true</code> if <i>str</i> has a length of zero.
00358  *     
00359  *     "hello".empty?   #=> false
00360  *     "".empty?        #=> true
00361  */
00362 
00363 static VALUE
00364 rb_str_empty(str)
00365     VALUE str;
00366 {
00367     if (RSTRING(str)->len == 0)
00368         return Qtrue;
00369     return Qfalse;
00370 }
00371 
00372 /*
00373  *  call-seq:
00374  *     str + other_str   => new_str
00375  *  
00376  *  Concatenation---Returns a new <code>String</code> containing
00377  *  <i>other_str</i> concatenated to <i>str</i>.
00378  *     
00379  *     "Hello from " + self.to_s   #=> "Hello from main"
00380  */
00381 
00382 VALUE
00383 rb_str_plus(str1, str2)
00384     VALUE str1, str2;
00385 {
00386     VALUE str3;
00387 
00388     StringValue(str2);
00389     str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len);
00390     memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len);
00391     memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len,
00392            RSTRING(str2)->ptr, RSTRING(str2)->len);
00393     RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0';
00394 
00395     if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
00396         OBJ_TAINT(str3);
00397     return str3;
00398 }
00399 
00400 /*
00401  *  call-seq:
00402  *     str * integer   => new_str
00403  *  
00404  *  Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
00405  *  the receiver.
00406  *     
00407  *     "Ho! " * 3   #=> "Ho! Ho! Ho! "
00408  */
00409 
00410 VALUE
00411 rb_str_times(str, times)
00412     VALUE str;
00413     VALUE times;
00414 {
00415     VALUE str2;
00416     long i, len;
00417 
00418     len = NUM2LONG(times);
00419     if (len < 0) {
00420         rb_raise(rb_eArgError, "negative argument");
00421     }
00422     if (len && LONG_MAX/len <  RSTRING(str)->len) {
00423         rb_raise(rb_eArgError, "argument too big");
00424     }
00425 
00426     str2 = rb_str_new5(str,0, len *= RSTRING(str)->len);
00427     for (i = 0; i < len; i += RSTRING(str)->len) {
00428         memcpy(RSTRING(str2)->ptr + i,
00429                RSTRING(str)->ptr, RSTRING(str)->len);
00430     }
00431     RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0';
00432 
00433     OBJ_INFECT(str2, str);
00434 
00435     return str2;
00436 }
00437 
00438 /*
00439  *  call-seq:
00440  *     str % arg   => new_str
00441  *  
00442  *  Format---Uses <i>str</i> as a format specification, and returns the result
00443  *  of applying it to <i>arg</i>. If the format specification contains more than
00444  *  one substitution, then <i>arg</i> must be an <code>Array</code> containing
00445  *  the values to be substituted. See <code>Kernel::sprintf</code> for details
00446  *  of the format string.
00447  *     
00448  *     "%05d" % 123                       #=> "00123"
00449  *     "%-5s: %08x" % [ "ID", self.id ]   #=> "ID   : 200e14d6"
00450  */
00451 
00452 static VALUE
00453 rb_str_format(str, arg)
00454     VALUE str, arg;
00455 {
00456     VALUE *argv;
00457 
00458     if (TYPE(arg) == T_ARRAY) {
00459         argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1);
00460         argv[0] = str;
00461         MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len);
00462         return rb_f_sprintf(RARRAY(arg)->len+1, argv);
00463     }
00464 
00465     argv = ALLOCA_N(VALUE, 2);
00466     argv[0] = str;
00467     argv[1] = arg;
00468     return rb_f_sprintf(2, argv);
00469 }
00470 
00471 static int
00472 str_independent(str)
00473     VALUE str;
00474 {
00475     if (FL_TEST(str, STR_TMPLOCK)) {
00476         rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
00477     }
00478     if (OBJ_FROZEN(str)) rb_error_frozen("string");
00479     if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
00480         rb_raise(rb_eSecurityError, "Insecure: can't modify string");
00481     if (!FL_TEST(str, ELTS_SHARED)) return 1;
00482     return 0;
00483 }
00484 
00485 static void
00486 str_make_independent(str)
00487     VALUE str;
00488 {
00489     char *ptr;
00490 
00491     ptr = ALLOC_N(char, RSTRING(str)->len+1);
00492     if (RSTRING(str)->ptr) {
00493         memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
00494     }
00495     ptr[RSTRING(str)->len] = 0;
00496     RSTRING(str)->ptr = ptr;
00497     RSTRING(str)->aux.capa = RSTRING(str)->len;
00498     FL_UNSET(str, STR_NOCAPA);
00499 }
00500 
00501 void
00502 rb_str_modify(str)
00503     VALUE str;
00504 {
00505     if (!str_independent(str))
00506         str_make_independent(str);
00507 }
00508 
00509 void
00510 rb_str_associate(str, add)
00511     VALUE str, add;
00512 {
00513     if (FL_TEST(str, STR_ASSOC)) {
00514         /* already associated */
00515         rb_ary_concat(RSTRING(str)->aux.shared, add);
00516     }
00517     else {
00518         if (FL_TEST(str, ELTS_SHARED)) {
00519             str_make_independent(str);
00520         }
00521         else if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
00522             RESIZE_CAPA(str, RSTRING(str)->len);
00523         }
00524         RSTRING(str)->aux.shared = add;
00525         FL_SET(str, STR_ASSOC);
00526     }
00527 }
00528 
00529 VALUE
00530 rb_str_associated(str)
00531     VALUE str;
00532 {
00533     if (FL_TEST(str, STR_ASSOC)) {
00534         return RSTRING(str)->aux.shared;
00535     }
00536     return Qfalse;
00537 }
00538 
00539 static char *null_str = "";
00540 
00541 VALUE
00542 rb_string_value(ptr)
00543     volatile VALUE *ptr;
00544 {
00545     VALUE s = *ptr;
00546     if (TYPE(s) != T_STRING) {
00547         s = rb_str_to_str(s);
00548         *ptr = s;
00549     }
00550     if (!RSTRING(s)->ptr) {
00551         FL_SET(s, ELTS_SHARED);
00552         RSTRING(s)->ptr = null_str;
00553     }
00554     return s;
00555 }
00556 
00557 char *
00558 rb_string_value_ptr(ptr)
00559     volatile VALUE *ptr;
00560 {
00561     return RSTRING(rb_string_value(ptr))->ptr;
00562 }
00563 
00564 char *
00565 rb_string_value_cstr(ptr)
00566     volatile VALUE *ptr;
00567 {
00568     VALUE str = rb_string_value(ptr);
00569     char *s = RSTRING(str)->ptr;
00570 
00571     if (!s || RSTRING(str)->len != strlen(s)) {
00572         rb_raise(rb_eArgError, "string contains null byte");
00573     }
00574     return s;
00575 }
00576 
00577 VALUE
00578 rb_check_string_type(str)
00579     VALUE str;
00580 {
00581     str = rb_check_convert_type(str, T_STRING, "String", "to_str");
00582     if (!NIL_P(str) && !RSTRING(str)->ptr) {
00583         FL_SET(str, ELTS_SHARED);
00584         RSTRING(str)->ptr = null_str;
00585     }
00586     return str;
00587 }
00588 
00589 VALUE
00590 rb_str_substr(str, beg, len)
00591     VALUE str;
00592     long beg, len;
00593 {
00594     VALUE str2;
00595 
00596     if (len < 0) return Qnil;
00597     if (beg > RSTRING(str)->len) return Qnil;
00598     if (beg < 0) {
00599         beg += RSTRING(str)->len;
00600         if (beg < 0) return Qnil;
00601     }
00602     if (beg + len > RSTRING(str)->len) {
00603         len = RSTRING(str)->len - beg;
00604     }
00605     if (len < 0) {
00606         len = 0;
00607     }
00608     if (len == 0) {
00609         str2 = rb_str_new5(str,0,0);
00610     }
00611     else if (len > sizeof(struct RString)/2 &&
00612         beg + len == RSTRING(str)->len && !FL_TEST(str, STR_ASSOC)) {
00613         str2 = rb_str_new3(rb_str_new4(str));
00614         RSTRING(str2)->ptr += RSTRING(str2)->len - len;
00615         RSTRING(str2)->len = len;
00616     }
00617     else {
00618         str2 = rb_str_new5(str, RSTRING(str)->ptr+beg, len);
00619     }
00620     OBJ_INFECT(str2, str);
00621 
00622     return str2;
00623 }
00624 
00625 VALUE
00626 rb_str_freeze(str)
00627     VALUE str;
00628 {
00629     return rb_obj_freeze(str);
00630 }
00631 
00632 VALUE
00633 rb_str_dup_frozen(str)
00634     VALUE str;
00635 {
00636     if (FL_TEST(str, ELTS_SHARED) && RSTRING(str)->aux.shared) {
00637         VALUE shared = RSTRING(str)->aux.shared;
00638         if (RSTRING(shared)->len == RSTRING(str)->len) {
00639             OBJ_FREEZE(shared);
00640             return shared;
00641         }
00642     }
00643     if (OBJ_FROZEN(str)) return str;
00644     str = rb_str_dup(str);
00645     OBJ_FREEZE(str);
00646     return str;
00647 }
00648 
00649 VALUE
00650 rb_str_locktmp(str)
00651     VALUE str;
00652 {
00653     if (FL_TEST(str, STR_TMPLOCK)) {
00654         rb_raise(rb_eRuntimeError, "temporal locking already locked string");
00655     }
00656     FL_SET(str, STR_TMPLOCK);
00657     return str;
00658 }
00659 
00660 VALUE
00661 rb_str_unlocktmp(str)
00662     VALUE str;
00663 {
00664     if (!FL_TEST(str, STR_TMPLOCK)) {
00665         rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
00666     }
00667     FL_UNSET(str, STR_TMPLOCK);
00668     return str;
00669 }
00670 
00671 VALUE
00672 rb_str_resize(str, len)
00673     VALUE str;
00674     long len;
00675 {
00676     if (len < 0) {
00677         rb_raise(rb_eArgError, "negative string size (or size too big)");
00678     }
00679 
00680     rb_str_modify(str);
00681     if (len != RSTRING(str)->len) {
00682         if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
00683             REALLOC_N(RSTRING(str)->ptr, char, len+1);
00684             if (!FL_TEST(str, STR_NOCAPA)) {
00685                 RSTRING(str)->aux.capa = len;
00686             }
00687         }
00688         RSTRING(str)->len = len;
00689         RSTRING(str)->ptr[len] = '\0';  /* sentinel */
00690     }
00691     return str;
00692 }
00693 
00694 VALUE
00695 rb_str_buf_cat(str, ptr, len)
00696     VALUE str;
00697     const char *ptr;
00698     long len;
00699 {
00700     long capa, total;
00701 
00702     if (len == 0) return str;
00703     if (len < 0) {
00704         rb_raise(rb_eArgError, "negative string size (or size too big)");
00705     }
00706     rb_str_modify(str);
00707     if (FL_TEST(str, STR_ASSOC)) {
00708         FL_UNSET(str, STR_ASSOC);
00709         capa = RSTRING(str)->aux.capa = RSTRING(str)->len;
00710     }
00711     else {
00712         capa = RSTRING(str)->aux.capa;
00713     }
00714     total = RSTRING(str)->len+len;
00715     if (capa <= total) {
00716         while (total > capa) {
00717             capa = (capa + 1) * 2;
00718         }
00719         RESIZE_CAPA(str, capa);
00720     }
00721     memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
00722     RSTRING(str)->len = total;
00723     RSTRING(str)->ptr[total] = '\0'; /* sentinel */
00724 
00725     return str;
00726 }
00727 
00728 VALUE
00729 rb_str_buf_cat2(str, ptr)
00730     VALUE str;
00731     const char *ptr;
00732 {
00733     return rb_str_buf_cat(str, ptr, strlen(ptr));
00734 }
00735 
00736 VALUE
00737 rb_str_cat(str, ptr, len)
00738     VALUE str;
00739     const char *ptr;
00740     long len;
00741 {
00742     if (len < 0) {
00743         rb_raise(rb_eArgError, "negative string size (or size too big)");
00744     }
00745     if (FL_TEST(str, STR_ASSOC)) {
00746         rb_str_modify(str);
00747         REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len);
00748         memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
00749         RSTRING(str)->len += len;
00750         RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
00751         return str;
00752     }
00753 
00754     return rb_str_buf_cat(str, ptr, len);
00755 }
00756 
00757 VALUE
00758 rb_str_cat2(str, ptr)
00759     VALUE str;
00760     const char *ptr;
00761 {
00762     return rb_str_cat(str, ptr, strlen(ptr));
00763 }
00764 
00765 VALUE
00766 rb_str_buf_append(str, str2)
00767     VALUE str, str2;
00768 {
00769     long capa, len;
00770 
00771     rb_str_modify(str);
00772     if (FL_TEST(str, STR_ASSOC)) {
00773         FL_UNSET(str, STR_ASSOC);
00774         capa = RSTRING(str)->aux.capa = RSTRING(str)->len;
00775     }
00776     else {
00777         capa = RSTRING(str)->aux.capa;
00778     }
00779     len = RSTRING(str)->len+RSTRING(str2)->len;
00780     if (capa <= len) {
00781         while (len > capa) {
00782             capa = (capa + 1) * 2;
00783         }
00784         RESIZE_CAPA(str, capa);
00785     }
00786     memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
00787            RSTRING(str2)->ptr, RSTRING(str2)->len);
00788     RSTRING(str)->len += RSTRING(str2)->len;
00789     RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
00790     OBJ_INFECT(str, str2);
00791 
00792     return str;
00793 }
00794 
00795 VALUE
00796 rb_str_append(str, str2)
00797     VALUE str, str2;
00798 {
00799     StringValue(str2);
00800     rb_str_modify(str);
00801     if (RSTRING(str2)->len > 0) {
00802         if (FL_TEST(str, STR_ASSOC)) {
00803             long len = RSTRING(str)->len+RSTRING(str2)->len;
00804             REALLOC_N(RSTRING(str)->ptr, char, len+1);
00805             memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
00806                    RSTRING(str2)->ptr, RSTRING(str2)->len);
00807             RSTRING(str)->ptr[len] = '\0'; /* sentinel */
00808             RSTRING(str)->len = len;
00809         }
00810         else {
00811             return rb_str_buf_append(str, str2);
00812         }
00813     }
00814     OBJ_INFECT(str, str2);
00815     return str;
00816 }
00817 
00818 
00819 /*
00820  *  call-seq:
00821  *     str << fixnum        => str
00822  *     str.concat(fixnum)   => str
00823  *     str << obj           => str
00824  *     str.concat(obj)      => str
00825  *  
00826  *  Append---Concatenates the given object to <i>str</i>. If the object is a
00827  *  <code>Fixnum</code> between 0 and 255, it is converted to a character before
00828  *  concatenation.
00829  *     
00830  *     a = "hello "
00831  *     a << "world"   #=> "hello world"
00832  *     a.concat(33)   #=> "hello world!"
00833  */
00834 
00835 VALUE
00836 rb_str_concat(str1, str2)
00837     VALUE str1, str2;
00838 {
00839     if (FIXNUM_P(str2)) {
00840         int i = FIX2INT(str2);
00841         if (0 <= i && i <= 0xff) { /* byte */
00842             char c = i;
00843             return rb_str_cat(str1, &c, 1);
00844         }
00845     }
00846     str1 = rb_str_append(str1, str2);
00847 
00848     return str1;
00849 }
00850 
00851 int
00852 rb_str_hash(str)
00853     VALUE str;
00854 {
00855     register long len = RSTRING(str)->len;
00856     register char *p = RSTRING(str)->ptr;
00857     register int key = 0;
00858 
00859 #ifdef HASH_ELFHASH
00860     register unsigned int g;
00861 
00862     while (len--) {
00863         key = (key << 4) + *p++;
00864         if (g = key & 0xF0000000)
00865             key ^= g >> 24;
00866         key &= ~g;
00867     }
00868 #elif HASH_PERL
00869     while (len--) {
00870         key += *p++;
00871         key += (key << 10);
00872         key ^= (key >> 6);
00873     }
00874     key += (key << 3);
00875     key ^= (key >> 11);
00876     key += (key << 15);
00877 #else
00878     while (len--) {
00879         key = key*65599 + *p;
00880         p++;
00881     }
00882     key = key + (key>>5);
00883 #endif
00884     return key;
00885 }
00886 
00887 /*
00888  * call-seq:
00889  *    str.hash   => fixnum
00890  *
00891  * Return a hash based on the string's length and content.
00892  */
00893 
00894 static VALUE
00895 rb_str_hash_m(str)
00896     VALUE str;
00897 {
00898     int key = rb_str_hash(str);
00899     return INT2FIX(key);
00900 }
00901 
00902 #define lesser(a,b) (((a)>(b))?(b):(a))
00903 
00904 int
00905 rb_str_cmp(str1, str2)
00906     VALUE str1, str2;
00907 {
00908     long len;
00909     int retval;
00910 
00911     len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
00912     retval = rb_memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
00913     if (retval == 0) {
00914         if (RSTRING(str1)->len == RSTRING(str2)->len) return 0;
00915         if (RSTRING(str1)->len > RSTRING(str2)->len) return 1;
00916         return -1;
00917     }
00918     if (retval > 0) return 1;
00919     return -1;
00920 }
00921 
00922 
00923 /*
00924  *  call-seq:
00925  *     str == obj   => true or false
00926  *  
00927  *  Equality---If <i>obj</i> is not a <code>String</code>, returns
00928  *  <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i>
00929  *  <code><=></code> <i>obj</i> returns zero.
00930  */
00931 
00932 static VALUE
00933 rb_str_equal(str1, str2)
00934     VALUE str1, str2;
00935 {
00936     if (str1 == str2) return Qtrue;
00937     if (TYPE(str2) != T_STRING) {
00938         if (!rb_respond_to(str2, rb_intern("to_str"))) {
00939             return Qfalse;
00940         }
00941         return rb_equal(str2, str1);
00942     }
00943     if (RSTRING(str1)->len == RSTRING(str2)->len &&
00944         rb_str_cmp(str1, str2) == 0) {
00945         return Qtrue;
00946     }
00947     return Qfalse;
00948 }
00949 
00950 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
00951 
00952 /*
00953  * call-seq:
00954  *   str.eql?(other)   => true or false
00955  *
00956  * Two strings are equal if the have the same length and content.
00957  */
00958 
00959 static VALUE
00960 rb_str_eql(str1, str2)
00961     VALUE str1, str2;
00962 {
00963     if (TYPE(str2) != T_STRING || RSTRING(str1)->len != RSTRING(str2)->len)
00964         return Qfalse;
00965 
00966     if (memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr,
00967                lesser(RSTRING(str1)->len, RSTRING(str2)->len)) == 0)
00968         return Qtrue;
00969 
00970     return Qfalse;
00971 }
00972 
00973 /*
00974  *  call-seq:
00975  *     str <=> other_str   => -1, 0, +1
00976  *  
00977  *  Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
00978  *  <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
00979  *  <i>str</i>. If the strings are of different lengths, and the strings are
00980  *  equal when compared up to the shortest length, then the longer string is
00981  *  considered greater than the shorter one. If the variable <code>$=</code> is
00982  *  <code>false</code>, the comparison is based on comparing the binary values
00983  *  of each character in the string. In older versions of Ruby, setting
00984  *  <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
00985  *  in favor of using <code>String#casecmp</code>.
00986  *
00987  *  <code><=></code> is the basis for the methods <code><</code>,
00988  *  <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
00989  *  included from module <code>Comparable</code>.  The method
00990  *  <code>String#==</code> does not use <code>Comparable#==</code>.
00991  *     
00992  *     "abcdef" <=> "abcde"     #=> 1
00993  *     "abcdef" <=> "abcdef"    #=> 0
00994  *     "abcdef" <=> "abcdefg"   #=> -1
00995  *     "abcdef" <=> "ABCDEF"    #=> 1
00996  */
00997 
00998 static VALUE
00999 rb_str_cmp_m(str1, str2)
01000     VALUE str1, str2;
01001 {
01002     long result;
01003 
01004     if (TYPE(str2) != T_STRING) {
01005         if (!rb_respond_to(str2, rb_intern("to_str"))) {
01006             return Qnil;
01007         }
01008         else if (!rb_respond_to(str2, rb_intern("<=>"))) {
01009             return Qnil;
01010         }
01011         else {
01012             VALUE tmp = rb_funcall(str2, rb_intern("<=>"), 1, str1);
01013 
01014             if (NIL_P(tmp)) return Qnil;
01015             if (!FIXNUM_P(tmp)) {
01016                 return rb_funcall(LONG2FIX(0), '-', 1, tmp);
01017             }
01018             result = -FIX2LONG(tmp);
01019         }
01020     }
01021     else {
01022         result = rb_str_cmp(str1, str2);
01023     }
01024     return LONG2NUM(result);
01025 }
01026 
01027 /*
01028  *  call-seq:
01029  *     str.casecmp(other_str)   => -1, 0, +1
01030  *  
01031  *  Case-insensitive version of <code>String#<=></code>.
01032  *     
01033  *     "abcdef".casecmp("abcde")     #=> 1
01034  *     "aBcDeF".casecmp("abcdef")    #=> 0
01035  *     "abcdef".casecmp("abcdefg")   #=> -1
01036  *     "abcdef".casecmp("ABCDEF")    #=> 0
01037  */
01038 
01039 static VALUE
01040 rb_str_casecmp(str1, str2)
01041     VALUE str1, str2;
01042 {
01043     long len;
01044     int retval;
01045 
01046     StringValue(str2);
01047     len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
01048     retval = rb_memcicmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
01049     if (retval == 0) {
01050         if (RSTRING(str1)->len == RSTRING(str2)->len) return INT2FIX(0);
01051         if (RSTRING(str1)->len > RSTRING(str2)->len) return INT2FIX(1);
01052         return INT2FIX(-1);
01053     }
01054     if (retval == 0) return INT2FIX(0);
01055     if (retval > 0) return INT2FIX(1);
01056     return INT2FIX(-1);
01057 }
01058 
01059 static long
01060 rb_str_index(str, sub, offset)
01061     VALUE str, sub;
01062     long offset;
01063 {
01064     long pos;
01065 
01066     if (offset < 0) {
01067         offset += RSTRING(str)->len;
01068         if (offset < 0) return -1;
01069     }
01070     if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1;
01071     if (RSTRING(sub)->len == 0) return offset;
01072     pos = rb_memsearch(RSTRING(sub)->ptr, RSTRING(sub)->len,
01073                        RSTRING(str)->ptr+offset, RSTRING(str)->len-offset);
01074     if (pos < 0) return pos;
01075     return pos + offset;
01076 }
01077 
01078 
01079 /*
01080  *  call-seq:
01081  *     str.index(substring [, offset])   => fixnum or nil
01082  *     str.index(fixnum [, offset])      => fixnum or nil
01083  *     str.index(regexp [, offset])      => fixnum or nil
01084  *  
01085  *  Returns the index of the first occurrence of the given <i>substring</i>,
01086  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
01087  *  <code>nil</code> if not found. If the second parameter is present, it
01088  *  specifies the position in the string to begin the search.
01089  *     
01090  *     "hello".index('e')             #=> 1
01091  *     "hello".index('lo')            #=> 3
01092  *     "hello".index('a')             #=> nil
01093  *     "hello".index(101)             #=> 1
01094  *     "hello".index(/[aeiou]/, -3)   #=> 4
01095  */
01096 
01097 static VALUE
01098 rb_str_index_m(argc, argv, str)
01099     int argc;
01100     VALUE *argv;
01101     VALUE str;
01102 {
01103     VALUE sub;
01104     VALUE initpos;
01105     long pos;
01106 
01107     if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
01108         pos = NUM2LONG(initpos);
01109     }
01110     else {
01111         pos = 0;
01112     }
01113     if (pos < 0) {
01114         pos += RSTRING(str)->len;
01115         if (pos < 0) {
01116             if (TYPE(sub) == T_REGEXP) {
01117                 rb_backref_set(Qnil);
01118             }
01119             return Qnil;
01120         }
01121     }
01122 
01123     switch (TYPE(sub)) {
01124       case T_REGEXP:
01125         pos = rb_reg_adjust_startpos(sub, str, pos, 0);
01126         pos = rb_reg_search(sub, str, pos, 0);
01127         break;
01128 
01129       case T_FIXNUM:
01130       {
01131           int c = FIX2INT(sub);
01132           long len = RSTRING(str)->len;
01133           unsigned char *p = RSTRING(str)->ptr;
01134 
01135           for (;pos<len;pos++) {
01136               if (p[pos] == c) return LONG2NUM(pos);
01137           }
01138           return Qnil;
01139       }
01140 
01141       default: {
01142           VALUE tmp;
01143 
01144           tmp = rb_check_string_type(sub);
01145           if (NIL_P(tmp)) {
01146               rb_raise(rb_eTypeError, "type mismatch: %s given",
01147                        rb_obj_classname(sub));
01148           }
01149           sub = tmp;
01150       }
01151         /* fall through */
01152       case T_STRING:
01153         pos = rb_str_index(str, sub, pos);
01154         break;
01155     }
01156 
01157     if (pos == -1) return Qnil;
01158     return LONG2NUM(pos);
01159 }
01160 
01161 static long
01162 rb_str_rindex(str, sub, pos)
01163     VALUE str, sub;
01164     long pos;
01165 {
01166     long len = RSTRING(sub)->len;
01167     char *s, *sbeg, *t;
01168 
01169     /* substring longer than string */
01170     if (RSTRING(str)->len < len) return -1;
01171     if (RSTRING(str)->len - pos < len) {
01172         pos = RSTRING(str)->len - len;
01173     }
01174     sbeg = RSTRING(str)->ptr;
01175     s = RSTRING(str)->ptr + pos;
01176     t = RSTRING(sub)->ptr;
01177     if (len) {
01178         while (sbeg <= s) {
01179             if (rb_memcmp(s, t, len) == 0) {
01180                 return s - RSTRING(str)->ptr;
01181             }
01182             s--;
01183         }
01184         return -1;
01185     }
01186     else {
01187         return pos;
01188     }
01189 }
01190 
01191 
01192 /*
01193  *  call-seq:
01194  *     str.rindex(substring [, fixnum])   => fixnum or nil
01195  *     str.rindex(fixnum [, fixnum])   => fixnum or nil
01196  *     str.rindex(regexp [, fixnum])   => fixnum or nil
01197  *  
01198  *  Returns the index of the last occurrence of the given <i>substring</i>,
01199  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
01200  *  <code>nil</code> if not found. If the second parameter is present, it
01201  *  specifies the position in the string to end the search---characters beyond
01202  *  this point will not be considered.
01203  *     
01204  *     "hello".rindex('e')             #=> 1
01205  *     "hello".rindex('l')             #=> 3
01206  *     "hello".rindex('a')             #=> nil
01207  *     "hello".rindex(101)             #=> 1
01208  *     "hello".rindex(/[aeiou]/, -2)   #=> 1
01209  */
01210 
01211 static VALUE
01212 rb_str_rindex_m(argc, argv, str)
01213     int argc;
01214     VALUE *argv;
01215     VALUE str;
01216 {
01217     VALUE sub;
01218     VALUE position;
01219     long pos;
01220 
01221     if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
01222         pos = NUM2LONG(position);
01223         if (pos < 0) {
01224             pos += RSTRING(str)->len;
01225             if (pos < 0) {
01226                 if (TYPE(sub) == T_REGEXP) {
01227                     rb_backref_set(Qnil);
01228                 }
01229                 return Qnil;
01230             }
01231         }
01232         if (pos > RSTRING(str)->len) pos = RSTRING(str)->len;
01233     }
01234     else {
01235         pos = RSTRING(str)->len;
01236     }
01237 
01238     switch (TYPE(sub)) {
01239       case T_REGEXP:
01240         if (RREGEXP(sub)->len) {
01241             pos = rb_reg_adjust_startpos(sub, str, pos, 1);
01242             pos = rb_reg_search(sub, str, pos, 1);
01243         }
01244         if (pos >= 0) return LONG2NUM(pos);
01245         break;
01246 
01247       case T_STRING:
01248         pos = rb_str_rindex(str, sub, pos);
01249         if (pos >= 0) return LONG2NUM(pos);
01250         break;
01251 
01252       case T_FIXNUM:
01253       {
01254           int c = FIX2INT(sub);
01255           unsigned char *p = RSTRING(str)->ptr + pos;
01256           unsigned char *pbeg = RSTRING(str)->ptr;
01257 
01258           if (pos == RSTRING(str)->len) {
01259               if (pos == 0) return Qnil;
01260               --p;
01261           }
01262           while (pbeg <= p) {
01263               if (*p == c) return LONG2NUM((char*)p - RSTRING(str)->ptr);
01264               p--;
01265           }
01266           return Qnil;
01267       }
01268 
01269       default:
01270         rb_raise(rb_eTypeError, "type mismatch: %s given",
01271                  rb_obj_classname(sub));
01272     }
01273     return Qnil;
01274 }
01275 
01276 /*
01277  *  call-seq:
01278  *     str =~ obj   => fixnum or nil
01279  *  
01280  *  Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
01281  *  against <i>str</i>. If <i>obj</i> is a <code>String</code>, look for it in
01282  *  <i>str</i> (similar to <code>String#index</code>). Returns the position the
01283  *  match starts, or <code>nil</code> if there is no match. Otherwise, invokes
01284  *  <i>obj.=~</i>, passing <i>str</i> as an argument. The default
01285  *  <code>=~</code> in <code>Object</code> returns <code>false</code>.
01286  *     
01287  *     "cat o' 9 tails" =~ '\d'   #=> nil
01288  *     "cat o' 9 tails" =~ /\d/   #=> 7
01289  *     "cat o' 9 tails" =~ 9      #=> false
01290  */
01291 
01292 static VALUE
01293 rb_str_match(x, y)
01294     VALUE x, y;
01295 {
01296     switch (TYPE(y)) {
01297       case T_STRING:
01298         rb_raise(rb_eTypeError, "type mismatch: String given");
01299 
01300       case T_REGEXP:
01301         return rb_reg_match(y, x);
01302 
01303       default:
01304         return rb_funcall(y, rb_intern("=~"), 1, x);
01305     }
01306 }
01307 
01308 
01309 static VALUE get_pat (VALUE, int);
01310 
01311 
01312 /*
01313  *  call-seq:
01314  *     str.match(pattern)   => matchdata or nil
01315  *  
01316  *  Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
01317  *  then invokes its <code>match</code> method on <i>str</i>.
01318  *     
01319  *     'hello'.match('(.)\1')      #=> #<MatchData:0x401b3d30>
01320  *     'hello'.match('(.)\1')[0]   #=> "ll"
01321  *     'hello'.match(/(.)\1/)[0]   #=> "ll"
01322  *     'hello'.match('xx')         #=> nil
01323  */
01324 
01325 static VALUE
01326 rb_str_match_m(str, re)
01327     VALUE str, re;
01328 {
01329     return rb_funcall(get_pat(re, 0), rb_intern("match"), 1, str);
01330 }
01331 
01332 static char
01333 succ_char(s)
01334     char *s;
01335 {
01336     char c = *s;
01337 
01338     /* numerics */
01339     if ('0' <= c && c < '9') (*s)++;
01340     else if (c == '9') {
01341         *s = '0';
01342         return '1';
01343     }
01344     /* small alphabets */
01345     else if ('a' <= c && c < 'z') (*s)++;
01346     else if (c == 'z') {
01347         return *s = 'a';
01348     }
01349     /* capital alphabets */
01350     else if ('A' <= c && c < 'Z') (*s)++;
01351     else if (c == 'Z') {
01352         return *s = 'A';
01353     }
01354     return 0;
01355 }
01356 
01357 
01358 /*
01359  *  call-seq:
01360  *     str.succ   => new_str
01361  *     str.next   => new_str
01362  *  
01363  *  Returns the successor to <i>str</i>. The successor is calculated by
01364  *  incrementing characters starting from the rightmost alphanumeric (or
01365  *  the rightmost character if there are no alphanumerics) in the
01366  *  string. Incrementing a digit always results in another digit, and
01367  *  incrementing a letter results in another letter of the same case.
01368  *  Incrementing nonalphanumerics uses the underlying character set's
01369  *  collating sequence.
01370  *     
01371  *  If the increment generates a ``carry,'' the character to the left of
01372  *  it is incremented. This process repeats until there is no carry,
01373  *  adding an additional character if necessary.
01374  *     
01375  *     "abcd".succ        #=> "abce"
01376  *     "THX1138".succ     #=> "THX1139"
01377  *     "<<koala>>".succ   #=> "<<koalb>>"
01378  *     "1999zzz".succ     #=> "2000aaa"
01379  *     "ZZZ9999".succ     #=> "AAAA0000"
01380  *     "***".succ         #=> "**+"
01381  */
01382 
01383 static VALUE
01384 rb_str_succ(orig)
01385     VALUE orig;
01386 {
01387     VALUE str;
01388     char *sbeg, *s;
01389     int c = -1;
01390     long n = 0;
01391 
01392     str = rb_str_new5(orig, RSTRING(orig)->ptr, RSTRING(orig)->len);
01393     OBJ_INFECT(str, orig);
01394     if (RSTRING(str)->len == 0) return str;
01395 
01396     sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
01397 
01398     while (sbeg <= s) {
01399         if (ISALNUM(*s)) {
01400             if ((c = succ_char(s)) == 0) break;
01401             n = s - sbeg;
01402         }
01403         s--;
01404     }
01405     if (c == -1) {              /* str contains no alnum */
01406         sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
01407         c = '\001';
01408         while (sbeg <= s) {
01409             if ((*s += 1) != 0) break;
01410             s--;
01411         }
01412     }
01413     if (s < sbeg) {
01414         RESIZE_CAPA(str, RSTRING(str)->len + 1);
01415         s = RSTRING(str)->ptr + n;
01416         memmove(s+1, s, RSTRING(str)->len - n);
01417         *s = c;
01418         RSTRING(str)->len += 1;
01419         RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
01420     }
01421 
01422     return str;
01423 }
01424 
01425 
01426 /*
01427  *  call-seq:
01428  *     str.succ!   => str
01429  *     str.next!   => str
01430  *  
01431  *  Equivalent to <code>String#succ</code>, but modifies the receiver in
01432  *  place.
01433  */
01434 
01435 static VALUE
01436 rb_str_succ_bang(str)
01437     VALUE str;
01438 {
01439     rb_str_shared_replace(str, rb_str_succ(str));
01440 
01441     return str;
01442 }
01443 
01444 VALUE
01445 rb_str_upto(beg, end, excl)
01446     VALUE beg, end;
01447     int excl;
01448 {
01449     VALUE current, after_end;
01450     ID succ = rb_intern("succ");
01451     int n;
01452 
01453     StringValue(end);
01454     n = rb_str_cmp(beg, end);
01455     if (n > 0 || (excl && n == 0)) return beg;
01456     after_end = rb_funcall(end, succ, 0, 0);
01457     current = beg;
01458     while (!rb_str_equal(current, after_end)) {
01459         rb_yield(current);
01460         if (!excl && rb_str_equal(current, end)) break;
01461         current = rb_funcall(current, succ, 0, 0);
01462         StringValue(current);
01463         if (excl && rb_str_equal(current, end)) break;
01464         StringValue(current);
01465         if (RSTRING(current)->len > RSTRING(end)->len)
01466             break;
01467     }
01468 
01469     return beg;
01470 }
01471 
01472 
01473 /*
01474  *  call-seq:
01475  *     str.upto(other_str) {|s| block }   => str
01476  *  
01477  *  Iterates through successive values, starting at <i>str</i> and
01478  *  ending at <i>other_str</i> inclusive, passing each value in turn to
01479  *  the block. The <code>String#succ</code> method is used to generate
01480  *  each value.
01481  *     
01482  *     "a8".upto("b6") {|s| print s, ' ' }
01483  *     for s in "a8".."b6"
01484  *       print s, ' '
01485  *     end
01486  *     
01487  *  <em>produces:</em>
01488  *     
01489  *     a8 a9 b0 b1 b2 b3 b4 b5 b6
01490  *     a8 a9 b0 b1 b2 b3 b4 b5 b6
01491  */
01492 
01493 static VALUE
01494 rb_str_upto_m(beg, end)
01495     VALUE beg, end;
01496 {
01497     return rb_str_upto(beg, end, Qfalse);
01498 }
01499 
01500 static VALUE
01501 rb_str_subpat(str, re, nth)
01502     VALUE str, re;
01503     int nth;
01504 {
01505     if (rb_reg_search(re, str, 0, 0) >= 0) {
01506         return rb_reg_nth_match(nth, rb_backref_get());
01507     }
01508     return Qnil;
01509 }
01510 
01511 static VALUE
01512 rb_str_aref(str, indx)
01513     VALUE str;
01514     VALUE indx;
01515 {
01516     long idx;
01517 
01518     switch (TYPE(indx)) {
01519       case T_FIXNUM:
01520         idx = FIX2LONG(indx);
01521 
01522       num_index:
01523         if (idx < 0) {
01524             idx = RSTRING(str)->len + idx;
01525         }
01526         if (idx < 0 || RSTRING(str)->len <= idx) {
01527             return Qnil;
01528         }
01529         return INT2FIX(RSTRING(str)->ptr[idx] & 0xff);
01530 
01531       case T_REGEXP:
01532         return rb_str_subpat(str, indx, 0);
01533 
01534       case T_STRING:
01535         if (rb_str_index(str, indx, 0) != -1)
01536             return rb_str_dup(indx);
01537         return Qnil;
01538 
01539       default:
01540         /* check if indx is Range */
01541         {
01542             long beg, len;
01543             VALUE tmp;
01544 
01545             switch (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 0)) {
01546               case Qfalse:
01547                 break;
01548               case Qnil:
01549                 return Qnil;
01550               default:
01551                 tmp = rb_str_substr(str, beg, len);
01552                 OBJ_INFECT(tmp, indx);
01553                 return tmp;
01554             }
01555         }
01556         idx = NUM2LONG(indx);
01557         goto num_index;
01558     }
01559     return Qnil;                /* not reached */
01560 }
01561 
01562 
01563 /*
01564  *  call-seq:
01565  *     str[fixnum]                 => fixnum or nil
01566  *     str[fixnum, fixnum]         => new_str or nil
01567  *     str[range]                  => new_str or nil
01568  *     str[regexp]                 => new_str or nil
01569  *     str[regexp, fixnum]         => new_str or nil
01570  *     str[other_str]              => new_str or nil
01571  *     str.slice(fixnum)           => fixnum or nil
01572  *     str.slice(fixnum, fixnum)   => new_str or nil
01573  *     str.slice(range)            => new_str or nil
01574  *     str.slice(regexp)           => new_str or nil
01575  *     str.slice(regexp, fixnum)   => new_str or nil
01576  *     str.slice(other_str)        => new_str or nil
01577  *  
01578  *  Element Reference---If passed a single <code>Fixnum</code>, returns the code
01579  *  of the character at that position. If passed two <code>Fixnum</code>
01580  *  objects, returns a substring starting at the offset given by the first, and
01581  *  a length given by the second. If given a range, a substring containing
01582  *  characters at offsets given by the range is returned. In all three cases, if
01583  *  an offset is negative, it is counted from the end of <i>str</i>. Returns
01584  *  <code>nil</code> if the initial offset falls outside the string, the length
01585  *  is negative, or the beginning of the range is greater than the end.
01586  *     
01587  *  If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
01588  *  returned. If a numeric parameter follows the regular expression, that
01589  *  component of the <code>MatchData</code> is returned instead. If a
01590  *  <code>String</code> is given, that string is returned if it occurs in
01591  *  <i>str</i>. In both cases, <code>nil</code> is returned if there is no
01592  *  match.
01593  *     
01594  *     a = "hello there"
01595  *     a[1]                   #=> 101
01596  *     a[1,3]                 #=> "ell"
01597  *     a[1..3]                #=> "ell"
01598  *     a[-3,2]                #=> "er"
01599  *     a[-4..-2]              #=> "her"
01600  *     a[12..-1]              #=> nil
01601  *     a[-2..-4]              #=> ""
01602  *     a[/[aeiou](.)\1/]      #=> "ell"
01603  *     a[/[aeiou](.)\1/, 0]   #=> "ell"
01604  *     a[/[aeiou](.)\1/, 1]   #=> "l"
01605  *     a[/[aeiou](.)\1/, 2]   #=> nil
01606  *     a["lo"]                #=> "lo"
01607  *     a["bye"]               #=> nil
01608  */
01609 
01610 static VALUE
01611 rb_str_aref_m(argc, argv, str)
01612     int argc;
01613     VALUE *argv;
01614     VALUE str;
01615 {
01616     if (argc == 2) {
01617         if (TYPE(argv[0]) == T_REGEXP) {
01618             return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
01619         }
01620         return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
01621     }
01622     if (argc != 1) {
01623         rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
01624     }
01625     return rb_str_aref(str, argv[0]);
01626 }
01627 
01628 static void
01629 rb_str_splice(str, beg, len, val)
01630     VALUE str;
01631     long beg, len;
01632     VALUE val;
01633 {
01634     if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
01635 
01636     StringValue(val);
01637     rb_str_modify(str);
01638 
01639     if (RSTRING(str)->len < beg) {
01640       out_of_range:
01641         rb_raise(rb_eIndexError, "index %ld out of string", beg);
01642     }
01643     if (beg < 0) {
01644         if (-beg > RSTRING(str)->len) {
01645             goto out_of_range;
01646         }
01647         beg += RSTRING(str)->len;
01648     }
01649     if (RSTRING(str)->len < beg + len) {
01650         len = RSTRING(str)->len - beg;
01651     }
01652 
01653     if (len < RSTRIN