Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals

regex.h

Go to the documentation of this file.
00001 /* Definitions for data structures and routines for the regular
00002    expression library, version 0.12.
00003    Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
00004 
00005    This file is part of the GNU C Library.  Its master source is NOT part of
00006    the C library, however.  The master source lives in /gd/gnu/lib.
00007 
00008    The GNU C Library is free software; you can redistribute it and/or
00009    modify it under the terms of the GNU Library General Public License as
00010    published by the Free Software Foundation; either version 2 of the
00011    License, or (at your option) any later version.
00012 
00013    The GNU C Library is distributed in the hope that it will be useful,
00014    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016    Library General Public License for more details.
00017 
00018    You should have received a copy of the GNU Library General Public
00019    License along with the GNU C Library; see the file LGPL.  If not,
00020    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00021    Boston, MA 02111-1307, USA.  */
00022 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
00023    Last change: May 21, 1993 by t^2  */
00024 /* modified for Ruby by matz@netlab.co.jp */
00025 
00026 #ifndef REGEX_H
00027 #define REGEX_H
00028 
00029 /* symbol mangling for ruby */
00030 #ifdef RUBY
00031 # define re_adjust_startpos ruby_re_adjust_startpos
00032 # define re_compile_fastmap ruby_re_compile_fastmap
00033 # define re_compile_pattern ruby_re_compile_pattern
00034 # define re_copy_registers ruby_re_copy_registers
00035 # define re_free_pattern ruby_re_free_pattern
00036 # define re_free_registers ruby_re_free_registers
00037 # define re_match ruby_re_match
00038 # define re_mbcinit ruby_re_mbcinit
00039 # define re_search ruby_re_search
00040 # define re_set_casetable ruby_re_set_casetable
00041 # define register_info_type ruby_register_info_type
00042 #endif
00043 
00044 #include <stddef.h>
00045 
00046 /* Define number of parens for which we record the beginnings and ends.
00047    This affects how much space the `struct re_registers' type takes up.  */
00048 #ifndef RE_NREGS
00049 #define RE_NREGS 10
00050 #endif
00051 
00052 #define BYTEWIDTH 8
00053 
00054 #define RE_REG_MAX ((1<<BYTEWIDTH)-1)
00055 
00056 /* Maximum number of duplicates an interval can allow.  */
00057 #ifndef RE_DUP_MAX
00058 #define RE_DUP_MAX  ((1 << 15) - 1) 
00059 #endif
00060 
00061 
00062 /* If this bit is set, then character classes are supported; they are:
00063      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
00064      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
00065    If not set, then character classes are not supported.  */
00066 #define RE_CHAR_CLASSES (1L << 9)
00067 
00068 /* match will be done case insensetively */
00069 #define RE_OPTION_IGNORECASE (1L)
00070 /* perl-style extended pattern available */
00071 #define RE_OPTION_EXTENDED   (RE_OPTION_IGNORECASE<<1)
00072 /* newline will be included for . */
00073 #define RE_OPTION_MULTILINE  (RE_OPTION_EXTENDED<<1)
00074 /* ^ and $ ignore newline */
00075 #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
00076 /* search for longest match, in accord with POSIX regexp */
00077 #define RE_OPTION_LONGEST    (RE_OPTION_SINGLELINE<<1)
00078 
00079 #define RE_MAY_IGNORECASE    (RE_OPTION_LONGEST<<1)
00080 #define RE_OPTIMIZE_ANCHOR   (RE_MAY_IGNORECASE<<1)
00081 #define RE_OPTIMIZE_EXACTN   (RE_OPTIMIZE_ANCHOR<<1)
00082 #define RE_OPTIMIZE_NO_BM    (RE_OPTIMIZE_EXACTN<<1)
00083 #define RE_OPTIMIZE_BMATCH   (RE_OPTIMIZE_NO_BM<<1)
00084 
00085 /* For multi-byte char support */
00086 #define MBCTYPE_ASCII 0
00087 #define MBCTYPE_EUC 1
00088 #define MBCTYPE_SJIS 2
00089 #define MBCTYPE_UTF8 3
00090 
00091 extern
00092 #if defined _WIN32 && !defined __GNUC__ && !defined RUBY_EXPORT
00093 __declspec(dllimport)
00094 # endif
00095 const unsigned char *re_mbctab;
00096 #if defined(__STDC__)
00097 void re_mbcinit (int);
00098 #else
00099 void re_mbcinit ();
00100 #endif
00101 
00102 #undef ismbchar
00103 #define ismbchar(c) re_mbctab[(unsigned char)(c)]
00104 #define mbclen(c)   (re_mbctab[(unsigned char)(c)]+1)
00105 
00106 /* Structure used in re_match() */
00107 
00108 typedef union
00109 {
00110   unsigned char *word;
00111   struct {
00112     unsigned is_active : 1;
00113     unsigned matched_something : 1;
00114   } bits;
00115 } register_info_type;
00116 
00117 /* This data structure is used to represent a compiled pattern.  */
00118 
00119 struct re_pattern_buffer
00120   {
00121     char *buffer;       /* Space holding the compiled pattern commands.  */
00122     int allocated;      /* Size of space that `buffer' points to. */
00123     int used;           /* Length of portion of buffer actually occupied  */
00124     char *fastmap;      /* Pointer to fastmap, if any, or zero if none.  */
00125                         /* re_search uses the fastmap, if there is one,
00126                            to skip over totally implausible characters.  */
00127     char *must;         /* Pointer to exact pattern which strings should have
00128                            to be matched.  */
00129     int *must_skip;     /* Pointer to exact pattern skip table for bm_search */
00130     long options;       /* Flags for options such as extended_pattern. */
00131     long re_nsub;       /* Number of subexpressions found by the compiler. */
00132     char fastmap_accurate;
00133                         /* Set to zero when a new pattern is stored,
00134                            set to one when the fastmap is updated from it.  */
00135     char can_be_null;   /* Set to one by compiling fastmap
00136                            if this pattern might match the null string.
00137                            It does not necessarily match the null string
00138                            in that case, but if this is zero, it cannot.
00139                            2 as value means can match null string
00140                            but at end of range or before a character
00141                            listed in the fastmap.  */
00142 
00143     /* stack & working area for re_match() */
00144     unsigned char **regstart;
00145     unsigned char **regend;
00146     unsigned char **old_regstart;
00147     unsigned char **old_regend;
00148     register_info_type *reg_info;
00149     unsigned char **best_regstart;
00150     unsigned char **best_regend;
00151   };
00152 
00153 typedef struct re_pattern_buffer regex_t;
00154 
00155 /* Structure to store register contents data in.
00156 
00157    Pass the address of such a structure as an argument to re_match, etc.,
00158    if you want this information back.
00159 
00160    For i from 1 to RE_NREGS - 1, start[i] records the starting index in
00161    the string of where the ith subexpression matched, and end[i] records
00162    one after the ending index.  start[0] and end[0] are analogous, for
00163    the entire pattern.  */
00164 
00165 struct re_registers
00166   {
00167     int allocated;
00168     int num_regs;
00169     int *beg;
00170     int *end;
00171   };
00172 
00173 /* Type for byte offsets within the string.  POSIX mandates this.  */
00174 typedef size_t regoff_t;
00175 
00176 /* POSIX specification for registers.  Aside from the different names than
00177    `re_registers', POSIX uses an array of structures, instead of a
00178    structure of arrays.  */
00179 typedef struct
00180 {
00181   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
00182   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
00183 } regmatch_t;
00184 
00185 #ifdef __STDC__
00186 
00187 extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
00188 void re_free_pattern (struct re_pattern_buffer *);
00189 /* Is this really advertised?  */
00190 extern int re_adjust_startpos (struct re_pattern_buffer *, const char*, int, int, int);
00191 extern void re_compile_fastmap (struct re_pattern_buffer *);
00192 extern int re_search (struct re_pattern_buffer *, const char*, int, int, int,
00193                       struct re_registers *);
00194 extern int re_match (struct re_pattern_buffer *, const char *, int, int,
00195                      struct re_registers *);
00196 extern void re_set_casetable (const char *table);
00197 extern void re_copy_registers (struct re_registers*, struct re_registers*);
00198 extern void re_free_registers (struct re_registers*);
00199 
00200 #ifndef RUBY
00201 /* 4.2 bsd compatibility.  */
00202 extern char *re_comp (const char *);
00203 extern int re_exec (const char *);
00204 #endif
00205 
00206 #else /* !__STDC__ */
00207 
00208 extern char *re_compile_pattern ();
00209 void re_free_regexp ();
00210 /* Is this really advertised? */
00211 extern int re_adjust_startpos ();
00212 extern void re_compile_fastmap ();
00213 extern int re_search ();
00214 extern int re_match ();
00215 extern void re_set_casetable ();
00216 extern void re_copy_registers ();
00217 extern void re_free_registers ();
00218 
00219 #endif /* __STDC__ */
00220 
00221 #endif /* !REGEX_H */
00222 

Generated on Wed Jan 18 23:32:06 2006 for Ruby by doxygen 1.3.5