regex.hGo to the documentation of this file.00001 #ifndef _REGEX_H_ 00002 #define _REGEX_H_ /* never again */ 00003 /* 00004 * regular expressions 00005 * 00006 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 00007 * 00008 * Development of this software was funded, in part, by Cray Research Inc., 00009 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 00010 * Corporation, none of whom are responsible for the results. The author 00011 * thanks all of them. 00012 * 00013 * Redistribution and use in source and binary forms -- with or without 00014 * modification -- are permitted for any purpose, provided that 00015 * redistributions in source form retain this entire copyright notice and 00016 * indicate the origin and nature of any modifications. 00017 * 00018 * I'd appreciate being given credit for this package in the documentation of 00019 * software which uses it, but that is not a requirement. 00020 * 00021 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 00022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00023 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00024 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00027 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00028 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00029 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00030 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 * 00032 * 00033 * Prototypes etc. marked with "^" within comments get gathered up (and 00034 * possibly edited) by the regfwd program and inserted near the bottom of this 00035 * file. 00036 * 00037 * We offer the option of declaring one wide-character version of the RE 00038 * functions as well as the char versions. To do that, define __REG_WIDE_T to 00039 * the type of wide characters (unfortunately, there is no consensus that 00040 * wchar_t is suitable) and __REG_WIDE_COMPILE and __REG_WIDE_EXEC to the 00041 * names to be used for the compile and execute functions (suggestion: 00042 * re_Xcomp and re_Xexec, where X is a letter suggestive of the wide type, 00043 * e.g. re_ucomp and re_uexec for Unicode). For cranky old compilers, it may 00044 * be necessary to do something like: 00045 * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) 00046 * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) 00047 * rather than just #defining the names as parameterless macros. 00048 * 00049 * For some specialized purposes, it may be desirable to suppress the 00050 * declarations of the "front end" functions, regcomp() and regexec(), or of 00051 * the char versions of the compile and execute functions. To suppress the 00052 * front-end functions, define __REG_NOFRONT. To suppress the char versions, 00053 * define __REG_NOCHAR. 00054 * 00055 * The right place to do those defines (and some others you may want, see 00056 * below) would be <sys/types.h>. If you don't have control of that file, the 00057 * right place to add your own defines to this file is marked below. This is 00058 * normally done automatically, by the makefile and regmkhdr, based on the 00059 * contents of regcustom.h. 00060 */ 00061 00062 /* 00063 * voodoo for C++ 00064 */ 00065 #ifdef __cplusplus 00066 extern "C" { 00067 #endif 00068 00069 /* 00070 * Add your own defines, if needed, here. 00071 */ 00072 00073 /* 00074 * Location where a chunk of regcustom.h is automatically spliced into this 00075 * file (working from its prototype, regproto.h). 00076 */ 00077 00078 /* --- begin --- */ 00079 /* ensure certain things don't sneak in from system headers */ 00080 #ifdef __REG_WIDE_T 00081 #undef __REG_WIDE_T 00082 #endif 00083 #ifdef __REG_WIDE_COMPILE 00084 #undef __REG_WIDE_COMPILE 00085 #endif 00086 #ifdef __REG_WIDE_EXEC 00087 #undef __REG_WIDE_EXEC 00088 #endif 00089 #ifdef __REG_REGOFF_T 00090 #undef __REG_REGOFF_T 00091 #endif 00092 #ifdef __REG_VOID_T 00093 #undef __REG_VOID_T 00094 #endif 00095 #ifdef __REG_CONST 00096 #undef __REG_CONST 00097 #endif 00098 #ifdef __REG_NOFRONT 00099 #undef __REG_NOFRONT 00100 #endif 00101 #ifdef __REG_NOCHAR 00102 #undef __REG_NOCHAR 00103 #endif 00104 /* interface types */ 00105 #define __REG_WIDE_T Tcl_UniChar 00106 #define __REG_REGOFF_T long /* not really right, but good enough... */ 00107 #define __REG_VOID_T VOID 00108 #define __REG_CONST CONST 00109 /* names and declarations */ 00110 #define __REG_WIDE_COMPILE TclReComp 00111 #define __REG_WIDE_EXEC TclReExec 00112 #define __REG_NOFRONT /* don't want regcomp() and regexec() */ 00113 #define __REG_NOCHAR /* or the char versions */ 00114 #define regfree TclReFree 00115 #define regerror TclReError 00116 /* --- end --- */ 00117 00118 /* 00119 * interface types etc. 00120 */ 00121 00122 /* 00123 * regoff_t has to be large enough to hold either off_t or ssize_t, and must 00124 * be signed; it's only a guess that long is suitable, so we offer 00125 * <sys/types.h> an override. 00126 */ 00127 #ifdef __REG_REGOFF_T 00128 typedef __REG_REGOFF_T regoff_t; 00129 #else 00130 typedef long regoff_t; 00131 #endif 00132 00133 /* 00134 * For benefit of old compilers, we offer <sys/types.h> the option of 00135 * overriding the `void' type used to declare nonexistent return types. 00136 */ 00137 #ifdef __REG_VOID_T 00138 typedef __REG_VOID_T re_void; 00139 #else 00140 typedef void re_void; 00141 #endif 00142 00143 /* 00144 * Also for benefit of old compilers, <sys/types.h> can supply a macro which 00145 * expands to a substitute for `const'. 00146 */ 00147 #ifndef __REG_CONST 00148 #define __REG_CONST const 00149 #endif 00150 00151 00152 00153 /* 00154 * other interface types 00155 */ 00156 00157 /* the biggie, a compiled RE (or rather, a front end to same) */ 00158 typedef struct { 00159 int re_magic; /* magic number */ 00160 size_t re_nsub; /* number of subexpressions */ 00161 long re_info; /* information about RE */ 00162 #define REG_UBACKREF 000001 00163 #define REG_ULOOKAHEAD 000002 00164 #define REG_UBOUNDS 000004 00165 #define REG_UBRACES 000010 00166 #define REG_UBSALNUM 000020 00167 #define REG_UPBOTCH 000040 00168 #define REG_UBBS 000100 00169 #define REG_UNONPOSIX 000200 00170 #define REG_UUNSPEC 000400 00171 #define REG_UUNPORT 001000 00172 #define REG_ULOCALE 002000 00173 #define REG_UEMPTYMATCH 004000 00174 #define REG_UIMPOSSIBLE 010000 00175 #define REG_USHORTEST 020000 00176 int re_csize; /* sizeof(character) */ 00177 char *re_endp; /* backward compatibility kludge */ 00178 /* the rest is opaque pointers to hidden innards */ 00179 char *re_guts; /* `char *' is more portable than `void *' */ 00180 char *re_fns; 00181 } regex_t; 00182 00183 /* result reporting (may acquire more fields later) */ 00184 typedef struct { 00185 regoff_t rm_so; /* start of substring */ 00186 regoff_t rm_eo; /* end of substring */ 00187 } regmatch_t; 00188 00189 /* supplementary control and reporting */ 00190 typedef struct { 00191 regmatch_t rm_extend; /* see REG_EXPECT */ 00192 } rm_detail_t; 00193 00194 /* 00195 * compilation 00196 ^ #ifndef __REG_NOCHAR 00197 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); 00198 ^ #endif 00199 ^ #ifndef __REG_NOFRONT 00200 ^ int regcomp(regex_t *, __REG_CONST char *, int); 00201 ^ #endif 00202 ^ #ifdef __REG_WIDE_T 00203 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); 00204 ^ #endif 00205 */ 00206 #define REG_BASIC 000000 /* BREs (convenience) */ 00207 #define REG_EXTENDED 000001 /* EREs */ 00208 #define REG_ADVF 000002 /* advanced features in EREs */ 00209 #define REG_ADVANCED 000003 /* AREs (which are also EREs) */ 00210 #define REG_QUOTE 000004 /* no special characters, none */ 00211 #define REG_NOSPEC REG_QUOTE /* historical synonym */ 00212 #define REG_ICASE 000010 /* ignore case */ 00213 #define REG_NOSUB 000020 /* don't care about subexpressions */ 00214 #define REG_EXPANDED 000040 /* expanded format, white space & comments */ 00215 #define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ 00216 #define REG_NLANCH 000200 /* ^ matches after \n, $ before */ 00217 #define REG_NEWLINE 000300 /* newlines are line terminators */ 00218 #define REG_PEND 000400 /* ugh -- backward-compatibility hack */ 00219 #define REG_EXPECT 001000 /* report details on partial/limited matches */ 00220 #define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ 00221 #define REG_DUMP 004000 /* none of your business :-) */ 00222 #define REG_FAKE 010000 /* none of your business :-) */ 00223 #define REG_PROGRESS 020000 /* none of your business :-) */ 00224 00225 /* 00226 * execution 00227 ^ #ifndef __REG_NOCHAR 00228 ^ int re_exec(regex_t *, __REG_CONST char *, size_t, 00229 ^ rm_detail_t *, size_t, regmatch_t [], int); 00230 ^ #endif 00231 ^ #ifndef __REG_NOFRONT 00232 ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); 00233 ^ #endif 00234 ^ #ifdef __REG_WIDE_T 00235 ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, 00236 ^ rm_detail_t *, size_t, regmatch_t [], int); 00237 ^ #endif 00238 */ 00239 #define REG_NOTBOL 0001 /* BOS is not BOL */ 00240 #define REG_NOTEOL 0002 /* EOS is not EOL */ 00241 #define REG_STARTEND 0004 /* backward compatibility kludge */ 00242 #define REG_FTRACE 0010 /* none of your business */ 00243 #define REG_MTRACE 0020 /* none of your business */ 00244 #define REG_SMALL 0040 /* none of your business */ 00245 00246 /* 00247 * misc generics (may be more functions here eventually) 00248 ^ re_void regfree(regex_t *); 00249 */ 00250 00251 /* 00252 * error reporting 00253 * Be careful if modifying the list of error codes -- the table used by 00254 * regerror() is generated automatically from this file! 00255 * 00256 * Note that there is no wide-char variant of regerror at this time; what kind 00257 * of character is used for error reports is independent of what kind is used 00258 * in matching. 00259 * 00260 ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); 00261 */ 00262 #define REG_OKAY 0 /* no errors detected */ 00263 #define REG_NOMATCH 1 /* failed to match */ 00264 #define REG_BADPAT 2 /* invalid regexp */ 00265 #define REG_ECOLLATE 3 /* invalid collating element */ 00266 #define REG_ECTYPE 4 /* invalid character class */ 00267 #define REG_EESCAPE 5 /* invalid escape \ sequence */ 00268 #define REG_ESUBREG 6 /* invalid backreference number */ 00269 #define REG_EBRACK 7 /* brackets [] not balanced */ 00270 #define REG_EPAREN 8 /* parentheses () not balanced */ 00271 #define REG_EBRACE 9 /* braces {} not balanced */ 00272 #define REG_BADBR 10 /* invalid repetition count(s) */ 00273 #define REG_ERANGE 11 /* invalid character range */ 00274 #define REG_ESPACE 12 /* out of memory */ 00275 #define REG_BADRPT 13 /* quantifier operand invalid */ 00276 #define REG_ASSERT 15 /* "can't happen" -- you found a bug */ 00277 #define REG_INVARG 16 /* invalid argument to regex function */ 00278 #define REG_MIXED 17 /* character widths of regex and string differ */ 00279 #define REG_BADOPT 18 /* invalid embedded option */ 00280 #define REG_ETOOBIG 19 /* nfa has too many states */ 00281 /* two specials for debugging and testing */ 00282 #define REG_ATOI 101 /* convert error-code name to number */ 00283 #define REG_ITOA 102 /* convert error-code number to name */ 00284 00285 /* 00286 * the prototypes, as possibly munched by regfwd 00287 */ 00288 /* =====^!^===== begin forwards =====^!^===== */ 00289 /* automatically gathered by fwd; do not hand-edit */ 00290 /* === regproto.h === */ 00291 #ifndef __REG_NOCHAR 00292 int re_comp(regex_t *, __REG_CONST char *, size_t, int); 00293 #endif 00294 #ifndef __REG_NOFRONT 00295 int regcomp(regex_t *, __REG_CONST char *, int); 00296 #endif 00297 #ifdef __REG_WIDE_T 00298 MODULE_SCOPE int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); 00299 #endif 00300 #ifndef __REG_NOCHAR 00301 int re_exec(regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int); 00302 #endif 00303 #ifndef __REG_NOFRONT 00304 int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); 00305 #endif 00306 #ifdef __REG_WIDE_T 00307 MODULE_SCOPE int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int); 00308 #endif 00309 MODULE_SCOPE re_void regfree(regex_t *); 00310 MODULE_SCOPE size_t regerror(int, __REG_CONST regex_t *, char *, size_t); 00311 /* automatically gathered by fwd; do not hand-edit */ 00312 /* =====^!^===== end forwards =====^!^===== */ 00313 00314 /* 00315 * more C++ voodoo 00316 */ 00317 #ifdef __cplusplus 00318 } 00319 #endif 00320 00321 #endif
Generated on Wed Mar 12 12:18:10 2008 by 1.5.1 |