regguts.hGo to the documentation of this file.00001 /* 00002 * Internal interface definitions, etc., for the reg package 00003 * 00004 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 00005 * 00006 * Development of this software was funded, in part, by Cray Research Inc., 00007 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 00008 * Corporation, none of whom are responsible for the results. The author 00009 * thanks all of them. 00010 * 00011 * Redistribution and use in source and binary forms -- with or without 00012 * modification -- are permitted for any purpose, provided that 00013 * redistributions in source form retain this entire copyright notice and 00014 * indicate the origin and nature of any modifications. 00015 * 00016 * I'd appreciate being given credit for this package in the documentation of 00017 * software which uses it, but that is not a requirement. 00018 * 00019 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 00020 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00021 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00022 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 */ 00030 00031 /* 00032 * Environmental customization. It should not (I hope) be necessary to alter 00033 * the file you are now reading -- regcustom.h should handle it all, given 00034 * care here and elsewhere. 00035 */ 00036 #include "regcustom.h" 00037 00038 /* 00039 * Things that regcustom.h might override. 00040 */ 00041 00042 /* standard header files (NULL is a reasonable indicator for them) */ 00043 #ifndef NULL 00044 #include <stdio.h> 00045 #include <stdlib.h> 00046 #include <ctype.h> 00047 #include <limits.h> 00048 #include <string.h> 00049 #endif 00050 00051 /* assertions */ 00052 #ifndef assert 00053 #ifndef REG_DEBUG 00054 #ifndef NDEBUG 00055 #define NDEBUG /* no assertions */ 00056 #endif 00057 #endif /* !REG_DEBUG */ 00058 #include <assert.h> 00059 #endif 00060 00061 /* voids */ 00062 #ifndef VOID 00063 #define VOID void /* for function return values */ 00064 #endif 00065 #ifndef DISCARD 00066 #define DISCARD void /* for throwing values away */ 00067 #endif 00068 #ifndef PVOID 00069 #define PVOID void * /* generic pointer */ 00070 #endif 00071 #ifndef VS 00072 #define VS(x) ((void*)(x)) /* cast something to generic ptr */ 00073 #endif 00074 #ifndef NOPARMS 00075 #define NOPARMS void /* for empty parm lists */ 00076 #endif 00077 00078 /* const */ 00079 #ifndef CONST 00080 #define CONST const /* for old compilers, might be empty */ 00081 #endif 00082 00083 /* function-pointer declarator */ 00084 #ifndef FUNCPTR 00085 #if __STDC__ >= 1 00086 #define FUNCPTR(name, args) (*name)args 00087 #else 00088 #define FUNCPTR(name, args) (*name)() 00089 #endif 00090 #endif 00091 00092 /* memory allocation */ 00093 #ifndef MALLOC 00094 #define MALLOC(n) malloc(n) 00095 #endif 00096 #ifndef REALLOC 00097 #define REALLOC(p, n) realloc(VS(p), n) 00098 #endif 00099 #ifndef FREE 00100 #define FREE(p) free(VS(p)) 00101 #endif 00102 00103 /* want size of a char in bits, and max value in bounded quantifiers */ 00104 #ifndef CHAR_BIT 00105 #include <limits.h> 00106 #endif 00107 #ifndef _POSIX2_RE_DUP_MAX 00108 #define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ 00109 #endif 00110 00111 /* 00112 * misc 00113 */ 00114 00115 #define NOTREACHED 0 00116 #define xxx 1 00117 00118 #define DUPMAX _POSIX2_RE_DUP_MAX 00119 #define INFINITY (DUPMAX+1) 00120 00121 #define REMAGIC 0xfed7 /* magic number for main struct */ 00122 00123 /* 00124 * debugging facilities 00125 */ 00126 #ifdef REG_DEBUG 00127 /* FDEBUG does finite-state tracing */ 00128 #define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } 00129 /* MDEBUG does higher-level tracing */ 00130 #define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } 00131 #else 00132 #define FDEBUG(arglist) {} 00133 #define MDEBUG(arglist) {} 00134 #endif 00135 00136 /* 00137 * bitmap manipulation 00138 */ 00139 #define UBITS (CHAR_BIT * sizeof(unsigned)) 00140 #define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) 00141 #define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) 00142 00143 /* 00144 * We dissect a chr into byts for colormap table indexing. Here we define a 00145 * byt, which will be the same as a byte on most machines... The exact size of 00146 * a byt is not critical, but about 8 bits is good, and extraction of 8-bit 00147 * chunks is sometimes especially fast. 00148 */ 00149 00150 #ifndef BYTBITS 00151 #define BYTBITS 8 /* bits in a byt */ 00152 #endif 00153 #define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */ 00154 #define BYTMASK (BYTTAB-1) /* bit mask for byt */ 00155 #define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS) 00156 /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ 00157 00158 /* 00159 * As soon as possible, we map chrs into equivalence classes -- "colors" -- 00160 * which are of much more manageable number. 00161 */ 00162 00163 typedef short color; /* colors of characters */ 00164 typedef int pcolor; /* what color promotes to */ 00165 #define COLORLESS (-1) /* impossible color */ 00166 #define WHITE 0 /* default color, parent of all others */ 00167 00168 /* 00169 * A colormap is a tree -- more precisely, a DAG -- indexed at each level by a 00170 * byt of the chr, to map the chr to a color efficiently. Because lower 00171 * sections of the tree can be shared, it can exploit the usual sparseness of 00172 * such a mapping table. The tree is always NBYTS levels deep (in the past it 00173 * was shallower during construction but was "filled" to full depth at the end 00174 * of that); areas that are unaltered as yet point to "fill blocks" which are 00175 * entirely WHITE in color. 00176 */ 00177 00178 /* the tree itself */ 00179 struct colors { 00180 color ccolor[BYTTAB]; 00181 }; 00182 struct ptrs { 00183 union tree *pptr[BYTTAB]; 00184 }; 00185 union tree { 00186 struct colors colors; 00187 struct ptrs ptrs; 00188 }; 00189 #define tcolor colors.ccolor 00190 #define tptr ptrs.pptr 00191 00192 /* Internal per-color descriptor structure for the color machinery */ 00193 struct colordesc { 00194 uchr nchrs; /* number of chars of this color */ 00195 color sub; /* open subcolor (if any); free chain ptr */ 00196 #define NOSUB COLORLESS 00197 struct arc *arcs; /* color chain */ 00198 int flags; 00199 #define FREECOL 01 /* currently free */ 00200 #define PSEUDO 02 /* pseudocolor, no real chars */ 00201 #define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) 00202 union tree *block; /* block of solid color, if any */ 00203 }; 00204 00205 /* the color map itself */ 00206 struct colormap { 00207 int magic; 00208 #define CMMAGIC 0x876 00209 struct vars *v; /* for compile error reporting */ 00210 size_t ncds; /* number of colordescs */ 00211 size_t max; /* highest in use */ 00212 color free; /* beginning of free chain (if non-0) */ 00213 struct colordesc *cd; 00214 #define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) 00215 #define NINLINECDS ((size_t)10) 00216 struct colordesc cdspace[NINLINECDS]; 00217 union tree tree[NBYTS]; /* tree top, plus fill blocks */ 00218 }; 00219 00220 /* optimization magic to do fast chr->color mapping */ 00221 #define B0(c) ((c) & BYTMASK) 00222 #define B1(c) (((c)>>BYTBITS) & BYTMASK) 00223 #define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) 00224 #define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) 00225 #if NBYTS == 1 00226 #define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) 00227 #endif 00228 /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ 00229 #if NBYTS == 2 00230 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) 00231 #endif 00232 #if NBYTS == 4 00233 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) 00234 #endif 00235 00236 /* 00237 * Interface definitions for locale-interface functions in locale.c. 00238 */ 00239 00240 /* Representation of a set of characters. */ 00241 struct cvec { 00242 int nchrs; /* number of chrs */ 00243 int chrspace; /* number of chrs possible */ 00244 chr *chrs; /* pointer to vector of chrs */ 00245 int nranges; /* number of ranges (chr pairs) */ 00246 int rangespace; /* number of chrs possible */ 00247 chr *ranges; /* pointer to vector of chr pairs */ 00248 }; 00249 00250 /* 00251 * definitions for non-deterministic finite autmaton (NFA) internal 00252 * representation 00253 * 00254 * Having a "from" pointer within each arc may seem redundant, but it saves a 00255 * lot of hassle. 00256 */ 00257 00258 struct state; 00259 00260 struct arc { 00261 int type; 00262 #define ARCFREE '\0' 00263 color co; 00264 struct state *from; /* where it's from (and contained within) */ 00265 struct state *to; /* where it's to */ 00266 struct arc *outchain; /* *from's outs chain or free chain */ 00267 #define freechain outchain 00268 struct arc *inchain; /* *to's ins chain */ 00269 struct arc *colorchain; /* color's arc chain */ 00270 struct arc *colorchainRev; /* back-link in color's arc chain */ 00271 }; 00272 00273 struct arcbatch { /* for bulk allocation of arcs */ 00274 struct arcbatch *next; 00275 #define ABSIZE 10 00276 struct arc a[ABSIZE]; 00277 }; 00278 00279 struct state { 00280 int no; 00281 #define FREESTATE (-1) 00282 char flag; /* marks special states */ 00283 int nins; /* number of inarcs */ 00284 struct arc *ins; /* chain of inarcs */ 00285 int nouts; /* number of outarcs */ 00286 struct arc *outs; /* chain of outarcs */ 00287 struct arc *free; /* chain of free arcs */ 00288 struct state *tmp; /* temporary for traversal algorithms */ 00289 struct state *next; /* chain for traversing all */ 00290 struct state *prev; /* back chain */ 00291 struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ 00292 int noas; /* number of arcs used in first arcbatch */ 00293 }; 00294 00295 struct nfa { 00296 struct state *pre; /* pre-initial state */ 00297 struct state *init; /* initial state */ 00298 struct state *final; /* final state */ 00299 struct state *post; /* post-final state */ 00300 int nstates; /* for numbering states */ 00301 struct state *states; /* state-chain header */ 00302 struct state *slast; /* tail of the chain */ 00303 struct state *free; /* free list */ 00304 struct colormap *cm; /* the color map */ 00305 color bos[2]; /* colors, if any, assigned to BOS and BOL */ 00306 color eos[2]; /* colors, if any, assigned to EOS and EOL */ 00307 size_t size; /* Current NFA size; differs from nstates as 00308 * it also counts the number of states created 00309 * by children of this state. */ 00310 struct vars *v; /* simplifies compile error reporting */ 00311 struct nfa *parent; /* parent NFA, if any */ 00312 }; 00313 00314 /* 00315 * definitions for compacted NFA 00316 */ 00317 00318 struct carc { 00319 color co; /* COLORLESS is list terminator */ 00320 int to; /* state number */ 00321 }; 00322 00323 struct cnfa { 00324 int nstates; /* number of states */ 00325 int ncolors; /* number of colors */ 00326 int flags; 00327 #define HASLACONS 01 /* uses lookahead constraints */ 00328 int pre; /* setup state number */ 00329 int post; /* teardown state number */ 00330 color bos[2]; /* colors, if any, assigned to BOS and BOL */ 00331 color eos[2]; /* colors, if any, assigned to EOS and EOL */ 00332 struct carc **states; /* vector of pointers to outarc lists */ 00333 struct carc *arcs; /* the area for the lists */ 00334 }; 00335 #define ZAPCNFA(cnfa) ((cnfa).nstates = 0) 00336 #define NULLCNFA(cnfa) ((cnfa).nstates == 0) 00337 00338 /* 00339 * Used to limit the maximum NFA size to something sane. [Bug 1810264] 00340 */ 00341 00342 #ifndef REG_MAX_STATES 00343 # define REG_MAX_STATES 100000 00344 #endif 00345 00346 /* 00347 * subexpression tree 00348 */ 00349 00350 struct subre { 00351 char op; /* '|', '.' (concat), 'b' (backref), '(', 00352 * '=' */ 00353 char flags; 00354 #define LONGER 01 /* prefers longer match */ 00355 #define SHORTER 02 /* prefers shorter match */ 00356 #define MIXED 04 /* mixed preference below */ 00357 #define CAP 010 /* capturing parens below */ 00358 #define BACKR 020 /* back reference below */ 00359 #define INUSE 0100 /* in use in final tree */ 00360 #define LOCAL 03 /* bits which may not propagate up */ 00361 #define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ 00362 #define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ 00363 #define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) 00364 #define MESSY(f) ((f)&(MIXED|CAP|BACKR)) 00365 #define PREF(f) ((f)&LOCAL) 00366 #define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) 00367 #define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) 00368 short retry; /* index into retry memory */ 00369 int subno; /* subexpression number (for 'b' and '(') */ 00370 short min; /* min repetitions, for backref only */ 00371 short max; /* max repetitions, for backref only */ 00372 struct subre *left; /* left child, if any (also freelist chain) */ 00373 struct subre *right; /* right child, if any */ 00374 struct state *begin; /* outarcs from here... */ 00375 struct state *end; /* ...ending in inarcs here */ 00376 struct cnfa cnfa; /* compacted NFA, if any */ 00377 struct subre *chain; /* for bookkeeping and error cleanup */ 00378 }; 00379 00380 /* 00381 * table of function pointers for generic manipulation functions. A regex_t's 00382 * re_fns points to one of these. 00383 */ 00384 00385 struct fns { 00386 VOID FUNCPTR(free, (regex_t *)); 00387 }; 00388 00389 /* 00390 * the insides of a regex_t, hidden behind a void * 00391 */ 00392 00393 struct guts { 00394 int magic; 00395 #define GUTSMAGIC 0xfed9 00396 int cflags; /* copy of compile flags */ 00397 long info; /* copy of re_info */ 00398 size_t nsub; /* copy of re_nsub */ 00399 struct subre *tree; 00400 struct cnfa search; /* for fast preliminary search */ 00401 int ntree; 00402 struct colormap cmap; 00403 int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); 00404 struct subre *lacons; /* lookahead-constraint vector */ 00405 int nlacons; /* size of lacons */ 00406 }; 00407 00408 /* 00409 * Magic for allocating a variable workspace. This default version is 00410 * stack-hungry. 00411 */ 00412 00413 #ifndef AllocVars 00414 #define AllocVars(vPtr) \ 00415 struct vars var; \ 00416 register struct vars *vPtr = &var 00417 #endif 00418 #ifndef FreeVars 00419 #define FreeVars(vPtr) ((void) 0) 00420 #endif 00421 00422 /* 00423 * Local Variables: 00424 * mode: c 00425 * c-basic-offset: 4 00426 * fill-column: 78 00427 * End: 00428 */
Generated on Wed Mar 12 12:18:10 2008 by 1.5.1 |