regcomp.cGo to the documentation of this file.00001 /* 00002 * re_*comp and friends - compile REs 00003 * This file #includes several others (see the bottom). 00004 * 00005 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 00006 * 00007 * Development of this software was funded, in part, by Cray Research Inc., 00008 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 00009 * Corporation, none of whom are responsible for the results. The author 00010 * thanks all of them. 00011 * 00012 * Redistribution and use in source and binary forms -- with or without 00013 * modification -- are permitted for any purpose, provided that 00014 * redistributions in source form retain this entire copyright notice and 00015 * indicate the origin and nature of any modifications. 00016 * 00017 * I'd appreciate being given credit for this package in the documentation of 00018 * software which uses it, but that is not a requirement. 00019 * 00020 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 00021 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00022 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00023 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00024 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00025 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00026 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00027 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00028 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00029 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 */ 00032 00033 #include "regguts.h" 00034 00035 /* 00036 * forward declarations, up here so forward datatypes etc. are defined early 00037 */ 00038 /* =====^!^===== begin forwards =====^!^===== */ 00039 /* automatically gathered by fwd; do not hand-edit */ 00040 /* === regcomp.c === */ 00041 int compile(regex_t *, const chr *, size_t, int); 00042 static void moresubs(struct vars *, int); 00043 static int freev(struct vars *, int); 00044 static void makesearch(struct vars *, struct nfa *); 00045 static struct subre *parse(struct vars *, int, int, struct state *, struct state *); 00046 static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); 00047 static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); 00048 static void nonword(struct vars *, int, struct state *, struct state *); 00049 static void word(struct vars *, int, struct state *, struct state *); 00050 static int scannum(struct vars *); 00051 static void repeat(struct vars *, struct state *, struct state *, int, int); 00052 static void bracket(struct vars *, struct state *, struct state *); 00053 static void cbracket(struct vars *, struct state *, struct state *); 00054 static void brackpart(struct vars *, struct state *, struct state *); 00055 static const chr *scanplain(struct vars *); 00056 static void onechr(struct vars *, pchr, struct state *, struct state *); 00057 static void dovec(struct vars *, struct cvec *, struct state *, struct state *); 00058 static void wordchrs(struct vars *); 00059 static struct subre *subre(struct vars *, int, int, struct state *, struct state *); 00060 static void freesubre(struct vars *, struct subre *); 00061 static void freesrnode(struct vars *, struct subre *); 00062 static void optst(struct vars *, struct subre *); 00063 static int numst(struct subre *, int); 00064 static void markst(struct subre *); 00065 static void cleanst(struct vars *); 00066 static long nfatree(struct vars *, struct subre *, FILE *); 00067 static long nfanode(struct vars *, struct subre *, FILE *); 00068 static int newlacon(struct vars *, struct state *, struct state *, int); 00069 static void freelacons(struct subre *, int); 00070 static void rfree(regex_t *); 00071 static void dump(regex_t *, FILE *); 00072 static void dumpst(struct subre *, FILE *, int); 00073 static void stdump(struct subre *, FILE *, int); 00074 static const char *stid(struct subre *, char *, size_t); 00075 /* === regc_lex.c === */ 00076 static void lexstart(struct vars *); 00077 static void prefixes(struct vars *); 00078 static void lexnest(struct vars *, const chr *, const chr *); 00079 static void lexword(struct vars *); 00080 static int next(struct vars *); 00081 static int lexescape(struct vars *); 00082 static chr lexdigits(struct vars *, int, int, int); 00083 static int brenext(struct vars *, pchr); 00084 static void skip(struct vars *); 00085 static chr newline(NOPARMS); 00086 #ifdef REG_DEBUG 00087 static const chr *ch(NOPARMS); 00088 #endif 00089 static chr chrnamed(struct vars *, const chr *, const chr *, pchr); 00090 /* === regc_color.c === */ 00091 static void initcm(struct vars *, struct colormap *); 00092 static void freecm(struct colormap *); 00093 static void cmtreefree(struct colormap *, union tree *, int); 00094 static color setcolor(struct colormap *, pchr, pcolor); 00095 static color maxcolor(struct colormap *); 00096 static color newcolor(struct colormap *); 00097 static void freecolor(struct colormap *, pcolor); 00098 static color pseudocolor(struct colormap *); 00099 static color subcolor(struct colormap *, pchr c); 00100 static color newsub(struct colormap *, pcolor); 00101 static void subrange(struct vars *, pchr, pchr, struct state *, struct state *); 00102 static void subblock(struct vars *, pchr, struct state *, struct state *); 00103 static void okcolors(struct nfa *, struct colormap *); 00104 static void colorchain(struct colormap *, struct arc *); 00105 static void uncolorchain(struct colormap *, struct arc *); 00106 static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); 00107 static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); 00108 #ifdef REG_DEBUG 00109 static void dumpcolors(struct colormap *, FILE *); 00110 static void fillcheck(struct colormap *, union tree *, int, FILE *); 00111 static void dumpchr(pchr, FILE *); 00112 #endif 00113 /* === regc_nfa.c === */ 00114 static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); 00115 static void freenfa(struct nfa *); 00116 static struct state *newstate(struct nfa *); 00117 static struct state *newfstate(struct nfa *, int flag); 00118 static void dropstate(struct nfa *, struct state *); 00119 static void freestate(struct nfa *, struct state *); 00120 static void destroystate(struct nfa *, struct state *); 00121 static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); 00122 static struct arc *allocarc(struct nfa *, struct state *); 00123 static void freearc(struct nfa *, struct arc *); 00124 static struct arc *findarc(struct state *, int, pcolor); 00125 static void cparc(struct nfa *, struct arc *, struct state *, struct state *); 00126 static void moveins(struct nfa *, struct state *, struct state *); 00127 static void copyins(struct nfa *, struct state *, struct state *); 00128 static void moveouts(struct nfa *, struct state *, struct state *); 00129 static void copyouts(struct nfa *, struct state *, struct state *); 00130 static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); 00131 static void delsub(struct nfa *, struct state *, struct state *); 00132 static void deltraverse(struct nfa *, struct state *, struct state *); 00133 static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *); 00134 static void duptraverse(struct nfa *, struct state *, struct state *); 00135 static void cleartraverse(struct nfa *, struct state *); 00136 static void specialcolors(struct nfa *); 00137 static long optimize(struct nfa *, FILE *); 00138 static void pullback(struct nfa *, FILE *); 00139 static int pull(struct nfa *, struct arc *); 00140 static void pushfwd(struct nfa *, FILE *); 00141 static int push(struct nfa *, struct arc *); 00142 #define INCOMPATIBLE 1 /* destroys arc */ 00143 #define SATISFIED 2 /* constraint satisfied */ 00144 #define COMPATIBLE 3 /* compatible but not satisfied yet */ 00145 static int combine(struct arc *, struct arc *); 00146 static void fixempties(struct nfa *, FILE *); 00147 static int unempty(struct nfa *, struct arc *); 00148 static void cleanup(struct nfa *); 00149 static void markreachable(struct nfa *, struct state *, struct state *, struct state *); 00150 static void markcanreach(struct nfa *, struct state *, struct state *, struct state *); 00151 static long analyze(struct nfa *); 00152 static void compact(struct nfa *, struct cnfa *); 00153 static void carcsort(struct carc *, struct carc *); 00154 static void freecnfa(struct cnfa *); 00155 static void dumpnfa(struct nfa *, FILE *); 00156 #ifdef REG_DEBUG 00157 static void dumpstate(struct state *, FILE *); 00158 static void dumparcs(struct state *, FILE *); 00159 static int dumprarcs(struct arc *, struct state *, FILE *, int); 00160 static void dumparc(struct arc *, struct state *, FILE *); 00161 #endif 00162 static void dumpcnfa(struct cnfa *, FILE *); 00163 #ifdef REG_DEBUG 00164 static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); 00165 #endif 00166 /* === regc_cvec.c === */ 00167 static struct cvec *clearcvec(struct cvec *); 00168 static void addchr(struct cvec *, pchr); 00169 static void addrange(struct cvec *, pchr, pchr); 00170 static struct cvec *newcvec(int, int); 00171 static struct cvec *getcvec(struct vars *, int, int); 00172 static void freecvec(struct cvec *); 00173 /* === regc_locale.c === */ 00174 static celt element(struct vars *, const chr *, const chr *); 00175 static struct cvec *range(struct vars *, celt, celt, int); 00176 static int before(celt, celt); 00177 static struct cvec *eclass(struct vars *, celt, int); 00178 static struct cvec *cclass(struct vars *, const chr *, const chr *, int); 00179 static struct cvec *allcases(struct vars *, pchr); 00180 static int cmp(const chr *, const chr *, size_t); 00181 static int casecmp(const chr *, const chr *, size_t); 00182 /* automatically gathered by fwd; do not hand-edit */ 00183 /* =====^!^===== end forwards =====^!^===== */ 00184 00185 /* internal variables, bundled for easy passing around */ 00186 struct vars { 00187 regex_t *re; 00188 const chr *now; /* scan pointer into string */ 00189 const chr *stop; /* end of string */ 00190 const chr *savenow; /* saved now and stop for "subroutine call" */ 00191 const chr *savestop; 00192 int err; /* error code (0 if none) */ 00193 int cflags; /* copy of compile flags */ 00194 int lasttype; /* type of previous token */ 00195 int nexttype; /* type of next token */ 00196 chr nextvalue; /* value (if any) of next token */ 00197 int lexcon; /* lexical context type (see lex.c) */ 00198 int nsubexp; /* subexpression count */ 00199 struct subre **subs; /* subRE pointer vector */ 00200 size_t nsubs; /* length of vector */ 00201 struct subre *sub10[10]; /* initial vector, enough for most */ 00202 struct nfa *nfa; /* the NFA */ 00203 struct colormap *cm; /* character color map */ 00204 color nlcolor; /* color of newline */ 00205 struct state *wordchrs; /* state in nfa holding word-char outarcs */ 00206 struct subre *tree; /* subexpression tree */ 00207 struct subre *treechain; /* all tree nodes allocated */ 00208 struct subre *treefree; /* any free tree nodes */ 00209 int ntree; /* number of tree nodes */ 00210 struct cvec *cv; /* interface cvec */ 00211 struct cvec *cv2; /* utility cvec */ 00212 struct subre *lacons; /* lookahead-constraint vector */ 00213 int nlacons; /* size of lacons */ 00214 }; 00215 00216 /* parsing macros; most know that `v' is the struct vars pointer */ 00217 #define NEXT() (next(v)) /* advance by one token */ 00218 #define SEE(t) (v->nexttype == (t)) /* is next token this? */ 00219 #define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ 00220 #define VISERR(vv) ((vv)->err != 0)/* have we seen an error yet? */ 00221 #define ISERR() VISERR(v) 00222 #define VERR(vv,e) \ 00223 ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e))) 00224 #define ERR(e) VERR(v, e) /* record an error */ 00225 #define NOERR() {if (ISERR()) return;} /* if error seen, return */ 00226 #define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ 00227 #define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ 00228 #define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ 00229 #define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ 00230 #define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) 00231 00232 /* token type codes, some also used as NFA arc types */ 00233 #define EMPTY 'n' /* no token present */ 00234 #define EOS 'e' /* end of string */ 00235 #define PLAIN 'p' /* ordinary character */ 00236 #define DIGIT 'd' /* digit (in bound) */ 00237 #define BACKREF 'b' /* back reference */ 00238 #define COLLEL 'I' /* start of [. */ 00239 #define ECLASS 'E' /* start of [= */ 00240 #define CCLASS 'C' /* start of [: */ 00241 #define END 'X' /* end of [. [= [: */ 00242 #define RANGE 'R' /* - within [] which might be range delim. */ 00243 #define LACON 'L' /* lookahead constraint subRE */ 00244 #define AHEAD 'a' /* color-lookahead arc */ 00245 #define BEHIND 'r' /* color-lookbehind arc */ 00246 #define WBDRY 'w' /* word boundary constraint */ 00247 #define NWBDRY 'W' /* non-word-boundary constraint */ 00248 #define SBEGIN 'A' /* beginning of string (even if not BOL) */ 00249 #define SEND 'Z' /* end of string (even if not EOL) */ 00250 #define PREFER 'P' /* length preference */ 00251 00252 /* is an arc colored, and hence on a color chain? */ 00253 #define COLORED(a) \ 00254 ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND) 00255 00256 /* static function list */ 00257 static struct fns functions = { 00258 rfree, /* regfree insides */ 00259 }; 00260 00261 /* 00262 - compile - compile regular expression 00263 ^ int compile(regex_t *, const chr *, size_t, int); 00264 */ 00265 int 00266 compile( 00267 regex_t *re, 00268 const chr *string, 00269 size_t len, 00270 int flags) 00271 { 00272 AllocVars(v); 00273 struct guts *g; 00274 int i; 00275 size_t j; 00276 FILE *debug = (flags®_PROGRESS) ? stdout : NULL; 00277 #define CNOERR() { if (ISERR()) return freev(v, v->err); } 00278 00279 /* 00280 * Sanity checks. 00281 */ 00282 00283 if (re == NULL || string == NULL) { 00284 FreeVars(v); 00285 return REG_INVARG; 00286 } 00287 if ((flags®_QUOTE) && (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) { 00288 FreeVars(v); 00289 return REG_INVARG; 00290 } 00291 if (!(flags®_EXTENDED) && (flags®_ADVF)) { 00292 FreeVars(v); 00293 return REG_INVARG; 00294 } 00295 00296 /* 00297 * Initial setup (after which freev() is callable). 00298 */ 00299 00300 v->re = re; 00301 v->now = string; 00302 v->stop = v->now + len; 00303 v->savenow = v->savestop = NULL; 00304 v->err = 0; 00305 v->cflags = flags; 00306 v->nsubexp = 0; 00307 v->subs = v->sub10; 00308 v->nsubs = 10; 00309 for (j = 0; j < v->nsubs; j++) { 00310 v->subs[j] = NULL; 00311 } 00312 v->nfa = NULL; 00313 v->cm = NULL; 00314 v->nlcolor = COLORLESS; 00315 v->wordchrs = NULL; 00316 v->tree = NULL; 00317 v->treechain = NULL; 00318 v->treefree = NULL; 00319 v->cv = NULL; 00320 v->cv2 = NULL; 00321 v->lacons = NULL; 00322 v->nlacons = 0; 00323 re->re_magic = REMAGIC; 00324 re->re_info = 0; /* bits get set during parse */ 00325 re->re_csize = sizeof(chr); 00326 re->re_guts = NULL; 00327 re->re_fns = VS(&functions); 00328 00329 /* 00330 * More complex setup, malloced things. 00331 */ 00332 00333 re->re_guts = VS(MALLOC(sizeof(struct guts))); 00334 if (re->re_guts == NULL) { 00335 return freev(v, REG_ESPACE); 00336 } 00337 g = (struct guts *) re->re_guts; 00338 g->tree = NULL; 00339 initcm(v, &g->cmap); 00340 v->cm = &g->cmap; 00341 g->lacons = NULL; 00342 g->nlacons = 0; 00343 ZAPCNFA(g->search); 00344 v->nfa = newnfa(v, v->cm, NULL); 00345 CNOERR(); 00346 v->cv = newcvec(100, 20); 00347 if (v->cv == NULL) { 00348 return freev(v, REG_ESPACE); 00349 } 00350 00351 /* 00352 * Parsing. 00353 */ 00354 00355 lexstart(v); /* also handles prefixes */ 00356 if ((v->cflags®_NLSTOP) || (v->cflags®_NLANCH)) { 00357 /* 00358 * Assign newline a unique color. 00359 */ 00360 00361 v->nlcolor = subcolor(v->cm, newline()); 00362 okcolors(v->nfa, v->cm); 00363 } 00364 CNOERR(); 00365 v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); 00366 assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ 00367 CNOERR(); 00368 assert(v->tree != NULL); 00369 00370 /* 00371 * Finish setup of nfa and its subre tree. 00372 */ 00373 00374 specialcolors(v->nfa); 00375 CNOERR(); 00376 if (debug != NULL) { 00377 fprintf(debug, "\n\n\n========= RAW ==========\n"); 00378 dumpnfa(v->nfa, debug); 00379 dumpst(v->tree, debug, 1); 00380 } 00381 optst(v, v->tree); 00382 v->ntree = numst(v->tree, 1); 00383 markst(v->tree); 00384 cleanst(v); 00385 if (debug != NULL) { 00386 fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); 00387 dumpst(v->tree, debug, 1); 00388 } 00389 00390 /* 00391 * Build compacted NFAs for tree and lacons. 00392 */ 00393 00394 re->re_info |= nfatree(v, v->tree, debug); 00395 CNOERR(); 00396 assert(v->nlacons == 0 || v->lacons != NULL); 00397 for (i = 1; i < v->nlacons; i++) { 00398 if (debug != NULL) { 00399 fprintf(debug, "\n\n\n========= LA%d ==========\n", i); 00400 } 00401 nfanode(v, &v->lacons[i], debug); 00402 } 00403 CNOERR(); 00404 if (v->tree->flags&SHORTER) { 00405 NOTE(REG_USHORTEST); 00406 } 00407 00408 /* 00409 * Build compacted NFAs for tree, lacons, fast search. 00410 */ 00411 00412 if (debug != NULL) { 00413 fprintf(debug, "\n\n\n========= SEARCH ==========\n"); 00414 } 00415 00416 /* 00417 * Can sacrifice main NFA now, so use it as work area. 00418 */ 00419 00420 (DISCARD) optimize(v->nfa, debug); 00421 CNOERR(); 00422 makesearch(v, v->nfa); 00423 CNOERR(); 00424 compact(v->nfa, &g->search); 00425 CNOERR(); 00426 00427 /* 00428 * Looks okay, package it up. 00429 */ 00430 00431 re->re_nsub = v->nsubexp; 00432 v->re = NULL; /* freev no longer frees re */ 00433 g->magic = GUTSMAGIC; 00434 g->cflags = v->cflags; 00435 g->info = re->re_info; 00436 g->nsub = re->re_nsub; 00437 g->tree = v->tree; 00438 v->tree = NULL; 00439 g->ntree = v->ntree; 00440 g->compare = (v->cflags®_ICASE) ? casecmp : cmp; 00441 g->lacons = v->lacons; 00442 v->lacons = NULL; 00443 g->nlacons = v->nlacons; 00444 00445 if (flags®_DUMP) { 00446 dump(re, stdout); 00447 } 00448 00449 assert(v->err == 0); 00450 return freev(v, 0); 00451 } 00452 00453 /* 00454 - moresubs - enlarge subRE vector 00455 ^ static void moresubs(struct vars *, int); 00456 */ 00457 static void 00458 moresubs( 00459 struct vars *v, 00460 int wanted) /* want enough room for this one */ 00461 { 00462 struct subre **p; 00463 size_t n; 00464 00465 assert(wanted > 0 && (size_t)wanted >= v->nsubs); 00466 n = (size_t)wanted * 3 / 2 + 1; 00467 if (v->subs == v->sub10) { 00468 p = (struct subre **) MALLOC(n * sizeof(struct subre *)); 00469 if (p != NULL) { 00470 memcpy(p, v->subs, v->nsubs * sizeof(struct subre *)); 00471 } 00472 } else { 00473 p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *)); 00474 } 00475 if (p == NULL) { 00476 ERR(REG_ESPACE); 00477 return; 00478 } 00479 00480 v->subs = p; 00481 for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) { 00482 *p = NULL; 00483 } 00484 assert(v->nsubs == n); 00485 assert((size_t)wanted < v->nsubs); 00486 } 00487 00488 /* 00489 - freev - free vars struct's substructures where necessary 00490 * Optionally does error-number setting, and always returns error code (if 00491 * any), to make error-handling code terser. 00492 ^ static int freev(struct vars *, int); 00493 */ 00494 static int 00495 freev( 00496 struct vars *v, 00497 int err) 00498 { 00499 register int ret; 00500 00501 if (v->re != NULL) { 00502 rfree(v->re); 00503 } 00504 if (v->subs != v->sub10) { 00505 FREE(v->subs); 00506 } 00507 if (v->nfa != NULL) { 00508 freenfa(v->nfa); 00509 } 00510 if (v->tree != NULL) { 00511 freesubre(v, v->tree); 00512 } 00513 if (v->treechain != NULL) { 00514 cleanst(v); 00515 } 00516 if (v->cv != NULL) { 00517 freecvec(v->cv); 00518 } 00519 if (v->cv2 != NULL) { 00520 freecvec(v->cv2); 00521 } 00522 if (v->lacons != NULL) { 00523 freelacons(v->lacons, v->nlacons); 00524 } 00525 ERR(err); /* nop if err==0 */ 00526 00527 ret = v->err; 00528 FreeVars(v); 00529 return ret; 00530 } 00531 00532 /* 00533 - makesearch - turn an NFA into a search NFA (implicit prepend of .*?) 00534 * NFA must have been optimize()d already. 00535 ^ static void makesearch(struct vars *, struct nfa *); 00536 */ 00537 static void 00538 makesearch( 00539 struct vars *v, 00540 struct nfa *nfa) 00541 { 00542 struct arc *a, *b; 00543 struct state *pre = nfa->pre; 00544 struct state *s, *s2, *slist; 00545 00546 /* 00547 * No loops are needed if it's anchored. 00548 */ 00549 00550 for (a = pre->outs; a != NULL; a = a->outchain) { 00551 assert(a->type == PLAIN); 00552 if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) { 00553 break; 00554 } 00555 } 00556 if (a != NULL) { 00557 /* 00558 * Add implicit .* in front. 00559 */ 00560 00561 rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); 00562 00563 /* 00564 * And ^* and \A* too -- not always necessary, but harmless. 00565 */ 00566 00567 newarc(nfa, PLAIN, nfa->bos[0], pre, pre); 00568 newarc(nfa, PLAIN, nfa->bos[1], pre, pre); 00569 } 00570 00571 /* 00572 * Now here's the subtle part. Because many REs have no lookback 00573 * constraints, often knowing when you were in the pre state tells you 00574 * little; it's the next state(s) that are informative. But some of them 00575 * may have other inarcs, i.e. it may be possible to make actual progress 00576 * and then return to one of them. We must de-optimize such cases, 00577 * splitting each such state into progress and no-progress states. 00578 */ 00579 00580 /* 00581 * First, make a list of the states. 00582 */ 00583 00584 slist = NULL; 00585 for (a=pre->outs ; a!=NULL ; a=a->outchain) { 00586 s = a->to; 00587 for (b=s->ins ; b!=NULL ; b=b->inchain) { 00588 if (b->from != pre) { 00589 break; 00590 } 00591 } 00592 if (b != NULL && s->tmp == NULL) { 00593 /* 00594 * Must be split if not already in the list (fixes bugs 505048, 00595 * 230589, 840258, 504785). 00596 */ 00597 00598 s->tmp = slist; 00599 slist = s; 00600 } 00601 } 00602 00603 /* 00604 * Do the splits. 00605 */ 00606 00607 for (s=slist ; s!=NULL ; s=s2) { 00608 s2 = newstate(nfa); 00609 00610 copyouts(nfa, s, s2); 00611 for (a=s->ins ; a!=NULL ; a=b) { 00612 b = a->inchain; 00613 00614 if (a->from != pre) { 00615 cparc(nfa, a, a->from, s2); 00616 freearc(nfa, a); 00617 } 00618 } 00619 s2 = s->tmp; 00620 s->tmp = NULL; /* clean up while we're at it */ 00621 } 00622 } 00623 00624 /* 00625 - parse - parse an RE 00626 * This is actually just the top level, which parses a bunch of branches tied 00627 * together with '|'. They appear in the tree as the left children of a chain 00628 * of '|' subres. 00629 ^ static struct subre *parse(struct vars *, int, int, struct state *, 00630 ^ struct state *); 00631 */ 00632 static struct subre * 00633 parse( 00634 struct vars *v, 00635 int stopper, /* EOS or ')' */ 00636 int type, /* LACON (lookahead subRE) or PLAIN */ 00637 struct state *init, /* initial state */ 00638 struct state *final) /* final state */ 00639 { 00640 struct state *left, *right; /* scaffolding for branch */ 00641 struct subre *branches; /* top level */ 00642 struct subre *branch; /* current branch */ 00643 struct subre *t; /* temporary */ 00644 int firstbranch; /* is this the first branch? */ 00645 00646 assert(stopper == ')' || stopper == EOS); 00647 00648 branches = subre(v, '|', LONGER, init, final); 00649 NOERRN(); 00650 branch = branches; 00651 firstbranch = 1; 00652 do { /* a branch */ 00653 if (!firstbranch) { 00654 /* 00655 * Need a place to hang the branch. 00656 */ 00657 00658 branch->right = subre(v, '|', LONGER, init, final); 00659 NOERRN(); 00660 branch = branch->right; 00661 } 00662 firstbranch = 0; 00663 left = newstate(v->nfa); 00664 right = newstate(v->nfa); 00665 NOERRN(); 00666 EMPTYARC(init, left); 00667 EMPTYARC(right, final); 00668 NOERRN(); 00669 branch->left = parsebranch(v, stopper, type, left, right, 0); 00670 NOERRN(); 00671 branch->flags |= UP(branch->flags | branch->left->flags); 00672 if ((branch->flags &~ branches->flags) != 0) { /* new flags */ 00673 for (t = branches; t != branch; t = t->right) { 00674 t->flags |= branch->flags; 00675 } 00676 } 00677 } while (EAT('|')); 00678 assert(SEE(stopper) || SEE(EOS)); 00679 00680 if (!SEE(stopper)) { 00681 assert(stopper == ')' && SEE(EOS)); 00682 ERR(REG_EPAREN); 00683 } 00684 00685 /* 00686 * Optimize out simple cases. 00687 */ 00688 00689 if (branch == branches) { /* only one branch */ 00690 assert(branch->right == NULL); 00691 t = branch->left; 00692 branch->left = NULL; 00693 freesubre(v, branches); 00694 branches = t; 00695 } else if (!MESSY(branches->flags)) { /* no interesting innards */ 00696 freesubre(v, branches->left); 00697 branches->left = NULL; 00698 freesubre(v, branches->right); 00699 branches->right = NULL; 00700 branches->op = '='; 00701 } 00702 00703 return branches; 00704 } 00705 00706 /* 00707 - parsebranch - parse one branch of an RE 00708 * This mostly manages concatenation, working closely with parseqatom(). 00709 * Concatenated things are bundled up as much as possible, with separate 00710 * ',' nodes introduced only when necessary due to substructure. 00711 ^ static struct subre *parsebranch(struct vars *, int, int, struct state *, 00712 ^ struct state *, int); 00713 */ 00714 static struct subre * 00715 parsebranch( 00716 struct vars *v, 00717 int stopper, /* EOS or ')' */ 00718 int type, /* LACON (lookahead subRE) or PLAIN */ 00719 struct state *left, /* leftmost state */ 00720 struct state *right, /* rightmost state */ 00721 int partial) /* is this only part of a branch? */ 00722 { 00723 struct state *lp; /* left end of current construct */ 00724 int seencontent; /* is there anything in this branch yet? */ 00725 struct subre *t; 00726 00727 lp = left; 00728 seencontent = 0; 00729 t = subre(v, '=', 0, left, right); /* op '=' is tentative */ 00730 NOERRN(); 00731 while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { 00732 if (seencontent) { /* implicit concat operator */ 00733 lp = newstate(v->nfa); 00734 NOERRN(); 00735 moveins(v->nfa, right, lp); 00736 } 00737 seencontent = 1; 00738 00739 /* NB, recursion in parseqatom() may swallow rest of branch */ 00740 parseqatom(v, stopper, type, lp, right, t); 00741 } 00742 00743 if (!seencontent) { /* empty branch */ 00744 if (!partial) { 00745 NOTE(REG_UUNSPEC); 00746 } 00747 assert(lp == left); 00748 EMPTYARC(left, right); 00749 } 00750 00751 return t; 00752 } 00753 00754 /* 00755 - parseqatom - parse one quantified atom or constraint of an RE 00756 * The bookkeeping near the end cooperates very closely with parsebranch(); in 00757 * particular, it contains a recursion that can involve parsing the rest of 00758 * the branch, making this function's name somewhat inaccurate. 00759 ^ static void parseqatom(struct vars *, int, int, struct state *, 00760 ^ struct state *, struct subre *); 00761 */ 00762 static void 00763 parseqatom( 00764 struct vars *v, 00765 int stopper, /* EOS or ')' */ 00766 int type, /* LACON (lookahead subRE) or PLAIN */ 00767 struct state *lp, /* left state to hang it on */ 00768 struct state *rp, /* right state to hang it on */ 00769 struct subre *top) /* subtree top */ 00770 { 00771 struct state *s; /* temporaries for new states */ 00772 struct state *s2; 00773 #define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) 00774 int m, n; 00775 struct subre *atom; /* atom's subtree */ 00776 struct subre *t; 00777 int cap; /* capturing parens? */ 00778 int pos; /* positive lookahead? */ 00779 int subno; /* capturing-parens or backref number */ 00780 int atomtype; 00781 int qprefer; /* quantifier short/long preference */ 00782 int f; 00783 struct subre **atomp; /* where the pointer to atom is */ 00784 00785 /* 00786 * Initial bookkeeping. 00787 */ 00788 00789 atom = NULL; 00790 assert(lp->nouts == 0); /* must string new code */ 00791 assert(rp->nins == 0); /* between lp and rp */ 00792 subno = 0; /* just to shut lint up */ 00793 00794 /* 00795 * An atom or constraint... 00796 */ 00797 00798 atomtype = v->nexttype; 00799 switch (atomtype) { 00800 /* first, constraints, which end by returning */ 00801 case '^': 00802 ARCV('^', 1); 00803 if (v->cflags®_NLANCH) { 00804 ARCV(BEHIND, v->nlcolor); 00805 } 00806 NEXT(); 00807 return; 00808 case '$': 00809 ARCV('$', 1); 00810 if (v->cflags®_NLANCH) { 00811 ARCV(AHEAD, v->nlcolor); 00812 } 00813 NEXT(); 00814 return; 00815 case SBEGIN: 00816 ARCV('^', 1); /* BOL */ 00817 ARCV('^', 0); /* or BOS */ 00818 NEXT(); 00819 return; 00820 case SEND: 00821 ARCV('$', 1); /* EOL */ 00822 ARCV('$', 0); /* or EOS */ 00823 NEXT(); 00824 return; 00825 case '<': 00826 wordchrs(v); /* does NEXT() */ 00827 s = newstate(v->nfa); 00828 NOERR(); 00829 nonword(v, BEHIND, lp, s); 00830 word(v, AHEAD, s, rp); 00831 return; 00832 case '>': 00833 wordchrs(v); /* does NEXT() */ 00834 s = newstate(v->nfa); 00835 NOERR(); 00836 word(v, BEHIND, lp, s); 00837 nonword(v, AHEAD, s, rp); 00838 return; 00839 case WBDRY: 00840 wordchrs(v); /* does NEXT() */ 00841 s = newstate(v->nfa); 00842 NOERR(); 00843 nonword(v, BEHIND, lp, s); 00844 word(v, AHEAD, s, rp); 00845 s = newstate(v->nfa); 00846 NOERR(); 00847 word(v, BEHIND, lp, s); 00848 nonword(v, AHEAD, s, rp); 00849 return; 00850 case NWBDRY: 00851 wordchrs(v); /* does NEXT() */ 00852 s = newstate(v->nfa); 00853 NOERR(); 00854 word(v, BEHIND, lp, s); 00855 word(v, AHEAD, s, rp); 00856 s = newstate(v->nfa); 00857 NOERR(); 00858 nonword(v, BEHIND, lp, s); 00859 nonword(v, AHEAD, s, rp); 00860 return; 00861 case LACON: /* lookahead constraint */ 00862 pos = v->nextvalue; 00863 NEXT(); 00864 s = newstate(v->nfa); 00865 s2 = newstate(v->nfa); 00866 NOERR(); 00867 t = parse(v, ')', LACON, s, s2); 00868 freesubre(v, t); /* internal structure irrelevant */ 00869 assert(SEE(')') || ISERR()); 00870 NEXT(); 00871 n = newlacon(v, s, s2, pos); 00872 NOERR(); 00873 ARCV(LACON, n); 00874 return; 00875 00876 /* 00877 * Then errors, to get them out of the way. 00878 */ 00879 00880 case '*': 00881 case '+': 00882 case '?': 00883 case '{': 00884 ERR(REG_BADRPT); 00885 return; 00886 default: 00887 ERR(REG_ASSERT); 00888 return; 00889 00890 /* 00891 * Then plain characters, and minor variants on that theme. 00892 */ 00893 00894 case ')': /* unbalanced paren */ 00895 if ((v->cflags®_ADVANCED) != REG_EXTENDED) { 00896 ERR(REG_EPAREN); 00897 return; 00898 } 00899 00900 /* 00901 * Legal in EREs due to specification botch. 00902 */ 00903 00904 NOTE(REG_UPBOTCH); 00905 /* fallthrough into case PLAIN */ 00906 case PLAIN: 00907 onechr(v, v->nextvalue, lp, rp); 00908 okcolors(v->nfa, v->cm); 00909 NOERR(); 00910 NEXT(); 00911 break; 00912 case '[': 00913 if (v->nextvalue == 1) { 00914 bracket(v, lp, rp); 00915 } else { 00916 cbracket(v, lp, rp); 00917 } 00918 assert(SEE(']') || ISERR()); 00919 NEXT(); 00920 break; 00921 case '.': 00922 rainbow(v->nfa, v->cm, PLAIN, 00923 (v->cflags®_NLSTOP) ? v->nlcolor : COLORLESS, lp, rp); 00924 NEXT(); 00925 break; 00926 00927 /* 00928 * And finally the ugly stuff. 00929 */ 00930 00931 case '(': /* value flags as capturing or non */ 00932 cap = (type == LACON) ? 0 : v->nextvalue; 00933 if (cap) { 00934 v->nsubexp++; 00935 subno = v->nsubexp; 00936 if ((size_t)subno >= v->nsubs) { 00937 moresubs(v, subno); 00938 } 00939 assert((size_t)subno < v->nsubs); 00940 } else { 00941 atomtype = PLAIN; /* something that's not '(' */ 00942 } 00943 NEXT(); 00944 00945 /* 00946 * Need new endpoints because tree will contain pointers. 00947 */ 00948 00949 s = newstate(v->nfa); 00950 s2 = newstate(v->nfa); 00951 NOERR(); 00952 EMPTYARC(lp, s); 00953 EMPTYARC(s2, rp); 00954 NOERR(); 00955 atom = parse(v, ')', PLAIN, s, s2); 00956 assert(SEE(')') || ISERR()); 00957 NEXT(); 00958 NOERR(); 00959 if (cap) { 00960 v->subs[subno] = atom; 00961 t = subre(v, '(', atom->flags|CAP, lp, rp); 00962 NOERR(); 00963 t->subno = subno; 00964 t->left = atom; 00965 atom = t; 00966 } 00967 00968 /* 00969 * Postpone everything else pending possible {0}. 00970 */ 00971 00972 break; 00973 case BACKREF: /* the Feature From The Black Lagoon */ 00974 INSIST(type != LACON, REG_ESUBREG); 00975 INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); 00976 INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); 00977 NOERR(); 00978 assert(v->nextvalue > 0); 00979 atom = subre(v, 'b', BACKR, lp, rp); 00980 subno = v->nextvalue; 00981 atom->subno = subno; 00982 EMPTYARC(lp, rp); /* temporarily, so there's something */ 00983 NEXT(); 00984 break; 00985 } 00986 00987 /* 00988 * ...and an atom may be followed by a quantifier. 00989 */ 00990 00991 switch (v->nexttype) { 00992 case '*': 00993 m = 0; 00994 n = INFINITY; 00995 qprefer = (v->nextvalue) ? LONGER : SHORTER; 00996 NEXT(); 00997 break; 00998 case '+': 00999 m = 1; 01000 n = INFINITY; 01001 qprefer = (v->nextvalue) ? LONGER : SHORTER; 01002 NEXT(); 01003 break; 01004 case '?': 01005 m = 0; 01006 n = 1; 01007 qprefer = (v->nextvalue) ? LONGER : SHORTER; 01008 NEXT(); 01009 break; 01010 case '{': 01011 NEXT(); 01012 m = scannum(v); 01013 if (EAT(',')) { 01014 if (SEE(DIGIT)) { 01015 n = scannum(v); 01016 } else { 01017 n = INFINITY; 01018 } 01019 if (m > n) { 01020 ERR(REG_BADBR); 01021 return; 01022 } 01023 01024 /* 01025 * {m,n} exercises preference, even if it's {m,m} 01026 */ 01027 01028 qprefer = (v->nextvalue) ? LONGER : SHORTER; 01029 } else { 01030 n = m; 01031 /* 01032 * {m} passes operand's preference through. 01033 */ 01034 01035 qprefer = 0; 01036 } 01037 if (!SEE('}')) { /* catches errors too */ 01038 ERR(REG_BADBR); 01039 return; 01040 } 01041 NEXT(); 01042 break; 01043 default: /* no quantifier */ 01044 m = n = 1; 01045 qprefer = 0; 01046 break; 01047 } 01048 01049 /* 01050 * Annoying special case: {0} or {0,0} cancels everything. 01051 */ 01052 01053 if (m == 0 && n == 0) { 01054 if (atom != NULL) { 01055 freesubre(v, atom); 01056 } 01057 if (atomtype == '(') { 01058 v->subs[subno] = NULL; 01059 } 01060 delsub(v->nfa, lp, rp); 01061 EMPTYARC(lp, rp); 01062 return; 01063 } 01064 01065 /* 01066 * If not a messy case, avoid hard part. 01067 */ 01068 01069 assert(!MESSY(top->flags)); 01070 f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); 01071 if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) { 01072 if (!(m == 1 && n == 1)) { 01073 repeat(v, lp, rp, m, n); 01074 } 01075 if (atom != NULL) { 01076 freesubre(v, atom); 01077 } 01078 top->flags = f; 01079 return; 01080 } 01081 01082 /* 01083 * hard part: something messy 01084 * That is, capturing parens, back reference, short/long clash, or an atom 01085 * with substructure containing one of those. 01086 */ 01087 01088 /* 01089 * Now we'll need a subre for the contents even if they're boring. 01090 */ 01091 01092 if (atom == NULL) { 01093 atom = subre(v, '=', 0, lp, rp); 01094 NOERR(); 01095 } 01096 01097 /* 01098 * Prepare a general-purpose state skeleton. 01099 * 01100 * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] 01101 * / / 01102 * [lp] ----> [s2] ----bypass--------------------- 01103 * 01104 * where bypass is an empty, and prefix is some repetitions of atom 01105 */ 01106 01107 s = newstate(v->nfa); /* first, new endpoints for the atom */ 01108 s2 = newstate(v->nfa); 01109 NOERR(); 01110 moveouts(v->nfa, lp, s); 01111 moveins(v->nfa, rp, s2); 01112 NOERR(); 01113 atom->begin = s; 01114 atom->end = s2; 01115 s = newstate(v->nfa); /* and spots for prefix and bypass */ 01116 s2 = newstate(v->nfa); 01117 NOERR(); 01118 EMPTYARC(lp, s); 01119 EMPTYARC(lp, s2); 01120 NOERR(); 01121 01122 /* 01123 * Break remaining subRE into x{...} and what follows. 01124 */ 01125 01126 t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); 01127 t->left = atom; 01128 atomp = &t->left; 01129 01130 /* 01131 * Here we should recurse... but we must postpone that to the end. 01132 */ 01133 01134 /* 01135 * Split top into prefix and remaining. 01136 */ 01137 01138 assert(top->op == '=' && top->left == NULL && top->right == NULL); 01139 top->left = subre(v, '=', top->flags, top->begin, lp); 01140 top->op = '.'; 01141 top->right = t; 01142 01143 /* 01144 * If it's a backref, now is the time to replicate the subNFA. 01145 */ 01146 01147 if (atomtype == BACKREF) { 01148 assert(atom->begin->nouts == 1); /* just the EMPTY */ 01149 delsub(v->nfa, atom->begin, atom->end); 01150 assert(v->subs[subno] != NULL); 01151 01152 /* 01153 * And here's why the recursion got postponed: it must wait until the 01154 * skeleton is filled in, because it may hit a backref that wants to 01155 * copy the filled-in skeleton. 01156 */ 01157 01158 dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, 01159 atom->begin, atom->end); 01160 NOERR(); 01161 } 01162 01163 /* 01164 * It's quantifier time; first, turn x{0,...} into x{1,...}|empty 01165 */ 01166 01167 if (m == 0) { 01168 EMPTYARC(s2, atom->end);/* the bypass */ 01169 assert(PREF(qprefer) != 0); 01170 f = COMBINE(qprefer, atom->flags); 01171 t = subre(v, '|', f, lp, atom->end); 01172 NOERR(); 01173 t->left = atom; 01174 t->right = subre(v, '|', PREF(f), s2, atom->end); 01175 NOERR(); 01176 t->right->left = subre(v, '=', 0, s2, atom->end); 01177 NOERR(); 01178 *atomp = t; 01179 atomp = &t->left; 01180 m = 1; 01181 } 01182 01183 /* 01184 * Deal with the rest of the quantifier. 01185 */ 01186 01187 if (atomtype == BACKREF) { 01188 /* 01189 * Special case: backrefs have internal quantifiers. 01190 */ 01191 01192 EMPTYARC(s, atom->begin); /* empty prefix */ 01193 01194 /* 01195 * Just stuff everything into atom. 01196 */ 01197 01198 repeat(v, atom->begin, atom->end, m, n); 01199 atom->min = (short) m; 01200 atom->max = (short) n; 01201 atom->flags |= COMBINE(qprefer, atom->flags); 01202 } else if (m == 1 && n == 1) { 01203 /* 01204 * No/vacuous quantifier: done. 01205 */ 01206 01207 EMPTYARC(s, atom->begin); /* empty prefix */ 01208 } else { 01209 /* 01210 * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second 01211 * x 01212 */ 01213 01214 dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); 01215 assert(m >= 1 && m != INFINITY && n >= 1); 01216 repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); 01217 f = COMBINE(qprefer, atom->flags); 01218 t = subre(v, '.', f, s, atom->end); /* prefix and atom */ 01219 NOERR(); 01220 t->left = subre(v, '=', PREF(f), s, atom->begin); 01221 NOERR(); 01222 t->right = atom; 01223 *atomp = t; 01224 } 01225 01226 /* 01227 * And finally, look after that postponed recursion. 01228 */ 01229 01230 t = top->right; 01231 if (!(SEE('|') || SEE(stopper) || SEE(EOS))) { 01232 t->right = parsebranch(v, stopper, type, atom->end, rp, 1); 01233 } else { 01234 EMPTYARC(atom->end, rp); 01235 t->right = subre(v, '=', 0, atom->end, rp); 01236 } 01237 assert(SEE('|') || SEE(stopper) || SEE(EOS)); 01238 t->flags |= COMBINE(t->flags, t->right->flags); 01239 top->flags |= COMBINE(top->flags, t->flags); 01240 } 01241 01242 /* 01243 - nonword - generate arcs for non-word-character ahead or behind 01244 ^ static void nonword(struct vars *, int, struct state *, struct state *); 01245 */ 01246 static void 01247 nonword( 01248 struct vars *v, 01249 int dir, /* AHEAD or BEHIND */ 01250 struct state *lp, 01251 struct state *rp) 01252 { 01253 int anchor = (dir == AHEAD) ? '$' : '^'; 01254 01255 assert(dir == AHEAD || dir == BEHIND); 01256 newarc(v->nfa, anchor, 1, lp, rp); 01257 newarc(v->nfa, anchor, 0, lp, rp); 01258 colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); 01259 /* (no need for special attention to \n) */ 01260 } 01261 01262 /* 01263 - word - generate arcs for word character ahead or behind 01264 ^ static void word(struct vars *, int, struct state *, struct state *); 01265 */ 01266 static void 01267 word( 01268 struct vars *v, 01269 int dir, /* AHEAD or BEHIND */ 01270 struct state *lp, 01271 struct state *rp) 01272 { 01273 assert(dir == AHEAD || dir == BEHIND); 01274 cloneouts(v->nfa, v->wordchrs, lp, rp, dir); 01275 /* (no need for special attention to \n) */ 01276 } 01277 01278 /* 01279 - scannum - scan a number 01280 ^ static int scannum(struct vars *); 01281 */ 01282 static int /* value, <= DUPMAX */ 01283 scannum( 01284 struct vars *v) 01285 { 01286 int n = 0; 01287 01288 while (SEE(DIGIT) && n < DUPMAX) { 01289 n = n*10 + v->nextvalue; 01290 NEXT(); 01291 } 01292 if (SEE(DIGIT) || n > DUPMAX) { 01293 ERR(REG_BADBR); 01294 return 0; 01295 } 01296 return n; 01297 } 01298 01299 /* 01300 - repeat - replicate subNFA for quantifiers 01301 * The duplication sequences used here are chosen carefully so that any 01302 * pointers starting out pointing into the subexpression end up pointing into 01303 * the last occurrence. (Note that it may not be strung between the same left 01304 * and right end states, however!) This used to be important for the subRE 01305 * tree, although the important bits are now handled by the in-line code in 01306 * parse(), and when this is called, it doesn't matter any more. 01307 ^ static void repeat(struct vars *, struct state *, struct state *, int, int); 01308 */ 01309 static void 01310 repeat( 01311 struct vars *v, 01312 struct state *lp, 01313 struct state *rp, 01314 int m, 01315 int n) 01316 { 01317 #define SOME 2 01318 #define INF 3 01319 #define PAIR(x, y) ((x)*4 + (y)) 01320 #define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) 01321 const int rm = REDUCE(m); 01322 const int rn = REDUCE(n); 01323 struct state *s, *s2; 01324 01325 switch (PAIR(rm, rn)) { 01326 case PAIR(0, 0): /* empty string */ 01327 delsub(v->nfa, lp, rp); 01328 EMPTYARC(lp, rp); 01329 break; 01330 case PAIR(0, 1): /* do as x| */ 01331 EMPTYARC(lp, rp); 01332 break; 01333 case PAIR(0, SOME): /* do as x{1,n}| */ 01334 repeat(v, lp, rp, 1, n); 01335 NOERR(); 01336 EMPTYARC(lp, rp); 01337 break; 01338 case PAIR(0, INF): /* loop x around */ 01339 s = newstate(v->nfa); 01340 NOERR(); 01341 moveouts(v->nfa, lp, s); 01342 moveins(v->nfa, rp, s); 01343 EMPTYARC(lp, s); 01344 EMPTYARC(s, rp); 01345 break; 01346 case PAIR(1, 1): /* no action required */ 01347 break; 01348 case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ 01349 s = newstate(v->nfa); 01350 NOERR(); 01351 moveouts(v->nfa, lp, s); 01352 dupnfa(v->nfa, s, rp, lp, s); 01353 NOERR(); 01354 repeat(v, lp, s, 1, n-1); 01355 NOERR(); 01356 EMPTYARC(lp, s); 01357 break; 01358 case PAIR(1, INF): /* add loopback arc */ 01359 s = newstate(v->nfa); 01360 s2 = newstate(v->nfa); 01361 NOERR(); 01362 moveouts(v->nfa, lp, s); 01363 moveins(v->nfa, rp, s2); 01364 EMPTYARC(lp, s); 01365 EMPTYARC(s2, rp); 01366 EMPTYARC(s2, s); 01367 break; 01368 case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ 01369 s = newstate(v->nfa); 01370 NOERR(); 01371 moveouts(v->nfa, lp, s); 01372 dupnfa(v->nfa, s, rp, lp, s); 01373 NOERR(); 01374 repeat(v, lp, s, m-1, n-1); 01375 break; 01376 case PAIR(SOME, INF): /* do as x{m-1,}x */ 01377 s = newstate(v->nfa); 01378 NOERR(); 01379 moveouts(v->nfa, lp, s); 01380 dupnfa(v->nfa, s, rp, lp, s); 01381 NOERR(); 01382 repeat(v, lp, s, m-1, n); 01383 break; 01384 default: 01385 ERR(REG_ASSERT); 01386 break; 01387 } 01388 } 01389 01390 /* 01391 - bracket - handle non-complemented bracket expression 01392 * Also called from cbracket for complemented bracket expressions. 01393 ^ static void bracket(struct vars *, struct state *, struct state *); 01394 */ 01395 static void 01396 bracket( 01397 struct vars *v, 01398 struct state *lp, 01399 struct state *rp) 01400 { 01401 assert(SEE('[')); 01402 NEXT(); 01403 while (!SEE(']') && !SEE(EOS)) { 01404 brackpart(v, lp, rp); 01405 } 01406 assert(SEE(']') || ISERR()); 01407 okcolors(v->nfa, v->cm); 01408 } 01409 01410 /* 01411 - cbracket - handle complemented bracket expression 01412 * We do it by calling bracket() with dummy endpoints, and then complementing 01413 * the result. The alternative would be to invoke rainbow(), and then delete 01414 * arcs as the b.e. is seen... but that gets messy. 01415 ^ static void cbracket(struct vars *, struct state *, struct state *); 01416 */ 01417 static void 01418 cbracket( 01419 struct vars *v, 01420 struct state *lp, 01421 struct state *rp) 01422 { 01423 struct state *left = newstate(v->nfa); 01424 struct state *right = newstate(v->nfa); 01425 01426 NOERR(); 01427 bracket(v, left, right); 01428 if (v->cflags®_NLSTOP) { 01429 newarc(v->nfa, PLAIN, v->nlcolor, left, right); 01430 } 01431 NOERR(); 01432 01433 assert(lp->nouts == 0); /* all outarcs will be ours */ 01434 01435 /* 01436 * Easy part of complementing, and all there is to do since the MCCE code 01437 * was removed. 01438 */ 01439 01440 colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); 01441 NOERR(); 01442 dropstate(v->nfa, left); 01443 assert(right->nins == 0); 01444 freestate(v->nfa, right); 01445 return; 01446 } 01447 01448 /* 01449 - brackpart - handle one item (or range) within a bracket expression 01450 ^ static void brackpart(struct vars *, struct state *, struct state *); 01451 */ 01452 static void 01453 brackpart( 01454 struct vars *v, 01455 struct state *lp, 01456 struct state *rp) 01457 { 01458 celt startc, endc; 01459 struct cvec *cv; 01460 const chr *startp, *endp; 01461 chr c[1]; 01462 01463 /* 01464 * Parse something, get rid of special cases, take shortcuts. 01465 */ 01466 01467 switch (v->nexttype) { 01468 case RANGE: /* a-b-c or other botch */ 01469 ERR(REG_ERANGE); 01470 return; 01471 break; 01472 case PLAIN: 01473 c[0] = v->nextvalue; 01474 NEXT(); 01475 01476 /* 01477 * Shortcut for ordinary chr (not range). 01478 */ 01479 01480 if (!SEE(RANGE)) { 01481 onechr(v, c[0], lp, rp); 01482 return; 01483 } 01484 startc = element(v, c, c+1); 01485 NOERR(); 01486 break; 01487 case COLLEL: 01488 startp = v->now; 01489 endp = scanplain(v); 01490 INSIST(startp < endp, REG_ECOLLATE); 01491 NOERR(); 01492 startc = element(v, startp, endp); 01493 NOERR(); 01494 break; 01495 case ECLASS: 01496 startp = v->now; 01497 endp = scanplain(v); 01498 INSIST(startp < endp, REG_ECOLLATE); 01499 NOERR(); 01500 startc = element(v, startp, endp); 01501 NOERR(); 01502 cv = eclass(v, startc, (v->cflags®_ICASE)); 01503 NOERR(); 01504 dovec(v, cv, lp, rp); 01505 return; 01506 break; 01507 case CCLASS: 01508 startp = v->now; 01509 endp = scanplain(v); 01510 INSIST(startp < endp, REG_ECTYPE); 01511 NOERR(); 01512 cv = cclass(v, startp, endp, (v->cflags®_ICASE)); 01513 NOERR(); 01514 dovec(v, cv, lp, rp); 01515 return; 01516 break; 01517 default: 01518 ERR(REG_ASSERT); 01519 return; 01520 break; 01521 } 01522 01523 if (SEE(RANGE)) { 01524 NEXT(); 01525 switch (v->nexttype) { 01526 case PLAIN: 01527 case RANGE: 01528 c[0] = v->nextvalue; 01529 NEXT(); 01530 endc = element(v, c, c+1); 01531 NOERR(); 01532 break; 01533 case COLLEL: 01534 startp = v->now; 01535 endp = scanplain(v); 01536 INSIST(startp < endp, REG_ECOLLATE); 01537 NOERR(); 01538 endc = element(v, startp, endp); 01539 NOERR(); 01540 break; 01541 default: 01542 ERR(REG_ERANGE); 01543 return; 01544 break; 01545 } 01546 } else { 01547 endc = startc; 01548 } 01549 01550 /* 01551 * Ranges are unportable. Actually, standard C does guarantee that digits 01552 * are contiguous, but making that an exception is just too complicated. 01553 */ 01554 01555 if (startc != endc) { 01556 NOTE(REG_UUNPORT); 01557 } 01558 cv = range(v, startc, endc, (v->cflags®_ICASE)); 01559 NOERR(); 01560 dovec(v, cv, lp, rp); 01561 } 01562 01563 /* 01564 - scanplain - scan PLAIN contents of [. etc. 01565 * Certain bits of trickery in lex.c know that this code does not try to look 01566 * past the final bracket of the [. etc. 01567 ^ static const chr *scanplain(struct vars *); 01568 */ 01569 static const chr * /* just after end of sequence */ 01570 scanplain( 01571 struct vars *v) 01572 { 01573 const chr *endp; 01574 01575 assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); 01576 NEXT(); 01577 01578 endp = v->now; 01579 while (SEE(PLAIN)) { 01580 endp = v->now; 01581 NEXT(); 01582 } 01583 01584 assert(SEE(END) || ISERR()); 01585 NEXT(); 01586 01587 return endp; 01588 } 01589 01590 /* 01591 - onechr - fill in arcs for a plain character, and possible case complements 01592 * This is mostly a shortcut for efficient handling of the common case. 01593 ^ static void onechr(struct vars *, pchr, struct state *, struct state *); 01594 */ 01595 static void 01596 onechr( 01597 struct vars *v, 01598 pchr c, 01599 struct state *lp, 01600 struct state *rp) 01601 { 01602 if (!(v->cflags®_ICASE)) { 01603 newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); 01604 return; 01605 } 01606 01607 /* 01608 * Rats, need general case anyway... 01609 */ 01610 01611 dovec(v, allcases(v, c), lp, rp); 01612 } 01613 01614 /* 01615 - dovec - fill in arcs for each element of a cvec 01616 ^ static void dovec(struct vars *, struct cvec *, struct state *, 01617 ^ struct state *); 01618 */ 01619 static void 01620 dovec( 01621 struct vars *v, 01622 struct cvec *cv, 01623 struct state *lp, 01624 struct state *rp) 01625 { 01626 chr ch, from, to; 01627 const chr *p; 01628 int i; 01629 01630 for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { 01631 ch = *p; 01632 newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); 01633 } 01634 01635 for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { 01636 from = *p; 01637 to = *(p+1); 01638 if (from <= to) { 01639 subrange(v, from, to, lp, rp); 01640 } 01641 } 01642 01643 } 01644 01645 /* 01646 - wordchrs - set up word-chr list for word-boundary stuff, if needed 01647 * The list is kept as a bunch of arcs between two dummy states; it's disposed 01648 * of by the unreachable-states sweep in NFA optimization. Does NEXT(). Must 01649 * not be called from any unusual lexical context. This should be reconciled 01650 * with the \w etc. handling in lex.c, and should be cleaned up to reduce 01651 * dependencies on input scanning. 01652 ^ static void wordchrs(struct vars *); 01653 */ 01654 static void 01655 wordchrs( 01656 struct vars *v) 01657 { 01658 struct state *left, *right; 01659 01660 if (v->wordchrs != NULL) { 01661 NEXT(); /* for consistency */ 01662 return; 01663 } 01664 01665 left = newstate(v->nfa); 01666 right = newstate(v->nfa); 01667 NOERR(); 01668 01669 /* 01670 * Fine point: implemented with [::], and lexer will set REG_ULOCALE. 01671 */ 01672 01673 lexword(v); 01674 NEXT(); 01675 assert(v->savenow != NULL && SEE('[')); 01676 bracket(v, left, right); 01677 assert((v->savenow != NULL && SEE(']')) || ISERR()); 01678 NEXT(); 01679 NOERR(); 01680 v->wordchrs = left; 01681 } 01682 01683 /* 01684 - subre - allocate a subre 01685 ^ static struct subre *subre(struct vars *, int, int, struct state *, 01686 ^ struct state *); 01687 */ 01688 static struct subre * 01689 subre( 01690 struct vars *v, 01691 int op, 01692 int flags, 01693 struct state *begin, 01694 struct state *end) 01695 { 01696 struct subre *ret = v->treefree; 01697 01698 if (ret != NULL) { 01699 v->treefree = ret->left; 01700 } else { 01701 ret = (struct subre *) MALLOC(sizeof(struct subre)); 01702 if (ret == NULL) { 01703 ERR(REG_ESPACE); 01704 return NULL; 01705 } 01706 ret->chain = v->treechain; 01707 v->treechain = ret; 01708 } 01709 01710 assert(strchr("|.b(=", op) != NULL); 01711 01712 ret->op = op; 01713 ret->flags = flags; 01714 ret->retry = 0; 01715 ret->subno = 0; 01716 ret->min = ret->max = 1; 01717 ret->left = NULL; 01718 ret->right = NULL; 01719 ret->begin = begin; 01720 ret->end = end; 01721 ZAPCNFA(ret->cnfa); 01722 01723 return ret; 01724 } 01725 01726 /* 01727 - freesubre - free a subRE subtree 01728 ^ static void freesubre(struct vars *, struct subre *); 01729 */ 01730 static void 01731 freesubre( 01732 struct vars *v, /* might be NULL */ 01733 struct subre *sr) 01734 { 01735 if (sr == NULL) { 01736 return; 01737 } 01738 01739 if (sr->left != NULL) { 01740 freesubre(v, sr->left); 01741 } 01742 if (sr->right != NULL) { 01743 freesubre(v, sr->right); 01744 } 01745 01746 freesrnode(v, sr); 01747 } 01748 01749 /* 01750 - freesrnode - free one node in a subRE subtree 01751 ^ static void freesrnode(struct vars *, struct subre *); 01752 */ 01753 static void 01754 freesrnode( 01755 struct vars *v, /* might be NULL */ 01756 struct subre *sr) 01757 { 01758 if (sr == NULL) { 01759 return; 01760 } 01761 01762 if (!NULLCNFA(sr->cnfa)) { 01763 freecnfa(&sr->cnfa); 01764 } 01765 sr->flags = 0; 01766 01767 if (v != NULL) { 01768 sr->left = v->treefree; 01769 v->treefree = sr; 01770 } else { 01771 FREE(sr); 01772 } 01773 } 01774 01775 /* 01776 - optst - optimize a subRE subtree 01777 ^ static void optst(struct vars *, struct subre *); 01778 */ 01779 static void 01780 optst( 01781 struct vars *v, 01782 struct subre *t) 01783 { 01784 /* 01785 * DGP (2007-11-13): I assume it was the programmer's intent to eventually 01786 * come back and add code to optimize subRE trees, but the routine coded 01787 * just spends effort traversing the tree and doing nothing. We can do 01788 * nothing with less effort. 01789 */ 01790 01791 return; 01792 } 01793 01794 /* 01795 - numst - number tree nodes (assigning retry indexes) 01796 ^ static int numst(struct subre *, int); 01797 */ 01798 static int /* next number */ 01799 numst( 01800 struct subre *t, 01801 int start) /* starting point for subtree numbers */ 01802 { 01803 int i; 01804 01805 assert(t != NULL); 01806 01807 i = start; 01808 t->retry = (short) i++; 01809 if (t->left != NULL) { 01810 i = numst(t->left, i); 01811 } 01812 if (t->right != NULL) { 01813 i = numst(t->right, i); 01814 } 01815 return i; 01816 } 01817 01818 /* 01819 - markst - mark tree nodes as INUSE 01820 ^ static void markst(struct subre *); 01821 */ 01822 static void 01823 markst( 01824 struct subre *t) 01825 { 01826 assert(t != NULL); 01827 01828 t->flags |= INUSE; 01829 if (t->left != NULL) { 01830 markst(t->left); 01831 } 01832 if (t->right != NULL) { 01833 markst(t->right); 01834 } 01835 } 01836 01837 /* 01838 - cleanst - free any tree nodes not marked INUSE 01839 ^ static void cleanst(struct vars *); 01840 */ 01841 static void 01842 cleanst( 01843 struct vars *v) 01844 { 01845 struct subre *t; 01846 struct subre *next; 01847 01848 for (t = v->treechain; t != NULL; t = next) { 01849 next = t->chain; 01850 if (!(t->flags&INUSE)) { 01851 FREE(t); 01852 } 01853 } 01854 v->treechain = NULL; 01855 v->treefree = NULL; /* just on general principles */ 01856 } 01857 01858 /* 01859 - nfatree - turn a subRE subtree into a tree of compacted NFAs 01860 ^ static long nfatree(struct vars *, struct subre *, FILE *); 01861 */ 01862 static long /* optimize results from top node */ 01863 nfatree( 01864 struct vars *v, 01865 struct subre *t, 01866 FILE *f) /* for debug output */ 01867 { 01868 assert(t != NULL && t->begin != NULL); 01869 01870 if (t->left != NULL) { 01871 (DISCARD) nfatree(v, t->left, f); 01872 } 01873 if (t->right != NULL) { 01874 (DISCARD) nfatree(v, t->right, f); 01875 } 01876 01877 return nfanode(v, t, f); 01878 } 01879 01880 /* 01881 - nfanode - do one NFA for nfatree 01882 ^ static long nfanode(struct vars *, struct subre *, FILE *); 01883 */ 01884 static long /* optimize results */ 01885 nfanode( 01886 struct vars *v, 01887 struct subre *t, 01888 FILE *f) /* for debug output */ 01889 { 01890 struct nfa *nfa; 01891 long ret = 0; 01892 char idbuf[50]; 01893 01894 assert(t->begin != NULL); 01895 01896 if (f != NULL) { 01897 fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", 01898 stid(t, idbuf, sizeof(idbuf))); 01899 } 01900 nfa = newnfa(v, v->cm, v->nfa); 01901 NOERRZ(); 01902 dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); 01903 if (!ISERR()) { 01904 specialcolors(nfa); 01905 ret = optimize(nfa, f); 01906 } 01907 if (!ISERR()) { 01908 compact(nfa, &t->cnfa); 01909 } 01910 01911 freenfa(nfa); 01912 return ret; 01913 } 01914 01915 /* 01916 - newlacon - allocate a lookahead-constraint subRE 01917 ^ static int newlacon(struct vars *, struct state *, struct state *, int); 01918 */ 01919 static int /* lacon number */ 01920 newlacon( 01921 struct vars *v, 01922 struct state *begin, 01923 struct state *end, 01924 int pos) 01925 { 01926 struct subre *sub; 01927 int n; 01928 01929 if (v->nlacons == 0) { 01930 v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); 01931 n = 1; /* skip 0th */ 01932 v->nlacons = 2; 01933 } else { 01934 v->lacons = (struct subre *) REALLOC(v->lacons, 01935 (v->nlacons+1)*sizeof(struct subre)); 01936 n = v->nlacons++; 01937 } 01938 01939 if (v->lacons == NULL) { 01940 ERR(REG_ESPACE); 01941 return 0; 01942 } 01943 01944 sub = &v->lacons[n]; 01945 sub->begin = begin; 01946 sub->end = end; 01947 sub->subno = pos; 01948 ZAPCNFA(sub->cnfa); 01949 return n; 01950 } 01951 01952 /* 01953 - freelacons - free lookahead-constraint subRE vector 01954 ^ static void freelacons(struct subre *, int); 01955 */ 01956 static void 01957 freelacons( 01958 struct subre *subs, 01959 int n) 01960 { 01961 struct subre *sub; 01962 int i; 01963 01964 assert(n > 0); 01965 for (sub=subs+1, i=n-1; i>0; sub++, i--) { /* no 0th */ 01966 if (!NULLCNFA(sub->cnfa)) { 01967 freecnfa(&sub->cnfa); 01968 } 01969 } 01970 FREE(subs); 01971 } 01972 01973 /* 01974 - rfree - free a whole RE (insides of regfree) 01975 ^ static void rfree(regex_t *); 01976 */ 01977 static void 01978 rfree( 01979 regex_t *re) 01980 { 01981 struct guts *g; 01982 01983 if (re == NULL || re->re_magic != REMAGIC) { 01984 return; 01985 } 01986 01987 re->re_magic = 0; /* invalidate RE */ 01988 g = (struct guts *) re->re_guts; 01989 re->re_guts = NULL; 01990 re->re_fns = NULL; 01991 g->magic = 0; 01992 freecm(&g->cmap); 01993 if (g->tree != NULL) { 01994 freesubre(NULL, g->tree); 01995 } 01996 if (g->lacons != NULL) { 01997 freelacons(g->lacons, g->nlacons); 01998 } 01999 if (!NULLCNFA(g->search)) { 02000 freecnfa(&g->search); 02001 } 02002 FREE(g); 02003 } 02004 02005 /* 02006 - dump - dump an RE in human-readable form 02007 ^ static void dump(regex_t *, FILE *); 02008 */ 02009 static void 02010 dump( 02011 regex_t *re, 02012 FILE *f) 02013 { 02014 #ifdef REG_DEBUG 02015 struct guts *g; 02016 int i; 02017 02018 if (re->re_magic != REMAGIC) { 02019 fprintf(f, "bad magic number (0x%x not 0x%x)\n", 02020 re->re_magic, REMAGIC); 02021 } 02022 if (re->re_guts == NULL) { 02023 fprintf(f, "NULL guts!!!\n"); 02024 return; 02025 } 02026 g = (struct guts *) re->re_guts; 02027 if (g->magic != GUTSMAGIC) { 02028 fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", 02029 g->magic, GUTSMAGIC); 02030 } 02031 02032 fprintf(f, "\n\n\n========= DUMP ==========\n"); 02033 fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", 02034 re->re_nsub, re->re_info, re->re_csize, g->ntree); 02035 02036 dumpcolors(&g->cmap, f); 02037 if (!NULLCNFA(g->search)) { 02038 printf("\nsearch:\n"); 02039 dumpcnfa(&g->search, f); 02040 } 02041 for (i = 1; i < g->nlacons; i++) { 02042 fprintf(f, "\nla%d (%s):\n", i, 02043 (g->lacons[i].subno) ? "positive" : "negative"); 02044 dumpcnfa(&g->lacons[i].cnfa, f); 02045 } 02046 fprintf(f, "\n"); 02047 dumpst(g->tree, f, 0); 02048 #endif 02049 } 02050 02051 /* 02052 - dumpst - dump a subRE tree 02053 ^ static void dumpst(struct subre *, FILE *, int); 02054 */ 02055 static void 02056 dumpst( 02057 struct subre *t, 02058 FILE *f, 02059 int nfapresent) /* is the original NFA still around? */ 02060 { 02061 if (t == NULL) { 02062 fprintf(f, "null tree\n"); 02063 } else { 02064 stdump(t, f, nfapresent); 02065 } 02066 fflush(f); 02067 } 02068 02069 /* 02070 - stdump - recursive guts of dumpst 02071 ^ static void stdump(struct subre *, FILE *, int); 02072 */ 02073 static void 02074 stdump( 02075 struct subre *t, 02076 FILE *f, 02077 int nfapresent) /* is the original NFA still around? */ 02078 { 02079 char idbuf[50]; 02080 02081 fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); 02082 if (t->flags&LONGER) { 02083 fprintf(f, " longest"); 02084 } 02085 if (t->flags&SHORTER) { 02086 fprintf(f, " shortest"); 02087 } 02088 if (t->flags&MIXED) { 02089 fprintf(f, " hasmixed"); 02090 } 02091 if (t->flags&CAP) { 02092 fprintf(f, " hascapture"); 02093 } 02094 if (t->flags&BACKR) { 02095 fprintf(f, " hasbackref"); 02096 } 02097 if (!(t->flags&INUSE)) { 02098 fprintf(f, " UNUSED"); 02099 } 02100 if (t->subno != 0) { 02101 fprintf(f, " (#%d)", t->subno); 02102 } 02103 if (t->min != 1 || t->max != 1) { 02104 fprintf(f, " {%d,", t->min); 02105 if (t->max != INFINITY) { 02106 fprintf(f, "%d", t->max); 02107 } 02108 fprintf(f, "}"); 02109 } 02110 if (nfapresent) { 02111 fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); 02112 } 02113 if (t->left != NULL) { 02114 fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); 02115 } 02116 if (t->right != NULL) { 02117 fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); 02118 } 02119 if (!NULLCNFA(t->cnfa)) { 02120 fprintf(f, "\n"); 02121 dumpcnfa(&t->cnfa, f); 02122 } 02123 fprintf(f, "\n"); 02124 if (t->left != NULL) { 02125 stdump(t->left, f, nfapresent); 02126 } 02127 if (t->right != NULL) { 02128 stdump(t->right, f, nfapresent); 02129 } 02130 } 02131 02132 /* 02133 - stid - identify a subtree node for dumping 02134 ^ static char *stid(struct subre *, char *, size_t); 02135 */ 02136 static const char * /* points to buf or constant string */ 02137 stid( 02138 struct subre *t, 02139 char *buf, 02140 size_t bufsize) 02141 { 02142 /* 02143 * Big enough for hex int or decimal t->retry? 02144 */ 02145 02146 if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) { 02147 return "unable"; 02148 } 02149 if (t->retry != 0) { 02150 sprintf(buf, "%d", t->retry); 02151 } else { 02152 sprintf(buf, "%p", t); 02153 } 02154 return buf; 02155 } 02156 02157 #include "regc_lex.c" 02158 #include "regc_color.c" 02159 #include "regc_nfa.c" 02160 #include "regc_cvec.c" 02161 #include "regc_locale.c" 02162 02163 /* 02164 * Local Variables: 02165 * mode: c 02166 * c-basic-offset: 4 02167 * fill-column: 78 02168 * End: 02169 */
Generated on Wed Mar 12 12:18:10 2008 by 1.5.1 |