/home/ntakagi/work/tcl8.5.1/generic/regcomp.c Source File

00001 /*
00002  * re_*comp and friends - compile REs
00003  * This file #includes several others (see the bottom).
00004  *
00005  * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
00006  *
00007  * Development of this software was funded, in part, by Cray Research Inc.,
00008  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
00009  * Corporation, none of whom are responsible for the results. The author
00010  * thanks all of them.
00011  *
00012  * Redistribution and use in source and binary forms -- with or without
00013  * modification -- are permitted for any purpose, provided that
00014  * redistributions in source form retain this entire copyright notice and
00015  * indicate the origin and nature of any modifications.
00016  *
00017  * I'd appreciate being given credit for this package in the documentation of
00018  * software which uses it, but that is not a requirement.
00019  *
00020  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
00021  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
00022  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
00023  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00024  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00025  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
00026  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00027  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
00028  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
00029  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  */
00032 
00033 #include "regguts.h"
00034 
00035 /*
00036  * forward declarations, up here so forward datatypes etc. are defined early
00037  */
00038 /* =====^!^===== begin forwards =====^!^===== */
00039 /* automatically gathered by fwd; do not hand-edit */
00040 /* === regcomp.c === */
00041 int compile(regex_t *, const chr *, size_t, int);
00042 static void moresubs(struct vars *, int);
00043 static int freev(struct vars *, int);
00044 static void makesearch(struct vars *, struct nfa *);
00045 static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
00046 static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
00047 static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
00048 static void nonword(struct vars *, int, struct state *, struct state *);
00049 static void word(struct vars *, int, struct state *, struct state *);
00050 static int scannum(struct vars *);
00051 static void repeat(struct vars *, struct state *, struct state *, int, int);
00052 static void bracket(struct vars *, struct state *, struct state *);
00053 static void cbracket(struct vars *, struct state *, struct state *);
00054 static void brackpart(struct vars *, struct state *, struct state *);
00055 static const chr *scanplain(struct vars *);
00056 static void onechr(struct vars *, pchr, struct state *, struct state *);
00057 static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
00058 static void wordchrs(struct vars *);
00059 static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
00060 static void freesubre(struct vars *, struct subre *);
00061 static void freesrnode(struct vars *, struct subre *);
00062 static void optst(struct vars *, struct subre *);
00063 static int numst(struct subre *, int);
00064 static void markst(struct subre *);
00065 static void cleanst(struct vars *);
00066 static long nfatree(struct vars *, struct subre *, FILE *);
00067 static long nfanode(struct vars *, struct subre *, FILE *);
00068 static int newlacon(struct vars *, struct state *, struct state *, int);
00069 static void freelacons(struct subre *, int);
00070 static void rfree(regex_t *);
00071 static void dump(regex_t *, FILE *);
00072 static void dumpst(struct subre *, FILE *, int);
00073 static void stdump(struct subre *, FILE *, int);
00074 static const char *stid(struct subre *, char *, size_t);
00075 /* === regc_lex.c === */
00076 static void lexstart(struct vars *);
00077 static void prefixes(struct vars *);
00078 static void lexnest(struct vars *, const chr *, const chr *);
00079 static void lexword(struct vars *);
00080 static int next(struct vars *);
00081 static int lexescape(struct vars *);
00082 static chr lexdigits(struct vars *, int, int, int);
00083 static int brenext(struct vars *, pchr);
00084 static void skip(struct vars *);
00085 static chr newline(NOPARMS);
00086 #ifdef REG_DEBUG
00087 static const chr *ch(NOPARMS);
00088 #endif
00089 static chr chrnamed(struct vars *, const chr *, const chr *, pchr);
00090 /* === regc_color.c === */
00091 static void initcm(struct vars *, struct colormap *);
00092 static void freecm(struct colormap *);
00093 static void cmtreefree(struct colormap *, union tree *, int);
00094 static color setcolor(struct colormap *, pchr, pcolor);
00095 static color maxcolor(struct colormap *);
00096 static color newcolor(struct colormap *);
00097 static void freecolor(struct colormap *, pcolor);
00098 static color pseudocolor(struct colormap *);
00099 static color subcolor(struct colormap *, pchr c);
00100 static color newsub(struct colormap *, pcolor);
00101 static void subrange(struct vars *, pchr, pchr, struct state *, struct state *);
00102 static void subblock(struct vars *, pchr, struct state *, struct state *);
00103 static void okcolors(struct nfa *, struct colormap *);
00104 static void colorchain(struct colormap *, struct arc *);
00105 static void uncolorchain(struct colormap *, struct arc *);
00106 static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
00107 static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
00108 #ifdef REG_DEBUG
00109 static void dumpcolors(struct colormap *, FILE *);
00110 static void fillcheck(struct colormap *, union tree *, int, FILE *);
00111 static void dumpchr(pchr, FILE *);
00112 #endif
00113 /* === regc_nfa.c === */
00114 static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
00115 static void freenfa(struct nfa *);
00116 static struct state *newstate(struct nfa *);
00117 static struct state *newfstate(struct nfa *, int flag);
00118 static void dropstate(struct nfa *, struct state *);
00119 static void freestate(struct nfa *, struct state *);
00120 static void destroystate(struct nfa *, struct state *);
00121 static void newarc(struct nfa *, int, pcolor, struct state *, struct state *);
00122 static struct arc *allocarc(struct nfa *, struct state *);
00123 static void freearc(struct nfa *, struct arc *);
00124 static struct arc *findarc(struct state *, int, pcolor);
00125 static void cparc(struct nfa *, struct arc *, struct state *, struct state *);
00126 static void moveins(struct nfa *, struct state *, struct state *);
00127 static void copyins(struct nfa *, struct state *, struct state *);
00128 static void moveouts(struct nfa *, struct state *, struct state *);
00129 static void copyouts(struct nfa *, struct state *, struct state *);
00130 static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
00131 static void delsub(struct nfa *, struct state *, struct state *);
00132 static void deltraverse(struct nfa *, struct state *, struct state *);
00133 static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
00134 static void duptraverse(struct nfa *, struct state *, struct state *);
00135 static void cleartraverse(struct nfa *, struct state *);
00136 static void specialcolors(struct nfa *);
00137 static long optimize(struct nfa *, FILE *);
00138 static void pullback(struct nfa *, FILE *);
00139 static int pull(struct nfa *, struct arc *);
00140 static void pushfwd(struct nfa *, FILE *);
00141 static int push(struct nfa *, struct arc *);
00142 #define INCOMPATIBLE    1       /* destroys arc */
00143 #define SATISFIED       2       /* constraint satisfied */
00144 #define COMPATIBLE      3       /* compatible but not satisfied yet */
00145 static int combine(struct arc *, struct arc *);
00146 static void fixempties(struct nfa *, FILE *);
00147 static int unempty(struct nfa *, struct arc *);
00148 static void cleanup(struct nfa *);
00149 static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
00150 static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
00151 static long analyze(struct nfa *);
00152 static void compact(struct nfa *, struct cnfa *);
00153 static void carcsort(struct carc *, struct carc *);
00154 static void freecnfa(struct cnfa *);
00155 static void dumpnfa(struct nfa *, FILE *);
00156 #ifdef REG_DEBUG
00157 static void dumpstate(struct state *, FILE *);
00158 static void dumparcs(struct state *, FILE *);
00159 static int dumprarcs(struct arc *, struct state *, FILE *, int);
00160 static void dumparc(struct arc *, struct state *, FILE *);
00161 #endif
00162 static void dumpcnfa(struct cnfa *, FILE *);
00163 #ifdef REG_DEBUG
00164 static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
00165 #endif
00166 /* === regc_cvec.c === */
00167 static struct cvec *clearcvec(struct cvec *);
00168 static void addchr(struct cvec *, pchr);
00169 static void addrange(struct cvec *, pchr, pchr);
00170 static struct cvec *newcvec(int, int);
00171 static struct cvec *getcvec(struct vars *, int, int);
00172 static void freecvec(struct cvec *);
00173 /* === regc_locale.c === */
00174 static celt element(struct vars *, const chr *, const chr *);
00175 static struct cvec *range(struct vars *, celt, celt, int);
00176 static int before(celt, celt);
00177 static struct cvec *eclass(struct vars *, celt, int);
00178 static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
00179 static struct cvec *allcases(struct vars *, pchr);
00180 static int cmp(const chr *, const chr *, size_t);
00181 static int casecmp(const chr *, const chr *, size_t);
00182 /* automatically gathered by fwd; do not hand-edit */
00183 /* =====^!^===== end forwards =====^!^===== */
00184 
00185 /* internal variables, bundled for easy passing around */
00186 struct vars {
00187     regex_t *re;
00188     const chr *now;             /* scan pointer into string */
00189     const chr *stop;            /* end of string */
00190     const chr *savenow;         /* saved now and stop for "subroutine call" */
00191     const chr *savestop;
00192     int err;                    /* error code (0 if none) */
00193     int cflags;                 /* copy of compile flags */
00194     int lasttype;               /* type of previous token */
00195     int nexttype;               /* type of next token */
00196     chr nextvalue;              /* value (if any) of next token */
00197     int lexcon;                 /* lexical context type (see lex.c) */
00198     int nsubexp;                /* subexpression count */
00199     struct subre **subs;        /* subRE pointer vector */
00200     size_t nsubs;               /* length of vector */
00201     struct subre *sub10[10];    /* initial vector, enough for most */
00202     struct nfa *nfa;            /* the NFA */
00203     struct colormap *cm;        /* character color map */
00204     color nlcolor;              /* color of newline */
00205     struct state *wordchrs;     /* state in nfa holding word-char outarcs */
00206     struct subre *tree;         /* subexpression tree */
00207     struct subre *treechain;    /* all tree nodes allocated */
00208     struct subre *treefree;     /* any free tree nodes */
00209     int ntree;                  /* number of tree nodes */
00210     struct cvec *cv;            /* interface cvec */
00211     struct cvec *cv2;           /* utility cvec */
00212     struct subre *lacons;       /* lookahead-constraint vector */
00213     int nlacons;                /* size of lacons */
00214 };
00215 
00216 /* parsing macros; most know that `v' is the struct vars pointer */
00217 #define NEXT()  (next(v))               /* advance by one token */
00218 #define SEE(t)  (v->nexttype == (t))    /* is next token this? */
00219 #define EAT(t)  (SEE(t) && next(v))     /* if next is this, swallow it */
00220 #define VISERR(vv)      ((vv)->err != 0)/* have we seen an error yet? */
00221 #define ISERR() VISERR(v)
00222 #define VERR(vv,e) \
00223         ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e)))
00224 #define ERR(e)  VERR(v, e)              /* record an error */
00225 #define NOERR() {if (ISERR()) return;}  /* if error seen, return */
00226 #define NOERRN()        {if (ISERR()) return NULL;}     /* NOERR with retval */
00227 #define NOERRZ()        {if (ISERR()) return 0;}        /* NOERR with retval */
00228 #define INSIST(c, e)    ((c) ? 0 : ERR(e))      /* if condition false, error */
00229 #define NOTE(b) (v->re->re_info |= (b))         /* note visible condition */
00230 #define EMPTYARC(x, y)  newarc(v->nfa, EMPTY, 0, x, y)
00231 
00232 /* token type codes, some also used as NFA arc types */
00233 #define EMPTY   'n'             /* no token present */
00234 #define EOS     'e'             /* end of string */
00235 #define PLAIN   'p'             /* ordinary character */
00236 #define DIGIT   'd'             /* digit (in bound) */
00237 #define BACKREF 'b'             /* back reference */
00238 #define COLLEL  'I'             /* start of [. */
00239 #define ECLASS  'E'             /* start of [= */
00240 #define CCLASS  'C'             /* start of [: */
00241 #define END     'X'             /* end of [. [= [: */
00242 #define RANGE   'R'             /* - within [] which might be range delim. */
00243 #define LACON   'L'             /* lookahead constraint subRE */
00244 #define AHEAD   'a'             /* color-lookahead arc */
00245 #define BEHIND  'r'             /* color-lookbehind arc */
00246 #define WBDRY   'w'             /* word boundary constraint */
00247 #define NWBDRY  'W'             /* non-word-boundary constraint */
00248 #define SBEGIN  'A'             /* beginning of string (even if not BOL) */
00249 #define SEND    'Z'             /* end of string (even if not EOL) */
00250 #define PREFER  'P'             /* length preference */
00251 
00252 /* is an arc colored, and hence on a color chain? */
00253 #define COLORED(a) \
00254         ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
00255 
00256 /* static function list */
00257 static struct fns functions = {
00258     rfree,                      /* regfree insides */
00259 };
00260 
00261 /*
00262  - compile - compile regular expression
00263  ^ int compile(regex_t *, const chr *, size_t, int);
00264  */
00265 int
00266 compile(
00267     regex_t *re,
00268     const chr *string,
00269     size_t len,
00270     int flags)
00271 {
00272     AllocVars(v);
00273     struct guts *g;
00274     int i;
00275     size_t j;
00276     FILE *debug = (flags&REG_PROGRESS) ? stdout : NULL;
00277 #define CNOERR()        { if (ISERR()) return freev(v, v->err); }
00278 
00279     /*
00280      * Sanity checks.
00281      */
00282 
00283     if (re == NULL || string == NULL) {
00284         FreeVars(v);
00285         return REG_INVARG;
00286     }
00287     if ((flags&REG_QUOTE) && (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) {
00288         FreeVars(v);
00289         return REG_INVARG;
00290     }
00291     if (!(flags&REG_EXTENDED) && (flags&REG_ADVF)) {
00292         FreeVars(v);
00293         return REG_INVARG;
00294     }
00295 
00296     /*
00297      * Initial setup (after which freev() is callable).
00298      */
00299 
00300     v->re = re;
00301     v->now = string;
00302     v->stop = v->now + len;
00303     v->savenow = v->savestop = NULL;
00304     v->err = 0;
00305     v->cflags = flags;
00306     v->nsubexp = 0;
00307     v->subs = v->sub10;
00308     v->nsubs = 10;
00309     for (j = 0; j < v->nsubs; j++) {
00310         v->subs[j] = NULL;
00311     }
00312     v->nfa = NULL;
00313     v->cm = NULL;
00314     v->nlcolor = COLORLESS;
00315     v->wordchrs = NULL;
00316     v->tree = NULL;
00317     v->treechain = NULL;
00318     v->treefree = NULL;
00319     v->cv = NULL;
00320     v->cv2 = NULL;
00321     v->lacons = NULL;
00322     v->nlacons = 0;
00323     re->re_magic = REMAGIC;
00324     re->re_info = 0;            /* bits get set during parse */
00325     re->re_csize = sizeof(chr);
00326     re->re_guts = NULL;
00327     re->re_fns = VS(&functions);
00328 
00329     /*
00330      * More complex setup, malloced things.
00331      */
00332 
00333     re->re_guts = VS(MALLOC(sizeof(struct guts)));
00334     if (re->re_guts == NULL) {
00335         return freev(v, REG_ESPACE);
00336     }
00337     g = (struct guts *) re->re_guts;
00338     g->tree = NULL;
00339     initcm(v, &g->cmap);
00340     v->cm = &g->cmap;
00341     g->lacons = NULL;
00342     g->nlacons = 0;
00343     ZAPCNFA(g->search);
00344     v->nfa = newnfa(v, v->cm, NULL);
00345     CNOERR();
00346     v->cv = newcvec(100, 20);
00347     if (v->cv == NULL) {
00348         return freev(v, REG_ESPACE);
00349     }
00350 
00351     /*
00352      * Parsing.
00353      */
00354 
00355     lexstart(v);                /* also handles prefixes */
00356     if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
00357         /*
00358          * Assign newline a unique color.
00359          */
00360 
00361         v->nlcolor = subcolor(v->cm, newline());
00362         okcolors(v->nfa, v->cm);
00363     }
00364     CNOERR();
00365     v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
00366     assert(SEE(EOS));           /* even if error; ISERR() => SEE(EOS) */
00367     CNOERR();
00368     assert(v->tree != NULL);
00369 
00370     /*
00371      * Finish setup of nfa and its subre tree.
00372      */
00373 
00374     specialcolors(v->nfa);
00375     CNOERR();
00376     if (debug != NULL) {
00377         fprintf(debug, "\n\n\n========= RAW ==========\n");
00378         dumpnfa(v->nfa, debug);
00379         dumpst(v->tree, debug, 1);
00380     }
00381     optst(v, v->tree);
00382     v->ntree = numst(v->tree, 1);
00383     markst(v->tree);
00384     cleanst(v);
00385     if (debug != NULL) {
00386         fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
00387         dumpst(v->tree, debug, 1);
00388     }
00389 
00390     /*
00391      * Build compacted NFAs for tree and lacons.
00392      */
00393 
00394     re->re_info |= nfatree(v, v->tree, debug);
00395     CNOERR();
00396     assert(v->nlacons == 0 || v->lacons != NULL);
00397     for (i = 1; i < v->nlacons; i++) {
00398         if (debug != NULL) {
00399             fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
00400         }
00401         nfanode(v, &v->lacons[i], debug);
00402     }
00403     CNOERR();
00404     if (v->tree->flags&SHORTER) {
00405         NOTE(REG_USHORTEST);
00406     }
00407 
00408     /*
00409      * Build compacted NFAs for tree, lacons, fast search.
00410      */
00411 
00412     if (debug != NULL) {
00413         fprintf(debug, "\n\n\n========= SEARCH ==========\n");
00414     }
00415 
00416     /*
00417      * Can sacrifice main NFA now, so use it as work area.
00418      */
00419 
00420     (DISCARD) optimize(v->nfa, debug);
00421     CNOERR();
00422     makesearch(v, v->nfa);
00423     CNOERR();
00424     compact(v->nfa, &g->search);
00425     CNOERR();
00426 
00427     /*
00428      * Looks okay, package it up.
00429      */
00430 
00431     re->re_nsub = v->nsubexp;
00432     v->re = NULL;               /* freev no longer frees re */
00433     g->magic = GUTSMAGIC;
00434     g->cflags = v->cflags;
00435     g->info = re->re_info;
00436     g->nsub = re->re_nsub;
00437     g->tree = v->tree;
00438     v->tree = NULL;
00439     g->ntree = v->ntree;
00440     g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
00441     g->lacons = v->lacons;
00442     v->lacons = NULL;
00443     g->nlacons = v->nlacons;
00444 
00445     if (flags&REG_DUMP) {
00446         dump(re, stdout);
00447     }
00448 
00449     assert(v->err == 0);
00450     return freev(v, 0);
00451 }
00452 
00453 /*
00454  - moresubs - enlarge subRE vector
00455  ^ static void moresubs(struct vars *, int);
00456  */
00457 static void
00458 moresubs(
00459     struct vars *v,
00460     int wanted)                 /* want enough room for this one */
00461 {
00462     struct subre **p;
00463     size_t n;
00464 
00465     assert(wanted > 0 && (size_t)wanted >= v->nsubs);
00466     n = (size_t)wanted * 3 / 2 + 1;
00467     if (v->subs == v->sub10) {
00468         p = (struct subre **) MALLOC(n * sizeof(struct subre *));
00469         if (p != NULL) {
00470             memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));
00471         }
00472     } else {
00473         p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *));
00474     }
00475     if (p == NULL) {
00476         ERR(REG_ESPACE);
00477         return;
00478     }
00479 
00480     v->subs = p;
00481     for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) {
00482         *p = NULL;
00483     }
00484     assert(v->nsubs == n);
00485     assert((size_t)wanted < v->nsubs);
00486 }
00487 
00488 /*
00489  - freev - free vars struct's substructures where necessary
00490  * Optionally does error-number setting, and always returns error code (if
00491  * any), to make error-handling code terser.
00492  ^ static int freev(struct vars *, int);
00493  */
00494 static int
00495 freev(
00496     struct vars *v,
00497     int err)
00498 {
00499     register int ret;
00500 
00501     if (v->re != NULL) {
00502         rfree(v->re);
00503     }
00504     if (v->subs != v->sub10) {
00505         FREE(v->subs);
00506     }
00507     if (v->nfa != NULL) {
00508         freenfa(v->nfa);
00509     }
00510     if (v->tree != NULL) {
00511         freesubre(v, v->tree);
00512     }
00513     if (v->treechain != NULL) {
00514         cleanst(v);
00515     }
00516     if (v->cv != NULL) {
00517         freecvec(v->cv);
00518     }
00519     if (v->cv2 != NULL) {
00520         freecvec(v->cv2);
00521     }
00522     if (v->lacons != NULL) {
00523         freelacons(v->lacons, v->nlacons);
00524     }
00525     ERR(err);                   /* nop if err==0 */
00526 
00527     ret = v->err;
00528     FreeVars(v);
00529     return ret;
00530 }
00531 
00532 /*
00533  - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
00534  * NFA must have been optimize()d already.
00535  ^ static void makesearch(struct vars *, struct nfa *);
00536  */
00537 static void
00538 makesearch(
00539     struct vars *v,
00540     struct nfa *nfa)
00541 {
00542     struct arc *a, *b;
00543     struct state *pre = nfa->pre;
00544     struct state *s, *s2, *slist;
00545 
00546     /*
00547      * No loops are needed if it's anchored.
00548      */
00549 
00550     for (a = pre->outs; a != NULL; a = a->outchain) {
00551         assert(a->type == PLAIN);
00552         if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) {
00553             break;
00554         }
00555     }
00556     if (a != NULL) {
00557         /*
00558          * Add implicit .* in front.
00559          */
00560 
00561         rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
00562 
00563         /*
00564          * And ^* and \A* too -- not always necessary, but harmless.
00565          */
00566 
00567         newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
00568         newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
00569     }
00570 
00571     /*
00572      * Now here's the subtle part. Because many REs have no lookback
00573      * constraints, often knowing when you were in the pre state tells you
00574      * little; it's the next state(s) that are informative. But some of them
00575      * may have other inarcs, i.e. it may be possible to make actual progress
00576      * and then return to one of them. We must de-optimize such cases,
00577      * splitting each such state into progress and no-progress states.
00578      */
00579 
00580     /*
00581      * First, make a list of the states.
00582      */
00583 
00584     slist = NULL;
00585     for (a=pre->outs ; a!=NULL ; a=a->outchain) {
00586         s = a->to;
00587         for (b=s->ins ; b!=NULL ; b=b->inchain) {
00588             if (b->from != pre) {
00589                 break;
00590             }
00591         }
00592         if (b != NULL && s->tmp == NULL) {
00593             /*
00594              * Must be split if not already in the list (fixes bugs 505048,
00595              * 230589, 840258, 504785).
00596              */
00597 
00598             s->tmp = slist;
00599             slist = s;
00600         }
00601     }
00602 
00603     /*
00604      * Do the splits.
00605      */
00606 
00607     for (s=slist ; s!=NULL ; s=s2) {
00608         s2 = newstate(nfa);
00609 
00610         copyouts(nfa, s, s2);
00611         for (a=s->ins ; a!=NULL ; a=b) {
00612             b = a->inchain;
00613 
00614             if (a->from != pre) {
00615                 cparc(nfa, a, a->from, s2);
00616                 freearc(nfa, a);
00617             }
00618         }
00619         s2 = s->tmp;
00620         s->tmp = NULL;          /* clean up while we're at it */
00621     }
00622 }
00623 
00624 /*
00625  - parse - parse an RE
00626  * This is actually just the top level, which parses a bunch of branches tied
00627  * together with '|'. They appear in the tree as the left children of a chain
00628  * of '|' subres.
00629  ^ static struct subre *parse(struct vars *, int, int, struct state *,
00630  ^      struct state *);
00631  */
00632 static struct subre *
00633 parse(
00634     struct vars *v,
00635     int stopper,                /* EOS or ')' */
00636     int type,                   /* LACON (lookahead subRE) or PLAIN */
00637     struct state *init,         /* initial state */
00638     struct state *final)        /* final state */
00639 {
00640     struct state *left, *right; /* scaffolding for branch */
00641     struct subre *branches;     /* top level */
00642     struct subre *branch;       /* current branch */
00643     struct subre *t;            /* temporary */
00644     int firstbranch;            /* is this the first branch? */
00645 
00646     assert(stopper == ')' || stopper == EOS);
00647 
00648     branches = subre(v, '|', LONGER, init, final);
00649     NOERRN();
00650     branch = branches;
00651     firstbranch = 1;
00652     do {        /* a branch */
00653         if (!firstbranch) {
00654             /*
00655              * Need a place to hang the branch.
00656              */
00657 
00658             branch->right = subre(v, '|', LONGER, init, final);
00659             NOERRN();
00660             branch = branch->right;
00661         }
00662         firstbranch = 0;
00663         left = newstate(v->nfa);
00664         right = newstate(v->nfa);
00665         NOERRN();
00666         EMPTYARC(init, left);
00667         EMPTYARC(right, final);
00668         NOERRN();
00669         branch->left = parsebranch(v, stopper, type, left, right, 0);
00670         NOERRN();
00671         branch->flags |= UP(branch->flags | branch->left->flags);
00672         if ((branch->flags &~ branches->flags) != 0) {  /* new flags */
00673             for (t = branches; t != branch; t = t->right) {
00674                 t->flags |= branch->flags;
00675             }
00676         }
00677     } while (EAT('|'));
00678     assert(SEE(stopper) || SEE(EOS));
00679 
00680     if (!SEE(stopper)) {
00681         assert(stopper == ')' && SEE(EOS));
00682         ERR(REG_EPAREN);
00683     }
00684 
00685     /*
00686      * Optimize out simple cases.
00687      */
00688 
00689     if (branch == branches) {   /* only one branch */
00690         assert(branch->right == NULL);
00691         t = branch->left;
00692         branch->left = NULL;
00693         freesubre(v, branches);
00694         branches = t;
00695     } else if (!MESSY(branches->flags)) {       /* no interesting innards */
00696         freesubre(v, branches->left);
00697         branches->left = NULL;
00698         freesubre(v, branches->right);
00699         branches->right = NULL;
00700         branches->op = '=';
00701     }
00702 
00703     return branches;
00704 }
00705 
00706 /*
00707  - parsebranch - parse one branch of an RE
00708  * This mostly manages concatenation, working closely with parseqatom().
00709  * Concatenated things are bundled up as much as possible, with separate
00710  * ',' nodes introduced only when necessary due to substructure.
00711  ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
00712  ^      struct state *, int);
00713  */
00714 static struct subre *
00715 parsebranch(
00716     struct vars *v,
00717     int stopper,                /* EOS or ')' */
00718     int type,                   /* LACON (lookahead subRE) or PLAIN */
00719     struct state *left,         /* leftmost state */
00720     struct state *right,        /* rightmost state */
00721     int partial)                /* is this only part of a branch? */
00722 {
00723     struct state *lp;           /* left end of current construct */
00724     int seencontent;            /* is there anything in this branch yet? */
00725     struct subre *t;
00726 
00727     lp = left;
00728     seencontent = 0;
00729     t = subre(v, '=', 0, left, right);  /* op '=' is tentative */
00730     NOERRN();
00731     while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
00732         if (seencontent) {      /* implicit concat operator */
00733             lp = newstate(v->nfa);
00734             NOERRN();
00735             moveins(v->nfa, right, lp);
00736         }
00737         seencontent = 1;
00738 
00739         /* NB, recursion in parseqatom() may swallow rest of branch */
00740         parseqatom(v, stopper, type, lp, right, t);
00741     }
00742 
00743     if (!seencontent) {         /* empty branch */
00744         if (!partial) {
00745             NOTE(REG_UUNSPEC);
00746         }
00747         assert(lp == left);
00748         EMPTYARC(left, right);
00749     }
00750 
00751     return t;
00752 }
00753 
00754 /*
00755  - parseqatom - parse one quantified atom or constraint of an RE
00756  * The bookkeeping near the end cooperates very closely with parsebranch(); in
00757  * particular, it contains a recursion that can involve parsing the rest of
00758  * the branch, making this function's name somewhat inaccurate.
00759  ^ static void parseqatom(struct vars *, int, int, struct state *,
00760  ^      struct state *, struct subre *);
00761  */
00762 static void
00763 parseqatom(
00764     struct vars *v,
00765     int stopper,                /* EOS or ')' */
00766     int type,                   /* LACON (lookahead subRE) or PLAIN */
00767     struct state *lp,           /* left state to hang it on */
00768     struct state *rp,           /* right state to hang it on */
00769     struct subre *top)          /* subtree top */
00770 {
00771     struct state *s;            /* temporaries for new states */
00772     struct state *s2;
00773 #define ARCV(t, val)    newarc(v->nfa, t, val, lp, rp)
00774     int m, n;
00775     struct subre *atom;         /* atom's subtree */
00776     struct subre *t;
00777     int cap;                    /* capturing parens? */
00778     int pos;                    /* positive lookahead? */
00779     int subno;                  /* capturing-parens or backref number */
00780     int atomtype;
00781     int qprefer;                /* quantifier short/long preference */
00782     int f;
00783     struct subre **atomp;       /* where the pointer to atom is */
00784 
00785     /*
00786      * Initial bookkeeping.
00787      */
00788 
00789     atom = NULL;
00790     assert(lp->nouts == 0);     /* must string new code */
00791     assert(rp->nins == 0);      /* between lp and rp */
00792     subno = 0;                  /* just to shut lint up */
00793 
00794     /*
00795      * An atom or constraint...
00796      */
00797 
00798     atomtype = v->nexttype;
00799     switch (atomtype) {
00800         /* first, constraints, which end by returning */
00801     case '^':
00802         ARCV('^', 1);
00803         if (v->cflags&REG_NLANCH) {
00804             ARCV(BEHIND, v->nlcolor);
00805         }
00806         NEXT();
00807         return;
00808     case '$':
00809         ARCV('$', 1);
00810         if (v->cflags&REG_NLANCH) {
00811             ARCV(AHEAD, v->nlcolor);
00812         }
00813         NEXT();
00814         return;
00815     case SBEGIN:
00816         ARCV('^', 1);           /* BOL */
00817         ARCV('^', 0);           /* or BOS */
00818         NEXT();
00819         return;
00820     case SEND:
00821         ARCV('$', 1);           /* EOL */
00822         ARCV('$', 0);           /* or EOS */
00823         NEXT();
00824         return;
00825     case '<':
00826         wordchrs(v);            /* does NEXT() */
00827         s = newstate(v->nfa);
00828         NOERR();
00829         nonword(v, BEHIND, lp, s);
00830         word(v, AHEAD, s, rp);
00831         return;
00832     case '>':
00833         wordchrs(v);            /* does NEXT() */
00834         s = newstate(v->nfa);
00835         NOERR();
00836         word(v, BEHIND, lp, s);
00837         nonword(v, AHEAD, s, rp);
00838         return;
00839     case WBDRY:
00840         wordchrs(v);            /* does NEXT() */
00841         s = newstate(v->nfa);
00842         NOERR();
00843         nonword(v, BEHIND, lp, s);
00844         word(v, AHEAD, s, rp);
00845         s = newstate(v->nfa);
00846         NOERR();
00847         word(v, BEHIND, lp, s);
00848         nonword(v, AHEAD, s, rp);
00849         return;
00850     case NWBDRY:
00851         wordchrs(v);            /* does NEXT() */
00852         s = newstate(v->nfa);
00853         NOERR();
00854         word(v, BEHIND, lp, s);
00855         word(v, AHEAD, s, rp);
00856         s = newstate(v->nfa);
00857         NOERR();
00858         nonword(v, BEHIND, lp, s);
00859         nonword(v, AHEAD, s, rp);
00860         return;
00861     case LACON:                 /* lookahead constraint */
00862         pos = v->nextvalue;
00863         NEXT();
00864         s = newstate(v->nfa);
00865         s2 = newstate(v->nfa);
00866         NOERR();
00867         t = parse(v, ')', LACON, s, s2);
00868         freesubre(v, t);        /* internal structure irrelevant */
00869         assert(SEE(')') || ISERR());
00870         NEXT();
00871         n = newlacon(v, s, s2, pos);
00872         NOERR();
00873         ARCV(LACON, n);
00874         return;
00875 
00876         /*
00877          * Then errors, to get them out of the way.
00878          */
00879 
00880     case '*':
00881     case '+':
00882     case '?':
00883     case '{':
00884         ERR(REG_BADRPT);
00885         return;
00886     default:
00887         ERR(REG_ASSERT);
00888         return;
00889 
00890         /*
00891          * Then plain characters, and minor variants on that theme.
00892          */
00893 
00894     case ')':                   /* unbalanced paren */
00895         if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
00896             ERR(REG_EPAREN);
00897             return;
00898         }
00899 
00900         /*
00901          * Legal in EREs due to specification botch.
00902          */
00903 
00904         NOTE(REG_UPBOTCH);
00905         /* fallthrough into case PLAIN */
00906     case PLAIN:
00907         onechr(v, v->nextvalue, lp, rp);
00908         okcolors(v->nfa, v->cm);
00909         NOERR();
00910         NEXT();
00911         break;
00912     case '[':
00913         if (v->nextvalue == 1) {
00914             bracket(v, lp, rp);
00915         } else {
00916             cbracket(v, lp, rp);
00917         }
00918         assert(SEE(']') || ISERR());
00919         NEXT();
00920         break;
00921     case '.':
00922         rainbow(v->nfa, v->cm, PLAIN,
00923                 (v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS, lp, rp);
00924         NEXT();
00925         break;
00926 
00927         /*
00928          * And finally the ugly stuff.
00929          */
00930 
00931     case '(':                   /* value flags as capturing or non */
00932         cap = (type == LACON) ? 0 : v->nextvalue;
00933         if (cap) {
00934             v->nsubexp++;
00935             subno = v->nsubexp;
00936             if ((size_t)subno >= v->nsubs) {
00937                 moresubs(v, subno);
00938             }
00939             assert((size_t)subno < v->nsubs);
00940         } else {
00941             atomtype = PLAIN;   /* something that's not '(' */
00942         }
00943         NEXT();
00944 
00945         /*
00946          * Need new endpoints because tree will contain pointers.
00947          */
00948 
00949         s = newstate(v->nfa);
00950         s2 = newstate(v->nfa);
00951         NOERR();
00952         EMPTYARC(lp, s);
00953         EMPTYARC(s2, rp);
00954         NOERR();
00955         atom = parse(v, ')', PLAIN, s, s2);
00956         assert(SEE(')') || ISERR());
00957         NEXT();
00958         NOERR();
00959         if (cap) {
00960             v->subs[subno] = atom;
00961             t = subre(v, '(', atom->flags|CAP, lp, rp);
00962             NOERR();
00963             t->subno = subno;
00964             t->left = atom;
00965             atom = t;
00966         }
00967 
00968         /*
00969          * Postpone everything else pending possible {0}.
00970          */
00971 
00972         break;
00973     case BACKREF:               /* the Feature From The Black Lagoon */
00974         INSIST(type != LACON, REG_ESUBREG);
00975         INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
00976         INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
00977         NOERR();
00978         assert(v->nextvalue > 0);
00979         atom = subre(v, 'b', BACKR, lp, rp);
00980         subno = v->nextvalue;
00981         atom->subno = subno;
00982         EMPTYARC(lp, rp);       /* temporarily, so there's something */
00983         NEXT();
00984         break;
00985     }
00986 
00987     /*
00988      * ...and an atom may be followed by a quantifier.
00989      */
00990 
00991     switch (v->nexttype) {
00992     case '*':
00993         m = 0;
00994         n = INFINITY;
00995         qprefer = (v->nextvalue) ? LONGER : SHORTER;
00996         NEXT();
00997         break;
00998     case '+':
00999         m = 1;
01000         n = INFINITY;
01001         qprefer = (v->nextvalue) ? LONGER : SHORTER;
01002         NEXT();
01003         break;
01004     case '?':
01005         m = 0;
01006         n = 1;
01007         qprefer = (v->nextvalue) ? LONGER : SHORTER;
01008         NEXT();
01009         break;
01010     case '{':
01011         NEXT();
01012         m = scannum(v);
01013         if (EAT(',')) {
01014             if (SEE(DIGIT)) {
01015                 n = scannum(v);
01016             } else {
01017                 n = INFINITY;
01018             }
01019             if (m > n) {
01020                 ERR(REG_BADBR);
01021                 return;
01022             }
01023 
01024             /*
01025              * {m,n} exercises preference, even if it's {m,m}
01026              */
01027 
01028             qprefer = (v->nextvalue) ? LONGER : SHORTER;
01029         } else {
01030             n = m;
01031             /*
01032              * {m} passes operand's preference through.
01033              */
01034 
01035             qprefer = 0;
01036         }
01037         if (!SEE('}')) {        /* catches errors too */
01038             ERR(REG_BADBR);
01039             return;
01040         }
01041         NEXT();
01042         break;
01043     default:                    /* no quantifier */
01044         m = n = 1;
01045         qprefer = 0;
01046         break;
01047     }
01048 
01049     /*
01050      * Annoying special case: {0} or {0,0} cancels everything.
01051      */
01052 
01053     if (m == 0 && n == 0) {
01054         if (atom != NULL) {
01055             freesubre(v, atom);
01056         }
01057         if (atomtype == '(') {
01058             v->subs[subno] = NULL;
01059         }
01060         delsub(v->nfa, lp, rp);
01061         EMPTYARC(lp, rp);
01062         return;
01063     }
01064 
01065     /*
01066      * If not a messy case, avoid hard part.
01067      */
01068 
01069     assert(!MESSY(top->flags));
01070     f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
01071     if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
01072         if (!(m == 1 && n == 1)) {
01073             repeat(v, lp, rp, m, n);
01074         }
01075         if (atom != NULL) {
01076             freesubre(v, atom);
01077         }
01078         top->flags = f;
01079         return;
01080     }
01081 
01082     /*
01083      * hard part: something messy
01084      * That is, capturing parens, back reference, short/long clash, or an atom
01085      * with substructure containing one of those.
01086      */
01087 
01088     /*
01089      * Now we'll need a subre for the contents even if they're boring.
01090      */
01091 
01092     if (atom == NULL) {
01093         atom = subre(v, '=', 0, lp, rp);
01094         NOERR();
01095     }
01096 
01097     /*
01098      * Prepare a general-purpose state skeleton.
01099      *
01100      *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
01101      *   /                                            /
01102      * [lp] ----> [s2] ----bypass---------------------
01103      *
01104      * where bypass is an empty, and prefix is some repetitions of atom
01105      */
01106 
01107     s = newstate(v->nfa);       /* first, new endpoints for the atom */
01108     s2 = newstate(v->nfa);
01109     NOERR();
01110     moveouts(v->nfa, lp, s);
01111     moveins(v->nfa, rp, s2);
01112     NOERR();
01113     atom->begin = s;
01114     atom->end = s2;
01115     s = newstate(v->nfa);       /* and spots for prefix and bypass */
01116     s2 = newstate(v->nfa);
01117     NOERR();
01118     EMPTYARC(lp, s);
01119     EMPTYARC(lp, s2);
01120     NOERR();
01121 
01122     /*
01123      * Break remaining subRE into x{...} and what follows.
01124      */
01125 
01126     t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
01127     t->left = atom;
01128     atomp = &t->left;
01129 
01130     /*
01131      * Here we should recurse... but we must postpone that to the end.
01132      */
01133 
01134     /*
01135      * Split top into prefix and remaining.
01136      */
01137 
01138     assert(top->op == '=' && top->left == NULL && top->right == NULL);
01139     top->left = subre(v, '=', top->flags, top->begin, lp);
01140     top->op = '.';
01141     top->right = t;
01142 
01143     /*
01144      * If it's a backref, now is the time to replicate the subNFA.
01145      */
01146 
01147     if (atomtype == BACKREF) {
01148         assert(atom->begin->nouts == 1);        /* just the EMPTY */
01149         delsub(v->nfa, atom->begin, atom->end);
01150         assert(v->subs[subno] != NULL);
01151 
01152         /*
01153          * And here's why the recursion got postponed: it must wait until the
01154          * skeleton is filled in, because it may hit a backref that wants to
01155          * copy the filled-in skeleton.
01156          */
01157 
01158         dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
01159                 atom->begin, atom->end);
01160         NOERR();
01161     }
01162 
01163     /*
01164      * It's quantifier time; first, turn x{0,...} into x{1,...}|empty
01165      */
01166 
01167     if (m == 0) {
01168         EMPTYARC(s2, atom->end);/* the bypass */
01169         assert(PREF(qprefer) != 0);
01170         f = COMBINE(qprefer, atom->flags);
01171         t = subre(v, '|', f, lp, atom->end);
01172         NOERR();
01173         t->left = atom;
01174         t->right = subre(v, '|', PREF(f), s2, atom->end);
01175         NOERR();
01176         t->right->left = subre(v, '=', 0, s2, atom->end);
01177         NOERR();
01178         *atomp = t;
01179         atomp = &t->left;
01180         m = 1;
01181     }
01182 
01183     /*
01184      * Deal with the rest of the quantifier.
01185      */
01186 
01187     if (atomtype == BACKREF) {
01188         /*
01189          * Special case: backrefs have internal quantifiers.
01190          */
01191 
01192         EMPTYARC(s, atom->begin);       /* empty prefix */
01193 
01194         /*
01195          * Just stuff everything into atom.
01196          */
01197 
01198         repeat(v, atom->begin, atom->end, m, n);
01199         atom->min = (short) m;
01200         atom->max = (short) n;
01201         atom->flags |= COMBINE(qprefer, atom->flags);
01202     } else if (m == 1 && n == 1) {
01203         /*
01204          * No/vacuous quantifier: done.
01205          */
01206 
01207         EMPTYARC(s, atom->begin);       /* empty prefix */
01208     } else {
01209         /*
01210          * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second
01211          * x
01212          */
01213 
01214         dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
01215         assert(m >= 1 && m != INFINITY && n >= 1);
01216         repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
01217         f = COMBINE(qprefer, atom->flags);
01218         t = subre(v, '.', f, s, atom->end);     /* prefix and atom */
01219         NOERR();
01220         t->left = subre(v, '=', PREF(f), s, atom->begin);
01221         NOERR();
01222         t->right = atom;
01223         *atomp = t;
01224     }
01225 
01226     /*
01227      * And finally, look after that postponed recursion.
01228      */
01229 
01230     t = top->right;
01231     if (!(SEE('|') || SEE(stopper) || SEE(EOS))) {
01232         t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
01233     } else {
01234         EMPTYARC(atom->end, rp);
01235         t->right = subre(v, '=', 0, atom->end, rp);
01236     }
01237     assert(SEE('|') || SEE(stopper) || SEE(EOS));
01238     t->flags |= COMBINE(t->flags, t->right->flags);
01239     top->flags |= COMBINE(top->flags, t->flags);
01240 }
01241 
01242 /*
01243  - nonword - generate arcs for non-word-character ahead or behind
01244  ^ static void nonword(struct vars *, int, struct state *, struct state *);
01245  */
01246 static void
01247 nonword(
01248     struct vars *v,
01249     int dir,                    /* AHEAD or BEHIND */
01250     struct state *lp,
01251     struct state *rp)
01252 {
01253     int anchor = (dir == AHEAD) ? '$' : '^';
01254 
01255     assert(dir == AHEAD || dir == BEHIND);
01256     newarc(v->nfa, anchor, 1, lp, rp);
01257     newarc(v->nfa, anchor, 0, lp, rp);
01258     colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
01259     /* (no need for special attention to \n) */
01260 }
01261 
01262 /*
01263  - word - generate arcs for word character ahead or behind
01264  ^ static void word(struct vars *, int, struct state *, struct state *);
01265  */
01266 static void
01267 word(
01268     struct vars *v,
01269     int dir,                    /* AHEAD or BEHIND */
01270     struct state *lp,
01271     struct state *rp)
01272 {
01273     assert(dir == AHEAD || dir == BEHIND);
01274     cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
01275     /* (no need for special attention to \n) */
01276 }
01277 
01278 /*
01279  - scannum - scan a number
01280  ^ static int scannum(struct vars *);
01281  */
01282 static int                      /* value, <= DUPMAX */
01283 scannum(
01284     struct vars *v)
01285 {
01286     int n = 0;
01287 
01288     while (SEE(DIGIT) && n < DUPMAX) {
01289         n = n*10 + v->nextvalue;
01290         NEXT();
01291     }
01292     if (SEE(DIGIT) || n > DUPMAX) {
01293         ERR(REG_BADBR);
01294         return 0;
01295     }
01296     return n;
01297 }
01298 
01299 /*
01300  - repeat - replicate subNFA for quantifiers
01301  * The duplication sequences used here are chosen carefully so that any
01302  * pointers starting out pointing into the subexpression end up pointing into
01303  * the last occurrence. (Note that it may not be strung between the same left
01304  * and right end states, however!) This used to be important for the subRE
01305  * tree, although the important bits are now handled by the in-line code in
01306  * parse(), and when this is called, it doesn't matter any more.
01307  ^ static void repeat(struct vars *, struct state *, struct state *, int, int);
01308  */
01309 static void
01310 repeat(
01311     struct vars *v,
01312     struct state *lp,
01313     struct state *rp,
01314     int m,
01315     int n)
01316 {
01317 #define SOME            2
01318 #define INF             3
01319 #define PAIR(x, y)      ((x)*4 + (y))
01320 #define REDUCE(x)       ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
01321     const int rm = REDUCE(m);
01322     const int rn = REDUCE(n);
01323     struct state *s, *s2;
01324 
01325     switch (PAIR(rm, rn)) {
01326     case PAIR(0, 0):            /* empty string */
01327         delsub(v->nfa, lp, rp);
01328         EMPTYARC(lp, rp);
01329         break;
01330     case PAIR(0, 1):            /* do as x| */
01331         EMPTYARC(lp, rp);
01332         break;
01333     case PAIR(0, SOME):         /* do as x{1,n}| */
01334         repeat(v, lp, rp, 1, n);
01335         NOERR();
01336         EMPTYARC(lp, rp);
01337         break;
01338     case PAIR(0, INF):          /* loop x around */
01339         s = newstate(v->nfa);
01340         NOERR();
01341         moveouts(v->nfa, lp, s);
01342         moveins(v->nfa, rp, s);
01343         EMPTYARC(lp, s);
01344         EMPTYARC(s, rp);
01345         break;
01346     case PAIR(1, 1):            /* no action required */
01347         break;
01348     case PAIR(1, SOME):         /* do as x{0,n-1}x = (x{1,n-1}|)x */
01349         s = newstate(v->nfa);
01350         NOERR();
01351         moveouts(v->nfa, lp, s);
01352         dupnfa(v->nfa, s, rp, lp, s);
01353         NOERR();
01354         repeat(v, lp, s, 1, n-1);
01355         NOERR();
01356         EMPTYARC(lp, s);
01357         break;
01358     case PAIR(1, INF):          /* add loopback arc */
01359         s = newstate(v->nfa);
01360         s2 = newstate(v->nfa);
01361         NOERR();
01362         moveouts(v->nfa, lp, s);
01363         moveins(v->nfa, rp, s2);
01364         EMPTYARC(lp, s);
01365         EMPTYARC(s2, rp);
01366         EMPTYARC(s2, s);
01367         break;
01368     case PAIR(SOME, SOME):              /* do as x{m-1,n-1}x */
01369         s = newstate(v->nfa);
01370         NOERR();
01371         moveouts(v->nfa, lp, s);
01372         dupnfa(v->nfa, s, rp, lp, s);
01373         NOERR();
01374         repeat(v, lp, s, m-1, n-1);
01375         break;
01376     case PAIR(SOME, INF):               /* do as x{m-1,}x */
01377         s = newstate(v->nfa);
01378         NOERR();
01379         moveouts(v->nfa, lp, s);
01380         dupnfa(v->nfa, s, rp, lp, s);
01381         NOERR();
01382         repeat(v, lp, s, m-1, n);
01383         break;
01384     default:
01385         ERR(REG_ASSERT);
01386         break;
01387     }
01388 }
01389 
01390 /*
01391  - bracket - handle non-complemented bracket expression
01392  * Also called from cbracket for complemented bracket expressions.
01393  ^ static void bracket(struct vars *, struct state *, struct state *);
01394  */
01395 static void
01396 bracket(
01397     struct vars *v,
01398     struct state *lp,
01399     struct state *rp)
01400 {
01401     assert(SEE('['));
01402     NEXT();
01403     while (!SEE(']') && !SEE(EOS)) {
01404         brackpart(v, lp, rp);
01405     }
01406     assert(SEE(']') || ISERR());
01407     okcolors(v->nfa, v->cm);
01408 }
01409 
01410 /*
01411  - cbracket - handle complemented bracket expression
01412  * We do it by calling bracket() with dummy endpoints, and then complementing
01413  * the result. The alternative would be to invoke rainbow(), and then delete
01414  * arcs as the b.e. is seen... but that gets messy.
01415  ^ static void cbracket(struct vars *, struct state *, struct state *);
01416  */
01417 static void
01418 cbracket(
01419     struct vars *v,
01420     struct state *lp,
01421     struct state *rp)
01422 {
01423     struct state *left = newstate(v->nfa);
01424     struct state *right = newstate(v->nfa);
01425 
01426     NOERR();
01427     bracket(v, left, right);
01428     if (v->cflags&REG_NLSTOP) {
01429         newarc(v->nfa, PLAIN, v->nlcolor, left, right);
01430     }
01431     NOERR();
01432 
01433     assert(lp->nouts == 0);     /* all outarcs will be ours */
01434 
01435     /*
01436      * Easy part of complementing, and all there is to do since the MCCE code
01437      * was removed.
01438      */
01439 
01440     colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
01441     NOERR();
01442     dropstate(v->nfa, left);
01443     assert(right->nins == 0);
01444     freestate(v->nfa, right);
01445     return;
01446 }
01447 
01448 /*
01449  - brackpart - handle one item (or range) within a bracket expression
01450  ^ static void brackpart(struct vars *, struct state *, struct state *);
01451  */
01452 static void
01453 brackpart(
01454     struct vars *v,
01455     struct state *lp,
01456     struct state *rp)
01457 {
01458     celt startc, endc;
01459     struct cvec *cv;
01460     const chr *startp, *endp;
01461     chr c[1];
01462 
01463     /*
01464      * Parse something, get rid of special cases, take shortcuts.
01465      */
01466 
01467     switch (v->nexttype) {
01468     case RANGE:                 /* a-b-c or other botch */
01469         ERR(REG_ERANGE);
01470         return;
01471         break;
01472     case PLAIN:
01473         c[0] = v->nextvalue;
01474         NEXT();
01475 
01476         /*
01477          * Shortcut for ordinary chr (not range).
01478          */
01479 
01480         if (!SEE(RANGE)) {
01481             onechr(v, c[0], lp, rp);
01482             return;
01483         }
01484         startc = element(v, c, c+1);
01485         NOERR();
01486         break;
01487     case COLLEL:
01488         startp = v->now;
01489         endp = scanplain(v);
01490         INSIST(startp < endp, REG_ECOLLATE);
01491         NOERR();
01492         startc = element(v, startp, endp);
01493         NOERR();
01494         break;
01495     case ECLASS:
01496         startp = v->now;
01497         endp = scanplain(v);
01498         INSIST(startp < endp, REG_ECOLLATE);
01499         NOERR();
01500         startc = element(v, startp, endp);
01501         NOERR();
01502         cv = eclass(v, startc, (v->cflags&REG_ICASE));
01503         NOERR();
01504         dovec(v, cv, lp, rp);
01505         return;
01506         break;
01507     case CCLASS:
01508         startp = v->now;
01509         endp = scanplain(v);
01510         INSIST(startp < endp, REG_ECTYPE);
01511         NOERR();
01512         cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));
01513         NOERR();
01514         dovec(v, cv, lp, rp);
01515         return;
01516         break;
01517     default:
01518         ERR(REG_ASSERT);
01519         return;
01520         break;
01521     }
01522 
01523     if (SEE(RANGE)) {
01524         NEXT();
01525         switch (v->nexttype) {
01526         case PLAIN:
01527         case RANGE:
01528             c[0] = v->nextvalue;
01529             NEXT();
01530             endc = element(v, c, c+1);
01531             NOERR();
01532             break;
01533         case COLLEL:
01534             startp = v->now;
01535             endp = scanplain(v);
01536             INSIST(startp < endp, REG_ECOLLATE);
01537             NOERR();
01538             endc = element(v, startp, endp);
01539             NOERR();
01540             break;
01541         default:
01542             ERR(REG_ERANGE);
01543             return;
01544             break;
01545         }
01546     } else {
01547         endc = startc;
01548     }
01549 
01550     /*
01551      * Ranges are unportable. Actually, standard C does guarantee that digits
01552      * are contiguous, but making that an exception is just too complicated.
01553      */
01554 
01555     if (startc != endc) {
01556         NOTE(REG_UUNPORT);
01557     }
01558     cv = range(v, startc, endc, (v->cflags&REG_ICASE));
01559     NOERR();
01560     dovec(v, cv, lp, rp);
01561 }
01562 
01563 /*
01564  - scanplain - scan PLAIN contents of [. etc.
01565  * Certain bits of trickery in lex.c know that this code does not try to look
01566  * past the final bracket of the [. etc.
01567  ^ static const chr *scanplain(struct vars *);
01568  */
01569 static const chr *              /* just after end of sequence */
01570 scanplain(
01571     struct vars *v)
01572 {
01573     const chr *endp;
01574 
01575     assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
01576     NEXT();
01577 
01578     endp = v->now;
01579     while (SEE(PLAIN)) {
01580         endp = v->now;
01581         NEXT();
01582     }
01583 
01584     assert(SEE(END) || ISERR());
01585     NEXT();
01586 
01587     return endp;
01588 }
01589 
01590 /*
01591  - onechr - fill in arcs for a plain character, and possible case complements
01592  * This is mostly a shortcut for efficient handling of the common case.
01593  ^ static void onechr(struct vars *, pchr, struct state *, struct state *);
01594  */
01595 static void
01596 onechr(
01597     struct vars *v,
01598     pchr c,
01599     struct state *lp,
01600     struct state *rp)
01601 {
01602     if (!(v->cflags&REG_ICASE)) {
01603         newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
01604         return;
01605     }
01606 
01607     /*
01608      * Rats, need general case anyway...
01609      */
01610 
01611     dovec(v, allcases(v, c), lp, rp);
01612 }
01613 
01614 /*
01615  - dovec - fill in arcs for each element of a cvec
01616  ^ static void dovec(struct vars *, struct cvec *, struct state *,
01617  ^      struct state *);
01618  */
01619 static void
01620 dovec(
01621     struct vars *v,
01622     struct cvec *cv,
01623     struct state *lp,
01624     struct state *rp)
01625 {
01626     chr ch, from, to;
01627     const chr *p;
01628     int i;
01629 
01630     for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
01631         ch = *p;
01632         newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
01633     }
01634 
01635     for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
01636         from = *p;
01637         to = *(p+1);
01638         if (from <= to) {
01639             subrange(v, from, to, lp, rp);
01640         }
01641     }
01642 
01643 }
01644 
01645 /*
01646  - wordchrs - set up word-chr list for word-boundary stuff, if needed
01647  * The list is kept as a bunch of arcs between two dummy states; it's disposed
01648  * of by the unreachable-states sweep in NFA optimization. Does NEXT(). Must
01649  * not be called from any unusual lexical context. This should be reconciled
01650  * with the \w etc. handling in lex.c, and should be cleaned up to reduce
01651  * dependencies on input scanning.
01652  ^ static void wordchrs(struct vars *);
01653  */
01654 static void
01655 wordchrs(
01656     struct vars *v)
01657 {
01658     struct state *left, *right;
01659 
01660     if (v->wordchrs != NULL) {
01661         NEXT();         /* for consistency */
01662         return;
01663     }
01664 
01665     left = newstate(v->nfa);
01666     right = newstate(v->nfa);
01667     NOERR();
01668 
01669     /*
01670      * Fine point: implemented with [::], and lexer will set REG_ULOCALE.
01671      */
01672 
01673     lexword(v);
01674     NEXT();
01675     assert(v->savenow != NULL && SEE('['));
01676     bracket(v, left, right);
01677     assert((v->savenow != NULL && SEE(']')) || ISERR());
01678     NEXT();
01679     NOERR();
01680     v->wordchrs = left;
01681 }
01682 
01683 /*
01684  - subre - allocate a subre
01685  ^ static struct subre *subre(struct vars *, int, int, struct state *,
01686  ^      struct state *);
01687  */
01688 static struct subre *
01689 subre(
01690     struct vars *v,
01691     int op,
01692     int flags,
01693     struct state *begin,
01694     struct state *end)
01695 {
01696     struct subre *ret = v->treefree;
01697 
01698     if (ret != NULL) {
01699         v->treefree = ret->left;
01700     } else {
01701         ret = (struct subre *) MALLOC(sizeof(struct subre));
01702         if (ret == NULL) {
01703             ERR(REG_ESPACE);
01704             return NULL;
01705         }
01706         ret->chain = v->treechain;
01707         v->treechain = ret;
01708     }
01709 
01710     assert(strchr("|.b(=", op) != NULL);
01711 
01712     ret->op = op;
01713     ret->flags = flags;
01714     ret->retry = 0;
01715     ret->subno = 0;
01716     ret->min = ret->max = 1;
01717     ret->left = NULL;
01718     ret->right = NULL;
01719     ret->begin = begin;
01720     ret->end = end;
01721     ZAPCNFA(ret->cnfa);
01722 
01723     return ret;
01724 }
01725 
01726 /*
01727  - freesubre - free a subRE subtree
01728  ^ static void freesubre(struct vars *, struct subre *);
01729  */
01730 static void
01731 freesubre(
01732     struct vars *v,             /* might be NULL */
01733     struct subre *sr)
01734 {
01735     if (sr == NULL) {
01736         return;
01737     }
01738 
01739     if (sr->left != NULL) {
01740         freesubre(v, sr->left);
01741     }
01742     if (sr->right != NULL) {
01743         freesubre(v, sr->right);
01744     }
01745 
01746     freesrnode(v, sr);
01747 }
01748 
01749 /*
01750  - freesrnode - free one node in a subRE subtree
01751  ^ static void freesrnode(struct vars *, struct subre *);
01752  */
01753 static void
01754 freesrnode(
01755     struct vars *v,             /* might be NULL */
01756     struct subre *sr)
01757 {
01758     if (sr == NULL) {
01759         return;
01760     }
01761 
01762     if (!NULLCNFA(sr->cnfa)) {
01763         freecnfa(&sr->cnfa);
01764     }
01765     sr->flags = 0;
01766 
01767     if (v != NULL) {
01768         sr->left = v->treefree;
01769         v->treefree = sr;
01770     } else {
01771         FREE(sr);
01772     }
01773 }
01774 
01775 /*
01776  - optst - optimize a subRE subtree
01777  ^ static void optst(struct vars *, struct subre *);
01778  */
01779 static void
01780 optst(
01781     struct vars *v,
01782     struct subre *t)
01783 {
01784     /*
01785      * DGP (2007-11-13): I assume it was the programmer's intent to eventually
01786      * come back and add code to optimize subRE trees, but the routine coded
01787      * just spends effort traversing the tree and doing nothing. We can do
01788      * nothing with less effort.
01789      */
01790 
01791     return;
01792 }
01793 
01794 /*
01795  - numst - number tree nodes (assigning retry indexes)
01796  ^ static int numst(struct subre *, int);
01797  */
01798 static int                      /* next number */
01799 numst(
01800     struct subre *t,
01801     int start)                  /* starting point for subtree numbers */
01802 {
01803     int i;
01804 
01805     assert(t != NULL);
01806 
01807     i = start;
01808     t->retry = (short) i++;
01809     if (t->left != NULL) {
01810         i = numst(t->left, i);
01811     }
01812     if (t->right != NULL) {
01813         i = numst(t->right, i);
01814     }
01815     return i;
01816 }
01817 
01818 /*
01819  - markst - mark tree nodes as INUSE
01820  ^ static void markst(struct subre *);
01821  */
01822 static void
01823 markst(
01824     struct subre *t)
01825 {
01826     assert(t != NULL);
01827 
01828     t->flags |= INUSE;
01829     if (t->left != NULL) {
01830         markst(t->left);
01831     }
01832     if (t->right != NULL) {
01833         markst(t->right);
01834     }
01835 }
01836 
01837 /*
01838  - cleanst - free any tree nodes not marked INUSE
01839  ^ static void cleanst(struct vars *);
01840  */
01841 static void
01842 cleanst(
01843     struct vars *v)
01844 {
01845     struct subre *t;
01846     struct subre *next;
01847 
01848     for (t = v->treechain; t != NULL; t = next) {
01849         next = t->chain;
01850         if (!(t->flags&INUSE)) {
01851             FREE(t);
01852         }
01853     }
01854     v->treechain = NULL;
01855     v->treefree = NULL;         /* just on general principles */
01856 }
01857 
01858 /*
01859  - nfatree - turn a subRE subtree into a tree of compacted NFAs
01860  ^ static long nfatree(struct vars *, struct subre *, FILE *);
01861  */
01862 static long                     /* optimize results from top node */
01863 nfatree(
01864     struct vars *v,
01865     struct subre *t,
01866     FILE *f)                    /* for debug output */
01867 {
01868     assert(t != NULL && t->begin != NULL);
01869 
01870     if (t->left != NULL) {
01871         (DISCARD) nfatree(v, t->left, f);
01872     }
01873     if (t->right != NULL) {
01874         (DISCARD) nfatree(v, t->right, f);
01875     }
01876 
01877     return nfanode(v, t, f);
01878 }
01879 
01880 /*
01881  - nfanode - do one NFA for nfatree
01882  ^ static long nfanode(struct vars *, struct subre *, FILE *);
01883  */
01884 static long                     /* optimize results */
01885 nfanode(
01886     struct vars *v,
01887     struct subre *t,
01888     FILE *f)                    /* for debug output */
01889 {
01890     struct nfa *nfa;
01891     long ret = 0;
01892     char idbuf[50];
01893 
01894     assert(t->begin != NULL);
01895 
01896     if (f != NULL) {
01897         fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
01898                 stid(t, idbuf, sizeof(idbuf)));
01899     }
01900     nfa = newnfa(v, v->cm, v->nfa);
01901     NOERRZ();
01902     dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
01903     if (!ISERR()) {
01904         specialcolors(nfa);
01905         ret = optimize(nfa, f);
01906     }
01907     if (!ISERR()) {
01908         compact(nfa, &t->cnfa);
01909     }
01910 
01911     freenfa(nfa);
01912     return ret;
01913 }
01914 
01915 /*
01916  - newlacon - allocate a lookahead-constraint subRE
01917  ^ static int newlacon(struct vars *, struct state *, struct state *, int);
01918  */
01919 static int                      /* lacon number */
01920 newlacon(
01921     struct vars *v,
01922     struct state *begin,
01923     struct state *end,
01924     int pos)
01925 {
01926     struct subre *sub;
01927     int n;
01928 
01929     if (v->nlacons == 0) {
01930         v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre));
01931         n = 1;          /* skip 0th */
01932         v->nlacons = 2;
01933     } else {
01934         v->lacons = (struct subre *) REALLOC(v->lacons,
01935                 (v->nlacons+1)*sizeof(struct subre));
01936         n = v->nlacons++;
01937     }
01938 
01939     if (v->lacons == NULL) {
01940         ERR(REG_ESPACE);
01941         return 0;
01942     }
01943 
01944     sub = &v->lacons[n];
01945     sub->begin = begin;
01946     sub->end = end;
01947     sub->subno = pos;
01948     ZAPCNFA(sub->cnfa);
01949     return n;
01950 }
01951 
01952 /*
01953  - freelacons - free lookahead-constraint subRE vector
01954  ^ static void freelacons(struct subre *, int);
01955  */
01956 static void
01957 freelacons(
01958     struct subre *subs,
01959     int n)
01960 {
01961     struct subre *sub;
01962     int i;
01963 
01964     assert(n > 0);
01965     for (sub=subs+1, i=n-1; i>0; sub++, i--) {  /* no 0th */
01966         if (!NULLCNFA(sub->cnfa)) {
01967             freecnfa(&sub->cnfa);
01968         }
01969     }
01970     FREE(subs);
01971 }
01972 
01973 /*
01974  - rfree - free a whole RE (insides of regfree)
01975  ^ static void rfree(regex_t *);
01976  */
01977 static void
01978 rfree(
01979     regex_t *re)
01980 {
01981     struct guts *g;
01982 
01983     if (re == NULL || re->re_magic != REMAGIC) {
01984         return;
01985     }
01986 
01987     re->re_magic = 0;   /* invalidate RE */
01988     g = (struct guts *) re->re_guts;
01989     re->re_guts = NULL;
01990     re->re_fns = NULL;
01991     g->magic = 0;
01992     freecm(&g->cmap);
01993     if (g->tree != NULL) {
01994         freesubre(NULL, g->tree);
01995     }
01996     if (g->lacons != NULL) {
01997         freelacons(g->lacons, g->nlacons);
01998     }
01999     if (!NULLCNFA(g->search)) {
02000         freecnfa(&g->search);
02001     }
02002     FREE(g);
02003 }
02004 
02005 /*
02006  - dump - dump an RE in human-readable form
02007  ^ static void dump(regex_t *, FILE *);
02008  */
02009 static void
02010 dump(
02011     regex_t *re,
02012     FILE *f)
02013 {
02014 #ifdef REG_DEBUG
02015     struct guts *g;
02016     int i;
02017 
02018     if (re->re_magic != REMAGIC) {
02019         fprintf(f, "bad magic number (0x%x not 0x%x)\n",
02020                 re->re_magic, REMAGIC);
02021     }
02022     if (re->re_guts == NULL) {
02023         fprintf(f, "NULL guts!!!\n");
02024         return;
02025     }
02026     g = (struct guts *) re->re_guts;
02027     if (g->magic != GUTSMAGIC) {
02028         fprintf(f, "bad guts magic number (0x%x not 0x%x)\n",
02029                 g->magic, GUTSMAGIC);
02030     }
02031 
02032     fprintf(f, "\n\n\n========= DUMP ==========\n");
02033     fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
02034             re->re_nsub, re->re_info, re->re_csize, g->ntree);
02035 
02036     dumpcolors(&g->cmap, f);
02037     if (!NULLCNFA(g->search)) {
02038         printf("\nsearch:\n");
02039         dumpcnfa(&g->search, f);
02040     }
02041     for (i = 1; i < g->nlacons; i++) {
02042         fprintf(f, "\nla%d (%s):\n", i,
02043                 (g->lacons[i].subno) ? "positive" : "negative");
02044         dumpcnfa(&g->lacons[i].cnfa, f);
02045     }
02046     fprintf(f, "\n");
02047     dumpst(g->tree, f, 0);
02048 #endif
02049 }
02050 
02051 /*
02052  - dumpst - dump a subRE tree
02053  ^ static void dumpst(struct subre *, FILE *, int);
02054  */
02055 static void
02056 dumpst(
02057     struct subre *t,
02058     FILE *f,
02059     int nfapresent)             /* is the original NFA still around? */
02060 {
02061     if (t == NULL) {
02062         fprintf(f, "null tree\n");
02063     } else {
02064         stdump(t, f, nfapresent);
02065     }
02066     fflush(f);
02067 }
02068 
02069 /*
02070  - stdump - recursive guts of dumpst
02071  ^ static void stdump(struct subre *, FILE *, int);
02072  */
02073 static void
02074 stdump(
02075     struct subre *t,
02076     FILE *f,
02077     int nfapresent)             /* is the original NFA still around? */
02078 {
02079     char idbuf[50];
02080 
02081     fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
02082     if (t->flags&LONGER) {
02083         fprintf(f, " longest");
02084     }
02085     if (t->flags&SHORTER) {
02086         fprintf(f, " shortest");
02087     }
02088     if (t->flags&MIXED) {
02089         fprintf(f, " hasmixed");
02090     }
02091     if (t->flags&CAP) {
02092         fprintf(f, " hascapture");
02093     }
02094     if (t->flags&BACKR) {
02095         fprintf(f, " hasbackref");
02096     }
02097     if (!(t->flags&INUSE)) {
02098         fprintf(f, " UNUSED");
02099     }
02100     if (t->subno != 0) {
02101         fprintf(f, " (#%d)", t->subno);
02102     }
02103     if (t->min != 1 || t->max != 1) {
02104         fprintf(f, " {%d,", t->min);
02105         if (t->max != INFINITY) {
02106             fprintf(f, "%d", t->max);
02107         }
02108         fprintf(f, "}");
02109     }
02110     if (nfapresent) {
02111         fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
02112     }
02113     if (t->left != NULL) {
02114         fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
02115     }
02116     if (t->right != NULL) {
02117         fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
02118     }
02119     if (!NULLCNFA(t->cnfa)) {
02120         fprintf(f, "\n");
02121         dumpcnfa(&t->cnfa, f);
02122     }
02123     fprintf(f, "\n");
02124     if (t->left != NULL) {
02125         stdump(t->left, f, nfapresent);
02126     }
02127     if (t->right != NULL) {
02128         stdump(t->right, f, nfapresent);
02129     }
02130 }
02131 
02132 /*
02133  - stid - identify a subtree node for dumping
02134  ^ static char *stid(struct subre *, char *, size_t);
02135  */
02136 static const char *                     /* points to buf or constant string */
02137 stid(
02138     struct subre *t,
02139     char *buf,
02140     size_t bufsize)
02141 {
02142     /*
02143      * Big enough for hex int or decimal t->retry?
02144      */
02145 
02146     if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) {
02147         return "unable";
02148     }
02149     if (t->retry != 0) {
02150         sprintf(buf, "%d", t->retry);
02151     } else {
02152         sprintf(buf, "%p", t);
02153     }
02154     return buf;
02155 }
02156 
02157 #include "regc_lex.c"
02158 #include "regc_color.c"
02159 #include "regc_nfa.c"
02160 #include "regc_cvec.c"
02161 #include "regc_locale.c"
02162 
02163 /*
02164  * Local Variables:
02165  * mode: c
02166  * c-basic-offset: 4
02167  * fill-column: 78
02168  * End:
02169  */