regc_nfa.c

Go to the documentation of this file.
00001 /*
00002  * NFA utilities.
00003  * This file is #included by regcomp.c.
00004  *
00005  * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
00006  *
00007  * Development of this software was funded, in part, by Cray Research Inc.,
00008  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
00009  * Corporation, none of whom are responsible for the results. The author
00010  * thanks all of them.
00011  *
00012  * Redistribution and use in source and binary forms -- with or without
00013  * modification -- are permitted for any purpose, provided that
00014  * redistributions in source form retain this entire copyright notice and
00015  * indicate the origin and nature of any modifications.
00016  *
00017  * I'd appreciate being given credit for this package in the documentation of
00018  * software which uses it, but that is not a requirement.
00019  *
00020  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
00021  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
00022  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
00023  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00024  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00025  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
00026  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00027  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
00028  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
00029  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * One or two things that technically ought to be in here are actually in
00032  * color.c, thanks to some incestuous relationships in the color chains.
00033  */
00034 
00035 #define NISERR()        VISERR(nfa->v)
00036 #define NERR(e)         VERR(nfa->v, (e))
00037 
00038 /*
00039  - newnfa - set up an NFA
00040  ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
00041  */
00042 static struct nfa *             /* the NFA, or NULL */
00043 newnfa(
00044     struct vars *v,
00045     struct colormap *cm,
00046     struct nfa *parent)         /* NULL if primary NFA */
00047 {
00048     struct nfa *nfa;
00049 
00050     nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
00051     if (nfa == NULL) {
00052         return NULL;
00053     }
00054 
00055     nfa->states = NULL;
00056     nfa->slast = NULL;
00057     nfa->free = NULL;
00058     nfa->nstates = 0;
00059     nfa->cm = cm;
00060     nfa->v = v;
00061     nfa->size = 0;
00062     nfa->bos[0] = nfa->bos[1] = COLORLESS;
00063     nfa->eos[0] = nfa->eos[1] = COLORLESS;
00064     nfa->parent = parent;       /* Precedes newfstate so parent is valid. */
00065     nfa->post = newfstate(nfa, '@');    /* number 0 */
00066     nfa->pre = newfstate(nfa, '>');     /* number 1 */
00067 
00068     nfa->init = newstate(nfa);  /* May become invalid later. */
00069     nfa->final = newstate(nfa);
00070     if (ISERR()) {
00071         freenfa(nfa);
00072         return NULL;
00073     }
00074     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
00075     newarc(nfa, '^', 1, nfa->pre, nfa->init);
00076     newarc(nfa, '^', 0, nfa->pre, nfa->init);
00077     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
00078     newarc(nfa, '$', 1, nfa->final, nfa->post);
00079     newarc(nfa, '$', 0, nfa->final, nfa->post);
00080 
00081     if (ISERR()) {
00082         freenfa(nfa);
00083         return NULL;
00084     }
00085     return nfa;
00086 }
00087 
00088 /*
00089  - TooManyStates - checks if the max states exceeds the compile-time value
00090  ^ static int TooManyStates(struct nfa *);
00091  */
00092 static int
00093 TooManyStates(
00094     struct nfa *nfa)
00095 {
00096     struct nfa *parent = nfa->parent;
00097     size_t sz = nfa->size;
00098 
00099     while (parent != NULL) {
00100         sz = parent->size;
00101         parent = parent->parent;
00102     }
00103     if (sz > REG_MAX_STATES) {
00104         return 1;
00105     }
00106     return 0;
00107 }
00108 
00109 /*
00110  - IncrementSize - increases the tracked size of the NFA and its parents.
00111  ^ static void IncrementSize(struct nfa *);
00112  */
00113 static void
00114 IncrementSize(
00115     struct nfa *nfa)
00116 {
00117     struct nfa *parent = nfa->parent;
00118 
00119     nfa->size++;
00120     while (parent != NULL) {
00121         parent->size++;
00122         parent = parent->parent;
00123     }
00124 }
00125 
00126 /*
00127  - DecrementSize - increases the tracked size of the NFA and its parents.
00128  ^ static void DecrementSize(struct nfa *);
00129  */
00130 static void
00131 DecrementSize(
00132     struct nfa *nfa)
00133 {
00134     struct nfa *parent = nfa->parent;
00135 
00136     nfa->size--;
00137     while (parent != NULL) {
00138         parent->size--;
00139         parent = parent->parent;
00140     }
00141 }
00142 
00143 /*
00144  - freenfa - free an entire NFA
00145  ^ static VOID freenfa(struct nfa *);
00146  */
00147 static void
00148 freenfa(
00149     struct nfa *nfa)
00150 {
00151     struct state *s;
00152 
00153     while ((s = nfa->states) != NULL) {
00154         s->nins = s->nouts = 0; /* don't worry about arcs */
00155         freestate(nfa, s);
00156     }
00157     while ((s = nfa->free) != NULL) {
00158         nfa->free = s->next;
00159         destroystate(nfa, s);
00160     }
00161 
00162     nfa->slast = NULL;
00163     nfa->nstates = -1;
00164     nfa->pre = NULL;
00165     nfa->post = NULL;
00166     FREE(nfa);
00167 }
00168 
00169 /*
00170  - newstate - allocate an NFA state, with zero flag value
00171  ^ static struct state *newstate(struct nfa *);
00172  */
00173 static struct state *           /* NULL on error */
00174 newstate(
00175     struct nfa *nfa)
00176 {
00177     struct state *s;
00178 
00179     if (TooManyStates(nfa)) {
00180         /* XXX: add specific error for this */
00181         NERR(REG_ETOOBIG);
00182         return NULL;
00183     }
00184     if (nfa->free != NULL) {
00185         s = nfa->free;
00186         nfa->free = s->next;
00187     } else {
00188         s = (struct state *) MALLOC(sizeof(struct state));
00189         if (s == NULL) {
00190             NERR(REG_ESPACE);
00191             return NULL;
00192         }
00193         s->oas.next = NULL;
00194         s->free = NULL;
00195         s->noas = 0;
00196     }
00197 
00198     assert(nfa->nstates >= 0);
00199     s->no = nfa->nstates++;
00200     s->flag = 0;
00201     if (nfa->states == NULL) {
00202         nfa->states = s;
00203     }
00204     s->nins = 0;
00205     s->ins = NULL;
00206     s->nouts = 0;
00207     s->outs = NULL;
00208     s->tmp = NULL;
00209     s->next = NULL;
00210     if (nfa->slast != NULL) {
00211         assert(nfa->slast->next == NULL);
00212         nfa->slast->next = s;
00213     }
00214     s->prev = nfa->slast;
00215     nfa->slast = s;
00216 
00217     /*
00218      * Track the current size and the parent size.
00219      */
00220 
00221     IncrementSize(nfa);
00222     return s;
00223 }
00224 
00225 /*
00226  - newfstate - allocate an NFA state with a specified flag value
00227  ^ static struct state *newfstate(struct nfa *, int flag);
00228  */
00229 static struct state *           /* NULL on error */
00230 newfstate(
00231     struct nfa *nfa,
00232     int flag)
00233 {
00234     struct state *s;
00235 
00236     s = newstate(nfa);
00237     if (s != NULL) {
00238         s->flag = (char) flag;
00239     }
00240     return s;
00241 }
00242 
00243 /*
00244  - dropstate - delete a state's inarcs and outarcs and free it
00245  ^ static VOID dropstate(struct nfa *, struct state *);
00246  */
00247 static void
00248 dropstate(
00249     struct nfa *nfa,
00250     struct state *s)
00251 {
00252     struct arc *a;
00253 
00254     while ((a = s->ins) != NULL) {
00255         freearc(nfa, a);
00256     }
00257     while ((a = s->outs) != NULL) {
00258         freearc(nfa, a);
00259     }
00260     freestate(nfa, s);
00261 }
00262 
00263 /*
00264  - freestate - free a state, which has no in-arcs or out-arcs
00265  ^ static VOID freestate(struct nfa *, struct state *);
00266  */
00267 static void
00268 freestate(
00269     struct nfa *nfa,
00270     struct state *s)
00271 {
00272     assert(s != NULL);
00273     assert(s->nins == 0 && s->nouts == 0);
00274 
00275     s->no = FREESTATE;
00276     s->flag = 0;
00277     if (s->next != NULL) {
00278         s->next->prev = s->prev;
00279     } else {
00280         assert(s == nfa->slast);
00281         nfa->slast = s->prev;
00282     }
00283     if (s->prev != NULL) {
00284         s->prev->next = s->next;
00285     } else {
00286         assert(s == nfa->states);
00287         nfa->states = s->next;
00288     }
00289     s->prev = NULL;
00290     s->next = nfa->free;        /* don't delete it, put it on the free list */
00291     nfa->free = s;
00292     DecrementSize(nfa);
00293 }
00294 
00295 /*
00296  - destroystate - really get rid of an already-freed state
00297  ^ static VOID destroystate(struct nfa *, struct state *);
00298  */
00299 static void
00300 destroystate(
00301     struct nfa *nfa,
00302     struct state *s)
00303 {
00304     struct arcbatch *ab;
00305     struct arcbatch *abnext;
00306 
00307     assert(s->no == FREESTATE);
00308     for (ab=s->oas.next ; ab!=NULL ; ab=abnext) {
00309         abnext = ab->next;
00310         FREE(ab);
00311     }
00312     s->ins = NULL;
00313     s->outs = NULL;
00314     s->next = NULL;
00315     FREE(s);
00316 }
00317 
00318 /*
00319  - newarc - set up a new arc within an NFA
00320  ^ static VOID newarc(struct nfa *, int, pcolor, struct state *,
00321  ^      struct state *);
00322  */
00323 static void
00324 newarc(
00325     struct nfa *nfa,
00326     int t,
00327     pcolor co,
00328     struct state *from,
00329     struct state *to)
00330 {
00331     struct arc *a;
00332 
00333     assert(from != NULL && to != NULL);
00334 
00335     /*
00336      * Check for duplicates.
00337      */
00338 
00339     for (a=from->outs ; a!=NULL ; a=a->outchain) {
00340         if (a->to == to && a->co == co && a->type == t) {
00341             return;
00342         }
00343     }
00344 
00345     a = allocarc(nfa, from);
00346     if (NISERR()) {
00347         return;
00348     }
00349     assert(a != NULL);
00350 
00351     a->type = t;
00352     a->co = (color) co;
00353     a->to = to;
00354     a->from = from;
00355 
00356     /*
00357      * Put the new arc on the beginning, not the end, of the chains. Not only
00358      * is this easier, it has the very useful side effect that deleting the
00359      * most-recently-added arc is the cheapest case rather than the most
00360      * expensive one.
00361      */
00362 
00363     a->inchain = to->ins;
00364     to->ins = a;
00365     a->outchain = from->outs;
00366     from->outs = a;
00367 
00368     from->nouts++;
00369     to->nins++;
00370 
00371     if (COLORED(a) && nfa->parent == NULL) {
00372         colorchain(nfa->cm, a);
00373     }
00374 }
00375 
00376 /*
00377  - allocarc - allocate a new out-arc within a state
00378  ^ static struct arc *allocarc(struct nfa *, struct state *);
00379  */
00380 static struct arc *             /* NULL for failure */
00381 allocarc(
00382     struct nfa *nfa,
00383     struct state *s)
00384 {
00385     struct arc *a;
00386 
00387     /*
00388      * Shortcut
00389      */
00390 
00391     if (s->free == NULL && s->noas < ABSIZE) {
00392         a = &s->oas.a[s->noas];
00393         s->noas++;
00394         return a;
00395     }
00396 
00397     /*
00398      * if none at hand, get more
00399      */
00400 
00401     if (s->free == NULL) {
00402         struct arcbatch *newAb = (struct arcbatch *)
00403                 MALLOC(sizeof(struct arcbatch));
00404         int i;
00405 
00406         if (newAb == NULL) {
00407             NERR(REG_ESPACE);
00408             return NULL;
00409         }
00410         newAb->next = s->oas.next;
00411         s->oas.next = newAb;
00412 
00413         for (i=0 ; i<ABSIZE ; i++) {
00414             newAb->a[i].type = 0;
00415             newAb->a[i].freechain = &newAb->a[i+1];
00416         }
00417         newAb->a[ABSIZE-1].freechain = NULL;
00418         s->free = &newAb->a[0];
00419     }
00420     assert(s->free != NULL);
00421 
00422     a = s->free;
00423     s->free = a->freechain;
00424     return a;
00425 }
00426 
00427 /*
00428  - freearc - free an arc
00429  ^ static VOID freearc(struct nfa *, struct arc *);
00430  */
00431 static void
00432 freearc(
00433     struct nfa *nfa,
00434     struct arc *victim)
00435 {
00436     struct state *from = victim->from;
00437     struct state *to = victim->to;
00438     struct arc *a;
00439 
00440     assert(victim->type != 0);
00441 
00442     /*
00443      * Take it off color chain if necessary.
00444      */
00445 
00446     if (COLORED(victim) && nfa->parent == NULL) {
00447         uncolorchain(nfa->cm, victim);
00448     }
00449 
00450     /*
00451      * Take it off source's out-chain.
00452      */
00453 
00454     assert(from != NULL);
00455     assert(from->outs != NULL);
00456     a = from->outs;
00457     if (a == victim) {          /* simple case: first in chain */
00458         from->outs = victim->outchain;
00459     } else {
00460         for (; a!=NULL && a->outchain!=victim ; a=a->outchain) {
00461             continue;
00462         }
00463         assert(a != NULL);
00464         a->outchain = victim->outchain;
00465     }
00466     from->nouts--;
00467 
00468     /*
00469      * Take it off target's in-chain.
00470      */
00471 
00472     assert(to != NULL);
00473     assert(to->ins != NULL);
00474     a = to->ins;
00475     if (a == victim) {          /* simple case: first in chain */
00476         to->ins = victim->inchain;
00477     } else {
00478         for (; a->inchain!=victim ; a=a->inchain) {
00479             assert(a->inchain != NULL);
00480             continue;
00481         }
00482         a->inchain = victim->inchain;
00483     }
00484     to->nins--;
00485 
00486     /*
00487      * Clean up and place on free list.
00488      */
00489 
00490     victim->type = 0;
00491     victim->from = NULL;        /* precautions... */
00492     victim->to = NULL;
00493     victim->inchain = NULL;
00494     victim->outchain = NULL;
00495     victim->freechain = from->free;
00496     from->free = victim;
00497 }
00498 
00499 /*
00500  - findarc - find arc, if any, from given source with given type and color
00501  * If there is more than one such arc, the result is random.
00502  ^ static struct arc *findarc(struct state *, int, pcolor);
00503  */
00504 static struct arc *
00505 findarc(
00506     struct state *s,
00507     int type,
00508     pcolor co)
00509 {
00510     struct arc *a;
00511 
00512     for (a=s->outs ; a!=NULL ; a=a->outchain) {
00513         if (a->type == type && a->co == co) {
00514             return a;
00515         }
00516     }
00517     return NULL;
00518 }
00519 
00520 /*
00521  - cparc - allocate a new arc within an NFA, copying details from old one
00522  ^ static VOID cparc(struct nfa *, struct arc *, struct state *,
00523  ^      struct state *);
00524  */
00525 static void
00526 cparc(
00527     struct nfa *nfa,
00528     struct arc *oa,
00529     struct state *from,
00530     struct state *to)
00531 {
00532     newarc(nfa, oa->type, oa->co, from, to);
00533 }
00534 
00535 /*
00536  - moveins - move all in arcs of a state to another state
00537  * You might think this could be done better by just updating the
00538  * existing arcs, and you would be right if it weren't for the desire
00539  * for duplicate suppression, which makes it easier to just make new
00540  * ones to exploit the suppression built into newarc.
00541  ^ static VOID moveins(struct nfa *, struct state *, struct state *);
00542  */
00543 static void
00544 moveins(
00545     struct nfa *nfa,
00546     struct state *oldState,
00547     struct state *newState)
00548 {
00549     struct arc *a;
00550 
00551     assert(oldState != newState);
00552 
00553     while ((a = oldState->ins) != NULL) {
00554         cparc(nfa, a, a->from, newState);
00555         freearc(nfa, a);
00556     }
00557     assert(oldState->nins == 0);
00558     assert(oldState->ins == NULL);
00559 }
00560 
00561 /*
00562  - copyins - copy all in arcs of a state to another state
00563  ^ static VOID copyins(struct nfa *, struct state *, struct state *);
00564  */
00565 static void
00566 copyins(
00567     struct nfa *nfa,
00568     struct state *oldState,
00569     struct state *newState)
00570 {
00571     struct arc *a;
00572 
00573     assert(oldState != newState);
00574 
00575     for (a=oldState->ins ; a!=NULL ; a=a->inchain) {
00576         cparc(nfa, a, a->from, newState);
00577     }
00578 }
00579 
00580 /*
00581  - moveouts - move all out arcs of a state to another state
00582  ^ static VOID moveouts(struct nfa *, struct state *, struct state *);
00583  */
00584 static void
00585 moveouts(
00586     struct nfa *nfa,
00587     struct state *oldState,
00588     struct state *newState)
00589 {
00590     struct arc *a;
00591 
00592     assert(oldState != newState);
00593 
00594     while ((a = oldState->outs) != NULL) {
00595         cparc(nfa, a, newState, a->to);
00596         freearc(nfa, a);
00597     }
00598 }
00599 
00600 /*
00601  - copyouts - copy all out arcs of a state to another state
00602  ^ static VOID copyouts(struct nfa *, struct state *, struct state *);
00603  */
00604 static void
00605 copyouts(
00606     struct nfa *nfa,
00607     struct state *oldState,
00608     struct state *newState)
00609 {
00610     struct arc *a;
00611 
00612     assert(oldState != newState);
00613 
00614     for (a=oldState->outs ; a!=NULL ; a=a->outchain) {
00615         cparc(nfa, a, newState, a->to);
00616     }
00617 }
00618 
00619 /*
00620  - cloneouts - copy out arcs of a state to another state pair, modifying type
00621  ^ static VOID cloneouts(struct nfa *, struct state *, struct state *,
00622  ^      struct state *, int);
00623  */
00624 static void
00625 cloneouts(
00626     struct nfa *nfa,
00627     struct state *old,
00628     struct state *from,
00629     struct state *to,
00630     int type)
00631 {
00632     struct arc *a;
00633 
00634     assert(old != from);
00635 
00636     for (a=old->outs ; a!=NULL ; a=a->outchain) {
00637         newarc(nfa, type, a->co, from, to);
00638     }
00639 }
00640 
00641 /*
00642  - delsub - delete a sub-NFA, updating subre pointers if necessary
00643  * This uses a recursive traversal of the sub-NFA, marking already-seen
00644  * states using their tmp pointer.
00645  ^ static VOID delsub(struct nfa *, struct state *, struct state *);
00646  */
00647 static void
00648 delsub(
00649     struct nfa *nfa,
00650     struct state *lp,           /* the sub-NFA goes from here... */
00651     struct state *rp)           /* ...to here, *not* inclusive */
00652 {
00653     assert(lp != rp);
00654 
00655     rp->tmp = rp;               /* mark end */
00656 
00657     deltraverse(nfa, lp, lp);
00658     assert(lp->nouts == 0 && rp->nins == 0);    /* did the job */
00659     assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
00660 
00661     rp->tmp = NULL;             /* unmark end */
00662     lp->tmp = NULL;             /* and begin, marked by deltraverse */
00663 }
00664 
00665 /*
00666  - deltraverse - the recursive heart of delsub
00667  * This routine's basic job is to destroy all out-arcs of the state.
00668  ^ static VOID deltraverse(struct nfa *, struct state *, struct state *);
00669  */
00670 static void
00671 deltraverse(
00672     struct nfa *nfa,
00673     struct state *leftend,
00674     struct state *s)
00675 {
00676     struct arc *a;
00677     struct state *to;
00678 
00679     if (s->nouts == 0) {
00680         return;                 /* nothing to do */
00681     }
00682     if (s->tmp != NULL) {
00683         return;                 /* already in progress */
00684     }
00685 
00686     s->tmp = s;                 /* mark as in progress */
00687 
00688     while ((a = s->outs) != NULL) {
00689         to = a->to;
00690         deltraverse(nfa, leftend, to);
00691         assert(to->nouts == 0 || to->tmp != NULL);
00692         freearc(nfa, a);
00693         if (to->nins == 0 && to->tmp == NULL) {
00694             assert(to->nouts == 0);
00695             freestate(nfa, to);
00696         }
00697     }
00698 
00699     assert(s->no != FREESTATE); /* we're still here */
00700     assert(s == leftend || s->nins != 0);       /* and still reachable */
00701     assert(s->nouts == 0);      /* but have no outarcs */
00702 
00703     s->tmp = NULL;              /* we're done here */
00704 }
00705 
00706 /*
00707  - dupnfa - duplicate sub-NFA
00708  * Another recursive traversal, this time using tmp to point to duplicates as
00709  * well as mark already-seen states. (You knew there was a reason why it's a
00710  * state pointer, didn't you? :-))
00711  ^ static VOID dupnfa(struct nfa *, struct state *, struct state *,
00712  ^      struct state *, struct state *);
00713  */
00714 static void
00715 dupnfa(
00716     struct nfa *nfa,
00717     struct state *start,        /* duplicate of subNFA starting here */
00718     struct state *stop,         /* and stopping here */
00719     struct state *from,         /* stringing duplicate from here */
00720     struct state *to)           /* to here */
00721 {
00722     if (start == stop) {
00723         newarc(nfa, EMPTY, 0, from, to);
00724         return;
00725     }
00726 
00727     stop->tmp = to;
00728     duptraverse(nfa, start, from);
00729     /* done, except for clearing out the tmp pointers */
00730 
00731     stop->tmp = NULL;
00732     cleartraverse(nfa, start);
00733 }
00734 
00735 /*
00736  - duptraverse - recursive heart of dupnfa
00737  ^ static VOID duptraverse(struct nfa *, struct state *, struct state *);
00738  */
00739 static void
00740 duptraverse(
00741     struct nfa *nfa,
00742     struct state *s,
00743     struct state *stmp)         /* s's duplicate, or NULL */
00744 {
00745     struct arc *a;
00746 
00747     if (s->tmp != NULL) {
00748         return;                 /* already done */
00749     }
00750 
00751     s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
00752     if (s->tmp == NULL) {
00753         assert(NISERR());
00754         return;
00755     }
00756 
00757     for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) {
00758         duptraverse(nfa, a->to, NULL);
00759         if (NISERR()) {
00760             break;
00761         }
00762         assert(a->to->tmp != NULL);
00763         cparc(nfa, a, s->tmp, a->to->tmp);
00764     }
00765 }
00766 
00767 /*
00768  - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
00769  ^ static VOID cleartraverse(struct nfa *, struct state *);
00770  */
00771 static void
00772 cleartraverse(
00773     struct nfa *nfa,
00774     struct state *s)
00775 {
00776     struct arc *a;
00777 
00778     if (s->tmp == NULL) {
00779         return;
00780     }
00781     s->tmp = NULL;
00782 
00783     for (a=s->outs ; a!=NULL ; a=a->outchain) {
00784         cleartraverse(nfa, a->to);
00785     }
00786 }
00787 
00788 /*
00789  - specialcolors - fill in special colors for an NFA
00790  ^ static VOID specialcolors(struct nfa *);
00791  */
00792 static void
00793 specialcolors(
00794     struct nfa *nfa)
00795 {
00796     /*
00797      * False colors for BOS, BOL, EOS, EOL
00798      */
00799 
00800     if (nfa->parent == NULL) {
00801         nfa->bos[0] = pseudocolor(nfa->cm);
00802         nfa->bos[1] = pseudocolor(nfa->cm);
00803         nfa->eos[0] = pseudocolor(nfa->cm);
00804         nfa->eos[1] = pseudocolor(nfa->cm);
00805     } else {
00806         assert(nfa->parent->bos[0] != COLORLESS);
00807         nfa->bos[0] = nfa->parent->bos[0];
00808         assert(nfa->parent->bos[1] != COLORLESS);
00809         nfa->bos[1] = nfa->parent->bos[1];
00810         assert(nfa->parent->eos[0] != COLORLESS);
00811         nfa->eos[0] = nfa->parent->eos[0];
00812         assert(nfa->parent->eos[1] != COLORLESS);
00813         nfa->eos[1] = nfa->parent->eos[1];
00814     }
00815 }
00816 
00817 /*
00818  - optimize - optimize an NFA
00819  ^ static long optimize(struct nfa *, FILE *);
00820  */
00821 static long                     /* re_info bits */
00822 optimize(
00823     struct nfa *nfa,
00824     FILE *f)                    /* for debug output; NULL none */
00825 {
00826     int verbose = (f != NULL) ? 1 : 0;
00827 
00828     if (verbose) {
00829         fprintf(f, "\ninitial cleanup:\n");
00830     }
00831     cleanup(nfa);               /* may simplify situation */
00832     if (verbose) {
00833         dumpnfa(nfa, f);
00834     }
00835     if (verbose) {
00836         fprintf(f, "\nempties:\n");
00837     }
00838     fixempties(nfa, f);         /* get rid of EMPTY arcs */
00839     if (verbose) {
00840         fprintf(f, "\nconstraints:\n");
00841     }
00842     pullback(nfa, f);           /* pull back constraints backward */
00843     pushfwd(nfa, f);            /* push fwd constraints forward */
00844     if (verbose) {
00845         fprintf(f, "\nfinal cleanup:\n");
00846     }
00847     cleanup(nfa);               /* final tidying */
00848     return analyze(nfa);        /* and analysis */
00849 }
00850 
00851 /*
00852  - pullback - pull back constraints backward to (with luck) eliminate them
00853  ^ static VOID pullback(struct nfa *, FILE *);
00854  */
00855 static void
00856 pullback(
00857     struct nfa *nfa,
00858     FILE *f)                    /* for debug output; NULL none */
00859 {
00860     struct state *s;
00861     struct state *nexts;
00862     struct arc *a;
00863     struct arc *nexta;
00864     int progress;
00865 
00866     /*
00867      * Find and pull until there are no more.
00868      */
00869 
00870     do {
00871         progress = 0;
00872         for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) {
00873             nexts = s->next;
00874             for (a=s->outs ; a!=NULL && !NISERR() ; a=nexta) {
00875                 nexta = a->outchain;
00876                 if (a->type == '^' || a->type == BEHIND) {
00877                     if (pull(nfa, a)) {
00878                         progress = 1;
00879                     }
00880                 }
00881                 assert(nexta == NULL || s->no != FREESTATE);
00882             }
00883         }
00884         if (progress && f != NULL) {
00885             dumpnfa(nfa, f);
00886         }
00887     } while (progress && !NISERR());
00888     if (NISERR()) {
00889         return;
00890     }
00891 
00892     for (a=nfa->pre->outs ; a!=NULL ; a=nexta) {
00893         nexta = a->outchain;
00894         if (a->type == '^') {
00895             assert(a->co == 0 || a->co == 1);
00896             newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
00897             freearc(nfa, a);
00898         }
00899     }
00900 }
00901 
00902 /*
00903  - pull - pull a back constraint backward past its source state
00904  * A significant property of this function is that it deletes at most
00905  * one state -- the constraint's from state -- and only if the constraint
00906  * was that state's last outarc.
00907  ^ static int pull(struct nfa *, struct arc *);
00908  */
00909 static int                      /* 0 couldn't, 1 could */
00910 pull(
00911     struct nfa *nfa,
00912     struct arc *con)
00913 {
00914     struct state *from = con->from;
00915     struct state *to = con->to;
00916     struct arc *a;
00917     struct arc *nexta;
00918     struct state *s;
00919 
00920     if (from == to) {           /* circular constraint is pointless */
00921         freearc(nfa, con);
00922         return 1;
00923     }
00924     if (from->flag) {           /* can't pull back beyond start */
00925         return 0;
00926     }
00927     if (from->nins == 0) {      /* unreachable */
00928         freearc(nfa, con);
00929         return 1;
00930     }
00931 
00932     /*
00933      * DGP 2007-11-15: Cloning a state with a circular constraint on its list
00934      * of outs can lead to trouble [Bug 1810038], so get rid of them first.
00935      */
00936 
00937     for (a = from->outs; a != NULL; a = nexta) {
00938         nexta = a->outchain;
00939         switch (a->type) {
00940         case '^':
00941         case '$':
00942         case BEHIND:
00943         case AHEAD:
00944             if (from == a->to) {
00945                 freearc(nfa, a);
00946             }
00947             break;
00948         }
00949     }
00950 
00951     /*
00952      * First, clone from state if necessary to avoid other outarcs.
00953      */
00954 
00955     if (from->nouts > 1) {
00956         s = newstate(nfa);
00957         if (NISERR()) {
00958             return 0;
00959         }
00960         assert(to != from);     /* con is not an inarc */
00961         copyins(nfa, from, s);  /* duplicate inarcs */
00962         cparc(nfa, con, s, to); /* move constraint arc */
00963         freearc(nfa, con);
00964         from = s;
00965         con = from->outs;
00966     }
00967     assert(from->nouts == 1);
00968 
00969     /*
00970      * Propagate the constraint into the from state's inarcs.
00971      */
00972 
00973     for (a=from->ins ; a!=NULL ; a=nexta) {
00974         nexta = a->inchain;
00975         switch (combine(con, a)) {
00976         case INCOMPATIBLE:      /* destroy the arc */
00977             freearc(nfa, a);
00978             break;
00979         case SATISFIED:         /* no action needed */
00980             break;
00981         case COMPATIBLE:        /* swap the two arcs, more or less */
00982             s = newstate(nfa);
00983             if (NISERR()) {
00984                 return 0;
00985             }
00986             cparc(nfa, a, s, to);       /* anticipate move */
00987             cparc(nfa, con, a->from, s);
00988             if (NISERR()) {
00989                 return 0;
00990             }
00991             freearc(nfa, a);
00992             break;
00993         default:
00994             assert(NOTREACHED);
00995             break;
00996         }
00997     }
00998 
00999     /*
01000      * Remaining inarcs, if any, incorporate the constraint.
01001      */
01002 
01003     moveins(nfa, from, to);
01004     dropstate(nfa, from);       /* will free the constraint */
01005     return 1;
01006 }
01007 
01008 /*
01009  - pushfwd - push forward constraints forward to (with luck) eliminate them
01010  ^ static VOID pushfwd(struct nfa *, FILE *);
01011  */
01012 static void
01013 pushfwd(
01014     struct nfa *nfa,
01015     FILE *f)                    /* for debug output; NULL none */
01016 {
01017     struct state *s;
01018     struct state *nexts;
01019     struct arc *a;
01020     struct arc *nexta;
01021     int progress;
01022 
01023     /*
01024      * Find and push until there are no more.
01025      */
01026 
01027     do {
01028         progress = 0;
01029         for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) {
01030             nexts = s->next;
01031             for (a = s->ins; a != NULL && !NISERR(); a = nexta) {
01032                 nexta = a->inchain;
01033                 if (a->type == '$' || a->type == AHEAD) {
01034                     if (push(nfa, a)) {
01035                         progress = 1;
01036                     }
01037                 }
01038                 assert(nexta == NULL || s->no != FREESTATE);
01039             }
01040         }
01041         if (progress && f != NULL) {
01042             dumpnfa(nfa, f);
01043         }
01044     } while (progress && !NISERR());
01045     if (NISERR()) {
01046         return;
01047     }
01048 
01049     for (a = nfa->post->ins; a != NULL; a = nexta) {
01050         nexta = a->inchain;
01051         if (a->type == '$') {
01052             assert(a->co == 0 || a->co == 1);
01053             newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
01054             freearc(nfa, a);
01055         }
01056     }
01057 }
01058 
01059 /*
01060  - push - push a forward constraint forward past its destination state
01061  * A significant property of this function is that it deletes at most
01062  * one state -- the constraint's to state -- and only if the constraint
01063  * was that state's last inarc.
01064  ^ static int push(struct nfa *, struct arc *);
01065  */
01066 static int                      /* 0 couldn't, 1 could */
01067 push(
01068     struct nfa *nfa,
01069     struct arc *con)
01070 {
01071     struct state *from = con->from;
01072     struct state *to = con->to;
01073     struct arc *a;
01074     struct arc *nexta;
01075     struct state *s;
01076 
01077     if (to == from) {           /* circular constraint is pointless */
01078         freearc(nfa, con);
01079         return 1;
01080     }
01081     if (to->flag) {             /* can't push forward beyond end */
01082         return 0;
01083     }
01084     if (to->nouts == 0) {       /* dead end */
01085         freearc(nfa, con);
01086         return 1;
01087     }
01088 
01089     /*
01090      * DGP 2007-11-15: Here we duplicate the same protections as appear
01091      * in pull() above to avoid troubles with cloning a state with a
01092      * circular constraint on its list of ins.  It is not clear whether
01093      * this is necessary, or is protecting against a "can't happen".
01094      * Any test case that actually leads to a freearc() call here would
01095      * be a welcome addition to the test suite.
01096      */
01097 
01098     for (a = to->ins; a != NULL; a = nexta) {
01099         nexta = a->inchain;
01100         switch (a->type) {
01101         case '^':
01102         case '$':
01103         case BEHIND:
01104         case AHEAD:
01105             if (a->from == to) {
01106                 freearc(nfa, a);
01107             }
01108             break;
01109         }
01110     }
01111     /*
01112      * First, clone to state if necessary to avoid other inarcs.
01113      */
01114 
01115     if (to->nins > 1) {
01116         s = newstate(nfa);
01117         if (NISERR()) {
01118             return 0;
01119         }
01120         copyouts(nfa, to, s);   /* duplicate outarcs */
01121         cparc(nfa, con, from, s);       /* move constraint */
01122         freearc(nfa, con);
01123         to = s;
01124         con = to->ins;
01125     }
01126     assert(to->nins == 1);
01127 
01128     /*
01129      * Propagate the constraint into the to state's outarcs.
01130      */
01131 
01132     for (a = to->outs; a != NULL; a = nexta) {
01133         nexta = a->outchain;
01134         switch (combine(con, a)) {
01135         case INCOMPATIBLE:      /* destroy the arc */
01136             freearc(nfa, a);
01137             break;
01138         case SATISFIED:         /* no action needed */
01139             break;
01140         case COMPATIBLE:        /* swap the two arcs, more or less */
01141             s = newstate(nfa);
01142             if (NISERR()) {
01143                 return 0;
01144             }
01145             cparc(nfa, con, s, a->to);  /* anticipate move */
01146             cparc(nfa, a, from, s);
01147             if (NISERR()) {
01148                 return 0;
01149             }
01150             freearc(nfa, a);
01151             break;
01152         default:
01153             assert(NOTREACHED);
01154             break;
01155         }
01156     }
01157 
01158     /*
01159      * Remaining outarcs, if any, incorporate the constraint.
01160      */
01161 
01162     moveouts(nfa, to, from);
01163     dropstate(nfa, to);         /* will free the constraint */
01164     return 1;
01165 }
01166 
01167 /*
01168  - combine - constraint lands on an arc, what happens?
01169  ^ #def INCOMPATIBLE    1       // destroys arc
01170  ^ #def SATISFIED       2       // constraint satisfied
01171  ^ #def COMPATIBLE      3       // compatible but not satisfied yet
01172  ^ static int combine(struct arc *, struct arc *);
01173  */
01174 static int
01175 combine(
01176     struct arc *con,
01177     struct arc *a)
01178 {
01179 #define CA(ct,at)       (((ct)<<CHAR_BIT) | (at))
01180 
01181     switch (CA(con->type, a->type)) {
01182     case CA('^', PLAIN):        /* newlines are handled separately */
01183     case CA('$', PLAIN):
01184         return INCOMPATIBLE;
01185         break;
01186     case CA(AHEAD, PLAIN):      /* color constraints meet colors */
01187     case CA(BEHIND, PLAIN):
01188         if (con->co == a->co) {
01189             return SATISFIED;
01190         }
01191         return INCOMPATIBLE;
01192         break;
01193     case CA('^', '^'):          /* collision, similar constraints */
01194     case CA('$', '$'):
01195     case CA(AHEAD, AHEAD):
01196     case CA(BEHIND, BEHIND):
01197         if (con->co == a->co) { /* true duplication */
01198             return SATISFIED;
01199         }
01200         return INCOMPATIBLE;
01201         break;
01202     case CA('^', BEHIND):       /* collision, dissimilar constraints */
01203     case CA(BEHIND, '^'):
01204     case CA('$', AHEAD):
01205     case CA(AHEAD, '$'):
01206         return INCOMPATIBLE;
01207         break;
01208     case CA('^', '$'):          /* constraints passing each other */
01209     case CA('^', AHEAD):
01210     case CA(BEHIND, '$'):
01211     case CA(BEHIND, AHEAD):
01212     case CA('$', '^'):
01213     case CA('$', BEHIND):
01214     case CA(AHEAD, '^'):
01215     case CA(AHEAD, BEHIND):
01216     case CA('^', LACON):
01217     case CA(BEHIND, LACON):
01218     case CA('$', LACON):
01219     case CA(AHEAD, LACON):
01220         return COMPATIBLE;
01221         break;
01222     }
01223     assert(NOTREACHED);
01224     return INCOMPATIBLE;        /* for benefit of blind compilers */
01225 }
01226 
01227 /*
01228  - fixempties - get rid of EMPTY arcs
01229  ^ static VOID fixempties(struct nfa *, FILE *);
01230  */
01231 static void
01232 fixempties(
01233     struct nfa *nfa,
01234     FILE *f)                    /* for debug output; NULL none */
01235 {
01236     struct state *s;
01237     struct state *nexts;
01238     struct arc *a;
01239     struct arc *nexta;
01240     int progress;
01241 
01242     /*
01243      * Find and eliminate empties until there are no more.
01244      */
01245 
01246     do {
01247         progress = 0;
01248         for (s = nfa->states; s != NULL && !NISERR()
01249                 && s->no != FREESTATE; s = nexts) {
01250             nexts = s->next;
01251             for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
01252                 nexta = a->outchain;
01253                 if (a->type == EMPTY && unempty(nfa, a)) {
01254                     progress = 1;
01255                 }
01256                 assert(nexta == NULL || s->no != FREESTATE);
01257             }
01258         }
01259         if (progress && f != NULL) {
01260             dumpnfa(nfa, f);
01261         }
01262     } while (progress && !NISERR());
01263 }
01264 
01265 /*
01266  - unempty - optimize out an EMPTY arc, if possible
01267  * Actually, as it stands this function always succeeds, but the return value
01268  * is kept with an eye on possible future changes.
01269  ^ static int unempty(struct nfa *, struct arc *);
01270  */
01271 static int                      /* 0 couldn't, 1 could */
01272 unempty(
01273     struct nfa *nfa,
01274     struct arc *a)
01275 {
01276     struct state *from = a->from;
01277     struct state *to = a->to;
01278     int usefrom;                /* work on from, as opposed to to? */
01279 
01280     assert(a->type == EMPTY);
01281     assert(from != nfa->pre && to != nfa->post);
01282 
01283     if (from == to) {           /* vacuous loop */
01284         freearc(nfa, a);
01285         return 1;
01286     }
01287 
01288     /*
01289      * Decide which end to work on.
01290      */
01291 
01292     usefrom = 1;                /* default: attack from */
01293     if (from->nouts > to->nins) {
01294         usefrom = 0;
01295     } else if (from->nouts == to->nins) {
01296         /*
01297          * Decide on secondary issue: move/copy fewest arcs.
01298          */
01299 
01300         if (from->nins > to->nouts) {
01301             usefrom = 0;
01302         }
01303     }
01304 
01305     freearc(nfa, a);
01306     if (usefrom) {
01307         if (from->nouts == 0) {
01308             /*
01309              * Was the state's only outarc.
01310              */
01311 
01312             moveins(nfa, from, to);
01313             freestate(nfa, from);
01314         } else {
01315             copyins(nfa, from, to);
01316         }
01317     } else {
01318         if (to->nins == 0) {
01319             /*
01320              * Was the state's only inarc.
01321              */
01322 
01323             moveouts(nfa, to, from);
01324             freestate(nfa, to);
01325         } else {
01326             copyouts(nfa, to, from);
01327         }
01328     }
01329 
01330     return 1;
01331 }
01332 
01333 /*
01334  - cleanup - clean up NFA after optimizations
01335  ^ static VOID cleanup(struct nfa *);
01336  */
01337 static void
01338 cleanup(
01339     struct nfa *nfa)
01340 {
01341     struct state *s;
01342     struct state *nexts;
01343     int n;
01344 
01345     /*
01346      * Clear out unreachable or dead-end states. Use pre to mark reachable,
01347      * then post to mark can-reach-post.
01348      */
01349 
01350     markreachable(nfa, nfa->pre, NULL, nfa->pre);
01351     markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
01352     for (s = nfa->states; s != NULL; s = nexts) {
01353         nexts = s->next;
01354         if (s->tmp != nfa->post && !s->flag) {
01355             dropstate(nfa, s);
01356         }
01357     }
01358     assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
01359     cleartraverse(nfa, nfa->pre);
01360     assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
01361     /* the nins==0 (final unreachable) case will be caught later */
01362 
01363     /*
01364      * Renumber surviving states.
01365      */
01366 
01367     n = 0;
01368     for (s = nfa->states; s != NULL; s = s->next) {
01369         s->no = n++;
01370     }
01371     nfa->nstates = n;
01372 }
01373 
01374 /*
01375  - markreachable - recursive marking of reachable states
01376  ^ static VOID markreachable(struct nfa *, struct state *, struct state *,
01377  ^      struct state *);
01378  */
01379 static void
01380 markreachable(
01381     struct nfa *nfa,
01382     struct state *s,
01383     struct state *okay,         /* consider only states with this mark */
01384     struct state *mark)         /* the value to mark with */
01385 {
01386     struct arc *a;
01387 
01388     if (s->tmp != okay) {
01389         return;
01390     }
01391     s->tmp = mark;
01392 
01393     for (a = s->outs; a != NULL; a = a->outchain) {
01394         markreachable(nfa, a->to, okay, mark);
01395     }
01396 }
01397 
01398 /*
01399  - markcanreach - recursive marking of states which can reach here
01400  ^ static VOID markcanreach(struct nfa *, struct state *, struct state *,
01401  ^      struct state *);
01402  */
01403 static void
01404 markcanreach(
01405     struct nfa *nfa,
01406     struct state *s,
01407     struct state *okay,         /* consider only states with this mark */
01408     struct state *mark)         /* the value to mark with */
01409 {
01410     struct arc *a;
01411 
01412     if (s->tmp != okay) {
01413         return;
01414     }
01415     s->tmp = mark;
01416 
01417     for (a = s->ins; a != NULL; a = a->inchain) {
01418         markcanreach(nfa, a->from, okay, mark);
01419     }
01420 }
01421 
01422 /*
01423  - analyze - ascertain potentially-useful facts about an optimized NFA
01424  ^ static long analyze(struct nfa *);
01425  */
01426 static long                     /* re_info bits to be ORed in */
01427 analyze(
01428     struct nfa *nfa)
01429 {
01430     struct arc *a;
01431     struct arc *aa;
01432 
01433     if (nfa->pre->outs == NULL) {
01434         return REG_UIMPOSSIBLE;
01435     }
01436     for (a = nfa->pre->outs; a != NULL; a = a->outchain) {
01437         for (aa = a->to->outs; aa != NULL; aa = aa->outchain) {
01438             if (aa->to == nfa->post) {
01439                 return REG_UEMPTYMATCH;
01440             }
01441         }
01442     }
01443     return 0;
01444 }
01445 
01446 /*
01447  - compact - compact an NFA
01448  ^ static VOID compact(struct nfa *, struct cnfa *);
01449  */
01450 static void
01451 compact(
01452     struct nfa *nfa,
01453     struct cnfa *cnfa)
01454 {
01455     struct state *s;
01456     struct arc *a;
01457     size_t nstates;
01458     size_t narcs;
01459     struct carc *ca;
01460     struct carc *first;
01461 
01462     assert(!NISERR());
01463 
01464     nstates = 0;
01465     narcs = 0;
01466     for (s = nfa->states; s != NULL; s = s->next) {
01467         nstates++;
01468         narcs += 1 + s->nouts + 1;
01469         /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
01470     }
01471 
01472     cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
01473     cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
01474     if (cnfa->states == NULL || cnfa->arcs == NULL) {
01475         if (cnfa->states != NULL) {
01476             FREE(cnfa->states);
01477         }
01478         if (cnfa->arcs != NULL) {
01479             FREE(cnfa->arcs);
01480         }
01481         NERR(REG_ESPACE);
01482         return;
01483     }
01484     cnfa->nstates = nstates;
01485     cnfa->pre = nfa->pre->no;
01486     cnfa->post = nfa->post->no;
01487     cnfa->bos[0] = nfa->bos[0];
01488     cnfa->bos[1] = nfa->bos[1];
01489     cnfa->eos[0] = nfa->eos[0];
01490     cnfa->eos[1] = nfa->eos[1];
01491     cnfa->ncolors = maxcolor(nfa->cm) + 1;
01492     cnfa->flags = 0;
01493 
01494     ca = cnfa->arcs;
01495     for (s = nfa->states; s != NULL; s = s->next) {
01496         assert((size_t) s->no < nstates);
01497         cnfa->states[s->no] = ca;
01498         ca->co = 0;             /* clear and skip flags "arc" */
01499         ca++;
01500         first = ca;
01501         for (a = s->outs; a != NULL; a = a->outchain) {
01502             switch (a->type) {
01503             case PLAIN:
01504                 ca->co = a->co;
01505                 ca->to = a->to->no;
01506                 ca++;
01507                 break;
01508             case LACON:
01509                 assert(s->no != cnfa->pre);
01510                 ca->co = (color) (cnfa->ncolors + a->co);
01511                 ca->to = a->to->no;
01512                 ca++;
01513                 cnfa->flags |= HASLACONS;
01514                 break;
01515             default:
01516                 assert(NOTREACHED);
01517                 break;
01518             }
01519         }
01520         carcsort(first, ca-1);
01521         ca->co = COLORLESS;
01522         ca->to = 0;
01523         ca++;
01524     }
01525     assert(ca == &cnfa->arcs[narcs]);
01526     assert(cnfa->nstates != 0);
01527 
01528     /*
01529      * Mark no-progress states.
01530      */
01531 
01532     for (a = nfa->pre->outs; a != NULL; a = a->outchain) {
01533         cnfa->states[a->to->no]->co = 1;
01534     }
01535     cnfa->states[nfa->pre->no]->co = 1;
01536 }
01537 
01538 /*
01539  - carcsort - sort compacted-NFA arcs by color
01540  * Really dumb algorithm, but if the list is long enough for that to matter,
01541  * you're in real trouble anyway.
01542  ^ static VOID carcsort(struct carc *, struct carc *);
01543  */
01544 static void
01545 carcsort(
01546     struct carc *first,
01547     struct carc *last)
01548 {
01549     struct carc *p;
01550     struct carc *q;
01551     struct carc tmp;
01552 
01553     if (last - first <= 1) {
01554         return;
01555     }
01556 
01557     for (p = first; p <= last; p++) {
01558         for (q = p; q <= last; q++) {
01559             if (p->co > q->co || (p->co == q->co && p->to > q->to)) {
01560                 assert(p != q);
01561                 tmp = *p;
01562                 *p = *q;
01563                 *q = tmp;
01564             }
01565         }
01566     }
01567 }
01568 
01569 /*
01570  - freecnfa - free a compacted NFA
01571  ^ static VOID freecnfa(struct cnfa *);
01572  */
01573 static void
01574 freecnfa(
01575     struct cnfa *cnfa)
01576 {
01577     assert(cnfa->nstates != 0); /* not empty already */
01578     cnfa->nstates = 0;
01579     FREE(cnfa->states);
01580     FREE(cnfa->arcs);
01581 }
01582 
01583 /*
01584  - dumpnfa - dump an NFA in human-readable form
01585  ^ static VOID dumpnfa(struct nfa *, FILE *);
01586  */
01587 static void
01588 dumpnfa(
01589     struct nfa *nfa,
01590     FILE *f)
01591 {
01592 #ifdef REG_DEBUG
01593     struct state *s;
01594 
01595     fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
01596     if (nfa->bos[0] != COLORLESS) {
01597         fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
01598     }
01599     if (nfa->bos[1] != COLORLESS) {
01600         fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
01601     }
01602     if (nfa->eos[0] != COLORLESS) {
01603         fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
01604     }
01605     if (nfa->eos[1] != COLORLESS) {
01606         fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
01607     }
01608     fprintf(f, "\n");
01609     for (s = nfa->states; s != NULL; s = s->next) {
01610         dumpstate(s, f);
01611     }
01612     if (nfa->parent == NULL) {
01613         dumpcolors(nfa->cm, f);
01614     }
01615     fflush(f);
01616 #endif
01617 }
01618 
01619 #ifdef REG_DEBUG                /* subordinates of dumpnfa */
01620 /*
01621  ^ #ifdef REG_DEBUG
01622  */
01623 
01624 /*
01625  - dumpstate - dump an NFA state in human-readable form
01626  ^ static VOID dumpstate(struct state *, FILE *);
01627  */
01628 static void
01629 dumpstate(
01630     struct state *s,
01631     FILE *f)
01632 {
01633     struct arc *a;
01634 
01635     fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
01636             (s->flag) ? s->flag : '.');
01637     if (s->prev != NULL && s->prev->next != s) {
01638         fprintf(f, "\tstate chain bad\n");
01639     }
01640     if (s->nouts == 0) {
01641         fprintf(f, "\tno out arcs\n");
01642     } else {
01643         dumparcs(s, f);
01644     }
01645     fflush(f);
01646     for (a = s->ins; a != NULL; a = a->inchain) {
01647         if (a->to != s) {
01648             fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
01649                     a->from->no, a->to->no, s->no);
01650         }
01651     }
01652 }
01653 
01654 /*
01655  - dumparcs - dump out-arcs in human-readable form
01656  ^ static VOID dumparcs(struct state *, FILE *);
01657  */
01658 static void
01659 dumparcs(
01660     struct state *s,
01661     FILE *f)
01662 {
01663     int pos;
01664 
01665     assert(s->nouts > 0);
01666     /* printing arcs in reverse order is usually clearer */
01667     pos = dumprarcs(s->outs, s, f, 1);
01668     if (pos != 1) {
01669         fprintf(f, "\n");
01670     }
01671 }
01672 
01673 /*
01674  - dumprarcs - dump remaining outarcs, recursively, in reverse order
01675  ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
01676  */
01677 static int                      /* resulting print position */
01678 dumprarcs(
01679     struct arc *a,
01680     struct state *s,
01681     FILE *f,
01682     int pos)                    /* initial print position */
01683 {
01684     if (a->outchain != NULL) {
01685         pos = dumprarcs(a->outchain, s, f, pos);
01686     }
01687     dumparc(a, s, f);
01688     if (pos == 5) {
01689         fprintf(f, "\n");
01690         pos = 1;
01691     } else {
01692         pos++;
01693     }
01694     return pos;
01695 }
01696 
01697 /*
01698  - dumparc - dump one outarc in readable form, including prefixing tab
01699  ^ static VOID dumparc(struct arc *, struct state *, FILE *);
01700  */
01701 static void
01702 dumparc(
01703     struct arc *a,
01704     struct state *s,
01705     FILE *f)
01706 {
01707     struct arc *aa;
01708     struct arcbatch *ab;
01709 
01710     fprintf(f, "\t");
01711     switch (a->type) {
01712     case PLAIN:
01713         fprintf(f, "[%ld]", (long) a->co);
01714         break;
01715     case AHEAD:
01716         fprintf(f, ">%ld>", (long) a->co);
01717         break;
01718     case BEHIND:
01719         fprintf(f, "<%ld<", (long) a->co);
01720         break;
01721     case LACON:
01722         fprintf(f, ":%ld:", (long) a->co);
01723         break;
01724     case '^':
01725     case '$':
01726         fprintf(f, "%c%d", a->type, (int) a->co);
01727         break;
01728     case EMPTY:
01729         break;
01730     default:
01731         fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
01732         break;
01733     }
01734     if (a->from != s) {
01735         fprintf(f, "?%d?", a->from->no);
01736     }
01737     for (ab = &a->from->oas; ab != NULL; ab = ab->next) {
01738         for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) {
01739             if (aa == a) {
01740                 break;          /* NOTE BREAK OUT */
01741             }
01742         }
01743         if (aa < &ab->a[ABSIZE]) {      /* propagate break */
01744             break;              /* NOTE BREAK OUT */
01745         }
01746     }
01747     if (ab == NULL) {
01748         fprintf(f, "?!?");      /* not in allocated space */
01749     }
01750     fprintf(f, "->");
01751     if (a->to == NULL) {
01752         fprintf(f, "NULL");
01753         return;
01754     }
01755     fprintf(f, "%d", a->to->no);
01756     for (aa = a->to->ins; aa != NULL; aa = aa->inchain) {
01757         if (aa == a) {
01758             break;              /* NOTE BREAK OUT */
01759         }
01760     }
01761     if (aa == NULL) {
01762         fprintf(f, "?!?");      /* missing from in-chain */
01763     }
01764 }
01765 
01766 /*
01767  ^ #endif
01768  */
01769 #endif                          /* ifdef REG_DEBUG */
01770 
01771 /*
01772  - dumpcnfa - dump a compacted NFA in human-readable form
01773  ^ static VOID dumpcnfa(struct cnfa *, FILE *);
01774  */
01775 static void
01776 dumpcnfa(
01777     struct cnfa *cnfa,
01778     FILE *f)
01779 {
01780 #ifdef REG_DEBUG
01781     int st;
01782 
01783     fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
01784     if (cnfa->bos[0] != COLORLESS) {
01785         fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
01786     }
01787     if (cnfa->bos[1] != COLORLESS) {
01788         fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
01789     }
01790     if (cnfa->eos[0] != COLORLESS) {
01791         fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
01792     }
01793     if (cnfa->eos[1] != COLORLESS) {
01794         fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
01795     }
01796     if (cnfa->flags&HASLACONS) {
01797         fprintf(f, ", haslacons");
01798     }
01799     fprintf(f, "\n");
01800     for (st = 0; st < cnfa->nstates; st++) {
01801         dumpcstate(st, cnfa->states[st], cnfa, f);
01802     }
01803     fflush(f);
01804 #endif
01805 }
01806 
01807 #ifdef REG_DEBUG                /* subordinates of dumpcnfa */
01808 /*
01809  ^ #ifdef REG_DEBUG
01810  */
01811 
01812 /*
01813  - dumpcstate - dump a compacted-NFA state in human-readable form
01814  ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *);
01815  */
01816 static void
01817 dumpcstate(
01818     int st,
01819     struct carc *ca,
01820     struct cnfa *cnfa,
01821     FILE *f)
01822 {
01823     int i;
01824     int pos;
01825 
01826     fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
01827     pos = 1;
01828     for (i = 1; ca[i].co != COLORLESS; i++) {
01829         if (ca[i].co < cnfa->ncolors) {
01830             fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
01831         } else {
01832             fprintf(f, "\t:%ld:->%d", (long) ca[i].co-cnfa->ncolors,ca[i].to);
01833         }
01834         if (pos == 5) {
01835             fprintf(f, "\n");
01836             pos = 1;
01837         } else {
01838             pos++;
01839         }
01840     }
01841     if (i == 1 || pos != 1) {
01842         fprintf(f, "\n");
01843     }
01844     fflush(f);
01845 }
01846 
01847 /*
01848  ^ #endif
01849  */
01850 #endif                          /* ifdef REG_DEBUG */
01851 
01852 /*
01853  * Local Variables:
01854  * mode: c
01855  * c-basic-offset: 4
01856  * fill-column: 78
01857  * End:
01858  */



Generated on Wed Mar 12 12:18:10 2008 by  doxygen 1.5.1