regc_nfa.cGo to the documentation of this file.00001 /* 00002 * NFA utilities. 00003 * This file is #included by regcomp.c. 00004 * 00005 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 00006 * 00007 * Development of this software was funded, in part, by Cray Research Inc., 00008 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 00009 * Corporation, none of whom are responsible for the results. The author 00010 * thanks all of them. 00011 * 00012 * Redistribution and use in source and binary forms -- with or without 00013 * modification -- are permitted for any purpose, provided that 00014 * redistributions in source form retain this entire copyright notice and 00015 * indicate the origin and nature of any modifications. 00016 * 00017 * I'd appreciate being given credit for this package in the documentation of 00018 * software which uses it, but that is not a requirement. 00019 * 00020 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 00021 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 00022 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00023 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00024 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00025 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00026 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00027 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00028 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00029 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * One or two things that technically ought to be in here are actually in 00032 * color.c, thanks to some incestuous relationships in the color chains. 00033 */ 00034 00035 #define NISERR() VISERR(nfa->v) 00036 #define NERR(e) VERR(nfa->v, (e)) 00037 00038 /* 00039 - newnfa - set up an NFA 00040 ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); 00041 */ 00042 static struct nfa * /* the NFA, or NULL */ 00043 newnfa( 00044 struct vars *v, 00045 struct colormap *cm, 00046 struct nfa *parent) /* NULL if primary NFA */ 00047 { 00048 struct nfa *nfa; 00049 00050 nfa = (struct nfa *) MALLOC(sizeof(struct nfa)); 00051 if (nfa == NULL) { 00052 return NULL; 00053 } 00054 00055 nfa->states = NULL; 00056 nfa->slast = NULL; 00057 nfa->free = NULL; 00058 nfa->nstates = 0; 00059 nfa->cm = cm; 00060 nfa->v = v; 00061 nfa->size = 0; 00062 nfa->bos[0] = nfa->bos[1] = COLORLESS; 00063 nfa->eos[0] = nfa->eos[1] = COLORLESS; 00064 nfa->parent = parent; /* Precedes newfstate so parent is valid. */ 00065 nfa->post = newfstate(nfa, '@'); /* number 0 */ 00066 nfa->pre = newfstate(nfa, '>'); /* number 1 */ 00067 00068 nfa->init = newstate(nfa); /* May become invalid later. */ 00069 nfa->final = newstate(nfa); 00070 if (ISERR()) { 00071 freenfa(nfa); 00072 return NULL; 00073 } 00074 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); 00075 newarc(nfa, '^', 1, nfa->pre, nfa->init); 00076 newarc(nfa, '^', 0, nfa->pre, nfa->init); 00077 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); 00078 newarc(nfa, '$', 1, nfa->final, nfa->post); 00079 newarc(nfa, '$', 0, nfa->final, nfa->post); 00080 00081 if (ISERR()) { 00082 freenfa(nfa); 00083 return NULL; 00084 } 00085 return nfa; 00086 } 00087 00088 /* 00089 - TooManyStates - checks if the max states exceeds the compile-time value 00090 ^ static int TooManyStates(struct nfa *); 00091 */ 00092 static int 00093 TooManyStates( 00094 struct nfa *nfa) 00095 { 00096 struct nfa *parent = nfa->parent; 00097 size_t sz = nfa->size; 00098 00099 while (parent != NULL) { 00100 sz = parent->size; 00101 parent = parent->parent; 00102 } 00103 if (sz > REG_MAX_STATES) { 00104 return 1; 00105 } 00106 return 0; 00107 } 00108 00109 /* 00110 - IncrementSize - increases the tracked size of the NFA and its parents. 00111 ^ static void IncrementSize(struct nfa *); 00112 */ 00113 static void 00114 IncrementSize( 00115 struct nfa *nfa) 00116 { 00117 struct nfa *parent = nfa->parent; 00118 00119 nfa->size++; 00120 while (parent != NULL) { 00121 parent->size++; 00122 parent = parent->parent; 00123 } 00124 } 00125 00126 /* 00127 - DecrementSize - increases the tracked size of the NFA and its parents. 00128 ^ static void DecrementSize(struct nfa *); 00129 */ 00130 static void 00131 DecrementSize( 00132 struct nfa *nfa) 00133 { 00134 struct nfa *parent = nfa->parent; 00135 00136 nfa->size--; 00137 while (parent != NULL) { 00138 parent->size--; 00139 parent = parent->parent; 00140 } 00141 } 00142 00143 /* 00144 - freenfa - free an entire NFA 00145 ^ static VOID freenfa(struct nfa *); 00146 */ 00147 static void 00148 freenfa( 00149 struct nfa *nfa) 00150 { 00151 struct state *s; 00152 00153 while ((s = nfa->states) != NULL) { 00154 s->nins = s->nouts = 0; /* don't worry about arcs */ 00155 freestate(nfa, s); 00156 } 00157 while ((s = nfa->free) != NULL) { 00158 nfa->free = s->next; 00159 destroystate(nfa, s); 00160 } 00161 00162 nfa->slast = NULL; 00163 nfa->nstates = -1; 00164 nfa->pre = NULL; 00165 nfa->post = NULL; 00166 FREE(nfa); 00167 } 00168 00169 /* 00170 - newstate - allocate an NFA state, with zero flag value 00171 ^ static struct state *newstate(struct nfa *); 00172 */ 00173 static struct state * /* NULL on error */ 00174 newstate( 00175 struct nfa *nfa) 00176 { 00177 struct state *s; 00178 00179 if (TooManyStates(nfa)) { 00180 /* XXX: add specific error for this */ 00181 NERR(REG_ETOOBIG); 00182 return NULL; 00183 } 00184 if (nfa->free != NULL) { 00185 s = nfa->free; 00186 nfa->free = s->next; 00187 } else { 00188 s = (struct state *) MALLOC(sizeof(struct state)); 00189 if (s == NULL) { 00190 NERR(REG_ESPACE); 00191 return NULL; 00192 } 00193 s->oas.next = NULL; 00194 s->free = NULL; 00195 s->noas = 0; 00196 } 00197 00198 assert(nfa->nstates >= 0); 00199 s->no = nfa->nstates++; 00200 s->flag = 0; 00201 if (nfa->states == NULL) { 00202 nfa->states = s; 00203 } 00204 s->nins = 0; 00205 s->ins = NULL; 00206 s->nouts = 0; 00207 s->outs = NULL; 00208 s->tmp = NULL; 00209 s->next = NULL; 00210 if (nfa->slast != NULL) { 00211 assert(nfa->slast->next == NULL); 00212 nfa->slast->next = s; 00213 } 00214 s->prev = nfa->slast; 00215 nfa->slast = s; 00216 00217 /* 00218 * Track the current size and the parent size. 00219 */ 00220 00221 IncrementSize(nfa); 00222 return s; 00223 } 00224 00225 /* 00226 - newfstate - allocate an NFA state with a specified flag value 00227 ^ static struct state *newfstate(struct nfa *, int flag); 00228 */ 00229 static struct state * /* NULL on error */ 00230 newfstate( 00231 struct nfa *nfa, 00232 int flag) 00233 { 00234 struct state *s; 00235 00236 s = newstate(nfa); 00237 if (s != NULL) { 00238 s->flag = (char) flag; 00239 } 00240 return s; 00241 } 00242 00243 /* 00244 - dropstate - delete a state's inarcs and outarcs and free it 00245 ^ static VOID dropstate(struct nfa *, struct state *); 00246 */ 00247 static void 00248 dropstate( 00249 struct nfa *nfa, 00250 struct state *s) 00251 { 00252 struct arc *a; 00253 00254 while ((a = s->ins) != NULL) { 00255 freearc(nfa, a); 00256 } 00257 while ((a = s->outs) != NULL) { 00258 freearc(nfa, a); 00259 } 00260 freestate(nfa, s); 00261 } 00262 00263 /* 00264 - freestate - free a state, which has no in-arcs or out-arcs 00265 ^ static VOID freestate(struct nfa *, struct state *); 00266 */ 00267 static void 00268 freestate( 00269 struct nfa *nfa, 00270 struct state *s) 00271 { 00272 assert(s != NULL); 00273 assert(s->nins == 0 && s->nouts == 0); 00274 00275 s->no = FREESTATE; 00276 s->flag = 0; 00277 if (s->next != NULL) { 00278 s->next->prev = s->prev; 00279 } else { 00280 assert(s == nfa->slast); 00281 nfa->slast = s->prev; 00282 } 00283 if (s->prev != NULL) { 00284 s->prev->next = s->next; 00285 } else { 00286 assert(s == nfa->states); 00287 nfa->states = s->next; 00288 } 00289 s->prev = NULL; 00290 s->next = nfa->free; /* don't delete it, put it on the free list */ 00291 nfa->free = s; 00292 DecrementSize(nfa); 00293 } 00294 00295 /* 00296 - destroystate - really get rid of an already-freed state 00297 ^ static VOID destroystate(struct nfa *, struct state *); 00298 */ 00299 static void 00300 destroystate( 00301 struct nfa *nfa, 00302 struct state *s) 00303 { 00304 struct arcbatch *ab; 00305 struct arcbatch *abnext; 00306 00307 assert(s->no == FREESTATE); 00308 for (ab=s->oas.next ; ab!=NULL ; ab=abnext) { 00309 abnext = ab->next; 00310 FREE(ab); 00311 } 00312 s->ins = NULL; 00313 s->outs = NULL; 00314 s->next = NULL; 00315 FREE(s); 00316 } 00317 00318 /* 00319 - newarc - set up a new arc within an NFA 00320 ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, 00321 ^ struct state *); 00322 */ 00323 static void 00324 newarc( 00325 struct nfa *nfa, 00326 int t, 00327 pcolor co, 00328 struct state *from, 00329 struct state *to) 00330 { 00331 struct arc *a; 00332 00333 assert(from != NULL && to != NULL); 00334 00335 /* 00336 * Check for duplicates. 00337 */ 00338 00339 for (a=from->outs ; a!=NULL ; a=a->outchain) { 00340 if (a->to == to && a->co == co && a->type == t) { 00341 return; 00342 } 00343 } 00344 00345 a = allocarc(nfa, from); 00346 if (NISERR()) { 00347 return; 00348 } 00349 assert(a != NULL); 00350 00351 a->type = t; 00352 a->co = (color) co; 00353 a->to = to; 00354 a->from = from; 00355 00356 /* 00357 * Put the new arc on the beginning, not the end, of the chains. Not only 00358 * is this easier, it has the very useful side effect that deleting the 00359 * most-recently-added arc is the cheapest case rather than the most 00360 * expensive one. 00361 */ 00362 00363 a->inchain = to->ins; 00364 to->ins = a; 00365 a->outchain = from->outs; 00366 from->outs = a; 00367 00368 from->nouts++; 00369 to->nins++; 00370 00371 if (COLORED(a) && nfa->parent == NULL) { 00372 colorchain(nfa->cm, a); 00373 } 00374 } 00375 00376 /* 00377 - allocarc - allocate a new out-arc within a state 00378 ^ static struct arc *allocarc(struct nfa *, struct state *); 00379 */ 00380 static struct arc * /* NULL for failure */ 00381 allocarc( 00382 struct nfa *nfa, 00383 struct state *s) 00384 { 00385 struct arc *a; 00386 00387 /* 00388 * Shortcut 00389 */ 00390 00391 if (s->free == NULL && s->noas < ABSIZE) { 00392 a = &s->oas.a[s->noas]; 00393 s->noas++; 00394 return a; 00395 } 00396 00397 /* 00398 * if none at hand, get more 00399 */ 00400 00401 if (s->free == NULL) { 00402 struct arcbatch *newAb = (struct arcbatch *) 00403 MALLOC(sizeof(struct arcbatch)); 00404 int i; 00405 00406 if (newAb == NULL) { 00407 NERR(REG_ESPACE); 00408 return NULL; 00409 } 00410 newAb->next = s->oas.next; 00411 s->oas.next = newAb; 00412 00413 for (i=0 ; i<ABSIZE ; i++) { 00414 newAb->a[i].type = 0; 00415 newAb->a[i].freechain = &newAb->a[i+1]; 00416 } 00417 newAb->a[ABSIZE-1].freechain = NULL; 00418 s->free = &newAb->a[0]; 00419 } 00420 assert(s->free != NULL); 00421 00422 a = s->free; 00423 s->free = a->freechain; 00424 return a; 00425 } 00426 00427 /* 00428 - freearc - free an arc 00429 ^ static VOID freearc(struct nfa *, struct arc *); 00430 */ 00431 static void 00432 freearc( 00433 struct nfa *nfa, 00434 struct arc *victim) 00435 { 00436 struct state *from = victim->from; 00437 struct state *to = victim->to; 00438 struct arc *a; 00439 00440 assert(victim->type != 0); 00441 00442 /* 00443 * Take it off color chain if necessary. 00444 */ 00445 00446 if (COLORED(victim) && nfa->parent == NULL) { 00447 uncolorchain(nfa->cm, victim); 00448 } 00449 00450 /* 00451 * Take it off source's out-chain. 00452 */ 00453 00454 assert(from != NULL); 00455 assert(from->outs != NULL); 00456 a = from->outs; 00457 if (a == victim) { /* simple case: first in chain */ 00458 from->outs = victim->outchain; 00459 } else { 00460 for (; a!=NULL && a->outchain!=victim ; a=a->outchain) { 00461 continue; 00462 } 00463 assert(a != NULL); 00464 a->outchain = victim->outchain; 00465 } 00466 from->nouts--; 00467 00468 /* 00469 * Take it off target's in-chain. 00470 */ 00471 00472 assert(to != NULL); 00473 assert(to->ins != NULL); 00474 a = to->ins; 00475 if (a == victim) { /* simple case: first in chain */ 00476 to->ins = victim->inchain; 00477 } else { 00478 for (; a->inchain!=victim ; a=a->inchain) { 00479 assert(a->inchain != NULL); 00480 continue; 00481 } 00482 a->inchain = victim->inchain; 00483 } 00484 to->nins--; 00485 00486 /* 00487 * Clean up and place on free list. 00488 */ 00489 00490 victim->type = 0; 00491 victim->from = NULL; /* precautions... */ 00492 victim->to = NULL; 00493 victim->inchain = NULL; 00494 victim->outchain = NULL; 00495 victim->freechain = from->free; 00496 from->free = victim; 00497 } 00498 00499 /* 00500 - findarc - find arc, if any, from given source with given type and color 00501 * If there is more than one such arc, the result is random. 00502 ^ static struct arc *findarc(struct state *, int, pcolor); 00503 */ 00504 static struct arc * 00505 findarc( 00506 struct state *s, 00507 int type, 00508 pcolor co) 00509 { 00510 struct arc *a; 00511 00512 for (a=s->outs ; a!=NULL ; a=a->outchain) { 00513 if (a->type == type && a->co == co) { 00514 return a; 00515 } 00516 } 00517 return NULL; 00518 } 00519 00520 /* 00521 - cparc - allocate a new arc within an NFA, copying details from old one 00522 ^ static VOID cparc(struct nfa *, struct arc *, struct state *, 00523 ^ struct state *); 00524 */ 00525 static void 00526 cparc( 00527 struct nfa *nfa, 00528 struct arc *oa, 00529 struct state *from, 00530 struct state *to) 00531 { 00532 newarc(nfa, oa->type, oa->co, from, to); 00533 } 00534 00535 /* 00536 - moveins - move all in arcs of a state to another state 00537 * You might think this could be done better by just updating the 00538 * existing arcs, and you would be right if it weren't for the desire 00539 * for duplicate suppression, which makes it easier to just make new 00540 * ones to exploit the suppression built into newarc. 00541 ^ static VOID moveins(struct nfa *, struct state *, struct state *); 00542 */ 00543 static void 00544 moveins( 00545 struct nfa *nfa, 00546 struct state *oldState, 00547 struct state *newState) 00548 { 00549 struct arc *a; 00550 00551 assert(oldState != newState); 00552 00553 while ((a = oldState->ins) != NULL) { 00554 cparc(nfa, a, a->from, newState); 00555 freearc(nfa, a); 00556 } 00557 assert(oldState->nins == 0); 00558 assert(oldState->ins == NULL); 00559 } 00560 00561 /* 00562 - copyins - copy all in arcs of a state to another state 00563 ^ static VOID copyins(struct nfa *, struct state *, struct state *); 00564 */ 00565 static void 00566 copyins( 00567 struct nfa *nfa, 00568 struct state *oldState, 00569 struct state *newState) 00570 { 00571 struct arc *a; 00572 00573 assert(oldState != newState); 00574 00575 for (a=oldState->ins ; a!=NULL ; a=a->inchain) { 00576 cparc(nfa, a, a->from, newState); 00577 } 00578 } 00579 00580 /* 00581 - moveouts - move all out arcs of a state to another state 00582 ^ static VOID moveouts(struct nfa *, struct state *, struct state *); 00583 */ 00584 static void 00585 moveouts( 00586 struct nfa *nfa, 00587 struct state *oldState, 00588 struct state *newState) 00589 { 00590 struct arc *a; 00591 00592 assert(oldState != newState); 00593 00594 while ((a = oldState->outs) != NULL) { 00595 cparc(nfa, a, newState, a->to); 00596 freearc(nfa, a); 00597 } 00598 } 00599 00600 /* 00601 - copyouts - copy all out arcs of a state to another state 00602 ^ static VOID copyouts(struct nfa *, struct state *, struct state *); 00603 */ 00604 static void 00605 copyouts( 00606 struct nfa *nfa, 00607 struct state *oldState, 00608 struct state *newState) 00609 { 00610 struct arc *a; 00611 00612 assert(oldState != newState); 00613 00614 for (a=oldState->outs ; a!=NULL ; a=a->outchain) { 00615 cparc(nfa, a, newState, a->to); 00616 } 00617 } 00618 00619 /* 00620 - cloneouts - copy out arcs of a state to another state pair, modifying type 00621 ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, 00622 ^ struct state *, int); 00623 */ 00624 static void 00625 cloneouts( 00626 struct nfa *nfa, 00627 struct state *old, 00628 struct state *from, 00629 struct state *to, 00630 int type) 00631 { 00632 struct arc *a; 00633 00634 assert(old != from); 00635 00636 for (a=old->outs ; a!=NULL ; a=a->outchain) { 00637 newarc(nfa, type, a->co, from, to); 00638 } 00639 } 00640 00641 /* 00642 - delsub - delete a sub-NFA, updating subre pointers if necessary 00643 * This uses a recursive traversal of the sub-NFA, marking already-seen 00644 * states using their tmp pointer. 00645 ^ static VOID delsub(struct nfa *, struct state *, struct state *); 00646 */ 00647 static void 00648 delsub( 00649 struct nfa *nfa, 00650 struct state *lp, /* the sub-NFA goes from here... */ 00651 struct state *rp) /* ...to here, *not* inclusive */ 00652 { 00653 assert(lp != rp); 00654 00655 rp->tmp = rp; /* mark end */ 00656 00657 deltraverse(nfa, lp, lp); 00658 assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ 00659 assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ 00660 00661 rp->tmp = NULL; /* unmark end */ 00662 lp->tmp = NULL; /* and begin, marked by deltraverse */ 00663 } 00664 00665 /* 00666 - deltraverse - the recursive heart of delsub 00667 * This routine's basic job is to destroy all out-arcs of the state. 00668 ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); 00669 */ 00670 static void 00671 deltraverse( 00672 struct nfa *nfa, 00673 struct state *leftend, 00674 struct state *s) 00675 { 00676 struct arc *a; 00677 struct state *to; 00678 00679 if (s->nouts == 0) { 00680 return; /* nothing to do */ 00681 } 00682 if (s->tmp != NULL) { 00683 return; /* already in progress */ 00684 } 00685 00686 s->tmp = s; /* mark as in progress */ 00687 00688 while ((a = s->outs) != NULL) { 00689 to = a->to; 00690 deltraverse(nfa, leftend, to); 00691 assert(to->nouts == 0 || to->tmp != NULL); 00692 freearc(nfa, a); 00693 if (to->nins == 0 && to->tmp == NULL) { 00694 assert(to->nouts == 0); 00695 freestate(nfa, to); 00696 } 00697 } 00698 00699 assert(s->no != FREESTATE); /* we're still here */ 00700 assert(s == leftend || s->nins != 0); /* and still reachable */ 00701 assert(s->nouts == 0); /* but have no outarcs */ 00702 00703 s->tmp = NULL; /* we're done here */ 00704 } 00705 00706 /* 00707 - dupnfa - duplicate sub-NFA 00708 * Another recursive traversal, this time using tmp to point to duplicates as 00709 * well as mark already-seen states. (You knew there was a reason why it's a 00710 * state pointer, didn't you? :-)) 00711 ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, 00712 ^ struct state *, struct state *); 00713 */ 00714 static void 00715 dupnfa( 00716 struct nfa *nfa, 00717 struct state *start, /* duplicate of subNFA starting here */ 00718 struct state *stop, /* and stopping here */ 00719 struct state *from, /* stringing duplicate from here */ 00720 struct state *to) /* to here */ 00721 { 00722 if (start == stop) { 00723 newarc(nfa, EMPTY, 0, from, to); 00724 return; 00725 } 00726 00727 stop->tmp = to; 00728 duptraverse(nfa, start, from); 00729 /* done, except for clearing out the tmp pointers */ 00730 00731 stop->tmp = NULL; 00732 cleartraverse(nfa, start); 00733 } 00734 00735 /* 00736 - duptraverse - recursive heart of dupnfa 00737 ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); 00738 */ 00739 static void 00740 duptraverse( 00741 struct nfa *nfa, 00742 struct state *s, 00743 struct state *stmp) /* s's duplicate, or NULL */ 00744 { 00745 struct arc *a; 00746 00747 if (s->tmp != NULL) { 00748 return; /* already done */ 00749 } 00750 00751 s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; 00752 if (s->tmp == NULL) { 00753 assert(NISERR()); 00754 return; 00755 } 00756 00757 for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) { 00758 duptraverse(nfa, a->to, NULL); 00759 if (NISERR()) { 00760 break; 00761 } 00762 assert(a->to->tmp != NULL); 00763 cparc(nfa, a, s->tmp, a->to->tmp); 00764 } 00765 } 00766 00767 /* 00768 - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set 00769 ^ static VOID cleartraverse(struct nfa *, struct state *); 00770 */ 00771 static void 00772 cleartraverse( 00773 struct nfa *nfa, 00774 struct state *s) 00775 { 00776 struct arc *a; 00777 00778 if (s->tmp == NULL) { 00779 return; 00780 } 00781 s->tmp = NULL; 00782 00783 for (a=s->outs ; a!=NULL ; a=a->outchain) { 00784 cleartraverse(nfa, a->to); 00785 } 00786 } 00787 00788 /* 00789 - specialcolors - fill in special colors for an NFA 00790 ^ static VOID specialcolors(struct nfa *); 00791 */ 00792 static void 00793 specialcolors( 00794 struct nfa *nfa) 00795 { 00796 /* 00797 * False colors for BOS, BOL, EOS, EOL 00798 */ 00799 00800 if (nfa->parent == NULL) { 00801 nfa->bos[0] = pseudocolor(nfa->cm); 00802 nfa->bos[1] = pseudocolor(nfa->cm); 00803 nfa->eos[0] = pseudocolor(nfa->cm); 00804 nfa->eos[1] = pseudocolor(nfa->cm); 00805 } else { 00806 assert(nfa->parent->bos[0] != COLORLESS); 00807 nfa->bos[0] = nfa->parent->bos[0]; 00808 assert(nfa->parent->bos[1] != COLORLESS); 00809 nfa->bos[1] = nfa->parent->bos[1]; 00810 assert(nfa->parent->eos[0] != COLORLESS); 00811 nfa->eos[0] = nfa->parent->eos[0]; 00812 assert(nfa->parent->eos[1] != COLORLESS); 00813 nfa->eos[1] = nfa->parent->eos[1]; 00814 } 00815 } 00816 00817 /* 00818 - optimize - optimize an NFA 00819 ^ static long optimize(struct nfa *, FILE *); 00820 */ 00821 static long /* re_info bits */ 00822 optimize( 00823 struct nfa *nfa, 00824 FILE *f) /* for debug output; NULL none */ 00825 { 00826 int verbose = (f != NULL) ? 1 : 0; 00827 00828 if (verbose) { 00829 fprintf(f, "\ninitial cleanup:\n"); 00830 } 00831 cleanup(nfa); /* may simplify situation */ 00832 if (verbose) { 00833 dumpnfa(nfa, f); 00834 } 00835 if (verbose) { 00836 fprintf(f, "\nempties:\n"); 00837 } 00838 fixempties(nfa, f); /* get rid of EMPTY arcs */ 00839 if (verbose) { 00840 fprintf(f, "\nconstraints:\n"); 00841 } 00842 pullback(nfa, f); /* pull back constraints backward */ 00843 pushfwd(nfa, f); /* push fwd constraints forward */ 00844 if (verbose) { 00845 fprintf(f, "\nfinal cleanup:\n"); 00846 } 00847 cleanup(nfa); /* final tidying */ 00848 return analyze(nfa); /* and analysis */ 00849 } 00850 00851 /* 00852 - pullback - pull back constraints backward to (with luck) eliminate them 00853 ^ static VOID pullback(struct nfa *, FILE *); 00854 */ 00855 static void 00856 pullback( 00857 struct nfa *nfa, 00858 FILE *f) /* for debug output; NULL none */ 00859 { 00860 struct state *s; 00861 struct state *nexts; 00862 struct arc *a; 00863 struct arc *nexta; 00864 int progress; 00865 00866 /* 00867 * Find and pull until there are no more. 00868 */ 00869 00870 do { 00871 progress = 0; 00872 for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) { 00873 nexts = s->next; 00874 for (a=s->outs ; a!=NULL && !NISERR() ; a=nexta) { 00875 nexta = a->outchain; 00876 if (a->type == '^' || a->type == BEHIND) { 00877 if (pull(nfa, a)) { 00878 progress = 1; 00879 } 00880 } 00881 assert(nexta == NULL || s->no != FREESTATE); 00882 } 00883 } 00884 if (progress && f != NULL) { 00885 dumpnfa(nfa, f); 00886 } 00887 } while (progress && !NISERR()); 00888 if (NISERR()) { 00889 return; 00890 } 00891 00892 for (a=nfa->pre->outs ; a!=NULL ; a=nexta) { 00893 nexta = a->outchain; 00894 if (a->type == '^') { 00895 assert(a->co == 0 || a->co == 1); 00896 newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); 00897 freearc(nfa, a); 00898 } 00899 } 00900 } 00901 00902 /* 00903 - pull - pull a back constraint backward past its source state 00904 * A significant property of this function is that it deletes at most 00905 * one state -- the constraint's from state -- and only if the constraint 00906 * was that state's last outarc. 00907 ^ static int pull(struct nfa *, struct arc *); 00908 */ 00909 static int /* 0 couldn't, 1 could */ 00910 pull( 00911 struct nfa *nfa, 00912 struct arc *con) 00913 { 00914 struct state *from = con->from; 00915 struct state *to = con->to; 00916 struct arc *a; 00917 struct arc *nexta; 00918 struct state *s; 00919 00920 if (from == to) { /* circular constraint is pointless */ 00921 freearc(nfa, con); 00922 return 1; 00923 } 00924 if (from->flag) { /* can't pull back beyond start */ 00925 return 0; 00926 } 00927 if (from->nins == 0) { /* unreachable */ 00928 freearc(nfa, con); 00929 return 1; 00930 } 00931 00932 /* 00933 * DGP 2007-11-15: Cloning a state with a circular constraint on its list 00934 * of outs can lead to trouble [Bug 1810038], so get rid of them first. 00935 */ 00936 00937 for (a = from->outs; a != NULL; a = nexta) { 00938 nexta = a->outchain; 00939 switch (a->type) { 00940 case '^': 00941 case '$': 00942 case BEHIND: 00943 case AHEAD: 00944 if (from == a->to) { 00945 freearc(nfa, a); 00946 } 00947 break; 00948 } 00949 } 00950 00951 /* 00952 * First, clone from state if necessary to avoid other outarcs. 00953 */ 00954 00955 if (from->nouts > 1) { 00956 s = newstate(nfa); 00957 if (NISERR()) { 00958 return 0; 00959 } 00960 assert(to != from); /* con is not an inarc */ 00961 copyins(nfa, from, s); /* duplicate inarcs */ 00962 cparc(nfa, con, s, to); /* move constraint arc */ 00963 freearc(nfa, con); 00964 from = s; 00965 con = from->outs; 00966 } 00967 assert(from->nouts == 1); 00968 00969 /* 00970 * Propagate the constraint into the from state's inarcs. 00971 */ 00972 00973 for (a=from->ins ; a!=NULL ; a=nexta) { 00974 nexta = a->inchain; 00975 switch (combine(con, a)) { 00976 case INCOMPATIBLE: /* destroy the arc */ 00977 freearc(nfa, a); 00978 break; 00979 case SATISFIED: /* no action needed */ 00980 break; 00981 case COMPATIBLE: /* swap the two arcs, more or less */ 00982 s = newstate(nfa); 00983 if (NISERR()) { 00984 return 0; 00985 } 00986 cparc(nfa, a, s, to); /* anticipate move */ 00987 cparc(nfa, con, a->from, s); 00988 if (NISERR()) { 00989 return 0; 00990 } 00991 freearc(nfa, a); 00992 break; 00993 default: 00994 assert(NOTREACHED); 00995 break; 00996 } 00997 } 00998 00999 /* 01000 * Remaining inarcs, if any, incorporate the constraint. 01001 */ 01002 01003 moveins(nfa, from, to); 01004 dropstate(nfa, from); /* will free the constraint */ 01005 return 1; 01006 } 01007 01008 /* 01009 - pushfwd - push forward constraints forward to (with luck) eliminate them 01010 ^ static VOID pushfwd(struct nfa *, FILE *); 01011 */ 01012 static void 01013 pushfwd( 01014 struct nfa *nfa, 01015 FILE *f) /* for debug output; NULL none */ 01016 { 01017 struct state *s; 01018 struct state *nexts; 01019 struct arc *a; 01020 struct arc *nexta; 01021 int progress; 01022 01023 /* 01024 * Find and push until there are no more. 01025 */ 01026 01027 do { 01028 progress = 0; 01029 for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) { 01030 nexts = s->next; 01031 for (a = s->ins; a != NULL && !NISERR(); a = nexta) { 01032 nexta = a->inchain; 01033 if (a->type == '$' || a->type == AHEAD) { 01034 if (push(nfa, a)) { 01035 progress = 1; 01036 } 01037 } 01038 assert(nexta == NULL || s->no != FREESTATE); 01039 } 01040 } 01041 if (progress && f != NULL) { 01042 dumpnfa(nfa, f); 01043 } 01044 } while (progress && !NISERR()); 01045 if (NISERR()) { 01046 return; 01047 } 01048 01049 for (a = nfa->post->ins; a != NULL; a = nexta) { 01050 nexta = a->inchain; 01051 if (a->type == '$') { 01052 assert(a->co == 0 || a->co == 1); 01053 newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); 01054 freearc(nfa, a); 01055 } 01056 } 01057 } 01058 01059 /* 01060 - push - push a forward constraint forward past its destination state 01061 * A significant property of this function is that it deletes at most 01062 * one state -- the constraint's to state -- and only if the constraint 01063 * was that state's last inarc. 01064 ^ static int push(struct nfa *, struct arc *); 01065 */ 01066 static int /* 0 couldn't, 1 could */ 01067 push( 01068 struct nfa *nfa, 01069 struct arc *con) 01070 { 01071 struct state *from = con->from; 01072 struct state *to = con->to; 01073 struct arc *a; 01074 struct arc *nexta; 01075 struct state *s; 01076 01077 if (to == from) { /* circular constraint is pointless */ 01078 freearc(nfa, con); 01079 return 1; 01080 } 01081 if (to->flag) { /* can't push forward beyond end */ 01082 return 0; 01083 } 01084 if (to->nouts == 0) { /* dead end */ 01085 freearc(nfa, con); 01086 return 1; 01087 } 01088 01089 /* 01090 * DGP 2007-11-15: Here we duplicate the same protections as appear 01091 * in pull() above to avoid troubles with cloning a state with a 01092 * circular constraint on its list of ins. It is not clear whether 01093 * this is necessary, or is protecting against a "can't happen". 01094 * Any test case that actually leads to a freearc() call here would 01095 * be a welcome addition to the test suite. 01096 */ 01097 01098 for (a = to->ins; a != NULL; a = nexta) { 01099 nexta = a->inchain; 01100 switch (a->type) { 01101 case '^': 01102 case '$': 01103 case BEHIND: 01104 case AHEAD: 01105 if (a->from == to) { 01106 freearc(nfa, a); 01107 } 01108 break; 01109 } 01110 } 01111 /* 01112 * First, clone to state if necessary to avoid other inarcs. 01113 */ 01114 01115 if (to->nins > 1) { 01116 s = newstate(nfa); 01117 if (NISERR()) { 01118 return 0; 01119 } 01120 copyouts(nfa, to, s); /* duplicate outarcs */ 01121 cparc(nfa, con, from, s); /* move constraint */ 01122 freearc(nfa, con); 01123 to = s; 01124 con = to->ins; 01125 } 01126 assert(to->nins == 1); 01127 01128 /* 01129 * Propagate the constraint into the to state's outarcs. 01130 */ 01131 01132 for (a = to->outs; a != NULL; a = nexta) { 01133 nexta = a->outchain; 01134 switch (combine(con, a)) { 01135 case INCOMPATIBLE: /* destroy the arc */ 01136 freearc(nfa, a); 01137 break; 01138 case SATISFIED: /* no action needed */ 01139 break; 01140 case COMPATIBLE: /* swap the two arcs, more or less */ 01141 s = newstate(nfa); 01142 if (NISERR()) { 01143 return 0; 01144 } 01145 cparc(nfa, con, s, a->to); /* anticipate move */ 01146 cparc(nfa, a, from, s); 01147 if (NISERR()) { 01148 return 0; 01149 } 01150 freearc(nfa, a); 01151 break; 01152 default: 01153 assert(NOTREACHED); 01154 break; 01155 } 01156 } 01157 01158 /* 01159 * Remaining outarcs, if any, incorporate the constraint. 01160 */ 01161 01162 moveouts(nfa, to, from); 01163 dropstate(nfa, to); /* will free the constraint */ 01164 return 1; 01165 } 01166 01167 /* 01168 - combine - constraint lands on an arc, what happens? 01169 ^ #def INCOMPATIBLE 1 // destroys arc 01170 ^ #def SATISFIED 2 // constraint satisfied 01171 ^ #def COMPATIBLE 3 // compatible but not satisfied yet 01172 ^ static int combine(struct arc *, struct arc *); 01173 */ 01174 static int 01175 combine( 01176 struct arc *con, 01177 struct arc *a) 01178 { 01179 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at)) 01180 01181 switch (CA(con->type, a->type)) { 01182 case CA('^', PLAIN): /* newlines are handled separately */ 01183 case CA('$', PLAIN): 01184 return INCOMPATIBLE; 01185 break; 01186 case CA(AHEAD, PLAIN): /* color constraints meet colors */ 01187 case CA(BEHIND, PLAIN): 01188 if (con->co == a->co) { 01189 return SATISFIED; 01190 } 01191 return INCOMPATIBLE; 01192 break; 01193 case CA('^', '^'): /* collision, similar constraints */ 01194 case CA('$', '$'): 01195 case CA(AHEAD, AHEAD): 01196 case CA(BEHIND, BEHIND): 01197 if (con->co == a->co) { /* true duplication */ 01198 return SATISFIED; 01199 } 01200 return INCOMPATIBLE; 01201 break; 01202 case CA('^', BEHIND): /* collision, dissimilar constraints */ 01203 case CA(BEHIND, '^'): 01204 case CA('$', AHEAD): 01205 case CA(AHEAD, '$'): 01206 return INCOMPATIBLE; 01207 break; 01208 case CA('^', '$'): /* constraints passing each other */ 01209 case CA('^', AHEAD): 01210 case CA(BEHIND, '$'): 01211 case CA(BEHIND, AHEAD): 01212 case CA('$', '^'): 01213 case CA('$', BEHIND): 01214 case CA(AHEAD, '^'): 01215 case CA(AHEAD, BEHIND): 01216 case CA('^', LACON): 01217 case CA(BEHIND, LACON): 01218 case CA('$', LACON): 01219 case CA(AHEAD, LACON): 01220 return COMPATIBLE; 01221 break; 01222 } 01223 assert(NOTREACHED); 01224 return INCOMPATIBLE; /* for benefit of blind compilers */ 01225 } 01226 01227 /* 01228 - fixempties - get rid of EMPTY arcs 01229 ^ static VOID fixempties(struct nfa *, FILE *); 01230 */ 01231 static void 01232 fixempties( 01233 struct nfa *nfa, 01234 FILE *f) /* for debug output; NULL none */ 01235 { 01236 struct state *s; 01237 struct state *nexts; 01238 struct arc *a; 01239 struct arc *nexta; 01240 int progress; 01241 01242 /* 01243 * Find and eliminate empties until there are no more. 01244 */ 01245 01246 do { 01247 progress = 0; 01248 for (s = nfa->states; s != NULL && !NISERR() 01249 && s->no != FREESTATE; s = nexts) { 01250 nexts = s->next; 01251 for (a = s->outs; a != NULL && !NISERR(); a = nexta) { 01252 nexta = a->outchain; 01253 if (a->type == EMPTY && unempty(nfa, a)) { 01254 progress = 1; 01255 } 01256 assert(nexta == NULL || s->no != FREESTATE); 01257 } 01258 } 01259 if (progress && f != NULL) { 01260 dumpnfa(nfa, f); 01261 } 01262 } while (progress && !NISERR()); 01263 } 01264 01265 /* 01266 - unempty - optimize out an EMPTY arc, if possible 01267 * Actually, as it stands this function always succeeds, but the return value 01268 * is kept with an eye on possible future changes. 01269 ^ static int unempty(struct nfa *, struct arc *); 01270 */ 01271 static int /* 0 couldn't, 1 could */ 01272 unempty( 01273 struct nfa *nfa, 01274 struct arc *a) 01275 { 01276 struct state *from = a->from; 01277 struct state *to = a->to; 01278 int usefrom; /* work on from, as opposed to to? */ 01279 01280 assert(a->type == EMPTY); 01281 assert(from != nfa->pre && to != nfa->post); 01282 01283 if (from == to) { /* vacuous loop */ 01284 freearc(nfa, a); 01285 return 1; 01286 } 01287 01288 /* 01289 * Decide which end to work on. 01290 */ 01291 01292 usefrom = 1; /* default: attack from */ 01293 if (from->nouts > to->nins) { 01294 usefrom = 0; 01295 } else if (from->nouts == to->nins) { 01296 /* 01297 * Decide on secondary issue: move/copy fewest arcs. 01298 */ 01299 01300 if (from->nins > to->nouts) { 01301 usefrom = 0; 01302 } 01303 } 01304 01305 freearc(nfa, a); 01306 if (usefrom) { 01307 if (from->nouts == 0) { 01308 /* 01309 * Was the state's only outarc. 01310 */ 01311 01312 moveins(nfa, from, to); 01313 freestate(nfa, from); 01314 } else { 01315 copyins(nfa, from, to); 01316 } 01317 } else { 01318 if (to->nins == 0) { 01319 /* 01320 * Was the state's only inarc. 01321 */ 01322 01323 moveouts(nfa, to, from); 01324 freestate(nfa, to); 01325 } else { 01326 copyouts(nfa, to, from); 01327 } 01328 } 01329 01330 return 1; 01331 } 01332 01333 /* 01334 - cleanup - clean up NFA after optimizations 01335 ^ static VOID cleanup(struct nfa *); 01336 */ 01337 static void 01338 cleanup( 01339 struct nfa *nfa) 01340 { 01341 struct state *s; 01342 struct state *nexts; 01343 int n; 01344 01345 /* 01346 * Clear out unreachable or dead-end states. Use pre to mark reachable, 01347 * then post to mark can-reach-post. 01348 */ 01349 01350 markreachable(nfa, nfa->pre, NULL, nfa->pre); 01351 markcanreach(nfa, nfa->post, nfa->pre, nfa->post); 01352 for (s = nfa->states; s != NULL; s = nexts) { 01353 nexts = s->next; 01354 if (s->tmp != nfa->post && !s->flag) { 01355 dropstate(nfa, s); 01356 } 01357 } 01358 assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); 01359 cleartraverse(nfa, nfa->pre); 01360 assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); 01361 /* the nins==0 (final unreachable) case will be caught later */ 01362 01363 /* 01364 * Renumber surviving states. 01365 */ 01366 01367 n = 0; 01368 for (s = nfa->states; s != NULL; s = s->next) { 01369 s->no = n++; 01370 } 01371 nfa->nstates = n; 01372 } 01373 01374 /* 01375 - markreachable - recursive marking of reachable states 01376 ^ static VOID markreachable(struct nfa *, struct state *, struct state *, 01377 ^ struct state *); 01378 */ 01379 static void 01380 markreachable( 01381 struct nfa *nfa, 01382 struct state *s, 01383 struct state *okay, /* consider only states with this mark */ 01384 struct state *mark) /* the value to mark with */ 01385 { 01386 struct arc *a; 01387 01388 if (s->tmp != okay) { 01389 return; 01390 } 01391 s->tmp = mark; 01392 01393 for (a = s->outs; a != NULL; a = a->outchain) { 01394 markreachable(nfa, a->to, okay, mark); 01395 } 01396 } 01397 01398 /* 01399 - markcanreach - recursive marking of states which can reach here 01400 ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, 01401 ^ struct state *); 01402 */ 01403 static void 01404 markcanreach( 01405 struct nfa *nfa, 01406 struct state *s, 01407 struct state *okay, /* consider only states with this mark */ 01408 struct state *mark) /* the value to mark with */ 01409 { 01410 struct arc *a; 01411 01412 if (s->tmp != okay) { 01413 return; 01414 } 01415 s->tmp = mark; 01416 01417 for (a = s->ins; a != NULL; a = a->inchain) { 01418 markcanreach(nfa, a->from, okay, mark); 01419 } 01420 } 01421 01422 /* 01423 - analyze - ascertain potentially-useful facts about an optimized NFA 01424 ^ static long analyze(struct nfa *); 01425 */ 01426 static long /* re_info bits to be ORed in */ 01427 analyze( 01428 struct nfa *nfa) 01429 { 01430 struct arc *a; 01431 struct arc *aa; 01432 01433 if (nfa->pre->outs == NULL) { 01434 return REG_UIMPOSSIBLE; 01435 } 01436 for (a = nfa->pre->outs; a != NULL; a = a->outchain) { 01437 for (aa = a->to->outs; aa != NULL; aa = aa->outchain) { 01438 if (aa->to == nfa->post) { 01439 return REG_UEMPTYMATCH; 01440 } 01441 } 01442 } 01443 return 0; 01444 } 01445 01446 /* 01447 - compact - compact an NFA 01448 ^ static VOID compact(struct nfa *, struct cnfa *); 01449 */ 01450 static void 01451 compact( 01452 struct nfa *nfa, 01453 struct cnfa *cnfa) 01454 { 01455 struct state *s; 01456 struct arc *a; 01457 size_t nstates; 01458 size_t narcs; 01459 struct carc *ca; 01460 struct carc *first; 01461 01462 assert(!NISERR()); 01463 01464 nstates = 0; 01465 narcs = 0; 01466 for (s = nfa->states; s != NULL; s = s->next) { 01467 nstates++; 01468 narcs += 1 + s->nouts + 1; 01469 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ 01470 } 01471 01472 cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); 01473 cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); 01474 if (cnfa->states == NULL || cnfa->arcs == NULL) { 01475 if (cnfa->states != NULL) { 01476 FREE(cnfa->states); 01477 } 01478 if (cnfa->arcs != NULL) { 01479 FREE(cnfa->arcs); 01480 } 01481 NERR(REG_ESPACE); 01482 return; 01483 } 01484 cnfa->nstates = nstates; 01485 cnfa->pre = nfa->pre->no; 01486 cnfa->post = nfa->post->no; 01487 cnfa->bos[0] = nfa->bos[0]; 01488 cnfa->bos[1] = nfa->bos[1]; 01489 cnfa->eos[0] = nfa->eos[0]; 01490 cnfa->eos[1] = nfa->eos[1]; 01491 cnfa->ncolors = maxcolor(nfa->cm) + 1; 01492 cnfa->flags = 0; 01493 01494 ca = cnfa->arcs; 01495 for (s = nfa->states; s != NULL; s = s->next) { 01496 assert((size_t) s->no < nstates); 01497 cnfa->states[s->no] = ca; 01498 ca->co = 0; /* clear and skip flags "arc" */ 01499 ca++; 01500 first = ca; 01501 for (a = s->outs; a != NULL; a = a->outchain) { 01502 switch (a->type) { 01503 case PLAIN: 01504 ca->co = a->co; 01505 ca->to = a->to->no; 01506 ca++; 01507 break; 01508 case LACON: 01509 assert(s->no != cnfa->pre); 01510 ca->co = (color) (cnfa->ncolors + a->co); 01511 ca->to = a->to->no; 01512 ca++; 01513 cnfa->flags |= HASLACONS; 01514 break; 01515 default: 01516 assert(NOTREACHED); 01517 break; 01518 } 01519 } 01520 carcsort(first, ca-1); 01521 ca->co = COLORLESS; 01522 ca->to = 0; 01523 ca++; 01524 } 01525 assert(ca == &cnfa->arcs[narcs]); 01526 assert(cnfa->nstates != 0); 01527 01528 /* 01529 * Mark no-progress states. 01530 */ 01531 01532 for (a = nfa->pre->outs; a != NULL; a = a->outchain) { 01533 cnfa->states[a->to->no]->co = 1; 01534 } 01535 cnfa->states[nfa->pre->no]->co = 1; 01536 } 01537 01538 /* 01539 - carcsort - sort compacted-NFA arcs by color 01540 * Really dumb algorithm, but if the list is long enough for that to matter, 01541 * you're in real trouble anyway. 01542 ^ static VOID carcsort(struct carc *, struct carc *); 01543 */ 01544 static void 01545 carcsort( 01546 struct carc *first, 01547 struct carc *last) 01548 { 01549 struct carc *p; 01550 struct carc *q; 01551 struct carc tmp; 01552 01553 if (last - first <= 1) { 01554 return; 01555 } 01556 01557 for (p = first; p <= last; p++) { 01558 for (q = p; q <= last; q++) { 01559 if (p->co > q->co || (p->co == q->co && p->to > q->to)) { 01560 assert(p != q); 01561 tmp = *p; 01562 *p = *q; 01563 *q = tmp; 01564 } 01565 } 01566 } 01567 } 01568 01569 /* 01570 - freecnfa - free a compacted NFA 01571 ^ static VOID freecnfa(struct cnfa *); 01572 */ 01573 static void 01574 freecnfa( 01575 struct cnfa *cnfa) 01576 { 01577 assert(cnfa->nstates != 0); /* not empty already */ 01578 cnfa->nstates = 0; 01579 FREE(cnfa->states); 01580 FREE(cnfa->arcs); 01581 } 01582 01583 /* 01584 - dumpnfa - dump an NFA in human-readable form 01585 ^ static VOID dumpnfa(struct nfa *, FILE *); 01586 */ 01587 static void 01588 dumpnfa( 01589 struct nfa *nfa, 01590 FILE *f) 01591 { 01592 #ifdef REG_DEBUG 01593 struct state *s; 01594 01595 fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); 01596 if (nfa->bos[0] != COLORLESS) { 01597 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]); 01598 } 01599 if (nfa->bos[1] != COLORLESS) { 01600 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]); 01601 } 01602 if (nfa->eos[0] != COLORLESS) { 01603 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]); 01604 } 01605 if (nfa->eos[1] != COLORLESS) { 01606 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); 01607 } 01608 fprintf(f, "\n"); 01609 for (s = nfa->states; s != NULL; s = s->next) { 01610 dumpstate(s, f); 01611 } 01612 if (nfa->parent == NULL) { 01613 dumpcolors(nfa->cm, f); 01614 } 01615 fflush(f); 01616 #endif 01617 } 01618 01619 #ifdef REG_DEBUG /* subordinates of dumpnfa */ 01620 /* 01621 ^ #ifdef REG_DEBUG 01622 */ 01623 01624 /* 01625 - dumpstate - dump an NFA state in human-readable form 01626 ^ static VOID dumpstate(struct state *, FILE *); 01627 */ 01628 static void 01629 dumpstate( 01630 struct state *s, 01631 FILE *f) 01632 { 01633 struct arc *a; 01634 01635 fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", 01636 (s->flag) ? s->flag : '.'); 01637 if (s->prev != NULL && s->prev->next != s) { 01638 fprintf(f, "\tstate chain bad\n"); 01639 } 01640 if (s->nouts == 0) { 01641 fprintf(f, "\tno out arcs\n"); 01642 } else { 01643 dumparcs(s, f); 01644 } 01645 fflush(f); 01646 for (a = s->ins; a != NULL; a = a->inchain) { 01647 if (a->to != s) { 01648 fprintf(f, "\tlink from %d to %d on %d's in-chain\n", 01649 a->from->no, a->to->no, s->no); 01650 } 01651 } 01652 } 01653 01654 /* 01655 - dumparcs - dump out-arcs in human-readable form 01656 ^ static VOID dumparcs(struct state *, FILE *); 01657 */ 01658 static void 01659 dumparcs( 01660 struct state *s, 01661 FILE *f) 01662 { 01663 int pos; 01664 01665 assert(s->nouts > 0); 01666 /* printing arcs in reverse order is usually clearer */ 01667 pos = dumprarcs(s->outs, s, f, 1); 01668 if (pos != 1) { 01669 fprintf(f, "\n"); 01670 } 01671 } 01672 01673 /* 01674 - dumprarcs - dump remaining outarcs, recursively, in reverse order 01675 ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); 01676 */ 01677 static int /* resulting print position */ 01678 dumprarcs( 01679 struct arc *a, 01680 struct state *s, 01681 FILE *f, 01682 int pos) /* initial print position */ 01683 { 01684 if (a->outchain != NULL) { 01685 pos = dumprarcs(a->outchain, s, f, pos); 01686 } 01687 dumparc(a, s, f); 01688 if (pos == 5) { 01689 fprintf(f, "\n"); 01690 pos = 1; 01691 } else { 01692 pos++; 01693 } 01694 return pos; 01695 } 01696 01697 /* 01698 - dumparc - dump one outarc in readable form, including prefixing tab 01699 ^ static VOID dumparc(struct arc *, struct state *, FILE *); 01700 */ 01701 static void 01702 dumparc( 01703 struct arc *a, 01704 struct state *s, 01705 FILE *f) 01706 { 01707 struct arc *aa; 01708 struct arcbatch *ab; 01709 01710 fprintf(f, "\t"); 01711 switch (a->type) { 01712 case PLAIN: 01713 fprintf(f, "[%ld]", (long) a->co); 01714 break; 01715 case AHEAD: 01716 fprintf(f, ">%ld>", (long) a->co); 01717 break; 01718 case BEHIND: 01719 fprintf(f, "<%ld<", (long) a->co); 01720 break; 01721 case LACON: 01722 fprintf(f, ":%ld:", (long) a->co); 01723 break; 01724 case '^': 01725 case '$': 01726 fprintf(f, "%c%d", a->type, (int) a->co); 01727 break; 01728 case EMPTY: 01729 break; 01730 default: 01731 fprintf(f, "0x%x/0%lo", a->type, (long) a->co); 01732 break; 01733 } 01734 if (a->from != s) { 01735 fprintf(f, "?%d?", a->from->no); 01736 } 01737 for (ab = &a->from->oas; ab != NULL; ab = ab->next) { 01738 for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) { 01739 if (aa == a) { 01740 break; /* NOTE BREAK OUT */ 01741 } 01742 } 01743 if (aa < &ab->a[ABSIZE]) { /* propagate break */ 01744 break; /* NOTE BREAK OUT */ 01745 } 01746 } 01747 if (ab == NULL) { 01748 fprintf(f, "?!?"); /* not in allocated space */ 01749 } 01750 fprintf(f, "->"); 01751 if (a->to == NULL) { 01752 fprintf(f, "NULL"); 01753 return; 01754 } 01755 fprintf(f, "%d", a->to->no); 01756 for (aa = a->to->ins; aa != NULL; aa = aa->inchain) { 01757 if (aa == a) { 01758 break; /* NOTE BREAK OUT */ 01759 } 01760 } 01761 if (aa == NULL) { 01762 fprintf(f, "?!?"); /* missing from in-chain */ 01763 } 01764 } 01765 01766 /* 01767 ^ #endif 01768 */ 01769 #endif /* ifdef REG_DEBUG */ 01770 01771 /* 01772 - dumpcnfa - dump a compacted NFA in human-readable form 01773 ^ static VOID dumpcnfa(struct cnfa *, FILE *); 01774 */ 01775 static void 01776 dumpcnfa( 01777 struct cnfa *cnfa, 01778 FILE *f) 01779 { 01780 #ifdef REG_DEBUG 01781 int st; 01782 01783 fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); 01784 if (cnfa->bos[0] != COLORLESS) { 01785 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]); 01786 } 01787 if (cnfa->bos[1] != COLORLESS) { 01788 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]); 01789 } 01790 if (cnfa->eos[0] != COLORLESS) { 01791 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]); 01792 } 01793 if (cnfa->eos[1] != COLORLESS) { 01794 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]); 01795 } 01796 if (cnfa->flags&HASLACONS) { 01797 fprintf(f, ", haslacons"); 01798 } 01799 fprintf(f, "\n"); 01800 for (st = 0; st < cnfa->nstates; st++) { 01801 dumpcstate(st, cnfa->states[st], cnfa, f); 01802 } 01803 fflush(f); 01804 #endif 01805 } 01806 01807 #ifdef REG_DEBUG /* subordinates of dumpcnfa */ 01808 /* 01809 ^ #ifdef REG_DEBUG 01810 */ 01811 01812 /* 01813 - dumpcstate - dump a compacted-NFA state in human-readable form 01814 ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); 01815 */ 01816 static void 01817 dumpcstate( 01818 int st, 01819 struct carc *ca, 01820 struct cnfa *cnfa, 01821 FILE *f) 01822 { 01823 int i; 01824 int pos; 01825 01826 fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); 01827 pos = 1; 01828 for (i = 1; ca[i].co != COLORLESS; i++) { 01829 if (ca[i].co < cnfa->ncolors) { 01830 fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); 01831 } else { 01832 fprintf(f, "\t:%ld:->%d", (long) ca[i].co-cnfa->ncolors,ca[i].to); 01833 } 01834 if (pos == 5) { 01835 fprintf(f, "\n"); 01836 pos = 1; 01837 } else { 01838 pos++; 01839 } 01840 } 01841 if (i == 1 || pos != 1) { 01842 fprintf(f, "\n"); 01843 } 01844 fflush(f); 01845 } 01846 01847 /* 01848 ^ #endif 01849 */ 01850 #endif /* ifdef REG_DEBUG */ 01851 01852 /* 01853 * Local Variables: 01854 * mode: c 01855 * c-basic-offset: 4 01856 * fill-column: 78 01857 * End: 01858 */
Generated on Wed Mar 12 12:18:10 2008 by 1.5.1 |