tclParse.c

Go to the documentation of this file.
00001 /*
00002  * tclParse.c --
00003  *
00004  *      This file contains functions that parse Tcl scripts. They do so in a
00005  *      general-purpose fashion that can be used for many different purposes,
00006  *      including compilation, direct execution, code analysis, etc.
00007  *
00008  * Copyright (c) 1997 Sun Microsystems, Inc.
00009  * Copyright (c) 1998-2000 Ajuba Solutions.
00010  * Contributions from Don Porter, NIST, 2002. (not subject to US copyright)
00011  *
00012  * See the file "license.terms" for information on usage and redistribution of
00013  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
00014  *
00015  * RCS: @(#) $Id: tclParse.c,v 1.62 2008/01/23 21:58:36 dgp Exp $
00016  */
00017 
00018 #include "tclInt.h"
00019 
00020 /*
00021  * The following table provides parsing information about each possible 8-bit
00022  * character. The table is designed to be referenced with either signed or
00023  * unsigned characters, so it has 384 entries. The first 128 entries
00024  * correspond to negative character values, the next 256 correspond to
00025  * positive character values. The last 128 entries are identical to the first
00026  * 128. The table is always indexed with a 128-byte offset (the 128th entry
00027  * corresponds to a character value of 0).
00028  *
00029  * The macro CHAR_TYPE is used to index into the table and return information
00030  * about its character argument. The following return values are defined.
00031  *
00032  * TYPE_NORMAL -        All characters that don't have special significance to
00033  *                      the Tcl parser.
00034  * TYPE_SPACE -         The character is a whitespace character other than
00035  *                      newline.
00036  * TYPE_COMMAND_END -   Character is newline or semicolon.
00037  * TYPE_SUBS -          Character begins a substitution or has other special
00038  *                      meaning in ParseTokens: backslash, dollar sign, or
00039  *                      open bracket.
00040  * TYPE_QUOTE -         Character is a double quote.
00041  * TYPE_CLOSE_PAREN -   Character is a right parenthesis.
00042  * TYPE_CLOSE_BRACK -   Character is a right square bracket.
00043  * TYPE_BRACE -         Character is a curly brace (either left or right).
00044  */
00045 
00046 #define TYPE_NORMAL             0
00047 #define TYPE_SPACE              0x1
00048 #define TYPE_COMMAND_END        0x2
00049 #define TYPE_SUBS               0x4
00050 #define TYPE_QUOTE              0x8
00051 #define TYPE_CLOSE_PAREN        0x10
00052 #define TYPE_CLOSE_BRACK        0x20
00053 #define TYPE_BRACE              0x40
00054 
00055 #define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)]
00056 
00057 static const char charTypeTable[] = {
00058     /*
00059      * Negative character values, from -128 to -1:
00060      */
00061 
00062     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00063     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00064     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00065     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00066     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00067     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00068     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00069     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00070     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00071     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00072     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00073     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00074     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00075     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00076     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00077     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00078     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00079     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00080     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00081     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00082     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00083     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00084     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00085     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00086     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00087     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00088     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00089     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00090     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00091     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00092     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00093     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00094 
00095     /*
00096      * Positive character values, from 0-127:
00097      */
00098 
00099     TYPE_SUBS,        TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00100     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00101     TYPE_NORMAL,      TYPE_SPACE,       TYPE_COMMAND_END, TYPE_SPACE,
00102     TYPE_SPACE,       TYPE_SPACE,       TYPE_NORMAL,      TYPE_NORMAL,
00103     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00104     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00105     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00106     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00107     TYPE_SPACE,       TYPE_NORMAL,      TYPE_QUOTE,       TYPE_NORMAL,
00108     TYPE_SUBS,        TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00109     TYPE_NORMAL,      TYPE_CLOSE_PAREN, TYPE_NORMAL,      TYPE_NORMAL,
00110     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00111     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00112     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00113     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_COMMAND_END,
00114     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00115     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00116     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00117     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00118     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00119     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00120     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00121     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_SUBS,
00122     TYPE_SUBS,        TYPE_CLOSE_BRACK, TYPE_NORMAL,      TYPE_NORMAL,
00123     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00124     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00125     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00126     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00127     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00128     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00129     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_BRACE,
00130     TYPE_NORMAL,      TYPE_BRACE,       TYPE_NORMAL,      TYPE_NORMAL,
00131 
00132     /*
00133      * Large unsigned character values, from 128-255:
00134      */
00135 
00136     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00137     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00138     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00139     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00140     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00141     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00142     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00143     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00144     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00145     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00146     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00147     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00148     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00149     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00150     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00151     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00152     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00153     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00154     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00155     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00156     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00157     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00158     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00159     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00160     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00161     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00162     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00163     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00164     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00165     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00166     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00167     TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,      TYPE_NORMAL,
00168 };
00169 
00170 /*
00171  * Prototypes for local functions defined in this file:
00172  */
00173 
00174 static inline int       CommandComplete(const char *script, int numBytes);
00175 static int              ParseComment(const char *src, int numBytes,
00176                             Tcl_Parse *parsePtr);
00177 static int              ParseTokens(const char *src, int numBytes, int mask,
00178                             int flags, Tcl_Parse *parsePtr);
00179 static int              ParseWhiteSpace(const char *src, int numBytes,
00180                             int *incompletePtr, char *typePtr);
00181 
00182 /*
00183  *----------------------------------------------------------------------
00184  *
00185  * TclParseInit --
00186  *
00187  *      Initialize the fields of a Tcl_Parse struct.
00188  *
00189  * Results:
00190  *      None.
00191  *
00192  * Side effects:
00193  *      The Tcl_Parse struct pointed to by parsePtr gets initialized.
00194  *
00195  *----------------------------------------------------------------------
00196  */
00197 
00198 void
00199 TclParseInit(
00200     Tcl_Interp *interp,         /* Interpreter to use for error reporting */
00201     const char *start,          /* Start of string to be parsed. */
00202     int numBytes,               /* Total number of bytes in string. If < 0,
00203                                  * the script consists of all bytes up to the
00204                                  * first null character. */
00205     Tcl_Parse *parsePtr)        /* Points to struct to initialize */
00206 {
00207     parsePtr->numWords = 0;
00208     parsePtr->tokenPtr = parsePtr->staticTokens;
00209     parsePtr->numTokens = 0;
00210     parsePtr->tokensAvailable = NUM_STATIC_TOKENS;
00211     parsePtr->string = start;
00212     parsePtr->end = start + numBytes;
00213     parsePtr->term = parsePtr->end;
00214     parsePtr->interp = interp;
00215     parsePtr->incomplete = 0;
00216     parsePtr->errorType = TCL_PARSE_SUCCESS;
00217 }
00218 
00219 /*
00220  *----------------------------------------------------------------------
00221  *
00222  * Tcl_ParseCommand --
00223  *
00224  *      Given a string, this function parses the first Tcl command in the
00225  *      string and returns information about the structure of the command.
00226  *
00227  * Results:
00228  *      The return value is TCL_OK if the command was parsed successfully and
00229  *      TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an
00230  *      error message is left in its result. On a successful return, parsePtr
00231  *      is filled in with information about the command that was parsed.
00232  *
00233  * Side effects:
00234  *      If there is insufficient space in parsePtr to hold all the information
00235  *      about the command, then additional space is malloc-ed. If the function
00236  *      returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to
00237  *      release any additional space that was allocated.
00238  *
00239  *----------------------------------------------------------------------
00240  */
00241 
00242 int
00243 Tcl_ParseCommand(
00244     Tcl_Interp *interp,         /* Interpreter to use for error reporting; if
00245                                  * NULL, then no error message is provided. */
00246     const char *start,          /* First character of string containing one or
00247                                  * more Tcl commands. */
00248     register int numBytes,      /* Total number of bytes in string. If < 0,
00249                                  * the script consists of all bytes up to the
00250                                  * first null character. */
00251     int nested,                 /* Non-zero means this is a nested command:
00252                                  * close bracket should be considered a
00253                                  * command terminator. If zero, then close
00254                                  * bracket has no special meaning. */
00255     register Tcl_Parse *parsePtr)
00256                                 /* Structure to fill in with information about
00257                                  * the parsed command; any previous
00258                                  * information in the structure is ignored. */
00259 {
00260     register const char *src;   /* Points to current character in the
00261                                  * command. */
00262     char type;                  /* Result returned by CHAR_TYPE(*src). */
00263     Tcl_Token *tokenPtr;        /* Pointer to token being filled in. */
00264     int wordIndex;              /* Index of word token for current word. */
00265     int terminators;            /* CHAR_TYPE bits that indicate the end of a
00266                                  * command. */
00267     const char *termPtr;        /* Set by Tcl_ParseBraces/QuotedString to
00268                                  * point to char after terminating one. */
00269     int scanned;
00270 
00271     if ((start == NULL) && (numBytes != 0)) {
00272         if (interp != NULL) {
00273             Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC);
00274         }
00275         return TCL_ERROR;
00276     }
00277     if (numBytes < 0) {
00278         numBytes = strlen(start);
00279     }
00280     TclParseInit(interp, start, numBytes, parsePtr);
00281     parsePtr->commentStart = NULL;
00282     parsePtr->commentSize = 0;
00283     parsePtr->commandStart = NULL;
00284     parsePtr->commandSize = 0;
00285     if (nested != 0) {
00286         terminators = TYPE_COMMAND_END | TYPE_CLOSE_BRACK;
00287     } else {
00288         terminators = TYPE_COMMAND_END;
00289     }
00290 
00291     /*
00292      * Parse any leading space and comments before the first word of the
00293      * command.
00294      */
00295 
00296     scanned = ParseComment(start, numBytes, parsePtr);
00297     src = (start + scanned);
00298     numBytes -= scanned;
00299     if (numBytes == 0) {
00300         if (nested) {
00301             parsePtr->incomplete = nested;
00302         }
00303     }
00304 
00305     /*
00306      * The following loop parses the words of the command, one word in each
00307      * iteration through the loop.
00308      */
00309 
00310     parsePtr->commandStart = src;
00311     while (1) {
00312         int expandWord = 0;
00313 
00314         /*
00315          * Create the token for the word.
00316          */
00317 
00318         TclGrowParseTokenArray(parsePtr, 1);
00319         wordIndex = parsePtr->numTokens;
00320         tokenPtr = &parsePtr->tokenPtr[wordIndex];
00321         tokenPtr->type = TCL_TOKEN_WORD;
00322 
00323         /*
00324          * Skip white space before the word. Also skip a backslash-newline
00325          * sequence: it should be treated just like white space.
00326          */
00327 
00328         scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type);
00329         src += scanned;
00330         numBytes -= scanned;
00331         if (numBytes == 0) {
00332             parsePtr->term = src;
00333             break;
00334         }
00335         if ((type & terminators) != 0) {
00336             parsePtr->term = src;
00337             src++;
00338             break;
00339         }
00340         tokenPtr->start = src;
00341         parsePtr->numTokens++;
00342         parsePtr->numWords++;
00343 
00344         /*
00345          * At this point the word can have one of four forms: something
00346          * enclosed in quotes, something enclosed in braces, and expanding
00347          * word, or an unquoted word (anything else).
00348          */
00349 
00350     parseWord:
00351         if (*src == '"') {
00352             if (Tcl_ParseQuotedString(interp, src, numBytes, parsePtr, 1,
00353                     &termPtr) != TCL_OK) {
00354                 goto error;
00355             }
00356             src = termPtr;
00357             numBytes = parsePtr->end - src;
00358         } else if (*src == '{') {
00359             int expIdx = wordIndex + 1;
00360             Tcl_Token *expPtr;
00361 
00362             if (Tcl_ParseBraces(interp, src, numBytes, parsePtr, 1,
00363                     &termPtr) != TCL_OK) {
00364                 goto error;
00365             }
00366             src = termPtr;
00367             numBytes = parsePtr->end - src;
00368 
00369             /*
00370              * Check whether the braces contained the word expansion prefix
00371              * {*}
00372              */
00373 
00374             expPtr = &parsePtr->tokenPtr[expIdx];
00375             if ((0 == expandWord)
00376                     /* Haven't seen prefix already */
00377                     && (1 == parsePtr->numTokens - expIdx)
00378                     /* Only one token */
00379                     && (((1 == (size_t) expPtr->size)
00380                             /* Same length as prefix */
00381                             && (expPtr->start[0] == '*')))
00382                             /* Is the prefix */
00383                     && (numBytes > 0) && (0 == ParseWhiteSpace(termPtr,
00384                             numBytes, &parsePtr->incomplete, &type))
00385                     && (type != TYPE_COMMAND_END)
00386                     /* Non-whitespace follows */) {
00387                 expandWord = 1;
00388                 parsePtr->numTokens--;
00389                 goto parseWord;
00390             }
00391         } else {
00392             /*
00393              * This is an unquoted word. Call ParseTokens and let it do all of
00394              * the work.
00395              */
00396 
00397             if (ParseTokens(src, numBytes, TYPE_SPACE|terminators,
00398                     TCL_SUBST_ALL, parsePtr) != TCL_OK) {
00399                 goto error;
00400             }
00401             src = parsePtr->term;
00402             numBytes = parsePtr->end - src;
00403         }
00404 
00405         /*
00406          * Finish filling in the token for the word and check for the special
00407          * case of a word consisting of a single range of literal text.
00408          */
00409 
00410         tokenPtr = &parsePtr->tokenPtr[wordIndex];
00411         tokenPtr->size = src - tokenPtr->start;
00412         tokenPtr->numComponents = parsePtr->numTokens - (wordIndex + 1);
00413         if (expandWord) {
00414             int i, isLiteral = 1;
00415 
00416             /*
00417              * When a command includes a word that is an expanded literal; for
00418              * example, {*}{1 2 3}, the parser performs that expansion
00419              * immediately, generating several TCL_TOKEN_SIMPLE_WORDs instead
00420              * of a single TCL_TOKEN_EXPAND_WORD that the Tcl_ParseCommand()
00421              * caller might have to expand. This notably makes it simpler for
00422              * those callers that wish to track line endings, such as those
00423              * that implement key parts of TIP 280.
00424              *
00425              * First check whether the thing to be expanded is a literal,
00426              * in the sense of being composed entirely of TCL_TOKEN_TEXT
00427              * tokens.
00428              */
00429 
00430             for (i = 1; i <= tokenPtr->numComponents; i++) {
00431                 if (tokenPtr[i].type != TCL_TOKEN_TEXT) {
00432                     isLiteral = 0;
00433                     break;
00434                 }
00435             }
00436 
00437             if (isLiteral) {
00438                 int elemCount = 0, code = TCL_OK;
00439                 const char *nextElem, *listEnd, *elemStart;
00440 
00441                 /*
00442                  * The word to be expanded is a literal, so determine the
00443                  * boundaries of the literal string to be treated as a list
00444                  * and expanded. That literal string starts at
00445                  * tokenPtr[1].start, and includes all bytes up to, but not
00446                  * including (tokenPtr[tokenPtr->numComponents].start +
00447                  * tokenPtr[tokenPtr->numComponents].size)
00448                  */
00449 
00450                 listEnd = (tokenPtr[tokenPtr->numComponents].start +
00451                         tokenPtr[tokenPtr->numComponents].size);
00452                 nextElem = tokenPtr[1].start;
00453 
00454                 /*
00455                  * Step through the literal string, parsing and counting list
00456                  * elements.
00457                  */
00458 
00459                 while (nextElem < listEnd) {
00460                     code = TclFindElement(NULL, nextElem, listEnd - nextElem,
00461                             &elemStart, &nextElem, NULL, NULL);
00462                     if (code != TCL_OK) break;
00463                     if (elemStart < listEnd) {
00464                         elemCount++;
00465                     }
00466                 }
00467 
00468                 if (code != TCL_OK) {
00469                     /*
00470                      * Some list element could not be parsed. This means the
00471                      * literal string was not in fact a valid list. Defer the
00472                      * handling of this to compile/eval time, where code is
00473                      * already in place to report the "attempt to expand a
00474                      * non-list" error.
00475                      */
00476 
00477                     tokenPtr->type = TCL_TOKEN_EXPAND_WORD;
00478                 } else if (elemCount == 0) {
00479                     /*
00480                      * We are expanding a literal empty list. This means that
00481                      * the expanding word completely disappears, leaving no
00482                      * word generated this pass through the loop. Adjust
00483                      * accounting appropriately.
00484                      */
00485 
00486                     parsePtr->numWords--;
00487                     parsePtr->numTokens = wordIndex;
00488                 } else {
00489                     /*
00490                      * Recalculate the number of Tcl_Tokens needed to store
00491                      * tokens representing the expanded list.
00492                      */
00493 
00494                     int growthNeeded = wordIndex + 2*elemCount
00495                             - parsePtr->numTokens;
00496                     parsePtr->numWords += elemCount - 1;
00497                     if (growthNeeded > 0) {
00498                         TclGrowParseTokenArray(parsePtr, growthNeeded);
00499                         tokenPtr = &parsePtr->tokenPtr[wordIndex];
00500                     }
00501                     parsePtr->numTokens = wordIndex + 2*elemCount;
00502 
00503                     /*
00504                      * Generate a TCL_TOKEN_SIMPLE_WORD token sequence for
00505                      * each element of the literal list we are expanding in
00506                      * place. Take care with the start and size fields of each
00507                      * token so they point to the right literal characters in
00508                      * the original script to represent the right expanded
00509                      * word value.
00510                      */
00511 
00512                     nextElem = tokenPtr[1].start;
00513                     while (isspace(UCHAR(*nextElem))) {
00514                         nextElem++;
00515                     }
00516                     while (nextElem < listEnd) {
00517                         tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;
00518                         tokenPtr->numComponents = 1;
00519                         tokenPtr->start = nextElem;
00520 
00521                         tokenPtr++;
00522                         tokenPtr->type = TCL_TOKEN_TEXT;
00523                         tokenPtr->numComponents = 0;
00524                         TclFindElement(NULL, nextElem, listEnd - nextElem,
00525                                 &(tokenPtr->start), &nextElem,
00526                                 &(tokenPtr->size), NULL);
00527                         if (tokenPtr->start + tokenPtr->size == listEnd) {
00528                             tokenPtr[-1].size = listEnd - tokenPtr[-1].start;
00529                         } else {
00530                             tokenPtr[-1].size = tokenPtr->start
00531                                     + tokenPtr->size - tokenPtr[-1].start;
00532                             tokenPtr[-1].size += (isspace(UCHAR(
00533                                 tokenPtr->start[tokenPtr->size])) == 0);
00534                         }
00535 
00536                         tokenPtr++;
00537                     }
00538                 }
00539             } else {
00540                 /*
00541                  * The word to be expanded is not a literal, so defer
00542                  * expansion to compile/eval time by marking with a
00543                  * TCL_TOKEN_EXPAND_WORD token.
00544                  */
00545 
00546                 tokenPtr->type = TCL_TOKEN_EXPAND_WORD;
00547             }
00548         } else if ((tokenPtr->numComponents == 1)
00549                 && (tokenPtr[1].type == TCL_TOKEN_TEXT)) {
00550             tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;
00551         }
00552 
00553         /*
00554          * Do two additional checks: (a) make sure we're really at the end of
00555          * a word (there might have been garbage left after a quoted or braced
00556          * word), and (b) check for the end of the command.
00557          */
00558 
00559         scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type);
00560         if (scanned) {
00561             src += scanned;
00562             numBytes -= scanned;
00563             continue;
00564         }
00565 
00566         if (numBytes == 0) {
00567             parsePtr->term = src;
00568             break;
00569         }
00570         if ((type & terminators) != 0) {
00571             parsePtr->term = src;
00572             src++;
00573             break;
00574         }
00575         if (src[-1] == '"') {
00576             if (interp != NULL) {
00577                 Tcl_SetResult(interp, "extra characters after close-quote",
00578                         TCL_STATIC);
00579             }
00580             parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA;
00581         } else {
00582             if (interp != NULL) {
00583                 Tcl_SetResult(interp, "extra characters after close-brace",
00584                         TCL_STATIC);
00585             }
00586             parsePtr->errorType = TCL_PARSE_BRACE_EXTRA;
00587         }
00588         parsePtr->term = src;
00589         goto error;
00590     }
00591 
00592     parsePtr->commandSize = src - parsePtr->commandStart;
00593     return TCL_OK;
00594 
00595   error:
00596     Tcl_FreeParse(parsePtr);
00597     parsePtr->commandSize = parsePtr->end - parsePtr->commandStart;
00598     return TCL_ERROR;
00599 }
00600 
00601 /*
00602  *----------------------------------------------------------------------
00603  *
00604  * ParseWhiteSpace --
00605  *
00606  *      Scans up to numBytes bytes starting at src, consuming white space
00607  *      between words as defined by Tcl's parsing rules.
00608  *
00609  * Results:
00610  *      Returns the number of bytes recognized as white space. Records at
00611  *      parsePtr, information about the parse. Records at typePtr the
00612  *      character type of the non-whitespace character that terminated the
00613  *      scan.
00614  *
00615  * Side effects:
00616  *      None.
00617  *
00618  *----------------------------------------------------------------------
00619  */
00620 
00621 static int
00622 ParseWhiteSpace(
00623     const char *src,            /* First character to parse. */
00624     register int numBytes,      /* Max number of bytes to scan. */
00625     int *incompletePtr,         /* Set this boolean memory to true if parsing
00626                                  * indicates an incomplete command. */
00627     char *typePtr)              /* Points to location to store character type
00628                                  * of character that ends run of whitespace */
00629 {
00630     register char type = TYPE_NORMAL;
00631     register const char *p = src;
00632 
00633     while (1) {
00634         while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) {
00635             numBytes--;
00636             p++;
00637         }
00638         if (numBytes && (type & TYPE_SUBS)) {
00639             if (*p != '\\') {
00640                 break;
00641             }
00642             if (--numBytes == 0) {
00643                 break;
00644             }
00645             if (p[1] != '\n') {
00646                 break;
00647             }
00648             p+=2;
00649             if (--numBytes == 0) {
00650                 *incompletePtr = 1;
00651                 break;
00652             }
00653             continue;
00654         }
00655         break;
00656     }
00657     *typePtr = type;
00658     return (p - src);
00659 }
00660 
00661 /*
00662  *----------------------------------------------------------------------
00663  *
00664  * TclParseAllWhiteSpace --
00665  *
00666  *      Scans up to numBytes bytes starting at src, consuming all white space
00667  *      including the command-terminating newline characters.
00668  *
00669  * Results:
00670  *      Returns the number of bytes recognized as white space.
00671  *
00672  *----------------------------------------------------------------------
00673  */
00674 
00675 int
00676 TclParseAllWhiteSpace(
00677     const char *src,            /* First character to parse. */
00678     int numBytes)               /* Max number of byes to scan */
00679 {
00680     int dummy;
00681     char type;
00682     const char *p = src;
00683 
00684     do {
00685         int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type);
00686 
00687         p += scanned;
00688         numBytes -= scanned;
00689     } while (numBytes && (*p == '\n') && (p++, --numBytes));
00690     return (p-src);
00691 }
00692 
00693 /*
00694  *----------------------------------------------------------------------
00695  *
00696  * TclParseHex --
00697  *
00698  *      Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing
00699  *      \x and \u escape sequences). At most numBytes bytes are scanned.
00700  *
00701  * Results:
00702  *      The numeric value is stored in *resultPtr. Returns the number of bytes
00703  *      consumed.
00704  *
00705  * Notes:
00706  *      Relies on the following properties of the ASCII character set, with
00707  *      which UTF-8 is compatible:
00708  *
00709  *      The digits '0' .. '9' and the letters 'A' .. 'Z' and 'a' .. 'z' occupy
00710  *      consecutive code points, and '0' < 'A' < 'a'.
00711  *
00712  *----------------------------------------------------------------------
00713  */
00714 
00715 int
00716 TclParseHex(
00717     const char *src,            /* First character to parse. */
00718     int numBytes,               /* Max number of byes to scan */
00719     Tcl_UniChar *resultPtr)     /* Points to storage provided by caller where
00720                                  * the Tcl_UniChar resulting from the
00721                                  * conversion is to be written. */
00722 {
00723     Tcl_UniChar result = 0;
00724     register const char *p = src;
00725 
00726     while (numBytes--) {
00727         unsigned char digit = UCHAR(*p);
00728 
00729         if (!isxdigit(digit)) {
00730             break;
00731         }
00732 
00733         ++p;
00734         result <<= 4;
00735 
00736         if (digit >= 'a') {
00737             result |= (10 + digit - 'a');
00738         } else if (digit >= 'A') {
00739             result |= (10 + digit - 'A');
00740         } else {
00741             result |= (digit - '0');
00742         }
00743     }
00744 
00745     *resultPtr = result;
00746     return (p - src);
00747 }
00748 
00749 /*
00750  *----------------------------------------------------------------------
00751  *
00752  * TclParseBackslash --
00753  *
00754  *      Scans up to numBytes bytes starting at src, consuming a backslash
00755  *      sequence as defined by Tcl's parsing rules.
00756  *
00757  * Results:
00758  *      Records at readPtr the number of bytes making up the backslash
00759  *      sequence. Records at dst the UTF-8 encoded equivalent of that
00760  *      backslash sequence. Returns the number of bytes written to dst, at
00761  *      most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
00762  *      are not needed, but the return value is the same either way.
00763  *
00764  * Side effects:
00765  *      None.
00766  *
00767  *----------------------------------------------------------------------
00768  */
00769 
00770 int
00771 TclParseBackslash(
00772     const char *src,            /* Points to the backslash character of a a
00773                                  * backslash sequence. */
00774     int numBytes,               /* Max number of bytes to scan. */
00775     int *readPtr,               /* NULL, or points to storage where the number
00776                                  * of bytes scanned should be written. */
00777     char *dst)                  /* NULL, or points to buffer where the UTF-8
00778                                  * encoding of the backslash sequence is to be
00779                                  * written. At most TCL_UTF_MAX bytes will be
00780                                  * written there. */
00781 {
00782     register const char *p = src+1;
00783     Tcl_UniChar result;
00784     int count;
00785     char buf[TCL_UTF_MAX];
00786 
00787     if (numBytes == 0) {
00788         if (readPtr != NULL) {
00789             *readPtr = 0;
00790         }
00791         return 0;
00792     }
00793 
00794     if (dst == NULL) {
00795         dst = buf;
00796     }
00797 
00798     if (numBytes == 1) {
00799         /*
00800          * Can only scan the backslash, so return it.
00801          */
00802 
00803         result = '\\';
00804         count = 1;
00805         goto done;
00806     }
00807 
00808     count = 2;
00809     switch (*p) {
00810         /*
00811          * Note: in the conversions below, use absolute values (e.g., 0xa)
00812          * rather than symbolic values (e.g. \n) that get converted by the
00813          * compiler. It's possible that compilers on some platforms will do
00814          * the symbolic conversions differently, which could result in
00815          * non-portable Tcl scripts.
00816          */
00817 
00818     case 'a':
00819         result = 0x7;
00820         break;
00821     case 'b':
00822         result = 0x8;
00823         break;
00824     case 'f':
00825         result = 0xc;
00826         break;
00827     case 'n':
00828         result = 0xa;
00829         break;
00830     case 'r':
00831         result = 0xd;
00832         break;
00833     case 't':
00834         result = 0x9;
00835         break;
00836     case 'v':
00837         result = 0xb;
00838         break;
00839     case 'x':
00840         count += TclParseHex(p+1, numBytes-1, &result);
00841         if (count == 2) {
00842             /*
00843              * No hexadigits -> This is just "x".
00844              */
00845 
00846             result = 'x';
00847         } else {
00848             /*
00849              * Keep only the last byte (2 hex digits).
00850              */
00851             result = (unsigned char) result;
00852         }
00853         break;
00854     case 'u':
00855         count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-1, &result);
00856         if (count == 2) {
00857             /*
00858              * No hexadigits -> This is just "u".
00859              */
00860             result = 'u';
00861         }
00862         break;
00863     case '\n':
00864         count--;
00865         do {
00866             p++;
00867             count++;
00868         } while ((count < numBytes) && ((*p == ' ') || (*p == '\t')));
00869         result = ' ';
00870         break;
00871     case 0:
00872         result = '\\';
00873         count = 1;
00874         break;
00875     default:
00876         /*
00877          * Check for an octal number \oo?o?
00878          */
00879 
00880         if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) {  /* INTL: digit */
00881             result = (unsigned char)(*p - '0');
00882             p++;
00883             if ((numBytes == 2) || !isdigit(UCHAR(*p))  /* INTL: digit */
00884                     || (UCHAR(*p) >= '8')) {
00885                 break;
00886             }
00887             count = 3;
00888             result = (unsigned char)((result << 3) + (*p - '0'));
00889             p++;
00890             if ((numBytes == 3) || !isdigit(UCHAR(*p))  /* INTL: digit */
00891                     || (UCHAR(*p) >= '8')) {
00892                 break;
00893             }
00894             count = 4;
00895             result = (unsigned char)((result << 3) + (*p - '0'));
00896             break;
00897         }
00898 
00899         /*
00900          * We have to convert here in case the user has put a backslash in
00901          * front of a multi-byte utf-8 character. While this means nothing
00902          * special, we shouldn't break up a correct utf-8 character. [Bug
00903          * #217987] test subst-3.2
00904          */
00905 
00906         if (Tcl_UtfCharComplete(p, numBytes - 1)) {
00907             count = Tcl_UtfToUniChar(p, &result) + 1;   /* +1 for '\' */
00908         } else {
00909             char utfBytes[TCL_UTF_MAX];
00910 
00911             memcpy(utfBytes, p, (size_t) (numBytes - 1));
00912             utfBytes[numBytes - 1] = '\0';
00913             count = Tcl_UtfToUniChar(utfBytes, &result) + 1;
00914         }
00915         break;
00916     }
00917 
00918   done:
00919     if (readPtr != NULL) {
00920         *readPtr = count;
00921     }
00922     return Tcl_UniCharToUtf((int) result, dst);
00923 }
00924 
00925 /*
00926  *----------------------------------------------------------------------
00927  *
00928  * ParseComment --
00929  *
00930  *      Scans up to numBytes bytes starting at src, consuming a Tcl comment as
00931  *      defined by Tcl's parsing rules.
00932  *
00933  * Results:
00934  *      Records in parsePtr information about the parse. Returns the number of
00935  *      bytes consumed.
00936  *
00937  * Side effects:
00938  *      None.
00939  *
00940  *----------------------------------------------------------------------
00941  */
00942 
00943 static int
00944 ParseComment(
00945     const char *src,            /* First character to parse. */
00946     register int numBytes,      /* Max number of bytes to scan. */
00947     Tcl_Parse *parsePtr)        /* Information about parse in progress.
00948                                  * Updated if parsing indicates an incomplete
00949                                  * command. */
00950 {
00951     register const char *p = src;
00952 
00953     while (numBytes) {
00954         char type;
00955         int scanned;
00956 
00957         scanned = TclParseAllWhiteSpace(p, numBytes);
00958         p += scanned;
00959         numBytes -= scanned;
00960 
00961         if ((numBytes == 0) || (*p != '#')) {
00962             break;
00963         }
00964         if (parsePtr->commentStart == NULL) {
00965             parsePtr->commentStart = p;
00966         }
00967 
00968         while (numBytes) {
00969             if (*p == '\\') {
00970                 scanned = ParseWhiteSpace(p, numBytes, &parsePtr->incomplete,
00971                         &type);
00972                 if (scanned) {
00973                     p += scanned;
00974                     numBytes -= scanned;
00975                 } else {
00976                     /*
00977                      * General backslash substitution in comments isn't part
00978                      * of the formal spec, but test parse-15.47 and history
00979                      * indicate that it has been the de facto rule. Don't
00980                      * change it now.
00981                      */
00982 
00983                     TclParseBackslash(p, numBytes, &scanned, NULL);
00984                     p += scanned;
00985                     numBytes -= scanned;
00986                 }
00987             } else {
00988                 p++;
00989                 numBytes--;
00990                 if (p[-1] == '\n') {
00991                     break;
00992                 }
00993             }
00994         }
00995         parsePtr->commentSize = p - parsePtr->commentStart;
00996     }
00997     return (p - src);
00998 }
00999 
01000 /*
01001  *----------------------------------------------------------------------
01002  *
01003  * ParseTokens --
01004  *
01005  *      This function forms the heart of the Tcl parser. It parses one or more
01006  *      tokens from a string, up to a termination point specified by the
01007  *      caller. This function is used to parse unquoted command words (those
01008  *      not in quotes or braces), words in quotes, and array indices for
01009  *      variables. No more than numBytes bytes will be scanned.
01010  *
01011  * Results:
01012  *      Tokens are added to parsePtr and parsePtr->term is filled in with the
01013  *      address of the character that terminated the parse (the first one
01014  *      whose CHAR_TYPE matched mask or the character at parsePtr->end). The
01015  *      return value is TCL_OK if the parse completed successfully and
01016  *      TCL_ERROR otherwise. If a parse error occurs and parsePtr->interp is
01017  *      not NULL, then an error message is left in the interpreter's result.
01018  *
01019  * Side effects:
01020  *      None.
01021  *
01022  *----------------------------------------------------------------------
01023  */
01024 
01025 static int
01026 ParseTokens(
01027     register const char *src,   /* First character to parse. */
01028     register int numBytes,      /* Max number of bytes to scan. */
01029     int mask,                   /* Specifies when to stop parsing. The parse
01030                                  * stops at the first unquoted character whose
01031                                  * CHAR_TYPE contains any of the bits in
01032                                  * mask. */
01033     int flags,                  /* OR-ed bits indicating what substitutions to
01034                                  * perform: TCL_SUBST_COMMANDS,
01035                                  * TCL_SUBST_VARIABLES, and
01036                                  * TCL_SUBST_BACKSLASHES */
01037     Tcl_Parse *parsePtr)        /* Information about parse in progress.
01038                                  * Updated with additional tokens and
01039                                  * termination information. */
01040 {
01041     char type;
01042     int originalTokens;
01043     int noSubstCmds = !(flags & TCL_SUBST_COMMANDS);
01044     int noSubstVars = !(flags & TCL_SUBST_VARIABLES);
01045     int noSubstBS = !(flags & TCL_SUBST_BACKSLASHES);
01046     Tcl_Token *tokenPtr;
01047 
01048     /*
01049      * Each iteration through the following loop adds one token of type
01050      * TCL_TOKEN_TEXT, TCL_TOKEN_BS, TCL_TOKEN_COMMAND, or TCL_TOKEN_VARIABLE
01051      * to parsePtr. For TCL_TOKEN_VARIABLE tokens, additional tokens are added
01052      * for the parsed variable name.
01053      */
01054 
01055     originalTokens = parsePtr->numTokens;
01056     while (numBytes && !((type = CHAR_TYPE(*src)) & mask)) {
01057         TclGrowParseTokenArray(parsePtr, 1);
01058         tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
01059         tokenPtr->start = src;
01060         tokenPtr->numComponents = 0;
01061 
01062         if ((type & TYPE_SUBS) == 0) {
01063             /*
01064              * This is a simple range of characters. Scan to find the end of
01065              * the range.
01066              */
01067 
01068             while ((++src, --numBytes)
01069                     && !(CHAR_TYPE(*src) & (mask | TYPE_SUBS))) {
01070                 /* empty loop */
01071             }
01072             tokenPtr->type = TCL_TOKEN_TEXT;
01073             tokenPtr->size = src - tokenPtr->start;
01074             parsePtr->numTokens++;
01075         } else if (*src == '$') {
01076             int varToken;
01077 
01078             if (noSubstVars) {
01079                 tokenPtr->type = TCL_TOKEN_TEXT;
01080                 tokenPtr->size = 1;
01081                 parsePtr->numTokens++;
01082                 src++;
01083                 numBytes--;
01084                 continue;
01085             }
01086 
01087             /*
01088              * This is a variable reference.  Call Tcl_ParseVarName to do all
01089              * the dirty work of parsing the name.
01090              */
01091 
01092             varToken = parsePtr->numTokens;
01093             if (Tcl_ParseVarName(parsePtr->interp, src, numBytes, parsePtr,
01094                     1) != TCL_OK) {
01095                 return TCL_ERROR;
01096             }
01097             src += parsePtr->tokenPtr[varToken].size;
01098             numBytes -= parsePtr->tokenPtr[varToken].size;
01099         } else if (*src == '[') {
01100             Tcl_Parse *nestedPtr;
01101 
01102             if (noSubstCmds) {
01103                 tokenPtr->type = TCL_TOKEN_TEXT;
01104                 tokenPtr->size = 1;
01105                 parsePtr->numTokens++;
01106                 src++;
01107                 numBytes--;
01108                 continue;
01109             }
01110 
01111             /*
01112              * Command substitution.  Call Tcl_ParseCommand recursively (and
01113              * repeatedly) to parse the nested command(s), then throw away the
01114              * parse information.
01115              */
01116 
01117             src++;
01118             numBytes--;
01119             nestedPtr = (Tcl_Parse *)
01120                     TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
01121             while (1) {
01122                 if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1,
01123                         nestedPtr) != TCL_OK) {
01124                     parsePtr->errorType = nestedPtr->errorType;
01125                     parsePtr->term = nestedPtr->term;
01126                     parsePtr->incomplete = nestedPtr->incomplete;
01127                     TclStackFree(parsePtr->interp, nestedPtr);
01128                     return TCL_ERROR;
01129                 }
01130                 src = nestedPtr->commandStart + nestedPtr->commandSize;
01131                 numBytes = parsePtr->end - src;
01132                 Tcl_FreeParse(nestedPtr);
01133 
01134                 /*
01135                  * Check for the closing ']' that ends the command
01136                  * substitution. It must have been the last character of the
01137                  * parsed command.
01138                  */
01139 
01140                 if ((nestedPtr->term < parsePtr->end)
01141                         && (*(nestedPtr->term) == ']')
01142                         && !(nestedPtr->incomplete)) {
01143                     break;
01144                 }
01145                 if (numBytes == 0) {
01146                     if (parsePtr->interp != NULL) {
01147                         Tcl_SetResult(parsePtr->interp,
01148                                 "missing close-bracket", TCL_STATIC);
01149                     }
01150                     parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;
01151                     parsePtr->term = tokenPtr->start;
01152                     parsePtr->incomplete = 1;
01153                     TclStackFree(parsePtr->interp, nestedPtr);
01154                     return TCL_ERROR;
01155                 }
01156             }
01157             TclStackFree(parsePtr->interp, nestedPtr);
01158             tokenPtr->type = TCL_TOKEN_COMMAND;
01159             tokenPtr->size = src - tokenPtr->start;
01160             parsePtr->numTokens++;
01161         } else if (*src == '\\') {
01162             if (noSubstBS) {
01163                 tokenPtr->type = TCL_TOKEN_TEXT;
01164                 tokenPtr->size = 1;
01165                 parsePtr->numTokens++;
01166                 src++;
01167                 numBytes--;
01168                 continue;
01169             }
01170 
01171             /*
01172              * Backslash substitution.
01173              */
01174 
01175             TclParseBackslash(src, numBytes, &tokenPtr->size, NULL);
01176 
01177             if (tokenPtr->size == 1) {
01178                 /*
01179                  * Just a backslash, due to end of string.
01180                  */
01181 
01182                 tokenPtr->type = TCL_TOKEN_TEXT;
01183                 parsePtr->numTokens++;
01184                 src++;
01185                 numBytes--;
01186                 continue;
01187             }
01188 
01189             if (src[1] == '\n') {
01190                 if (numBytes == 2) {
01191                     parsePtr->incomplete = 1;
01192                 }
01193 
01194                 /*
01195                  * Note: backslash-newline is special in that it is treated
01196                  * the same as a space character would be. This means that it
01197                  * could terminate the token.
01198                  */
01199 
01200                 if (mask & TYPE_SPACE) {
01201                     if (parsePtr->numTokens == originalTokens) {
01202                         goto finishToken;
01203                     }
01204                     break;
01205                 }
01206             }
01207 
01208             tokenPtr->type = TCL_TOKEN_BS;
01209             parsePtr->numTokens++;
01210             src += tokenPtr->size;
01211             numBytes -= tokenPtr->size;
01212         } else if (*src == 0) {
01213             tokenPtr->type = TCL_TOKEN_TEXT;
01214             tokenPtr->size = 1;
01215             parsePtr->numTokens++;
01216             src++;
01217             numBytes--;
01218         } else {
01219             Tcl_Panic("ParseTokens encountered unknown character");
01220         }
01221     }
01222     if (parsePtr->numTokens == originalTokens) {
01223         /*
01224          * There was nothing in this range of text. Add an empty token for the
01225          * empty range, so that there is always at least one token added.
01226          */
01227 
01228         TclGrowParseTokenArray(parsePtr, 1);
01229         tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
01230         tokenPtr->start = src;
01231         tokenPtr->numComponents = 0;
01232 
01233     finishToken:
01234         tokenPtr->type = TCL_TOKEN_TEXT;
01235         tokenPtr->size = 0;
01236         parsePtr->numTokens++;
01237     }
01238     parsePtr->term = src;
01239     return TCL_OK;
01240 }
01241 
01242 /*
01243  *----------------------------------------------------------------------
01244  *
01245  * Tcl_FreeParse --
01246  *
01247  *      This function is invoked to free any dynamic storage that may have
01248  *      been allocated by a previous call to Tcl_ParseCommand.
01249  *
01250  * Results:
01251  *      None.
01252  *
01253  * Side effects:
01254  *      If there is any dynamically allocated memory in *parsePtr, it is
01255  *      freed.
01256  *
01257  *----------------------------------------------------------------------
01258  */
01259 
01260 void
01261 Tcl_FreeParse(
01262     Tcl_Parse *parsePtr)        /* Structure that was filled in by a previous
01263                                  * call to Tcl_ParseCommand. */
01264 {
01265     if (parsePtr->tokenPtr != parsePtr->staticTokens) {
01266         ckfree((char *) parsePtr->tokenPtr);
01267         parsePtr->tokenPtr = parsePtr->staticTokens;
01268     }
01269 }
01270 
01271 /*
01272  *----------------------------------------------------------------------
01273  *
01274  * Tcl_ParseVarName --
01275  *
01276  *      Given a string starting with a $ sign, parse off a variable name and
01277  *      return information about the parse. No more than numBytes bytes will
01278  *      be scanned.
01279  *
01280  * Results:
01281  *      The return value is TCL_OK if the command was parsed successfully and
01282  *      TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an
01283  *      error message is left in its result. On a successful return, tokenPtr
01284  *      and numTokens fields of parsePtr are filled in with information about
01285  *      the variable name that was parsed. The "size" field of the first new
01286  *      token gives the total number of bytes in the variable name. Other
01287  *      fields in parsePtr are undefined.
01288  *
01289  * Side effects:
01290  *      If there is insufficient space in parsePtr to hold all the information
01291  *      about the command, then additional space is malloc-ed. If the function
01292  *      returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to
01293  *      release any additional space that was allocated.
01294  *
01295  *----------------------------------------------------------------------
01296  */
01297 
01298 int
01299 Tcl_ParseVarName(
01300     Tcl_Interp *interp,         /* Interpreter to use for error reporting; if
01301                                  * NULL, then no error message is provided. */
01302     const char *start,          /* Start of variable substitution string.
01303                                  * First character must be "$". */
01304     register int numBytes,      /* Total number of bytes in string. If < 0,
01305                                  * the string consists of all bytes up to the
01306                                  * first null character. */
01307     Tcl_Parse *parsePtr,        /* Structure to fill in with information about
01308                                  * the variable name. */
01309     int append)                 /* Non-zero means append tokens to existing
01310                                  * information in parsePtr; zero means ignore
01311                                  * existing tokens in parsePtr and
01312                                  * reinitialize it. */
01313 {
01314     Tcl_Token *tokenPtr;
01315     register const char *src;
01316     unsigned char c;
01317     int varIndex, offset;
01318     Tcl_UniChar ch;
01319     unsigned array;
01320 
01321     if ((numBytes == 0) || (start == NULL)) {
01322         return TCL_ERROR;
01323     }
01324     if (numBytes < 0) {
01325         numBytes = strlen(start);
01326     }
01327 
01328     if (!append) {
01329         TclParseInit(interp, start, numBytes, parsePtr);
01330     }
01331 
01332     /*
01333      * Generate one token for the variable, an additional token for the name,
01334      * plus any number of additional tokens for the index, if there is one.
01335      */
01336 
01337     src = start;
01338     TclGrowParseTokenArray(parsePtr, 2);
01339     tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
01340     tokenPtr->type = TCL_TOKEN_VARIABLE;
01341     tokenPtr->start = src;
01342     varIndex = parsePtr->numTokens;
01343     parsePtr->numTokens++;
01344     tokenPtr++;
01345     src++;
01346     numBytes--;
01347     if (numBytes == 0) {
01348         goto justADollarSign;
01349     }
01350     tokenPtr->type = TCL_TOKEN_TEXT;
01351     tokenPtr->start = src;
01352     tokenPtr->numComponents = 0;
01353 
01354     /*
01355      * The name of the variable can have three forms:
01356      * 1. The $ sign is followed by an open curly brace. Then the variable
01357      *    name is everything up to the next close curly brace, and the
01358      *    variable is a scalar variable.
01359      * 2. The $ sign is not followed by an open curly brace. Then the variable
01360      *    name is everything up to the next character that isn't a letter,
01361      *    digit, or underscore. :: sequences are also considered part of the
01362      *    variable name, in order to support namespaces. If the following
01363      *    character is an open parenthesis, then the information between
01364      *    parentheses is the array element name.
01365      * 3. The $ sign is followed by something that isn't a letter, digit, or
01366      *    underscore: in this case, there is no variable name and the token is
01367      *    just "$".
01368      */
01369 
01370     if (*src == '{') {
01371         src++;
01372         numBytes--;
01373         tokenPtr->type = TCL_TOKEN_TEXT;
01374         tokenPtr->start = src;
01375         tokenPtr->numComponents = 0;
01376 
01377         while (numBytes && (*src != '}')) {
01378             numBytes--;
01379             src++;
01380         }
01381         if (numBytes == 0) {
01382             if (parsePtr->interp != NULL) {
01383                 Tcl_SetResult(parsePtr->interp,
01384                         "missing close-brace for variable name", TCL_STATIC);
01385             }
01386             parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE;
01387             parsePtr->term = tokenPtr->start-1;
01388             parsePtr->incomplete = 1;
01389             goto error;
01390         }
01391         tokenPtr->size = src - tokenPtr->start;
01392         tokenPtr[-1].size = src - tokenPtr[-1].start;
01393         parsePtr->numTokens++;
01394         src++;
01395     } else {
01396         tokenPtr->type = TCL_TOKEN_TEXT;
01397         tokenPtr->start = src;
01398         tokenPtr->numComponents = 0;
01399 
01400         while (numBytes) {
01401             if (Tcl_UtfCharComplete(src, numBytes)) {
01402                 offset = Tcl_UtfToUniChar(src, &ch);
01403             } else {
01404                 char utfBytes[TCL_UTF_MAX];
01405 
01406                 memcpy(utfBytes, src, (size_t) numBytes);
01407                 utfBytes[numBytes] = '\0';
01408                 offset = Tcl_UtfToUniChar(utfBytes, &ch);
01409             }
01410             c = UCHAR(ch);
01411             if (isalnum(c) || (c == '_')) {     /* INTL: ISO only, UCHAR. */
01412                 src += offset;
01413                 numBytes -= offset;
01414                 continue;
01415             }
01416             if ((c == ':') && (numBytes != 1) && (src[1] == ':')) {
01417                 src += 2;
01418                 numBytes -= 2;
01419                 while (numBytes && (*src == ':')) {
01420                     src++;
01421                     numBytes--;
01422                 }
01423                 continue;
01424             }
01425             break;
01426         }
01427 
01428         /*
01429          * Support for empty array names here.
01430          */
01431 
01432         array = (numBytes && (*src == '('));
01433         tokenPtr->size = src - tokenPtr->start;
01434         if ((tokenPtr->size == 0) && !array) {
01435             goto justADollarSign;
01436         }
01437         parsePtr->numTokens++;
01438         if (array) {
01439             /*
01440              * This is a reference to an array element. Call ParseTokens
01441              * recursively to parse the element name, since it could contain
01442              * any number of substitutions.
01443              */
01444 
01445             if (TCL_OK != ParseTokens(src+1, numBytes-1, TYPE_CLOSE_PAREN,
01446                     TCL_SUBST_ALL, parsePtr)) {
01447                 goto error;
01448             }
01449             if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){
01450                 if (parsePtr->interp != NULL) {
01451                     Tcl_SetResult(parsePtr->interp, "missing )",
01452                             TCL_STATIC);
01453                 }
01454                 parsePtr->errorType = TCL_PARSE_MISSING_PAREN;
01455                 parsePtr->term = src;
01456                 parsePtr->incomplete = 1;
01457                 goto error;
01458             }
01459             src = parsePtr->term + 1;
01460         }
01461     }
01462     tokenPtr = &parsePtr->tokenPtr[varIndex];
01463     tokenPtr->size = src - tokenPtr->start;
01464     tokenPtr->numComponents = parsePtr->numTokens - (varIndex + 1);
01465     return TCL_OK;
01466 
01467     /*
01468      * The dollar sign isn't followed by a variable name. Replace the
01469      * TCL_TOKEN_VARIABLE token with a TCL_TOKEN_TEXT token for the dollar
01470      * sign.
01471      */
01472 
01473   justADollarSign:
01474     tokenPtr = &parsePtr->tokenPtr[varIndex];
01475     tokenPtr->type = TCL_TOKEN_TEXT;
01476     tokenPtr->size = 1;
01477     tokenPtr->numComponents = 0;
01478     return TCL_OK;
01479 
01480   error:
01481     Tcl_FreeParse(parsePtr);
01482     return TCL_ERROR;
01483 }
01484 
01485 /*
01486  *----------------------------------------------------------------------
01487  *
01488  * Tcl_ParseVar --
01489  *
01490  *      Given a string starting with a $ sign, parse off a variable name and
01491  *      return its value.
01492  *
01493  * Results:
01494  *      The return value is the contents of the variable given by the leading
01495  *      characters of string. If termPtr isn't NULL, *termPtr gets filled in
01496  *      with the address of the character just after the last one in the
01497  *      variable specifier. If the variable doesn't exist, then the return
01498  *      value is NULL and an error message will be left in interp's result.
01499  *
01500  * Side effects:
01501  *      None.
01502  *
01503  *----------------------------------------------------------------------
01504  */
01505 
01506 const char *
01507 Tcl_ParseVar(
01508     Tcl_Interp *interp,         /* Context for looking up variable. */
01509     register const char *start, /* Start of variable substitution. First
01510                                  * character must be "$". */
01511     const char **termPtr)       /* If non-NULL, points to word to fill in with
01512                                  * character just after last one in the
01513                                  * variable specifier. */
01514 {
01515     register Tcl_Obj *objPtr;
01516     int code;
01517     Tcl_Parse *parsePtr = (Tcl_Parse *)
01518             TclStackAlloc(interp, sizeof(Tcl_Parse));
01519 
01520     if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) {
01521         TclStackFree(interp, parsePtr);
01522         return NULL;
01523     }
01524 
01525     if (termPtr != NULL) {
01526         *termPtr = start + parsePtr->tokenPtr->size;
01527     }
01528     if (parsePtr->numTokens == 1) {
01529         /*
01530          * There isn't a variable name after all: the $ is just a $.
01531          */
01532 
01533         TclStackFree(interp, parsePtr);
01534         return "$";
01535     }
01536 
01537     code = TclSubstTokens(interp, parsePtr->tokenPtr, parsePtr->numTokens,
01538             NULL, 1);
01539     TclStackFree(interp, parsePtr);
01540     if (code != TCL_OK) {
01541         return NULL;
01542     }
01543     objPtr = Tcl_GetObjResult(interp);
01544 
01545     /*
01546      * At this point we should have an object containing the value of a
01547      * variable. Just return the string from that object.
01548      *
01549      * This should have returned the object for the user to manage, but
01550      * instead we have some weak reference to the string value in the object,
01551      * which is why we make sure the object exists after resetting the result.
01552      * This isn't ideal, but it's the best we can do with the current
01553      * documented interface. -- hobbs
01554      */
01555 
01556     if (!Tcl_IsShared(objPtr)) {
01557         Tcl_IncrRefCount(objPtr);
01558     }
01559     Tcl_ResetResult(interp);
01560     return TclGetString(objPtr);
01561 }
01562 
01563 /*
01564  *----------------------------------------------------------------------
01565  *
01566  * Tcl_ParseBraces --
01567  *
01568  *      Given a string in braces such as a Tcl command argument or a string
01569  *      value in a Tcl expression, this function parses the string and returns
01570  *      information about the parse. No more than numBytes bytes will be
01571  *      scanned.
01572  *
01573  * Results:
01574  *      The return value is TCL_OK if the string was parsed successfully and
01575  *      TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an
01576  *      error message is left in its result. On a successful return, tokenPtr
01577  *      and numTokens fields of parsePtr are filled in with information about
01578  *      the string that was parsed. Other fields in parsePtr are undefined.
01579  *      termPtr is set to point to the character just after the last one in
01580  *      the braced string.
01581  *
01582  * Side effects:
01583  *      If there is insufficient space in parsePtr to hold all the information
01584  *      about the command, then additional space is malloc-ed. If the function
01585  *      returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to
01586  *      release any additional space that was allocated.
01587  *
01588  *----------------------------------------------------------------------
01589  */
01590 
01591 int
01592 Tcl_ParseBraces(
01593     Tcl_Interp *interp,         /* Interpreter to use for error reporting; if
01594                                  * NULL, then no error message is provided. */
01595     const char *start,          /* Start of string enclosed in braces. The
01596                                  * first character must be {'. */
01597     register int numBytes,      /* Total number of bytes in string. If < 0,
01598                                  * the string consists of all bytes up to the
01599                                  * first null character. */
01600     register Tcl_Parse *parsePtr,
01601                                 /* Structure to fill in with information about
01602                                  * the string. */
01603     int append,                 /* Non-zero means append tokens to existing
01604                                  * information in parsePtr; zero means ignore
01605                                  * existing tokens in parsePtr and
01606                                  * reinitialize it. */
01607     const char **termPtr)       /* If non-NULL, points to word in which to
01608                                  * store a pointer to the character just after
01609                                  * the terminating '}' if the parse was
01610                                  * successful. */
01611 {
01612     Tcl_Token *tokenPtr;
01613     register const char *src;
01614     int startIndex, level, length;
01615 
01616     if ((numBytes == 0) || (start == NULL)) {
01617         return TCL_ERROR;
01618     }
01619     if (numBytes < 0) {
01620         numBytes = strlen(start);
01621     }
01622 
01623     if (!append) {
01624         TclParseInit(interp, start, numBytes, parsePtr);
01625     }
01626 
01627     src = start;
01628     startIndex = parsePtr->numTokens;
01629 
01630     TclGrowParseTokenArray(parsePtr, 1);
01631     tokenPtr = &parsePtr->tokenPtr[startIndex];
01632     tokenPtr->type = TCL_TOKEN_TEXT;
01633     tokenPtr->start = src+1;
01634     tokenPtr->numComponents = 0;
01635     level = 1;
01636     while (1) {
01637         while (++src, --numBytes) {
01638             if (CHAR_TYPE(*src) != TYPE_NORMAL) {
01639                 break;
01640             }
01641         }
01642         if (numBytes == 0) {
01643             goto missingBraceError;
01644         }
01645 
01646         switch (*src) {
01647         case '{':
01648             level++;
01649             break;
01650         case '}':
01651             if (--level == 0) {
01652                 /*
01653                  * Decide if we need to finish emitting a partially-finished
01654                  * token. There are 3 cases:
01655                  *     {abc \newline xyz} or {xyz}
01656                  *              - finish emitting "xyz" token
01657                  *     {abc \newline}
01658                  *              - don't emit token after \newline
01659                  *     {}       - finish emitting zero-sized token
01660                  *
01661                  * The last case ensures that there is a token (even if empty)
01662                  * that describes the braced string.
01663                  */
01664 
01665                 if ((src != tokenPtr->start)
01666                         || (parsePtr->numTokens == startIndex)) {
01667                     tokenPtr->size = (src - tokenPtr->start);
01668                     parsePtr->numTokens++;
01669                 }
01670                 if (termPtr != NULL) {
01671                     *termPtr = src+1;
01672                 }
01673                 return TCL_OK;
01674             }
01675             break;
01676         case '\\':
01677             TclParseBackslash(src, numBytes, &length, NULL);
01678             if ((length > 1) && (src[1] == '\n')) {
01679                 /*
01680                  * A backslash-newline sequence must be collapsed, even inside
01681                  * braces, so we have to split the word into multiple tokens
01682                  * so that the backslash-newline can be represented
01683                  * explicitly.
01684                  */
01685 
01686                 if (numBytes == 2) {
01687                     parsePtr->incomplete = 1;
01688                 }
01689                 tokenPtr->size = (src - tokenPtr->start);
01690                 if (tokenPtr->size != 0) {
01691                     parsePtr->numTokens++;
01692                 }
01693                 TclGrowParseTokenArray(parsePtr, 2);
01694                 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
01695                 tokenPtr->type = TCL_TOKEN_BS;
01696                 tokenPtr->start = src;
01697                 tokenPtr->size = length;
01698                 tokenPtr->numComponents = 0;
01699                 parsePtr->numTokens++;
01700 
01701                 src += length - 1;
01702                 numBytes -= length - 1;
01703                 tokenPtr++;
01704                 tokenPtr->type = TCL_TOKEN_TEXT;
01705                 tokenPtr->start = src + 1;
01706                 tokenPtr->numComponents = 0;
01707             } else {
01708                 src += length - 1;
01709                 numBytes -= length - 1;
01710             }
01711             break;
01712         }
01713     }
01714 
01715   missingBraceError:
01716     parsePtr->errorType = TCL_PARSE_MISSING_BRACE;
01717     parsePtr->term = start;
01718     parsePtr->incomplete = 1;
01719     if (parsePtr->interp == NULL) {
01720         /*
01721          * Skip straight to the exit code since we have no interpreter to put
01722          * error message in.
01723          */
01724 
01725         goto error;
01726     }
01727 
01728     Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC);
01729 
01730     /*
01731      * Guess if the problem is due to comments by searching the source string
01732      * for a possible open brace within the context of a comment. Since we
01733      * aren't performing a full Tcl parse, just look for an open brace
01734      * preceded by a '<whitespace>#' on the same line.
01735      */
01736 
01737     {
01738         register int openBrace = 0;
01739 
01740         while (--src > start) {
01741             switch (*src) {
01742             case '{':
01743                 openBrace = 1;
01744                 break;
01745             case '\n':
01746                 openBrace = 0;
01747                 break;
01748             case '#' :
01749                 if (openBrace && isspace(UCHAR(src[-1]))) {
01750                     Tcl_AppendResult(parsePtr->interp,
01751                             ": possible unbalanced brace in comment", NULL);
01752                     goto error;
01753                 }
01754                 break;
01755             }
01756         }
01757     }
01758 
01759   error:
01760     Tcl_FreeParse(parsePtr);
01761     return TCL_ERROR;
01762 }
01763 
01764 /*
01765  *----------------------------------------------------------------------
01766  *
01767  * Tcl_ParseQuotedString --
01768  *
01769  *      Given a double-quoted string such as a quoted Tcl command argument or
01770  *      a quoted value in a Tcl expression, this function parses the string
01771  *      and returns information about the parse. No more than numBytes bytes
01772  *      will be scanned.
01773  *
01774  * Results:
01775  *      The return value is TCL_OK if the string was parsed successfully and
01776  *      TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an
01777  *      error message is left in its result. On a successful return, tokenPtr
01778  *      and numTokens fields of parsePtr are filled in with information about
01779  *      the string that was parsed. Other fields in parsePtr are undefined.
01780  *      termPtr is set to point to the character just after the quoted
01781  *      string's terminating close-quote.
01782  *
01783  * Side effects:
01784  *      If there is insufficient space in parsePtr to hold all the information
01785  *      about the command, then additional space is malloc-ed. If the function
01786  *      returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to
01787  *      release any additional space that was allocated.
01788  *
01789  *----------------------------------------------------------------------
01790  */
01791 
01792 int
01793 Tcl_ParseQuotedString(
01794     Tcl_Interp *interp,         /* Interpreter to use for error reporting; if
01795                                  * NULL, then no error message is provided. */
01796     const char *start,          /* Start of the quoted string. The first
01797                                  * character must be '"'. */
01798     register int numBytes,      /* Total number of bytes in string. If < 0,
01799                                  * the string consists of all bytes up to the
01800                                  * first null character. */
01801     register Tcl_Parse *parsePtr,
01802                                 /* Structure to fill in with information about
01803                                  * the string. */
01804     int append,                 /* Non-zero means append tokens to existing
01805                                  * information in parsePtr; zero means ignore
01806                                  * existing tokens in parsePtr and
01807                                  * reinitialize it. */
01808     const char **termPtr)       /* If non-NULL, points to word in which to
01809                                  * store a pointer to the character just after
01810                                  * the quoted string's terminating close-quote
01811                                  * if the parse succeeds. */
01812 {
01813     if ((numBytes == 0) || (start == NULL)) {
01814         return TCL_ERROR;
01815     }
01816     if (numBytes < 0) {
01817         numBytes = strlen(start);
01818     }
01819 
01820     if (!append) {
01821         TclParseInit(interp, start, numBytes, parsePtr);
01822     }
01823 
01824     if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE, TCL_SUBST_ALL,
01825             parsePtr)) {
01826         goto error;
01827     }
01828     if (*parsePtr->term != '"') {
01829         if (parsePtr->interp != NULL) {
01830             Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC);
01831         }
01832         parsePtr->errorType = TCL_PARSE_MISSING_QUOTE;
01833         parsePtr->term = start;
01834         parsePtr->incomplete = 1;
01835         goto error;
01836     }
01837     if (termPtr != NULL) {
01838         *termPtr = (parsePtr->term + 1);
01839     }
01840     return TCL_OK;
01841 
01842   error:
01843     Tcl_FreeParse(parsePtr);
01844     return TCL_ERROR;
01845 }
01846 
01847 /*
01848  *----------------------------------------------------------------------
01849  *
01850  * Tcl_SubstObj --
01851  *
01852  *      This function performs the substitutions specified on the given string
01853  *      as described in the user documentation for the "subst" Tcl command.
01854  *
01855  * Results:
01856  *      A Tcl_Obj* containing the substituted string, or NULL to indicate that
01857  *      an error occurred.
01858  *
01859  * Side effects:
01860  *      See the user documentation.
01861  *
01862  *----------------------------------------------------------------------
01863  */
01864 
01865 Tcl_Obj *
01866 Tcl_SubstObj(
01867     Tcl_Interp *interp,         /* Interpreter in which substitution occurs */
01868     Tcl_Obj *objPtr,            /* The value to be substituted. */
01869     int flags)                  /* What substitutions to do. */
01870 {
01871     int length, tokensLeft, code;
01872     Tcl_Token *endTokenPtr;
01873     Tcl_Obj *result, *errMsg = NULL;
01874     CONST char *p = TclGetStringFromObj(objPtr, &length);
01875     Tcl_Parse *parsePtr = (Tcl_Parse *)
01876             TclStackAlloc(interp, sizeof(Tcl_Parse));
01877 
01878     TclParseInit(interp, p, length, parsePtr);
01879 
01880     /*
01881      * First parse the string rep of objPtr, as if it were enclosed as a
01882      * "-quoted word in a normal Tcl command. Honor flags that selectively
01883      * inhibit types of substitution.
01884      */
01885 
01886     if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) {
01887         /*
01888          * There was a parse error. Save the error message for possible
01889          * reporting later.
01890          */
01891 
01892         errMsg = Tcl_GetObjResult(interp);
01893         Tcl_IncrRefCount(errMsg);
01894 
01895         /*
01896          * We need to re-parse to get the portion of the string we can [subst]
01897          * before the parse error. Sadly, all the Tcl_Token's created by the
01898          * first parse attempt are gone, freed according to the public spec
01899          * for the Tcl_Parse* routines. The only clue we have is parse.term,
01900          * which points to either the unmatched opener, or to characters that
01901          * follow a close brace or close quote.
01902          *
01903          * Call ParseTokens again, working on the string up to parse.term.
01904          * Keep repeating until we get a good parse on a prefix.
01905          */
01906 
01907         do {
01908             parsePtr->numTokens = 0;
01909             parsePtr->tokensAvailable = NUM_STATIC_TOKENS;
01910             parsePtr->end = parsePtr->term;
01911             parsePtr->incomplete = 0;
01912             parsePtr->errorType = TCL_PARSE_SUCCESS;
01913         } while (TCL_OK !=
01914                 ParseTokens(p, parsePtr->end - p, 0, flags, parsePtr));
01915 
01916         /*
01917          * The good parse will have to be followed by {, (, or [.
01918          */
01919 
01920         switch (*(parsePtr->term)) {
01921         case '{':
01922             /*
01923              * Parse error was a missing } in a ${varname} variable
01924              * substitution at the toplevel. We will subst everything up to
01925              * that broken variable substitution before reporting the parse
01926              * error. Substituting the leftover '$' will have no side-effects,
01927              * so the current token stream is fine.
01928              */
01929             break;
01930 
01931         case '(':
01932             /*
01933              * Parse error was during the parsing of the index part of an
01934              * array variable substitution at the toplevel.
01935              */
01936 
01937             if (*(parsePtr->term - 1) == '$') {
01938                 /*
01939                  * Special case where removing the array index left us with
01940                  * just a dollar sign (array variable with name the empty
01941                  * string as its name), instead of with a scalar variable
01942                  * reference.
01943                  *
01944                  * As in the previous case, existing token stream is OK.
01945                  */
01946             } else {
01947                 /*
01948                  * The current parse includes a successful parse of a scalar
01949                  * variable substitution where there should have been an array
01950                  * variable substitution. We remove that mistaken part of the
01951                  * parse before moving on. A scalar variable substitution is
01952                  * two tokens.
01953                  */
01954 
01955                 Tcl_Token *varTokenPtr =
01956                         parsePtr->tokenPtr + parsePtr->numTokens - 2;
01957 
01958                 if (varTokenPtr->type != TCL_TOKEN_VARIABLE) {
01959                     Tcl_Panic("Tcl_SubstObj: programming error");
01960                 }
01961                 if (varTokenPtr[1].type != TCL_TOKEN_TEXT) {
01962                     Tcl_Panic("Tcl_SubstObj: programming error");
01963                 }
01964                 parsePtr->numTokens -= 2;
01965             }
01966             break;
01967         case '[':
01968             /*
01969              * Parse error occurred during parsing of a toplevel command
01970              * substitution.
01971              */
01972 
01973             parsePtr->end = p + length;
01974             p = parsePtr->term + 1;
01975             length = parsePtr->end - p;
01976             if (length == 0) {
01977                 /*
01978                  * No commands, just an unmatched [. As in previous cases,
01979                  * existing token stream is OK.
01980                  */
01981             } else {
01982                 /*
01983                  * We want to add the parsing of as many commands as we can
01984                  * within that substitution until we reach the actual parse
01985                  * error. We'll do additional parsing to determine what length
01986                  * to claim for the final TCL_TOKEN_COMMAND token.
01987                  */
01988 
01989                 Tcl_Token *tokenPtr;
01990                 const char *lastTerm = parsePtr->term;
01991                 Tcl_Parse *nestedPtr = (Tcl_Parse *)
01992                         TclStackAlloc(interp, sizeof(Tcl_Parse));
01993 
01994                 while (TCL_OK ==
01995                         Tcl_ParseCommand(NULL, p, length, 0, nestedPtr)) {
01996                     Tcl_FreeParse(nestedPtr);
01997                     p = nestedPtr->term + (nestedPtr->term < nestedPtr->end);
01998                     length = nestedPtr->end - p;
01999                     if ((length == 0) && (nestedPtr->term == nestedPtr->end)) {
02000                         /*
02001                          * If we run out of string, blame the missing close
02002                          * bracket on the last command, and do not evaluate it
02003                          * during substitution.
02004                          */
02005 
02006                         break;
02007                     }
02008                     lastTerm = nestedPtr->term;
02009                 }
02010                 TclStackFree(interp, nestedPtr);
02011 
02012                 if (lastTerm == parsePtr->term) {
02013                     /*
02014                      * Parse error in first command. No commands to subst, add
02015                      * no more tokens.
02016                      */
02017                     break;
02018                 }
02019 
02020                 /*
02021                  * Create a command substitution token for whatever commands
02022                  * got parsed.
02023                  */
02024 
02025                 TclGrowParseTokenArray(parsePtr, 1);
02026                 tokenPtr = &(parsePtr->tokenPtr[parsePtr->numTokens]);
02027                 tokenPtr->start = parsePtr->term;
02028                 tokenPtr->numComponents = 0;
02029                 tokenPtr->type = TCL_TOKEN_COMMAND;
02030                 tokenPtr->size = lastTerm - tokenPtr->start + 1;
02031                 parsePtr->numTokens++;
02032             }
02033             break;
02034 
02035         default:
02036             Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]);
02037         }
02038     }
02039 
02040     /*
02041      * Next, substitute the parsed tokens just as in normal Tcl evaluation.
02042      */
02043 
02044     endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens;
02045     tokensLeft = parsePtr->numTokens;
02046     code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
02047             &tokensLeft, 1);
02048     if (code == TCL_OK) {
02049         Tcl_FreeParse(parsePtr);
02050         TclStackFree(interp, parsePtr);
02051         if (errMsg != NULL) {
02052             Tcl_SetObjResult(interp, errMsg);
02053             Tcl_DecrRefCount(errMsg);
02054             return NULL;
02055         }
02056         return Tcl_GetObjResult(interp);
02057     }
02058 
02059     result = Tcl_NewObj();
02060     while (1) {
02061         switch (code) {
02062         case TCL_ERROR:
02063             Tcl_FreeParse(parsePtr);
02064             TclStackFree(interp, parsePtr);
02065             Tcl_DecrRefCount(result);
02066             if (errMsg != NULL) {
02067                 Tcl_DecrRefCount(errMsg);
02068             }
02069             return NULL;
02070         case TCL_BREAK:
02071             tokensLeft = 0;             /* Halt substitution */
02072         default:
02073             Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp));
02074         }
02075 
02076         if (tokensLeft == 0) {
02077             Tcl_FreeParse(parsePtr);
02078             TclStackFree(interp, parsePtr);
02079             if (errMsg != NULL) {
02080                 if (code != TCL_BREAK) {
02081                     Tcl_DecrRefCount(result);
02082                     Tcl_SetObjResult(interp, errMsg);
02083                     Tcl_DecrRefCount(errMsg);
02084                     return NULL;
02085                 }
02086                 Tcl_DecrRefCount(errMsg);
02087             }
02088             return result;
02089         }
02090 
02091         code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
02092                 &tokensLeft, 1);
02093     }
02094 }
02095 
02096 /*
02097  *----------------------------------------------------------------------
02098  *
02099  * TclSubstTokens --
02100  *
02101  *      Accepts an array of count Tcl_Token's, and creates a result value in
02102  *      the interp from concatenating the results of performing Tcl
02103  *      substitution on each Tcl_Token. Substitution is interrupted if any
02104  *      non-TCL_OK completion code arises.
02105  *
02106  * Results:
02107  *      The return value is a standard Tcl completion code. The result in
02108  *      interp is the substituted value, or an error message if TCL_ERROR is
02109  *      returned. If tokensLeftPtr is not NULL, then it points to an int where
02110  *      the number of tokens remaining to be processed is written.
02111  *
02112  * Side effects:
02113  *      Can be anything, depending on the types of substitution done.
02114  *
02115  *----------------------------------------------------------------------
02116  */
02117 
02118 int
02119 TclSubstTokens(
02120     Tcl_Interp *interp,         /* Interpreter in which to lookup variables,
02121                                  * execute nested commands, and report
02122                                  * errors. */
02123     Tcl_Token *tokenPtr,        /* Pointer to first in an array of tokens to
02124                                  * evaluate and concatenate. */
02125     int count,                  /* Number of tokens to consider at tokenPtr.
02126                                  * Must be at least 1. */
02127     int *tokensLeftPtr,         /* If not NULL, points to memory where an
02128                                  * integer representing the number of tokens
02129                                  * left to be substituted will be written */
02130     int line)                   /* The line the script starts on. */
02131 {
02132     Tcl_Obj *result;
02133     int code = TCL_OK;
02134 
02135     /*
02136      * Each pass through this loop will substitute one token, and its
02137      * components, if any. The only thing tricky here is that we go to some
02138      * effort to pass Tcl_Obj's through untouched, to avoid string copying and
02139      * Tcl_Obj creation if possible, to aid performance and limit shimmering.
02140      *
02141      * Further optimization opportunities might be to check for the equivalent
02142      * of Tcl_SetObjResult(interp, Tcl_GetObjResult(interp)) and omit them.
02143      */
02144 
02145     result = NULL;
02146     for (; count>0 && code==TCL_OK ; count--, tokenPtr++) {
02147         Tcl_Obj *appendObj = NULL;
02148         const char *append = NULL;
02149         int appendByteLength = 0;
02150         char utfCharBytes[TCL_UTF_MAX];
02151 
02152         switch (tokenPtr->type) {
02153         case TCL_TOKEN_TEXT:
02154             append = tokenPtr->start;
02155             appendByteLength = tokenPtr->size;
02156             break;
02157 
02158         case TCL_TOKEN_BS:
02159             appendByteLength = Tcl_UtfBackslash(tokenPtr->start, NULL,
02160                     utfCharBytes);
02161             append = utfCharBytes;
02162             break;
02163 
02164         case TCL_TOKEN_COMMAND: {
02165             Interp *iPtr = (Interp *) interp;
02166 
02167             iPtr->numLevels++;
02168             code = TclInterpReady(interp);
02169             if (code == TCL_OK) {
02170                 /* TIP #280: Transfer line information to nested command */
02171                 code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2,
02172                         0, line);
02173             }
02174             iPtr->numLevels--;
02175             appendObj = Tcl_GetObjResult(interp);
02176             break;
02177         }
02178 
02179         case TCL_TOKEN_VARIABLE: {
02180             Tcl_Obj *arrayIndex = NULL;
02181             Tcl_Obj *varName = NULL;
02182 
02183             if (tokenPtr->numComponents > 1) {
02184                 /*
02185                  * Subst the index part of an array variable reference.
02186                  */
02187 
02188                 code = TclSubstTokens(interp, tokenPtr+2,
02189                         tokenPtr->numComponents - 1, NULL, line);
02190                 arrayIndex = Tcl_GetObjResult(interp);
02191                 Tcl_IncrRefCount(arrayIndex);
02192             }
02193 
02194             if (code == TCL_OK) {
02195                 varName = Tcl_NewStringObj(tokenPtr[1].start,
02196                         tokenPtr[1].size);
02197                 appendObj = Tcl_ObjGetVar2(interp, varName, arrayIndex,
02198                         TCL_LEAVE_ERR_MSG);
02199                 Tcl_DecrRefCount(varName);
02200                 if (appendObj == NULL) {
02201                     code = TCL_ERROR;
02202                 }
02203             }
02204 
02205             switch (code) {
02206             case TCL_OK:        /* Got value */
02207             case TCL_ERROR:     /* Already have error message */
02208             case TCL_BREAK:     /* Will not substitute anyway */
02209             case TCL_CONTINUE:  /* Will not substitute anyway */
02210                 break;
02211             default:
02212                 /*
02213                  * All other return codes, we will subst the result from the
02214                  * code-throwing evaluation.
02215                  */
02216 
02217                 appendObj = Tcl_GetObjResult(interp);
02218             }
02219 
02220             if (arrayIndex != NULL) {
02221                 Tcl_DecrRefCount(arrayIndex);
02222             }
02223             count -= tokenPtr->numComponents;
02224             tokenPtr += tokenPtr->numComponents;
02225             break;
02226         }
02227 
02228         default:
02229             Tcl_Panic("unexpected token type in TclSubstTokens: %d",
02230                     tokenPtr->type);
02231         }
02232 
02233         if ((code == TCL_BREAK) || (code == TCL_CONTINUE)) {
02234             /*
02235              * Inhibit substitution.
02236              */
02237             continue;
02238         }
02239 
02240         if (result == NULL) {
02241             /*
02242              * First pass through. If we have a Tcl_Obj, just use it. If not,
02243              * create one from our string.
02244              */
02245 
02246             if (appendObj != NULL) {
02247                 result = appendObj;
02248             } else {
02249                 result = Tcl_NewStringObj(append, appendByteLength);
02250             }
02251             Tcl_IncrRefCount(result);
02252         } else {
02253             /*
02254              * Subsequent passes. Append to result.
02255              */
02256 
02257             if (Tcl_IsShared(result)) {
02258                 Tcl_DecrRefCount(result);
02259                 result = Tcl_DuplicateObj(result);
02260                 Tcl_IncrRefCount(result);
02261             }
02262             if (appendObj != NULL) {
02263                 Tcl_AppendObjToObj(result, appendObj);
02264             } else {
02265                 Tcl_AppendToObj(result, append, appendByteLength);
02266             }
02267         }
02268     }
02269 
02270     if (code != TCL_ERROR) {            /* Keep error message in result! */
02271         if (result != NULL) {
02272             Tcl_SetObjResult(interp, result);
02273         } else {
02274             Tcl_ResetResult(interp);
02275         }
02276     }
02277     if (tokensLeftPtr != NULL) {
02278         *tokensLeftPtr = count;
02279     }
02280     if (result != NULL) {
02281         Tcl_DecrRefCount(result);
02282     }
02283     return code;
02284 }
02285 
02286 /*
02287  *----------------------------------------------------------------------
02288  *
02289  * CommandComplete --
02290  *
02291  *      This function is shared by TclCommandComplete and
02292  *      Tcl_ObjCommandComplete; it does all the real work of seeing whether a
02293  *      script is complete
02294  *
02295  * Results:
02296  *      1 is returned if the script is complete, 0 if there are open
02297  *      delimiters such as " or (. 1 is also returned if there is a parse
02298  *      error in the script other than unmatched delimiters.
02299  *
02300  * Side effects:
02301  *      None.
02302  *
02303  *----------------------------------------------------------------------
02304  */
02305 
02306 static inline int
02307 CommandComplete(
02308     const char *script,         /* Script to check. */
02309     int numBytes)               /* Number of bytes in script. */
02310 {
02311     Tcl_Parse parse;
02312     const char *p, *end;
02313     int result;
02314 
02315     p = script;
02316     end = p + numBytes;
02317     while (Tcl_ParseCommand(NULL, p, end - p, 0, &parse) == TCL_OK) {
02318         p = parse.commandStart + parse.commandSize;
02319         if (p >= end) {
02320             break;
02321         }
02322         Tcl_FreeParse(&parse);
02323     }
02324     if (parse.incomplete) {
02325         result = 0;
02326     } else {
02327         result = 1;
02328     }
02329     Tcl_FreeParse(&parse);
02330     return result;
02331 }
02332 
02333 /*
02334  *----------------------------------------------------------------------
02335  *
02336  * Tcl_CommandComplete --
02337  *
02338  *      Given a partial or complete Tcl script, this function determines
02339  *      whether the script is complete in the sense of having matched braces
02340  *      and quotes and brackets.
02341  *
02342  * Results:
02343  *      1 is returned if the script is complete, 0 otherwise. 1 is also
02344  *      returned if there is a parse error in the script other than unmatched
02345  *      delimiters.
02346  *
02347  * Side effects:
02348  *      None.
02349  *
02350  *----------------------------------------------------------------------
02351  */
02352 
02353 int
02354 Tcl_CommandComplete(
02355     const char *script)         /* Script to check. */
02356 {
02357     return CommandComplete(script, (int) strlen(script));
02358 }
02359 
02360 /*
02361  *----------------------------------------------------------------------
02362  *
02363  * TclObjCommandComplete --
02364  *
02365  *      Given a partial or complete Tcl command in a Tcl object, this function
02366  *      determines whether the command is complete in the sense of having
02367  *      matched braces and quotes and brackets.
02368  *
02369  * Results:
02370  *      1 is returned if the command is complete, 0 otherwise.
02371  *
02372  * Side effects:
02373  *      None.
02374  *
02375  *----------------------------------------------------------------------
02376  */
02377 
02378 int
02379 TclObjCommandComplete(
02380     Tcl_Obj *objPtr)            /* Points to object holding script to
02381                                  * check. */
02382 {
02383     int length;
02384     const char *script = Tcl_GetStringFromObj(objPtr, &length);
02385 
02386     return CommandComplete(script, length);
02387 }
02388 
02389 /*
02390  *----------------------------------------------------------------------
02391  *
02392  * TclIsLocalScalar --
02393  *
02394  *      Check to see if a given string is a legal scalar variable name with no
02395  *      namespace qualifiers or substitutions.
02396  *
02397  * Results:
02398  *      Returns 1 if the variable is a local scalar.
02399  *
02400  * Side effects:
02401  *      None.
02402  *
02403  *----------------------------------------------------------------------
02404  */
02405 
02406 int
02407 TclIsLocalScalar(
02408     const char *src,
02409     int len)
02410 {
02411     const char *p;
02412     const char *lastChar = src + (len - 1);
02413 
02414     for (p=src ; p<=lastChar ; p++) {
02415         if ((CHAR_TYPE(*p) != TYPE_NORMAL) &&
02416                 (CHAR_TYPE(*p) != TYPE_COMMAND_END)) {
02417             /*
02418              * TCL_COMMAND_END is returned for the last character of the
02419              * string. By this point we know it isn't an array or namespace
02420              * reference.
02421              */
02422 
02423             return 0;
02424         }
02425         if (*p == '(') {
02426             if (*lastChar == ')') {     /* We have an array element */
02427                 return 0;
02428             }
02429         } else if (*p == ':') {
02430             if ((p != lastChar) && *(p+1) == ':') {     /* qualified name */
02431                 return 0;
02432             }
02433         }
02434     }
02435 
02436     return 1;
02437 }
02438 
02439 /*
02440  * Local Variables:
02441  * mode: c
02442  * c-basic-offset: 4
02443  * fill-column: 78
02444  * End:
02445  */



Generated on Wed Mar 12 12:18:20 2008 by  doxygen 1.5.1