tclParse.cGo to the documentation of this file.00001 /* 00002 * tclParse.c -- 00003 * 00004 * This file contains functions that parse Tcl scripts. They do so in a 00005 * general-purpose fashion that can be used for many different purposes, 00006 * including compilation, direct execution, code analysis, etc. 00007 * 00008 * Copyright (c) 1997 Sun Microsystems, Inc. 00009 * Copyright (c) 1998-2000 Ajuba Solutions. 00010 * Contributions from Don Porter, NIST, 2002. (not subject to US copyright) 00011 * 00012 * See the file "license.terms" for information on usage and redistribution of 00013 * this file, and for a DISCLAIMER OF ALL WARRANTIES. 00014 * 00015 * RCS: @(#) $Id: tclParse.c,v 1.62 2008/01/23 21:58:36 dgp Exp $ 00016 */ 00017 00018 #include "tclInt.h" 00019 00020 /* 00021 * The following table provides parsing information about each possible 8-bit 00022 * character. The table is designed to be referenced with either signed or 00023 * unsigned characters, so it has 384 entries. The first 128 entries 00024 * correspond to negative character values, the next 256 correspond to 00025 * positive character values. The last 128 entries are identical to the first 00026 * 128. The table is always indexed with a 128-byte offset (the 128th entry 00027 * corresponds to a character value of 0). 00028 * 00029 * The macro CHAR_TYPE is used to index into the table and return information 00030 * about its character argument. The following return values are defined. 00031 * 00032 * TYPE_NORMAL - All characters that don't have special significance to 00033 * the Tcl parser. 00034 * TYPE_SPACE - The character is a whitespace character other than 00035 * newline. 00036 * TYPE_COMMAND_END - Character is newline or semicolon. 00037 * TYPE_SUBS - Character begins a substitution or has other special 00038 * meaning in ParseTokens: backslash, dollar sign, or 00039 * open bracket. 00040 * TYPE_QUOTE - Character is a double quote. 00041 * TYPE_CLOSE_PAREN - Character is a right parenthesis. 00042 * TYPE_CLOSE_BRACK - Character is a right square bracket. 00043 * TYPE_BRACE - Character is a curly brace (either left or right). 00044 */ 00045 00046 #define TYPE_NORMAL 0 00047 #define TYPE_SPACE 0x1 00048 #define TYPE_COMMAND_END 0x2 00049 #define TYPE_SUBS 0x4 00050 #define TYPE_QUOTE 0x8 00051 #define TYPE_CLOSE_PAREN 0x10 00052 #define TYPE_CLOSE_BRACK 0x20 00053 #define TYPE_BRACE 0x40 00054 00055 #define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)] 00056 00057 static const char charTypeTable[] = { 00058 /* 00059 * Negative character values, from -128 to -1: 00060 */ 00061 00062 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00063 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00064 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00065 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00066 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00067 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00068 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00069 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00070 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00071 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00072 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00073 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00074 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00075 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00076 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00077 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00078 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00079 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00080 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00081 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00082 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00083 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00084 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00085 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00086 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00087 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00088 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00089 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00090 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00091 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00092 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00093 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00094 00095 /* 00096 * Positive character values, from 0-127: 00097 */ 00098 00099 TYPE_SUBS, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00100 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00101 TYPE_NORMAL, TYPE_SPACE, TYPE_COMMAND_END, TYPE_SPACE, 00102 TYPE_SPACE, TYPE_SPACE, TYPE_NORMAL, TYPE_NORMAL, 00103 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00104 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00105 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00106 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00107 TYPE_SPACE, TYPE_NORMAL, TYPE_QUOTE, TYPE_NORMAL, 00108 TYPE_SUBS, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00109 TYPE_NORMAL, TYPE_CLOSE_PAREN, TYPE_NORMAL, TYPE_NORMAL, 00110 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00111 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00112 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00113 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_COMMAND_END, 00114 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00115 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00116 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00117 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00118 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00119 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00120 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00121 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_SUBS, 00122 TYPE_SUBS, TYPE_CLOSE_BRACK, TYPE_NORMAL, TYPE_NORMAL, 00123 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00124 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00125 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00126 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00127 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00128 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00129 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_BRACE, 00130 TYPE_NORMAL, TYPE_BRACE, TYPE_NORMAL, TYPE_NORMAL, 00131 00132 /* 00133 * Large unsigned character values, from 128-255: 00134 */ 00135 00136 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00137 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00138 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00139 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00140 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00141 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00142 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00143 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00144 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00145 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00146 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00147 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00148 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00149 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00150 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00151 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00152 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00153 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00154 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00155 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00156 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00157 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00158 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00159 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00160 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00161 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00162 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00163 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00164 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00165 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00166 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00167 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, 00168 }; 00169 00170 /* 00171 * Prototypes for local functions defined in this file: 00172 */ 00173 00174 static inline int CommandComplete(const char *script, int numBytes); 00175 static int ParseComment(const char *src, int numBytes, 00176 Tcl_Parse *parsePtr); 00177 static int ParseTokens(const char *src, int numBytes, int mask, 00178 int flags, Tcl_Parse *parsePtr); 00179 static int ParseWhiteSpace(const char *src, int numBytes, 00180 int *incompletePtr, char *typePtr); 00181 00182 /* 00183 *---------------------------------------------------------------------- 00184 * 00185 * TclParseInit -- 00186 * 00187 * Initialize the fields of a Tcl_Parse struct. 00188 * 00189 * Results: 00190 * None. 00191 * 00192 * Side effects: 00193 * The Tcl_Parse struct pointed to by parsePtr gets initialized. 00194 * 00195 *---------------------------------------------------------------------- 00196 */ 00197 00198 void 00199 TclParseInit( 00200 Tcl_Interp *interp, /* Interpreter to use for error reporting */ 00201 const char *start, /* Start of string to be parsed. */ 00202 int numBytes, /* Total number of bytes in string. If < 0, 00203 * the script consists of all bytes up to the 00204 * first null character. */ 00205 Tcl_Parse *parsePtr) /* Points to struct to initialize */ 00206 { 00207 parsePtr->numWords = 0; 00208 parsePtr->tokenPtr = parsePtr->staticTokens; 00209 parsePtr->numTokens = 0; 00210 parsePtr->tokensAvailable = NUM_STATIC_TOKENS; 00211 parsePtr->string = start; 00212 parsePtr->end = start + numBytes; 00213 parsePtr->term = parsePtr->end; 00214 parsePtr->interp = interp; 00215 parsePtr->incomplete = 0; 00216 parsePtr->errorType = TCL_PARSE_SUCCESS; 00217 } 00218 00219 /* 00220 *---------------------------------------------------------------------- 00221 * 00222 * Tcl_ParseCommand -- 00223 * 00224 * Given a string, this function parses the first Tcl command in the 00225 * string and returns information about the structure of the command. 00226 * 00227 * Results: 00228 * The return value is TCL_OK if the command was parsed successfully and 00229 * TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an 00230 * error message is left in its result. On a successful return, parsePtr 00231 * is filled in with information about the command that was parsed. 00232 * 00233 * Side effects: 00234 * If there is insufficient space in parsePtr to hold all the information 00235 * about the command, then additional space is malloc-ed. If the function 00236 * returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to 00237 * release any additional space that was allocated. 00238 * 00239 *---------------------------------------------------------------------- 00240 */ 00241 00242 int 00243 Tcl_ParseCommand( 00244 Tcl_Interp *interp, /* Interpreter to use for error reporting; if 00245 * NULL, then no error message is provided. */ 00246 const char *start, /* First character of string containing one or 00247 * more Tcl commands. */ 00248 register int numBytes, /* Total number of bytes in string. If < 0, 00249 * the script consists of all bytes up to the 00250 * first null character. */ 00251 int nested, /* Non-zero means this is a nested command: 00252 * close bracket should be considered a 00253 * command terminator. If zero, then close 00254 * bracket has no special meaning. */ 00255 register Tcl_Parse *parsePtr) 00256 /* Structure to fill in with information about 00257 * the parsed command; any previous 00258 * information in the structure is ignored. */ 00259 { 00260 register const char *src; /* Points to current character in the 00261 * command. */ 00262 char type; /* Result returned by CHAR_TYPE(*src). */ 00263 Tcl_Token *tokenPtr; /* Pointer to token being filled in. */ 00264 int wordIndex; /* Index of word token for current word. */ 00265 int terminators; /* CHAR_TYPE bits that indicate the end of a 00266 * command. */ 00267 const char *termPtr; /* Set by Tcl_ParseBraces/QuotedString to 00268 * point to char after terminating one. */ 00269 int scanned; 00270 00271 if ((start == NULL) && (numBytes != 0)) { 00272 if (interp != NULL) { 00273 Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC); 00274 } 00275 return TCL_ERROR; 00276 } 00277 if (numBytes < 0) { 00278 numBytes = strlen(start); 00279 } 00280 TclParseInit(interp, start, numBytes, parsePtr); 00281 parsePtr->commentStart = NULL; 00282 parsePtr->commentSize = 0; 00283 parsePtr->commandStart = NULL; 00284 parsePtr->commandSize = 0; 00285 if (nested != 0) { 00286 terminators = TYPE_COMMAND_END | TYPE_CLOSE_BRACK; 00287 } else { 00288 terminators = TYPE_COMMAND_END; 00289 } 00290 00291 /* 00292 * Parse any leading space and comments before the first word of the 00293 * command. 00294 */ 00295 00296 scanned = ParseComment(start, numBytes, parsePtr); 00297 src = (start + scanned); 00298 numBytes -= scanned; 00299 if (numBytes == 0) { 00300 if (nested) { 00301 parsePtr->incomplete = nested; 00302 } 00303 } 00304 00305 /* 00306 * The following loop parses the words of the command, one word in each 00307 * iteration through the loop. 00308 */ 00309 00310 parsePtr->commandStart = src; 00311 while (1) { 00312 int expandWord = 0; 00313 00314 /* 00315 * Create the token for the word. 00316 */ 00317 00318 TclGrowParseTokenArray(parsePtr, 1); 00319 wordIndex = parsePtr->numTokens; 00320 tokenPtr = &parsePtr->tokenPtr[wordIndex]; 00321 tokenPtr->type = TCL_TOKEN_WORD; 00322 00323 /* 00324 * Skip white space before the word. Also skip a backslash-newline 00325 * sequence: it should be treated just like white space. 00326 */ 00327 00328 scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); 00329 src += scanned; 00330 numBytes -= scanned; 00331 if (numBytes == 0) { 00332 parsePtr->term = src; 00333 break; 00334 } 00335 if ((type & terminators) != 0) { 00336 parsePtr->term = src; 00337 src++; 00338 break; 00339 } 00340 tokenPtr->start = src; 00341 parsePtr->numTokens++; 00342 parsePtr->numWords++; 00343 00344 /* 00345 * At this point the word can have one of four forms: something 00346 * enclosed in quotes, something enclosed in braces, and expanding 00347 * word, or an unquoted word (anything else). 00348 */ 00349 00350 parseWord: 00351 if (*src == '"') { 00352 if (Tcl_ParseQuotedString(interp, src, numBytes, parsePtr, 1, 00353 &termPtr) != TCL_OK) { 00354 goto error; 00355 } 00356 src = termPtr; 00357 numBytes = parsePtr->end - src; 00358 } else if (*src == '{') { 00359 int expIdx = wordIndex + 1; 00360 Tcl_Token *expPtr; 00361 00362 if (Tcl_ParseBraces(interp, src, numBytes, parsePtr, 1, 00363 &termPtr) != TCL_OK) { 00364 goto error; 00365 } 00366 src = termPtr; 00367 numBytes = parsePtr->end - src; 00368 00369 /* 00370 * Check whether the braces contained the word expansion prefix 00371 * {*} 00372 */ 00373 00374 expPtr = &parsePtr->tokenPtr[expIdx]; 00375 if ((0 == expandWord) 00376 /* Haven't seen prefix already */ 00377 && (1 == parsePtr->numTokens - expIdx) 00378 /* Only one token */ 00379 && (((1 == (size_t) expPtr->size) 00380 /* Same length as prefix */ 00381 && (expPtr->start[0] == '*'))) 00382 /* Is the prefix */ 00383 && (numBytes > 0) && (0 == ParseWhiteSpace(termPtr, 00384 numBytes, &parsePtr->incomplete, &type)) 00385 && (type != TYPE_COMMAND_END) 00386 /* Non-whitespace follows */) { 00387 expandWord = 1; 00388 parsePtr->numTokens--; 00389 goto parseWord; 00390 } 00391 } else { 00392 /* 00393 * This is an unquoted word. Call ParseTokens and let it do all of 00394 * the work. 00395 */ 00396 00397 if (ParseTokens(src, numBytes, TYPE_SPACE|terminators, 00398 TCL_SUBST_ALL, parsePtr) != TCL_OK) { 00399 goto error; 00400 } 00401 src = parsePtr->term; 00402 numBytes = parsePtr->end - src; 00403 } 00404 00405 /* 00406 * Finish filling in the token for the word and check for the special 00407 * case of a word consisting of a single range of literal text. 00408 */ 00409 00410 tokenPtr = &parsePtr->tokenPtr[wordIndex]; 00411 tokenPtr->size = src - tokenPtr->start; 00412 tokenPtr->numComponents = parsePtr->numTokens - (wordIndex + 1); 00413 if (expandWord) { 00414 int i, isLiteral = 1; 00415 00416 /* 00417 * When a command includes a word that is an expanded literal; for 00418 * example, {*}{1 2 3}, the parser performs that expansion 00419 * immediately, generating several TCL_TOKEN_SIMPLE_WORDs instead 00420 * of a single TCL_TOKEN_EXPAND_WORD that the Tcl_ParseCommand() 00421 * caller might have to expand. This notably makes it simpler for 00422 * those callers that wish to track line endings, such as those 00423 * that implement key parts of TIP 280. 00424 * 00425 * First check whether the thing to be expanded is a literal, 00426 * in the sense of being composed entirely of TCL_TOKEN_TEXT 00427 * tokens. 00428 */ 00429 00430 for (i = 1; i <= tokenPtr->numComponents; i++) { 00431 if (tokenPtr[i].type != TCL_TOKEN_TEXT) { 00432 isLiteral = 0; 00433 break; 00434 } 00435 } 00436 00437 if (isLiteral) { 00438 int elemCount = 0, code = TCL_OK; 00439 const char *nextElem, *listEnd, *elemStart; 00440 00441 /* 00442 * The word to be expanded is a literal, so determine the 00443 * boundaries of the literal string to be treated as a list 00444 * and expanded. That literal string starts at 00445 * tokenPtr[1].start, and includes all bytes up to, but not 00446 * including (tokenPtr[tokenPtr->numComponents].start + 00447 * tokenPtr[tokenPtr->numComponents].size) 00448 */ 00449 00450 listEnd = (tokenPtr[tokenPtr->numComponents].start + 00451 tokenPtr[tokenPtr->numComponents].size); 00452 nextElem = tokenPtr[1].start; 00453 00454 /* 00455 * Step through the literal string, parsing and counting list 00456 * elements. 00457 */ 00458 00459 while (nextElem < listEnd) { 00460 code = TclFindElement(NULL, nextElem, listEnd - nextElem, 00461 &elemStart, &nextElem, NULL, NULL); 00462 if (code != TCL_OK) break; 00463 if (elemStart < listEnd) { 00464 elemCount++; 00465 } 00466 } 00467 00468 if (code != TCL_OK) { 00469 /* 00470 * Some list element could not be parsed. This means the 00471 * literal string was not in fact a valid list. Defer the 00472 * handling of this to compile/eval time, where code is 00473 * already in place to report the "attempt to expand a 00474 * non-list" error. 00475 */ 00476 00477 tokenPtr->type = TCL_TOKEN_EXPAND_WORD; 00478 } else if (elemCount == 0) { 00479 /* 00480 * We are expanding a literal empty list. This means that 00481 * the expanding word completely disappears, leaving no 00482 * word generated this pass through the loop. Adjust 00483 * accounting appropriately. 00484 */ 00485 00486 parsePtr->numWords--; 00487 parsePtr->numTokens = wordIndex; 00488 } else { 00489 /* 00490 * Recalculate the number of Tcl_Tokens needed to store 00491 * tokens representing the expanded list. 00492 */ 00493 00494 int growthNeeded = wordIndex + 2*elemCount 00495 - parsePtr->numTokens; 00496 parsePtr->numWords += elemCount - 1; 00497 if (growthNeeded > 0) { 00498 TclGrowParseTokenArray(parsePtr, growthNeeded); 00499 tokenPtr = &parsePtr->tokenPtr[wordIndex]; 00500 } 00501 parsePtr->numTokens = wordIndex + 2*elemCount; 00502 00503 /* 00504 * Generate a TCL_TOKEN_SIMPLE_WORD token sequence for 00505 * each element of the literal list we are expanding in 00506 * place. Take care with the start and size fields of each 00507 * token so they point to the right literal characters in 00508 * the original script to represent the right expanded 00509 * word value. 00510 */ 00511 00512 nextElem = tokenPtr[1].start; 00513 while (isspace(UCHAR(*nextElem))) { 00514 nextElem++; 00515 } 00516 while (nextElem < listEnd) { 00517 tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; 00518 tokenPtr->numComponents = 1; 00519 tokenPtr->start = nextElem; 00520 00521 tokenPtr++; 00522 tokenPtr->type = TCL_TOKEN_TEXT; 00523 tokenPtr->numComponents = 0; 00524 TclFindElement(NULL, nextElem, listEnd - nextElem, 00525 &(tokenPtr->start), &nextElem, 00526 &(tokenPtr->size), NULL); 00527 if (tokenPtr->start + tokenPtr->size == listEnd) { 00528 tokenPtr[-1].size = listEnd - tokenPtr[-1].start; 00529 } else { 00530 tokenPtr[-1].size = tokenPtr->start 00531 + tokenPtr->size - tokenPtr[-1].start; 00532 tokenPtr[-1].size += (isspace(UCHAR( 00533 tokenPtr->start[tokenPtr->size])) == 0); 00534 } 00535 00536 tokenPtr++; 00537 } 00538 } 00539 } else { 00540 /* 00541 * The word to be expanded is not a literal, so defer 00542 * expansion to compile/eval time by marking with a 00543 * TCL_TOKEN_EXPAND_WORD token. 00544 */ 00545 00546 tokenPtr->type = TCL_TOKEN_EXPAND_WORD; 00547 } 00548 } else if ((tokenPtr->numComponents == 1) 00549 && (tokenPtr[1].type == TCL_TOKEN_TEXT)) { 00550 tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; 00551 } 00552 00553 /* 00554 * Do two additional checks: (a) make sure we're really at the end of 00555 * a word (there might have been garbage left after a quoted or braced 00556 * word), and (b) check for the end of the command. 00557 */ 00558 00559 scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); 00560 if (scanned) { 00561 src += scanned; 00562 numBytes -= scanned; 00563 continue; 00564 } 00565 00566 if (numBytes == 0) { 00567 parsePtr->term = src; 00568 break; 00569 } 00570 if ((type & terminators) != 0) { 00571 parsePtr->term = src; 00572 src++; 00573 break; 00574 } 00575 if (src[-1] == '"') { 00576 if (interp != NULL) { 00577 Tcl_SetResult(interp, "extra characters after close-quote", 00578 TCL_STATIC); 00579 } 00580 parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; 00581 } else { 00582 if (interp != NULL) { 00583 Tcl_SetResult(interp, "extra characters after close-brace", 00584 TCL_STATIC); 00585 } 00586 parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; 00587 } 00588 parsePtr->term = src; 00589 goto error; 00590 } 00591 00592 parsePtr->commandSize = src - parsePtr->commandStart; 00593 return TCL_OK; 00594 00595 error: 00596 Tcl_FreeParse(parsePtr); 00597 parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; 00598 return TCL_ERROR; 00599 } 00600 00601 /* 00602 *---------------------------------------------------------------------- 00603 * 00604 * ParseWhiteSpace -- 00605 * 00606 * Scans up to numBytes bytes starting at src, consuming white space 00607 * between words as defined by Tcl's parsing rules. 00608 * 00609 * Results: 00610 * Returns the number of bytes recognized as white space. Records at 00611 * parsePtr, information about the parse. Records at typePtr the 00612 * character type of the non-whitespace character that terminated the 00613 * scan. 00614 * 00615 * Side effects: 00616 * None. 00617 * 00618 *---------------------------------------------------------------------- 00619 */ 00620 00621 static int 00622 ParseWhiteSpace( 00623 const char *src, /* First character to parse. */ 00624 register int numBytes, /* Max number of bytes to scan. */ 00625 int *incompletePtr, /* Set this boolean memory to true if parsing 00626 * indicates an incomplete command. */ 00627 char *typePtr) /* Points to location to store character type 00628 * of character that ends run of whitespace */ 00629 { 00630 register char type = TYPE_NORMAL; 00631 register const char *p = src; 00632 00633 while (1) { 00634 while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) { 00635 numBytes--; 00636 p++; 00637 } 00638 if (numBytes && (type & TYPE_SUBS)) { 00639 if (*p != '\\') { 00640 break; 00641 } 00642 if (--numBytes == 0) { 00643 break; 00644 } 00645 if (p[1] != '\n') { 00646 break; 00647 } 00648 p+=2; 00649 if (--numBytes == 0) { 00650 *incompletePtr = 1; 00651 break; 00652 } 00653 continue; 00654 } 00655 break; 00656 } 00657 *typePtr = type; 00658 return (p - src); 00659 } 00660 00661 /* 00662 *---------------------------------------------------------------------- 00663 * 00664 * TclParseAllWhiteSpace -- 00665 * 00666 * Scans up to numBytes bytes starting at src, consuming all white space 00667 * including the command-terminating newline characters. 00668 * 00669 * Results: 00670 * Returns the number of bytes recognized as white space. 00671 * 00672 *---------------------------------------------------------------------- 00673 */ 00674 00675 int 00676 TclParseAllWhiteSpace( 00677 const char *src, /* First character to parse. */ 00678 int numBytes) /* Max number of byes to scan */ 00679 { 00680 int dummy; 00681 char type; 00682 const char *p = src; 00683 00684 do { 00685 int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type); 00686 00687 p += scanned; 00688 numBytes -= scanned; 00689 } while (numBytes && (*p == '\n') && (p++, --numBytes)); 00690 return (p-src); 00691 } 00692 00693 /* 00694 *---------------------------------------------------------------------- 00695 * 00696 * TclParseHex -- 00697 * 00698 * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing 00699 * \x and \u escape sequences). At most numBytes bytes are scanned. 00700 * 00701 * Results: 00702 * The numeric value is stored in *resultPtr. Returns the number of bytes 00703 * consumed. 00704 * 00705 * Notes: 00706 * Relies on the following properties of the ASCII character set, with 00707 * which UTF-8 is compatible: 00708 * 00709 * The digits '0' .. '9' and the letters 'A' .. 'Z' and 'a' .. 'z' occupy 00710 * consecutive code points, and '0' < 'A' < 'a'. 00711 * 00712 *---------------------------------------------------------------------- 00713 */ 00714 00715 int 00716 TclParseHex( 00717 const char *src, /* First character to parse. */ 00718 int numBytes, /* Max number of byes to scan */ 00719 Tcl_UniChar *resultPtr) /* Points to storage provided by caller where 00720 * the Tcl_UniChar resulting from the 00721 * conversion is to be written. */ 00722 { 00723 Tcl_UniChar result = 0; 00724 register const char *p = src; 00725 00726 while (numBytes--) { 00727 unsigned char digit = UCHAR(*p); 00728 00729 if (!isxdigit(digit)) { 00730 break; 00731 } 00732 00733 ++p; 00734 result <<= 4; 00735 00736 if (digit >= 'a') { 00737 result |= (10 + digit - 'a'); 00738 } else if (digit >= 'A') { 00739 result |= (10 + digit - 'A'); 00740 } else { 00741 result |= (digit - '0'); 00742 } 00743 } 00744 00745 *resultPtr = result; 00746 return (p - src); 00747 } 00748 00749 /* 00750 *---------------------------------------------------------------------- 00751 * 00752 * TclParseBackslash -- 00753 * 00754 * Scans up to numBytes bytes starting at src, consuming a backslash 00755 * sequence as defined by Tcl's parsing rules. 00756 * 00757 * Results: 00758 * Records at readPtr the number of bytes making up the backslash 00759 * sequence. Records at dst the UTF-8 encoded equivalent of that 00760 * backslash sequence. Returns the number of bytes written to dst, at 00761 * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results 00762 * are not needed, but the return value is the same either way. 00763 * 00764 * Side effects: 00765 * None. 00766 * 00767 *---------------------------------------------------------------------- 00768 */ 00769 00770 int 00771 TclParseBackslash( 00772 const char *src, /* Points to the backslash character of a a 00773 * backslash sequence. */ 00774 int numBytes, /* Max number of bytes to scan. */ 00775 int *readPtr, /* NULL, or points to storage where the number 00776 * of bytes scanned should be written. */ 00777 char *dst) /* NULL, or points to buffer where the UTF-8 00778 * encoding of the backslash sequence is to be 00779 * written. At most TCL_UTF_MAX bytes will be 00780 * written there. */ 00781 { 00782 register const char *p = src+1; 00783 Tcl_UniChar result; 00784 int count; 00785 char buf[TCL_UTF_MAX]; 00786 00787 if (numBytes == 0) { 00788 if (readPtr != NULL) { 00789 *readPtr = 0; 00790 } 00791 return 0; 00792 } 00793 00794 if (dst == NULL) { 00795 dst = buf; 00796 } 00797 00798 if (numBytes == 1) { 00799 /* 00800 * Can only scan the backslash, so return it. 00801 */ 00802 00803 result = '\\'; 00804 count = 1; 00805 goto done; 00806 } 00807 00808 count = 2; 00809 switch (*p) { 00810 /* 00811 * Note: in the conversions below, use absolute values (e.g., 0xa) 00812 * rather than symbolic values (e.g. \n) that get converted by the 00813 * compiler. It's possible that compilers on some platforms will do 00814 * the symbolic conversions differently, which could result in 00815 * non-portable Tcl scripts. 00816 */ 00817 00818 case 'a': 00819 result = 0x7; 00820 break; 00821 case 'b': 00822 result = 0x8; 00823 break; 00824 case 'f': 00825 result = 0xc; 00826 break; 00827 case 'n': 00828 result = 0xa; 00829 break; 00830 case 'r': 00831 result = 0xd; 00832 break; 00833 case 't': 00834 result = 0x9; 00835 break; 00836 case 'v': 00837 result = 0xb; 00838 break; 00839 case 'x': 00840 count += TclParseHex(p+1, numBytes-1, &result); 00841 if (count == 2) { 00842 /* 00843 * No hexadigits -> This is just "x". 00844 */ 00845 00846 result = 'x'; 00847 } else { 00848 /* 00849 * Keep only the last byte (2 hex digits). 00850 */ 00851 result = (unsigned char) result; 00852 } 00853 break; 00854 case 'u': 00855 count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-1, &result); 00856 if (count == 2) { 00857 /* 00858 * No hexadigits -> This is just "u". 00859 */ 00860 result = 'u'; 00861 } 00862 break; 00863 case '\n': 00864 count--; 00865 do { 00866 p++; 00867 count++; 00868 } while ((count < numBytes) && ((*p == ' ') || (*p == '\t'))); 00869 result = ' '; 00870 break; 00871 case 0: 00872 result = '\\'; 00873 count = 1; 00874 break; 00875 default: 00876 /* 00877 * Check for an octal number \oo?o? 00878 */ 00879 00880 if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ 00881 result = (unsigned char)(*p - '0'); 00882 p++; 00883 if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */ 00884 || (UCHAR(*p) >= '8')) { 00885 break; 00886 } 00887 count = 3; 00888 result = (unsigned char)((result << 3) + (*p - '0')); 00889 p++; 00890 if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */ 00891 || (UCHAR(*p) >= '8')) { 00892 break; 00893 } 00894 count = 4; 00895 result = (unsigned char)((result << 3) + (*p - '0')); 00896 break; 00897 } 00898 00899 /* 00900 * We have to convert here in case the user has put a backslash in 00901 * front of a multi-byte utf-8 character. While this means nothing 00902 * special, we shouldn't break up a correct utf-8 character. [Bug 00903 * #217987] test subst-3.2 00904 */ 00905 00906 if (Tcl_UtfCharComplete(p, numBytes - 1)) { 00907 count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ 00908 } else { 00909 char utfBytes[TCL_UTF_MAX]; 00910 00911 memcpy(utfBytes, p, (size_t) (numBytes - 1)); 00912 utfBytes[numBytes - 1] = '\0'; 00913 count = Tcl_UtfToUniChar(utfBytes, &result) + 1; 00914 } 00915 break; 00916 } 00917 00918 done: 00919 if (readPtr != NULL) { 00920 *readPtr = count; 00921 } 00922 return Tcl_UniCharToUtf((int) result, dst); 00923 } 00924 00925 /* 00926 *---------------------------------------------------------------------- 00927 * 00928 * ParseComment -- 00929 * 00930 * Scans up to numBytes bytes starting at src, consuming a Tcl comment as 00931 * defined by Tcl's parsing rules. 00932 * 00933 * Results: 00934 * Records in parsePtr information about the parse. Returns the number of 00935 * bytes consumed. 00936 * 00937 * Side effects: 00938 * None. 00939 * 00940 *---------------------------------------------------------------------- 00941 */ 00942 00943 static int 00944 ParseComment( 00945 const char *src, /* First character to parse. */ 00946 register int numBytes, /* Max number of bytes to scan. */ 00947 Tcl_Parse *parsePtr) /* Information about parse in progress. 00948 * Updated if parsing indicates an incomplete 00949 * command. */ 00950 { 00951 register const char *p = src; 00952 00953 while (numBytes) { 00954 char type; 00955 int scanned; 00956 00957 scanned = TclParseAllWhiteSpace(p, numBytes); 00958 p += scanned; 00959 numBytes -= scanned; 00960 00961 if ((numBytes == 0) || (*p != '#')) { 00962 break; 00963 } 00964 if (parsePtr->commentStart == NULL) { 00965 parsePtr->commentStart = p; 00966 } 00967 00968 while (numBytes) { 00969 if (*p == '\\') { 00970 scanned = ParseWhiteSpace(p, numBytes, &parsePtr->incomplete, 00971 &type); 00972 if (scanned) { 00973 p += scanned; 00974 numBytes -= scanned; 00975 } else { 00976 /* 00977 * General backslash substitution in comments isn't part 00978 * of the formal spec, but test parse-15.47 and history 00979 * indicate that it has been the de facto rule. Don't 00980 * change it now. 00981 */ 00982 00983 TclParseBackslash(p, numBytes, &scanned, NULL); 00984 p += scanned; 00985 numBytes -= scanned; 00986 } 00987 } else { 00988 p++; 00989 numBytes--; 00990 if (p[-1] == '\n') { 00991 break; 00992 } 00993 } 00994 } 00995 parsePtr->commentSize = p - parsePtr->commentStart; 00996 } 00997 return (p - src); 00998 } 00999 01000 /* 01001 *---------------------------------------------------------------------- 01002 * 01003 * ParseTokens -- 01004 * 01005 * This function forms the heart of the Tcl parser. It parses one or more 01006 * tokens from a string, up to a termination point specified by the 01007 * caller. This function is used to parse unquoted command words (those 01008 * not in quotes or braces), words in quotes, and array indices for 01009 * variables. No more than numBytes bytes will be scanned. 01010 * 01011 * Results: 01012 * Tokens are added to parsePtr and parsePtr->term is filled in with the 01013 * address of the character that terminated the parse (the first one 01014 * whose CHAR_TYPE matched mask or the character at parsePtr->end). The 01015 * return value is TCL_OK if the parse completed successfully and 01016 * TCL_ERROR otherwise. If a parse error occurs and parsePtr->interp is 01017 * not NULL, then an error message is left in the interpreter's result. 01018 * 01019 * Side effects: 01020 * None. 01021 * 01022 *---------------------------------------------------------------------- 01023 */ 01024 01025 static int 01026 ParseTokens( 01027 register const char *src, /* First character to parse. */ 01028 register int numBytes, /* Max number of bytes to scan. */ 01029 int mask, /* Specifies when to stop parsing. The parse 01030 * stops at the first unquoted character whose 01031 * CHAR_TYPE contains any of the bits in 01032 * mask. */ 01033 int flags, /* OR-ed bits indicating what substitutions to 01034 * perform: TCL_SUBST_COMMANDS, 01035 * TCL_SUBST_VARIABLES, and 01036 * TCL_SUBST_BACKSLASHES */ 01037 Tcl_Parse *parsePtr) /* Information about parse in progress. 01038 * Updated with additional tokens and 01039 * termination information. */ 01040 { 01041 char type; 01042 int originalTokens; 01043 int noSubstCmds = !(flags & TCL_SUBST_COMMANDS); 01044 int noSubstVars = !(flags & TCL_SUBST_VARIABLES); 01045 int noSubstBS = !(flags & TCL_SUBST_BACKSLASHES); 01046 Tcl_Token *tokenPtr; 01047 01048 /* 01049 * Each iteration through the following loop adds one token of type 01050 * TCL_TOKEN_TEXT, TCL_TOKEN_BS, TCL_TOKEN_COMMAND, or TCL_TOKEN_VARIABLE 01051 * to parsePtr. For TCL_TOKEN_VARIABLE tokens, additional tokens are added 01052 * for the parsed variable name. 01053 */ 01054 01055 originalTokens = parsePtr->numTokens; 01056 while (numBytes && !((type = CHAR_TYPE(*src)) & mask)) { 01057 TclGrowParseTokenArray(parsePtr, 1); 01058 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; 01059 tokenPtr->start = src; 01060 tokenPtr->numComponents = 0; 01061 01062 if ((type & TYPE_SUBS) == 0) { 01063 /* 01064 * This is a simple range of characters. Scan to find the end of 01065 * the range. 01066 */ 01067 01068 while ((++src, --numBytes) 01069 && !(CHAR_TYPE(*src) & (mask | TYPE_SUBS))) { 01070 /* empty loop */ 01071 } 01072 tokenPtr->type = TCL_TOKEN_TEXT; 01073 tokenPtr->size = src - tokenPtr->start; 01074 parsePtr->numTokens++; 01075 } else if (*src == '$') { 01076 int varToken; 01077 01078 if (noSubstVars) { 01079 tokenPtr->type = TCL_TOKEN_TEXT; 01080 tokenPtr->size = 1; 01081 parsePtr->numTokens++; 01082 src++; 01083 numBytes--; 01084 continue; 01085 } 01086 01087 /* 01088 * This is a variable reference. Call Tcl_ParseVarName to do all 01089 * the dirty work of parsing the name. 01090 */ 01091 01092 varToken = parsePtr->numTokens; 01093 if (Tcl_ParseVarName(parsePtr->interp, src, numBytes, parsePtr, 01094 1) != TCL_OK) { 01095 return TCL_ERROR; 01096 } 01097 src += parsePtr->tokenPtr[varToken].size; 01098 numBytes -= parsePtr->tokenPtr[varToken].size; 01099 } else if (*src == '[') { 01100 Tcl_Parse *nestedPtr; 01101 01102 if (noSubstCmds) { 01103 tokenPtr->type = TCL_TOKEN_TEXT; 01104 tokenPtr->size = 1; 01105 parsePtr->numTokens++; 01106 src++; 01107 numBytes--; 01108 continue; 01109 } 01110 01111 /* 01112 * Command substitution. Call Tcl_ParseCommand recursively (and 01113 * repeatedly) to parse the nested command(s), then throw away the 01114 * parse information. 01115 */ 01116 01117 src++; 01118 numBytes--; 01119 nestedPtr = (Tcl_Parse *) 01120 TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); 01121 while (1) { 01122 if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, 01123 nestedPtr) != TCL_OK) { 01124 parsePtr->errorType = nestedPtr->errorType; 01125 parsePtr->term = nestedPtr->term; 01126 parsePtr->incomplete = nestedPtr->incomplete; 01127 TclStackFree(parsePtr->interp, nestedPtr); 01128 return TCL_ERROR; 01129 } 01130 src = nestedPtr->commandStart + nestedPtr->commandSize; 01131 numBytes = parsePtr->end - src; 01132 Tcl_FreeParse(nestedPtr); 01133 01134 /* 01135 * Check for the closing ']' that ends the command 01136 * substitution. It must have been the last character of the 01137 * parsed command. 01138 */ 01139 01140 if ((nestedPtr->term < parsePtr->end) 01141 && (*(nestedPtr->term) == ']') 01142 && !(nestedPtr->incomplete)) { 01143 break; 01144 } 01145 if (numBytes == 0) { 01146 if (parsePtr->interp != NULL) { 01147 Tcl_SetResult(parsePtr->interp, 01148 "missing close-bracket", TCL_STATIC); 01149 } 01150 parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; 01151 parsePtr->term = tokenPtr->start; 01152 parsePtr->incomplete = 1; 01153 TclStackFree(parsePtr->interp, nestedPtr); 01154 return TCL_ERROR; 01155 } 01156 } 01157 TclStackFree(parsePtr->interp, nestedPtr); 01158 tokenPtr->type = TCL_TOKEN_COMMAND; 01159 tokenPtr->size = src - tokenPtr->start; 01160 parsePtr->numTokens++; 01161 } else if (*src == '\\') { 01162 if (noSubstBS) { 01163 tokenPtr->type = TCL_TOKEN_TEXT; 01164 tokenPtr->size = 1; 01165 parsePtr->numTokens++; 01166 src++; 01167 numBytes--; 01168 continue; 01169 } 01170 01171 /* 01172 * Backslash substitution. 01173 */ 01174 01175 TclParseBackslash(src, numBytes, &tokenPtr->size, NULL); 01176 01177 if (tokenPtr->size == 1) { 01178 /* 01179 * Just a backslash, due to end of string. 01180 */ 01181 01182 tokenPtr->type = TCL_TOKEN_TEXT; 01183 parsePtr->numTokens++; 01184 src++; 01185 numBytes--; 01186 continue; 01187 } 01188 01189 if (src[1] == '\n') { 01190 if (numBytes == 2) { 01191 parsePtr->incomplete = 1; 01192 } 01193 01194 /* 01195 * Note: backslash-newline is special in that it is treated 01196 * the same as a space character would be. This means that it 01197 * could terminate the token. 01198 */ 01199 01200 if (mask & TYPE_SPACE) { 01201 if (parsePtr->numTokens == originalTokens) { 01202 goto finishToken; 01203 } 01204 break; 01205 } 01206 } 01207 01208 tokenPtr->type = TCL_TOKEN_BS; 01209 parsePtr->numTokens++; 01210 src += tokenPtr->size; 01211 numBytes -= tokenPtr->size; 01212 } else if (*src == 0) { 01213 tokenPtr->type = TCL_TOKEN_TEXT; 01214 tokenPtr->size = 1; 01215 parsePtr->numTokens++; 01216 src++; 01217 numBytes--; 01218 } else { 01219 Tcl_Panic("ParseTokens encountered unknown character"); 01220 } 01221 } 01222 if (parsePtr->numTokens == originalTokens) { 01223 /* 01224 * There was nothing in this range of text. Add an empty token for the 01225 * empty range, so that there is always at least one token added. 01226 */ 01227 01228 TclGrowParseTokenArray(parsePtr, 1); 01229 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; 01230 tokenPtr->start = src; 01231 tokenPtr->numComponents = 0; 01232 01233 finishToken: 01234 tokenPtr->type = TCL_TOKEN_TEXT; 01235 tokenPtr->size = 0; 01236 parsePtr->numTokens++; 01237 } 01238 parsePtr->term = src; 01239 return TCL_OK; 01240 } 01241 01242 /* 01243 *---------------------------------------------------------------------- 01244 * 01245 * Tcl_FreeParse -- 01246 * 01247 * This function is invoked to free any dynamic storage that may have 01248 * been allocated by a previous call to Tcl_ParseCommand. 01249 * 01250 * Results: 01251 * None. 01252 * 01253 * Side effects: 01254 * If there is any dynamically allocated memory in *parsePtr, it is 01255 * freed. 01256 * 01257 *---------------------------------------------------------------------- 01258 */ 01259 01260 void 01261 Tcl_FreeParse( 01262 Tcl_Parse *parsePtr) /* Structure that was filled in by a previous 01263 * call to Tcl_ParseCommand. */ 01264 { 01265 if (parsePtr->tokenPtr != parsePtr->staticTokens) { 01266 ckfree((char *) parsePtr->tokenPtr); 01267 parsePtr->tokenPtr = parsePtr->staticTokens; 01268 } 01269 } 01270 01271 /* 01272 *---------------------------------------------------------------------- 01273 * 01274 * Tcl_ParseVarName -- 01275 * 01276 * Given a string starting with a $ sign, parse off a variable name and 01277 * return information about the parse. No more than numBytes bytes will 01278 * be scanned. 01279 * 01280 * Results: 01281 * The return value is TCL_OK if the command was parsed successfully and 01282 * TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an 01283 * error message is left in its result. On a successful return, tokenPtr 01284 * and numTokens fields of parsePtr are filled in with information about 01285 * the variable name that was parsed. The "size" field of the first new 01286 * token gives the total number of bytes in the variable name. Other 01287 * fields in parsePtr are undefined. 01288 * 01289 * Side effects: 01290 * If there is insufficient space in parsePtr to hold all the information 01291 * about the command, then additional space is malloc-ed. If the function 01292 * returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to 01293 * release any additional space that was allocated. 01294 * 01295 *---------------------------------------------------------------------- 01296 */ 01297 01298 int 01299 Tcl_ParseVarName( 01300 Tcl_Interp *interp, /* Interpreter to use for error reporting; if 01301 * NULL, then no error message is provided. */ 01302 const char *start, /* Start of variable substitution string. 01303 * First character must be "$". */ 01304 register int numBytes, /* Total number of bytes in string. If < 0, 01305 * the string consists of all bytes up to the 01306 * first null character. */ 01307 Tcl_Parse *parsePtr, /* Structure to fill in with information about 01308 * the variable name. */ 01309 int append) /* Non-zero means append tokens to existing 01310 * information in parsePtr; zero means ignore 01311 * existing tokens in parsePtr and 01312 * reinitialize it. */ 01313 { 01314 Tcl_Token *tokenPtr; 01315 register const char *src; 01316 unsigned char c; 01317 int varIndex, offset; 01318 Tcl_UniChar ch; 01319 unsigned array; 01320 01321 if ((numBytes == 0) || (start == NULL)) { 01322 return TCL_ERROR; 01323 } 01324 if (numBytes < 0) { 01325 numBytes = strlen(start); 01326 } 01327 01328 if (!append) { 01329 TclParseInit(interp, start, numBytes, parsePtr); 01330 } 01331 01332 /* 01333 * Generate one token for the variable, an additional token for the name, 01334 * plus any number of additional tokens for the index, if there is one. 01335 */ 01336 01337 src = start; 01338 TclGrowParseTokenArray(parsePtr, 2); 01339 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; 01340 tokenPtr->type = TCL_TOKEN_VARIABLE; 01341 tokenPtr->start = src; 01342 varIndex = parsePtr->numTokens; 01343 parsePtr->numTokens++; 01344 tokenPtr++; 01345 src++; 01346 numBytes--; 01347 if (numBytes == 0) { 01348 goto justADollarSign; 01349 } 01350 tokenPtr->type = TCL_TOKEN_TEXT; 01351 tokenPtr->start = src; 01352 tokenPtr->numComponents = 0; 01353 01354 /* 01355 * The name of the variable can have three forms: 01356 * 1. The $ sign is followed by an open curly brace. Then the variable 01357 * name is everything up to the next close curly brace, and the 01358 * variable is a scalar variable. 01359 * 2. The $ sign is not followed by an open curly brace. Then the variable 01360 * name is everything up to the next character that isn't a letter, 01361 * digit, or underscore. :: sequences are also considered part of the 01362 * variable name, in order to support namespaces. If the following 01363 * character is an open parenthesis, then the information between 01364 * parentheses is the array element name. 01365 * 3. The $ sign is followed by something that isn't a letter, digit, or 01366 * underscore: in this case, there is no variable name and the token is 01367 * just "$". 01368 */ 01369 01370 if (*src == '{') { 01371 src++; 01372 numBytes--; 01373 tokenPtr->type = TCL_TOKEN_TEXT; 01374 tokenPtr->start = src; 01375 tokenPtr->numComponents = 0; 01376 01377 while (numBytes && (*src != '}')) { 01378 numBytes--; 01379 src++; 01380 } 01381 if (numBytes == 0) { 01382 if (parsePtr->interp != NULL) { 01383 Tcl_SetResult(parsePtr->interp, 01384 "missing close-brace for variable name", TCL_STATIC); 01385 } 01386 parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE; 01387 parsePtr->term = tokenPtr->start-1; 01388 parsePtr->incomplete = 1; 01389 goto error; 01390 } 01391 tokenPtr->size = src - tokenPtr->start; 01392 tokenPtr[-1].size = src - tokenPtr[-1].start; 01393 parsePtr->numTokens++; 01394 src++; 01395 } else { 01396 tokenPtr->type = TCL_TOKEN_TEXT; 01397 tokenPtr->start = src; 01398 tokenPtr->numComponents = 0; 01399 01400 while (numBytes) { 01401 if (Tcl_UtfCharComplete(src, numBytes)) { 01402 offset = Tcl_UtfToUniChar(src, &ch); 01403 } else { 01404 char utfBytes[TCL_UTF_MAX]; 01405 01406 memcpy(utfBytes, src, (size_t) numBytes); 01407 utfBytes[numBytes] = '\0'; 01408 offset = Tcl_UtfToUniChar(utfBytes, &ch); 01409 } 01410 c = UCHAR(ch); 01411 if (isalnum(c) || (c == '_')) { /* INTL: ISO only, UCHAR. */ 01412 src += offset; 01413 numBytes -= offset; 01414 continue; 01415 } 01416 if ((c == ':') && (numBytes != 1) && (src[1] == ':')) { 01417 src += 2; 01418 numBytes -= 2; 01419 while (numBytes && (*src == ':')) { 01420 src++; 01421 numBytes--; 01422 } 01423 continue; 01424 } 01425 break; 01426 } 01427 01428 /* 01429 * Support for empty array names here. 01430 */ 01431 01432 array = (numBytes && (*src == '(')); 01433 tokenPtr->size = src - tokenPtr->start; 01434 if ((tokenPtr->size == 0) && !array) { 01435 goto justADollarSign; 01436 } 01437 parsePtr->numTokens++; 01438 if (array) { 01439 /* 01440 * This is a reference to an array element. Call ParseTokens 01441 * recursively to parse the element name, since it could contain 01442 * any number of substitutions. 01443 */ 01444 01445 if (TCL_OK != ParseTokens(src+1, numBytes-1, TYPE_CLOSE_PAREN, 01446 TCL_SUBST_ALL, parsePtr)) { 01447 goto error; 01448 } 01449 if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){ 01450 if (parsePtr->interp != NULL) { 01451 Tcl_SetResult(parsePtr->interp, "missing )", 01452 TCL_STATIC); 01453 } 01454 parsePtr->errorType = TCL_PARSE_MISSING_PAREN; 01455 parsePtr->term = src; 01456 parsePtr->incomplete = 1; 01457 goto error; 01458 } 01459 src = parsePtr->term + 1; 01460 } 01461 } 01462 tokenPtr = &parsePtr->tokenPtr[varIndex]; 01463 tokenPtr->size = src - tokenPtr->start; 01464 tokenPtr->numComponents = parsePtr->numTokens - (varIndex + 1); 01465 return TCL_OK; 01466 01467 /* 01468 * The dollar sign isn't followed by a variable name. Replace the 01469 * TCL_TOKEN_VARIABLE token with a TCL_TOKEN_TEXT token for the dollar 01470 * sign. 01471 */ 01472 01473 justADollarSign: 01474 tokenPtr = &parsePtr->tokenPtr[varIndex]; 01475 tokenPtr->type = TCL_TOKEN_TEXT; 01476 tokenPtr->size = 1; 01477 tokenPtr->numComponents = 0; 01478 return TCL_OK; 01479 01480 error: 01481 Tcl_FreeParse(parsePtr); 01482 return TCL_ERROR; 01483 } 01484 01485 /* 01486 *---------------------------------------------------------------------- 01487 * 01488 * Tcl_ParseVar -- 01489 * 01490 * Given a string starting with a $ sign, parse off a variable name and 01491 * return its value. 01492 * 01493 * Results: 01494 * The return value is the contents of the variable given by the leading 01495 * characters of string. If termPtr isn't NULL, *termPtr gets filled in 01496 * with the address of the character just after the last one in the 01497 * variable specifier. If the variable doesn't exist, then the return 01498 * value is NULL and an error message will be left in interp's result. 01499 * 01500 * Side effects: 01501 * None. 01502 * 01503 *---------------------------------------------------------------------- 01504 */ 01505 01506 const char * 01507 Tcl_ParseVar( 01508 Tcl_Interp *interp, /* Context for looking up variable. */ 01509 register const char *start, /* Start of variable substitution. First 01510 * character must be "$". */ 01511 const char **termPtr) /* If non-NULL, points to word to fill in with 01512 * character just after last one in the 01513 * variable specifier. */ 01514 { 01515 register Tcl_Obj *objPtr; 01516 int code; 01517 Tcl_Parse *parsePtr = (Tcl_Parse *) 01518 TclStackAlloc(interp, sizeof(Tcl_Parse)); 01519 01520 if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) { 01521 TclStackFree(interp, parsePtr); 01522 return NULL; 01523 } 01524 01525 if (termPtr != NULL) { 01526 *termPtr = start + parsePtr->tokenPtr->size; 01527 } 01528 if (parsePtr->numTokens == 1) { 01529 /* 01530 * There isn't a variable name after all: the $ is just a $. 01531 */ 01532 01533 TclStackFree(interp, parsePtr); 01534 return "$"; 01535 } 01536 01537 code = TclSubstTokens(interp, parsePtr->tokenPtr, parsePtr->numTokens, 01538 NULL, 1); 01539 TclStackFree(interp, parsePtr); 01540 if (code != TCL_OK) { 01541 return NULL; 01542 } 01543 objPtr = Tcl_GetObjResult(interp); 01544 01545 /* 01546 * At this point we should have an object containing the value of a 01547 * variable. Just return the string from that object. 01548 * 01549 * This should have returned the object for the user to manage, but 01550 * instead we have some weak reference to the string value in the object, 01551 * which is why we make sure the object exists after resetting the result. 01552 * This isn't ideal, but it's the best we can do with the current 01553 * documented interface. -- hobbs 01554 */ 01555 01556 if (!Tcl_IsShared(objPtr)) { 01557 Tcl_IncrRefCount(objPtr); 01558 } 01559 Tcl_ResetResult(interp); 01560 return TclGetString(objPtr); 01561 } 01562 01563 /* 01564 *---------------------------------------------------------------------- 01565 * 01566 * Tcl_ParseBraces -- 01567 * 01568 * Given a string in braces such as a Tcl command argument or a string 01569 * value in a Tcl expression, this function parses the string and returns 01570 * information about the parse. No more than numBytes bytes will be 01571 * scanned. 01572 * 01573 * Results: 01574 * The return value is TCL_OK if the string was parsed successfully and 01575 * TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an 01576 * error message is left in its result. On a successful return, tokenPtr 01577 * and numTokens fields of parsePtr are filled in with information about 01578 * the string that was parsed. Other fields in parsePtr are undefined. 01579 * termPtr is set to point to the character just after the last one in 01580 * the braced string. 01581 * 01582 * Side effects: 01583 * If there is insufficient space in parsePtr to hold all the information 01584 * about the command, then additional space is malloc-ed. If the function 01585 * returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to 01586 * release any additional space that was allocated. 01587 * 01588 *---------------------------------------------------------------------- 01589 */ 01590 01591 int 01592 Tcl_ParseBraces( 01593 Tcl_Interp *interp, /* Interpreter to use for error reporting; if 01594 * NULL, then no error message is provided. */ 01595 const char *start, /* Start of string enclosed in braces. The 01596 * first character must be {'. */ 01597 register int numBytes, /* Total number of bytes in string. If < 0, 01598 * the string consists of all bytes up to the 01599 * first null character. */ 01600 register Tcl_Parse *parsePtr, 01601 /* Structure to fill in with information about 01602 * the string. */ 01603 int append, /* Non-zero means append tokens to existing 01604 * information in parsePtr; zero means ignore 01605 * existing tokens in parsePtr and 01606 * reinitialize it. */ 01607 const char **termPtr) /* If non-NULL, points to word in which to 01608 * store a pointer to the character just after 01609 * the terminating '}' if the parse was 01610 * successful. */ 01611 { 01612 Tcl_Token *tokenPtr; 01613 register const char *src; 01614 int startIndex, level, length; 01615 01616 if ((numBytes == 0) || (start == NULL)) { 01617 return TCL_ERROR; 01618 } 01619 if (numBytes < 0) { 01620 numBytes = strlen(start); 01621 } 01622 01623 if (!append) { 01624 TclParseInit(interp, start, numBytes, parsePtr); 01625 } 01626 01627 src = start; 01628 startIndex = parsePtr->numTokens; 01629 01630 TclGrowParseTokenArray(parsePtr, 1); 01631 tokenPtr = &parsePtr->tokenPtr[startIndex]; 01632 tokenPtr->type = TCL_TOKEN_TEXT; 01633 tokenPtr->start = src+1; 01634 tokenPtr->numComponents = 0; 01635 level = 1; 01636 while (1) { 01637 while (++src, --numBytes) { 01638 if (CHAR_TYPE(*src) != TYPE_NORMAL) { 01639 break; 01640 } 01641 } 01642 if (numBytes == 0) { 01643 goto missingBraceError; 01644 } 01645 01646 switch (*src) { 01647 case '{': 01648 level++; 01649 break; 01650 case '}': 01651 if (--level == 0) { 01652 /* 01653 * Decide if we need to finish emitting a partially-finished 01654 * token. There are 3 cases: 01655 * {abc \newline xyz} or {xyz} 01656 * - finish emitting "xyz" token 01657 * {abc \newline} 01658 * - don't emit token after \newline 01659 * {} - finish emitting zero-sized token 01660 * 01661 * The last case ensures that there is a token (even if empty) 01662 * that describes the braced string. 01663 */ 01664 01665 if ((src != tokenPtr->start) 01666 || (parsePtr->numTokens == startIndex)) { 01667 tokenPtr->size = (src - tokenPtr->start); 01668 parsePtr->numTokens++; 01669 } 01670 if (termPtr != NULL) { 01671 *termPtr = src+1; 01672 } 01673 return TCL_OK; 01674 } 01675 break; 01676 case '\\': 01677 TclParseBackslash(src, numBytes, &length, NULL); 01678 if ((length > 1) && (src[1] == '\n')) { 01679 /* 01680 * A backslash-newline sequence must be collapsed, even inside 01681 * braces, so we have to split the word into multiple tokens 01682 * so that the backslash-newline can be represented 01683 * explicitly. 01684 */ 01685 01686 if (numBytes == 2) { 01687 parsePtr->incomplete = 1; 01688 } 01689 tokenPtr->size = (src - tokenPtr->start); 01690 if (tokenPtr->size != 0) { 01691 parsePtr->numTokens++; 01692 } 01693 TclGrowParseTokenArray(parsePtr, 2); 01694 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; 01695 tokenPtr->type = TCL_TOKEN_BS; 01696 tokenPtr->start = src; 01697 tokenPtr->size = length; 01698 tokenPtr->numComponents = 0; 01699 parsePtr->numTokens++; 01700 01701 src += length - 1; 01702 numBytes -= length - 1; 01703 tokenPtr++; 01704 tokenPtr->type = TCL_TOKEN_TEXT; 01705 tokenPtr->start = src + 1; 01706 tokenPtr->numComponents = 0; 01707 } else { 01708 src += length - 1; 01709 numBytes -= length - 1; 01710 } 01711 break; 01712 } 01713 } 01714 01715 missingBraceError: 01716 parsePtr->errorType = TCL_PARSE_MISSING_BRACE; 01717 parsePtr->term = start; 01718 parsePtr->incomplete = 1; 01719 if (parsePtr->interp == NULL) { 01720 /* 01721 * Skip straight to the exit code since we have no interpreter to put 01722 * error message in. 01723 */ 01724 01725 goto error; 01726 } 01727 01728 Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC); 01729 01730 /* 01731 * Guess if the problem is due to comments by searching the source string 01732 * for a possible open brace within the context of a comment. Since we 01733 * aren't performing a full Tcl parse, just look for an open brace 01734 * preceded by a '<whitespace>#' on the same line. 01735 */ 01736 01737 { 01738 register int openBrace = 0; 01739 01740 while (--src > start) { 01741 switch (*src) { 01742 case '{': 01743 openBrace = 1; 01744 break; 01745 case '\n': 01746 openBrace = 0; 01747 break; 01748 case '#' : 01749 if (openBrace && isspace(UCHAR(src[-1]))) { 01750 Tcl_AppendResult(parsePtr->interp, 01751 ": possible unbalanced brace in comment", NULL); 01752 goto error; 01753 } 01754 break; 01755 } 01756 } 01757 } 01758 01759 error: 01760 Tcl_FreeParse(parsePtr); 01761 return TCL_ERROR; 01762 } 01763 01764 /* 01765 *---------------------------------------------------------------------- 01766 * 01767 * Tcl_ParseQuotedString -- 01768 * 01769 * Given a double-quoted string such as a quoted Tcl command argument or 01770 * a quoted value in a Tcl expression, this function parses the string 01771 * and returns information about the parse. No more than numBytes bytes 01772 * will be scanned. 01773 * 01774 * Results: 01775 * The return value is TCL_OK if the string was parsed successfully and 01776 * TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an 01777 * error message is left in its result. On a successful return, tokenPtr 01778 * and numTokens fields of parsePtr are filled in with information about 01779 * the string that was parsed. Other fields in parsePtr are undefined. 01780 * termPtr is set to point to the character just after the quoted 01781 * string's terminating close-quote. 01782 * 01783 * Side effects: 01784 * If there is insufficient space in parsePtr to hold all the information 01785 * about the command, then additional space is malloc-ed. If the function 01786 * returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to 01787 * release any additional space that was allocated. 01788 * 01789 *---------------------------------------------------------------------- 01790 */ 01791 01792 int 01793 Tcl_ParseQuotedString( 01794 Tcl_Interp *interp, /* Interpreter to use for error reporting; if 01795 * NULL, then no error message is provided. */ 01796 const char *start, /* Start of the quoted string. The first 01797 * character must be '"'. */ 01798 register int numBytes, /* Total number of bytes in string. If < 0, 01799 * the string consists of all bytes up to the 01800 * first null character. */ 01801 register Tcl_Parse *parsePtr, 01802 /* Structure to fill in with information about 01803 * the string. */ 01804 int append, /* Non-zero means append tokens to existing 01805 * information in parsePtr; zero means ignore 01806 * existing tokens in parsePtr and 01807 * reinitialize it. */ 01808 const char **termPtr) /* If non-NULL, points to word in which to 01809 * store a pointer to the character just after 01810 * the quoted string's terminating close-quote 01811 * if the parse succeeds. */ 01812 { 01813 if ((numBytes == 0) || (start == NULL)) { 01814 return TCL_ERROR; 01815 } 01816 if (numBytes < 0) { 01817 numBytes = strlen(start); 01818 } 01819 01820 if (!append) { 01821 TclParseInit(interp, start, numBytes, parsePtr); 01822 } 01823 01824 if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE, TCL_SUBST_ALL, 01825 parsePtr)) { 01826 goto error; 01827 } 01828 if (*parsePtr->term != '"') { 01829 if (parsePtr->interp != NULL) { 01830 Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC); 01831 } 01832 parsePtr->errorType = TCL_PARSE_MISSING_QUOTE; 01833 parsePtr->term = start; 01834 parsePtr->incomplete = 1; 01835 goto error; 01836 } 01837 if (termPtr != NULL) { 01838 *termPtr = (parsePtr->term + 1); 01839 } 01840 return TCL_OK; 01841 01842 error: 01843 Tcl_FreeParse(parsePtr); 01844 return TCL_ERROR; 01845 } 01846 01847 /* 01848 *---------------------------------------------------------------------- 01849 * 01850 * Tcl_SubstObj -- 01851 * 01852 * This function performs the substitutions specified on the given string 01853 * as described in the user documentation for the "subst" Tcl command. 01854 * 01855 * Results: 01856 * A Tcl_Obj* containing the substituted string, or NULL to indicate that 01857 * an error occurred. 01858 * 01859 * Side effects: 01860 * See the user documentation. 01861 * 01862 *---------------------------------------------------------------------- 01863 */ 01864 01865 Tcl_Obj * 01866 Tcl_SubstObj( 01867 Tcl_Interp *interp, /* Interpreter in which substitution occurs */ 01868 Tcl_Obj *objPtr, /* The value to be substituted. */ 01869 int flags) /* What substitutions to do. */ 01870 { 01871 int length, tokensLeft, code; 01872 Tcl_Token *endTokenPtr; 01873 Tcl_Obj *result, *errMsg = NULL; 01874 CONST char *p = TclGetStringFromObj(objPtr, &length); 01875 Tcl_Parse *parsePtr = (Tcl_Parse *) 01876 TclStackAlloc(interp, sizeof(Tcl_Parse)); 01877 01878 TclParseInit(interp, p, length, parsePtr); 01879 01880 /* 01881 * First parse the string rep of objPtr, as if it were enclosed as a 01882 * "-quoted word in a normal Tcl command. Honor flags that selectively 01883 * inhibit types of substitution. 01884 */ 01885 01886 if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) { 01887 /* 01888 * There was a parse error. Save the error message for possible 01889 * reporting later. 01890 */ 01891 01892 errMsg = Tcl_GetObjResult(interp); 01893 Tcl_IncrRefCount(errMsg); 01894 01895 /* 01896 * We need to re-parse to get the portion of the string we can [subst] 01897 * before the parse error. Sadly, all the Tcl_Token's created by the 01898 * first parse attempt are gone, freed according to the public spec 01899 * for the Tcl_Parse* routines. The only clue we have is parse.term, 01900 * which points to either the unmatched opener, or to characters that 01901 * follow a close brace or close quote. 01902 * 01903 * Call ParseTokens again, working on the string up to parse.term. 01904 * Keep repeating until we get a good parse on a prefix. 01905 */ 01906 01907 do { 01908 parsePtr->numTokens = 0; 01909 parsePtr->tokensAvailable = NUM_STATIC_TOKENS; 01910 parsePtr->end = parsePtr->term; 01911 parsePtr->incomplete = 0; 01912 parsePtr->errorType = TCL_PARSE_SUCCESS; 01913 } while (TCL_OK != 01914 ParseTokens(p, parsePtr->end - p, 0, flags, parsePtr)); 01915 01916 /* 01917 * The good parse will have to be followed by {, (, or [. 01918 */ 01919 01920 switch (*(parsePtr->term)) { 01921 case '{': 01922 /* 01923 * Parse error was a missing } in a ${varname} variable 01924 * substitution at the toplevel. We will subst everything up to 01925 * that broken variable substitution before reporting the parse 01926 * error. Substituting the leftover '$' will have no side-effects, 01927 * so the current token stream is fine. 01928 */ 01929 break; 01930 01931 case '(': 01932 /* 01933 * Parse error was during the parsing of the index part of an 01934 * array variable substitution at the toplevel. 01935 */ 01936 01937 if (*(parsePtr->term - 1) == '$') { 01938 /* 01939 * Special case where removing the array index left us with 01940 * just a dollar sign (array variable with name the empty 01941 * string as its name), instead of with a scalar variable 01942 * reference. 01943 * 01944 * As in the previous case, existing token stream is OK. 01945 */ 01946 } else { 01947 /* 01948 * The current parse includes a successful parse of a scalar 01949 * variable substitution where there should have been an array 01950 * variable substitution. We remove that mistaken part of the 01951 * parse before moving on. A scalar variable substitution is 01952 * two tokens. 01953 */ 01954 01955 Tcl_Token *varTokenPtr = 01956 parsePtr->tokenPtr + parsePtr->numTokens - 2; 01957 01958 if (varTokenPtr->type != TCL_TOKEN_VARIABLE) { 01959 Tcl_Panic("Tcl_SubstObj: programming error"); 01960 } 01961 if (varTokenPtr[1].type != TCL_TOKEN_TEXT) { 01962 Tcl_Panic("Tcl_SubstObj: programming error"); 01963 } 01964 parsePtr->numTokens -= 2; 01965 } 01966 break; 01967 case '[': 01968 /* 01969 * Parse error occurred during parsing of a toplevel command 01970 * substitution. 01971 */ 01972 01973 parsePtr->end = p + length; 01974 p = parsePtr->term + 1; 01975 length = parsePtr->end - p; 01976 if (length == 0) { 01977 /* 01978 * No commands, just an unmatched [. As in previous cases, 01979 * existing token stream is OK. 01980 */ 01981 } else { 01982 /* 01983 * We want to add the parsing of as many commands as we can 01984 * within that substitution until we reach the actual parse 01985 * error. We'll do additional parsing to determine what length 01986 * to claim for the final TCL_TOKEN_COMMAND token. 01987 */ 01988 01989 Tcl_Token *tokenPtr; 01990 const char *lastTerm = parsePtr->term; 01991 Tcl_Parse *nestedPtr = (Tcl_Parse *) 01992 TclStackAlloc(interp, sizeof(Tcl_Parse)); 01993 01994 while (TCL_OK == 01995 Tcl_ParseCommand(NULL, p, length, 0, nestedPtr)) { 01996 Tcl_FreeParse(nestedPtr); 01997 p = nestedPtr->term + (nestedPtr->term < nestedPtr->end); 01998 length = nestedPtr->end - p; 01999 if ((length == 0) && (nestedPtr->term == nestedPtr->end)) { 02000 /* 02001 * If we run out of string, blame the missing close 02002 * bracket on the last command, and do not evaluate it 02003 * during substitution. 02004 */ 02005 02006 break; 02007 } 02008 lastTerm = nestedPtr->term; 02009 } 02010 TclStackFree(interp, nestedPtr); 02011 02012 if (lastTerm == parsePtr->term) { 02013 /* 02014 * Parse error in first command. No commands to subst, add 02015 * no more tokens. 02016 */ 02017 break; 02018 } 02019 02020 /* 02021 * Create a command substitution token for whatever commands 02022 * got parsed. 02023 */ 02024 02025 TclGrowParseTokenArray(parsePtr, 1); 02026 tokenPtr = &(parsePtr->tokenPtr[parsePtr->numTokens]); 02027 tokenPtr->start = parsePtr->term; 02028 tokenPtr->numComponents = 0; 02029 tokenPtr->type = TCL_TOKEN_COMMAND; 02030 tokenPtr->size = lastTerm - tokenPtr->start + 1; 02031 parsePtr->numTokens++; 02032 } 02033 break; 02034 02035 default: 02036 Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]); 02037 } 02038 } 02039 02040 /* 02041 * Next, substitute the parsed tokens just as in normal Tcl evaluation. 02042 */ 02043 02044 endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; 02045 tokensLeft = parsePtr->numTokens; 02046 code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, 02047 &tokensLeft, 1); 02048 if (code == TCL_OK) { 02049 Tcl_FreeParse(parsePtr); 02050 TclStackFree(interp, parsePtr); 02051 if (errMsg != NULL) { 02052 Tcl_SetObjResult(interp, errMsg); 02053 Tcl_DecrRefCount(errMsg); 02054 return NULL; 02055 } 02056 return Tcl_GetObjResult(interp); 02057 } 02058 02059 result = Tcl_NewObj(); 02060 while (1) { 02061 switch (code) { 02062 case TCL_ERROR: 02063 Tcl_FreeParse(parsePtr); 02064 TclStackFree(interp, parsePtr); 02065 Tcl_DecrRefCount(result); 02066 if (errMsg != NULL) { 02067 Tcl_DecrRefCount(errMsg); 02068 } 02069 return NULL; 02070 case TCL_BREAK: 02071 tokensLeft = 0; /* Halt substitution */ 02072 default: 02073 Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp)); 02074 } 02075 02076 if (tokensLeft == 0) { 02077 Tcl_FreeParse(parsePtr); 02078 TclStackFree(interp, parsePtr); 02079 if (errMsg != NULL) { 02080 if (code != TCL_BREAK) { 02081 Tcl_DecrRefCount(result); 02082 Tcl_SetObjResult(interp, errMsg); 02083 Tcl_DecrRefCount(errMsg); 02084 return NULL; 02085 } 02086 Tcl_DecrRefCount(errMsg); 02087 } 02088 return result; 02089 } 02090 02091 code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, 02092 &tokensLeft, 1); 02093 } 02094 } 02095 02096 /* 02097 *---------------------------------------------------------------------- 02098 * 02099 * TclSubstTokens -- 02100 * 02101 * Accepts an array of count Tcl_Token's, and creates a result value in 02102 * the interp from concatenating the results of performing Tcl 02103 * substitution on each Tcl_Token. Substitution is interrupted if any 02104 * non-TCL_OK completion code arises. 02105 * 02106 * Results: 02107 * The return value is a standard Tcl completion code. The result in 02108 * interp is the substituted value, or an error message if TCL_ERROR is 02109 * returned. If tokensLeftPtr is not NULL, then it points to an int where 02110 * the number of tokens remaining to be processed is written. 02111 * 02112 * Side effects: 02113 * Can be anything, depending on the types of substitution done. 02114 * 02115 *---------------------------------------------------------------------- 02116 */ 02117 02118 int 02119 TclSubstTokens( 02120 Tcl_Interp *interp, /* Interpreter in which to lookup variables, 02121 * execute nested commands, and report 02122 * errors. */ 02123 Tcl_Token *tokenPtr, /* Pointer to first in an array of tokens to 02124 * evaluate and concatenate. */ 02125 int count, /* Number of tokens to consider at tokenPtr. 02126 * Must be at least 1. */ 02127 int *tokensLeftPtr, /* If not NULL, points to memory where an 02128 * integer representing the number of tokens 02129 * left to be substituted will be written */ 02130 int line) /* The line the script starts on. */ 02131 { 02132 Tcl_Obj *result; 02133 int code = TCL_OK; 02134 02135 /* 02136 * Each pass through this loop will substitute one token, and its 02137 * components, if any. The only thing tricky here is that we go to some 02138 * effort to pass Tcl_Obj's through untouched, to avoid string copying and 02139 * Tcl_Obj creation if possible, to aid performance and limit shimmering. 02140 * 02141 * Further optimization opportunities might be to check for the equivalent 02142 * of Tcl_SetObjResult(interp, Tcl_GetObjResult(interp)) and omit them. 02143 */ 02144 02145 result = NULL; 02146 for (; count>0 && code==TCL_OK ; count--, tokenPtr++) { 02147 Tcl_Obj *appendObj = NULL; 02148 const char *append = NULL; 02149 int appendByteLength = 0; 02150 char utfCharBytes[TCL_UTF_MAX]; 02151 02152 switch (tokenPtr->type) { 02153 case TCL_TOKEN_TEXT: 02154 append = tokenPtr->start; 02155 appendByteLength = tokenPtr->size; 02156 break; 02157 02158 case TCL_TOKEN_BS: 02159 appendByteLength = Tcl_UtfBackslash(tokenPtr->start, NULL, 02160 utfCharBytes); 02161 append = utfCharBytes; 02162 break; 02163 02164 case TCL_TOKEN_COMMAND: { 02165 Interp *iPtr = (Interp *) interp; 02166 02167 iPtr->numLevels++; 02168 code = TclInterpReady(interp); 02169 if (code == TCL_OK) { 02170 /* TIP #280: Transfer line information to nested command */ 02171 code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, 02172 0, line); 02173 } 02174 iPtr->numLevels--; 02175 appendObj = Tcl_GetObjResult(interp); 02176 break; 02177 } 02178 02179 case TCL_TOKEN_VARIABLE: { 02180 Tcl_Obj *arrayIndex = NULL; 02181 Tcl_Obj *varName = NULL; 02182 02183 if (tokenPtr->numComponents > 1) { 02184 /* 02185 * Subst the index part of an array variable reference. 02186 */ 02187 02188 code = TclSubstTokens(interp, tokenPtr+2, 02189 tokenPtr->numComponents - 1, NULL, line); 02190 arrayIndex = Tcl_GetObjResult(interp); 02191 Tcl_IncrRefCount(arrayIndex); 02192 } 02193 02194 if (code == TCL_OK) { 02195 varName = Tcl_NewStringObj(tokenPtr[1].start, 02196 tokenPtr[1].size); 02197 appendObj = Tcl_ObjGetVar2(interp, varName, arrayIndex, 02198 TCL_LEAVE_ERR_MSG); 02199 Tcl_DecrRefCount(varName); 02200 if (appendObj == NULL) { 02201 code = TCL_ERROR; 02202 } 02203 } 02204 02205 switch (code) { 02206 case TCL_OK: /* Got value */ 02207 case TCL_ERROR: /* Already have error message */ 02208 case TCL_BREAK: /* Will not substitute anyway */ 02209 case TCL_CONTINUE: /* Will not substitute anyway */ 02210 break; 02211 default: 02212 /* 02213 * All other return codes, we will subst the result from the 02214 * code-throwing evaluation. 02215 */ 02216 02217 appendObj = Tcl_GetObjResult(interp); 02218 } 02219 02220 if (arrayIndex != NULL) { 02221 Tcl_DecrRefCount(arrayIndex); 02222 } 02223 count -= tokenPtr->numComponents; 02224 tokenPtr += tokenPtr->numComponents; 02225 break; 02226 } 02227 02228 default: 02229 Tcl_Panic("unexpected token type in TclSubstTokens: %d", 02230 tokenPtr->type); 02231 } 02232 02233 if ((code == TCL_BREAK) || (code == TCL_CONTINUE)) { 02234 /* 02235 * Inhibit substitution. 02236 */ 02237 continue; 02238 } 02239 02240 if (result == NULL) { 02241 /* 02242 * First pass through. If we have a Tcl_Obj, just use it. If not, 02243 * create one from our string. 02244 */ 02245 02246 if (appendObj != NULL) { 02247 result = appendObj; 02248 } else { 02249 result = Tcl_NewStringObj(append, appendByteLength); 02250 } 02251 Tcl_IncrRefCount(result); 02252 } else { 02253 /* 02254 * Subsequent passes. Append to result. 02255 */ 02256 02257 if (Tcl_IsShared(result)) { 02258 Tcl_DecrRefCount(result); 02259 result = Tcl_DuplicateObj(result); 02260 Tcl_IncrRefCount(result); 02261 } 02262 if (appendObj != NULL) { 02263 Tcl_AppendObjToObj(result, appendObj); 02264 } else { 02265 Tcl_AppendToObj(result, append, appendByteLength); 02266 } 02267 } 02268 } 02269 02270 if (code != TCL_ERROR) { /* Keep error message in result! */ 02271 if (result != NULL) { 02272 Tcl_SetObjResult(interp, result); 02273 } else { 02274 Tcl_ResetResult(interp); 02275 } 02276 } 02277 if (tokensLeftPtr != NULL) { 02278 *tokensLeftPtr = count; 02279 } 02280 if (result != NULL) { 02281 Tcl_DecrRefCount(result); 02282 } 02283 return code; 02284 } 02285 02286 /* 02287 *---------------------------------------------------------------------- 02288 * 02289 * CommandComplete -- 02290 * 02291 * This function is shared by TclCommandComplete and 02292 * Tcl_ObjCommandComplete; it does all the real work of seeing whether a 02293 * script is complete 02294 * 02295 * Results: 02296 * 1 is returned if the script is complete, 0 if there are open 02297 * delimiters such as " or (. 1 is also returned if there is a parse 02298 * error in the script other than unmatched delimiters. 02299 * 02300 * Side effects: 02301 * None. 02302 * 02303 *---------------------------------------------------------------------- 02304 */ 02305 02306 static inline int 02307 CommandComplete( 02308 const char *script, /* Script to check. */ 02309 int numBytes) /* Number of bytes in script. */ 02310 { 02311 Tcl_Parse parse; 02312 const char *p, *end; 02313 int result; 02314 02315 p = script; 02316 end = p + numBytes; 02317 while (Tcl_ParseCommand(NULL, p, end - p, 0, &parse) == TCL_OK) { 02318 p = parse.commandStart + parse.commandSize; 02319 if (p >= end) { 02320 break; 02321 } 02322 Tcl_FreeParse(&parse); 02323 } 02324 if (parse.incomplete) { 02325 result = 0; 02326 } else { 02327 result = 1; 02328 } 02329 Tcl_FreeParse(&parse); 02330 return result; 02331 } 02332 02333 /* 02334 *---------------------------------------------------------------------- 02335 * 02336 * Tcl_CommandComplete -- 02337 * 02338 * Given a partial or complete Tcl script, this function determines 02339 * whether the script is complete in the sense of having matched braces 02340 * and quotes and brackets. 02341 * 02342 * Results: 02343 * 1 is returned if the script is complete, 0 otherwise. 1 is also 02344 * returned if there is a parse error in the script other than unmatched 02345 * delimiters. 02346 * 02347 * Side effects: 02348 * None. 02349 * 02350 *---------------------------------------------------------------------- 02351 */ 02352 02353 int 02354 Tcl_CommandComplete( 02355 const char *script) /* Script to check. */ 02356 { 02357 return CommandComplete(script, (int) strlen(script)); 02358 } 02359 02360 /* 02361 *---------------------------------------------------------------------- 02362 * 02363 * TclObjCommandComplete -- 02364 * 02365 * Given a partial or complete Tcl command in a Tcl object, this function 02366 * determines whether the command is complete in the sense of having 02367 * matched braces and quotes and brackets. 02368 * 02369 * Results: 02370 * 1 is returned if the command is complete, 0 otherwise. 02371 * 02372 * Side effects: 02373 * None. 02374 * 02375 *---------------------------------------------------------------------- 02376 */ 02377 02378 int 02379 TclObjCommandComplete( 02380 Tcl_Obj *objPtr) /* Points to object holding script to 02381 * check. */ 02382 { 02383 int length; 02384 const char *script = Tcl_GetStringFromObj(objPtr, &length); 02385 02386 return CommandComplete(script, length); 02387 } 02388 02389 /* 02390 *---------------------------------------------------------------------- 02391 * 02392 * TclIsLocalScalar -- 02393 * 02394 * Check to see if a given string is a legal scalar variable name with no 02395 * namespace qualifiers or substitutions. 02396 * 02397 * Results: 02398 * Returns 1 if the variable is a local scalar. 02399 * 02400 * Side effects: 02401 * None. 02402 * 02403 *---------------------------------------------------------------------- 02404 */ 02405 02406 int 02407 TclIsLocalScalar( 02408 const char *src, 02409 int len) 02410 { 02411 const char *p; 02412 const char *lastChar = src + (len - 1); 02413 02414 for (p=src ; p<=lastChar ; p++) { 02415 if ((CHAR_TYPE(*p) != TYPE_NORMAL) && 02416 (CHAR_TYPE(*p) != TYPE_COMMAND_END)) { 02417 /* 02418 * TCL_COMMAND_END is returned for the last character of the 02419 * string. By this point we know it isn't an array or namespace 02420 * reference. 02421 */ 02422 02423 return 0; 02424 } 02425 if (*p == '(') { 02426 if (*lastChar == ')') { /* We have an array element */ 02427 return 0; 02428 } 02429 } else if (*p == ':') { 02430 if ((p != lastChar) && *(p+1) == ':') { /* qualified name */ 02431 return 0; 02432 } 02433 } 02434 } 02435 02436 return 1; 02437 } 02438 02439 /* 02440 * Local Variables: 02441 * mode: c 02442 * c-basic-offset: 4 02443 * fill-column: 78 02444 * End: 02445 */
Generated on Wed Mar 12 12:18:20 2008 by 1.5.1 |