tclScan.c

Go to the documentation of this file.
00001 /*
00002  * tclScan.c --
00003  *
00004  *      This file contains the implementation of the "scan" command.
00005  *
00006  * Copyright (c) 1998 by Scriptics Corporation.
00007  *
00008  * See the file "license.terms" for information on usage and redistribution of
00009  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
00010  *
00011  * RCS: @(#) $Id: tclScan.c,v 1.27 2007/12/13 15:23:20 dgp Exp $
00012  */
00013 
00014 #include "tclInt.h"
00015 
00016 /*
00017  * Flag values used by Tcl_ScanObjCmd.
00018  */
00019 
00020 #define SCAN_NOSKIP     0x1             /* Don't skip blanks. */
00021 #define SCAN_SUPPRESS   0x2             /* Suppress assignment. */
00022 #define SCAN_UNSIGNED   0x4             /* Read an unsigned value. */
00023 #define SCAN_WIDTH      0x8             /* A width value was supplied. */
00024 
00025 #define SCAN_LONGER     0x400           /* Asked for a wide value. */
00026 #define SCAN_BIG        0x800           /* Asked for a bignum value. */
00027 
00028 /*
00029  * The following structure contains the information associated with a
00030  * character set.
00031  */
00032 
00033 typedef struct CharSet {
00034     int exclude;                /* 1 if this is an exclusion set. */
00035     int nchars;
00036     Tcl_UniChar *chars;
00037     int nranges;
00038     struct Range {
00039         Tcl_UniChar start;
00040         Tcl_UniChar end;
00041     } *ranges;
00042 } CharSet;
00043 
00044 /*
00045  * Declarations for functions used only in this file.
00046  */
00047 
00048 static char *           BuildCharSet(CharSet *cset, char *format);
00049 static int              CharInSet(CharSet *cset, int ch);
00050 static void             ReleaseCharSet(CharSet *cset);
00051 static int              ValidateFormat(Tcl_Interp *interp, char *format,
00052                             int numVars, int *totalVars);
00053 
00054 /*
00055  *----------------------------------------------------------------------
00056  *
00057  * BuildCharSet --
00058  *
00059  *      This function examines a character set format specification and builds
00060  *      a CharSet containing the individual characters and character ranges
00061  *      specified.
00062  *
00063  * Results:
00064  *      Returns the next format position.
00065  *
00066  * Side effects:
00067  *      Initializes the charset.
00068  *
00069  *----------------------------------------------------------------------
00070  */
00071 
00072 static char *
00073 BuildCharSet(
00074     CharSet *cset,
00075     char *format)               /* Points to first char of set. */
00076 {
00077     Tcl_UniChar ch, start;
00078     int offset, nranges;
00079     char *end;
00080 
00081     memset(cset, 0, sizeof(CharSet));
00082 
00083     offset = Tcl_UtfToUniChar(format, &ch);
00084     if (ch == '^') {
00085         cset->exclude = 1;
00086         format += offset;
00087         offset = Tcl_UtfToUniChar(format, &ch);
00088     }
00089     end = format + offset;
00090 
00091     /*
00092      * Find the close bracket so we can overallocate the set.
00093      */
00094 
00095     if (ch == ']') {
00096         end += Tcl_UtfToUniChar(end, &ch);
00097     }
00098     nranges = 0;
00099     while (ch != ']') {
00100         if (ch == '-') {
00101             nranges++;
00102         }
00103         end += Tcl_UtfToUniChar(end, &ch);
00104     }
00105 
00106     cset->chars = (Tcl_UniChar *)
00107             ckalloc(sizeof(Tcl_UniChar) * (end - format - 1));
00108     if (nranges > 0) {
00109         cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges);
00110     } else {
00111         cset->ranges = NULL;
00112     }
00113 
00114     /*
00115      * Now build the character set.
00116      */
00117 
00118     cset->nchars = cset->nranges = 0;
00119     format += Tcl_UtfToUniChar(format, &ch);
00120     start = ch;
00121     if (ch == ']' || ch == '-') {
00122         cset->chars[cset->nchars++] = ch;
00123         format += Tcl_UtfToUniChar(format, &ch);
00124     }
00125     while (ch != ']') {
00126         if (*format == '-') {
00127             /*
00128              * This may be the first character of a range, so don't add it
00129              * yet.
00130              */
00131 
00132             start = ch;
00133         } else if (ch == '-') {
00134             /*
00135              * Check to see if this is the last character in the set, in which
00136              * case it is not a range and we should add the previous character
00137              * as well as the dash.
00138              */
00139 
00140             if (*format == ']') {
00141                 cset->chars[cset->nchars++] = start;
00142                 cset->chars[cset->nchars++] = ch;
00143             } else {
00144                 format += Tcl_UtfToUniChar(format, &ch);
00145 
00146                 /*
00147                  * Check to see if the range is in reverse order.
00148                  */
00149 
00150                 if (start < ch) {
00151                     cset->ranges[cset->nranges].start = start;
00152                     cset->ranges[cset->nranges].end = ch;
00153                 } else {
00154                     cset->ranges[cset->nranges].start = ch;
00155                     cset->ranges[cset->nranges].end = start;
00156                 }
00157                 cset->nranges++;
00158             }
00159         } else {
00160             cset->chars[cset->nchars++] = ch;
00161         }
00162         format += Tcl_UtfToUniChar(format, &ch);
00163     }
00164     return format;
00165 }
00166 
00167 /*
00168  *----------------------------------------------------------------------
00169  *
00170  * CharInSet --
00171  *
00172  *      Check to see if a character matches the given set.
00173  *
00174  * Results:
00175  *      Returns non-zero if the character matches the given set.
00176  *
00177  * Side effects:
00178  *      None.
00179  *
00180  *----------------------------------------------------------------------
00181  */
00182 
00183 static int
00184 CharInSet(
00185     CharSet *cset,
00186     int c)                      /* Character to test, passed as int because of
00187                                  * non-ANSI prototypes. */
00188 {
00189     Tcl_UniChar ch = (Tcl_UniChar) c;
00190     int i, match = 0;
00191 
00192     for (i = 0; i < cset->nchars; i++) {
00193         if (cset->chars[i] == ch) {
00194             match = 1;
00195             break;
00196         }
00197     }
00198     if (!match) {
00199         for (i = 0; i < cset->nranges; i++) {
00200             if ((cset->ranges[i].start <= ch) && (ch <= cset->ranges[i].end)) {
00201                 match = 1;
00202                 break;
00203             }
00204         }
00205     }
00206     return (cset->exclude ? !match : match);
00207 }
00208 
00209 /*
00210  *----------------------------------------------------------------------
00211  *
00212  * ReleaseCharSet --
00213  *
00214  *      Free the storage associated with a character set.
00215  *
00216  * Results:
00217  *      None.
00218  *
00219  * Side effects:
00220  *      None.
00221  *
00222  *----------------------------------------------------------------------
00223  */
00224 
00225 static void
00226 ReleaseCharSet(
00227     CharSet *cset)
00228 {
00229     ckfree((char *)cset->chars);
00230     if (cset->ranges) {
00231         ckfree((char *)cset->ranges);
00232     }
00233 }
00234 
00235 /*
00236  *----------------------------------------------------------------------
00237  *
00238  * ValidateFormat --
00239  *
00240  *      Parse the format string and verify that it is properly formed and that
00241  *      there are exactly enough variables on the command line.
00242  *
00243  * Results:
00244  *      A standard Tcl result.
00245  *
00246  * Side effects:
00247  *      May place an error in the interpreter result.
00248  *
00249  *----------------------------------------------------------------------
00250  */
00251 
00252 static int
00253 ValidateFormat(
00254     Tcl_Interp *interp,         /* Current interpreter. */
00255     char *format,               /* The format string. */
00256     int numVars,                /* The number of variables passed to the scan
00257                                  * command. */
00258     int *totalSubs)             /* The number of variables that will be
00259                                  * required. */
00260 {
00261     int gotXpg, gotSequential, value, i, flags;
00262     char *end;
00263     Tcl_UniChar ch;
00264     int objIndex, xpgSize, nspace = numVars;
00265     int *nassign = (int *) TclStackAlloc(interp, nspace * sizeof(int));
00266     char buf[TCL_UTF_MAX+1];
00267 
00268     /*
00269      * Initialize an array that records the number of times a variable is
00270      * assigned to by the format string. We use this to detect if a variable
00271      * is multiply assigned or left unassigned.
00272      */
00273 
00274     for (i = 0; i < nspace; i++) {
00275         nassign[i] = 0;
00276     }
00277 
00278     xpgSize = objIndex = gotXpg = gotSequential = 0;
00279 
00280     while (*format != '\0') {
00281         format += Tcl_UtfToUniChar(format, &ch);
00282 
00283         flags = 0;
00284 
00285         if (ch != '%') {
00286             continue;
00287         }
00288         format += Tcl_UtfToUniChar(format, &ch);
00289         if (ch == '%') {
00290             continue;
00291         }
00292         if (ch == '*') {
00293             flags |= SCAN_SUPPRESS;
00294             format += Tcl_UtfToUniChar(format, &ch);
00295             goto xpgCheckDone;
00296         }
00297 
00298         if ((ch < 0x80) && isdigit(UCHAR(ch))) {        /* INTL: "C" locale. */
00299             /*
00300              * Check for an XPG3-style %n$ specification. Note: there must
00301              * not be a mixture of XPG3 specs and non-XPG3 specs in the same
00302              * format string.
00303              */
00304 
00305             value = strtoul(format-1, &end, 10);        /* INTL: "C" locale. */
00306             if (*end != '$') {
00307                 goto notXpg;
00308             }
00309             format = end+1;
00310             format += Tcl_UtfToUniChar(format, &ch);
00311             gotXpg = 1;
00312             if (gotSequential) {
00313                 goto mixedXPG;
00314             }
00315             objIndex = value - 1;
00316             if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
00317                 goto badIndex;
00318             } else if (numVars == 0) {
00319                 /*
00320                  * In the case where no vars are specified, the user can
00321                  * specify %9999$ legally, so we have to consider special
00322                  * rules for growing the assign array. 'value' is guaranteed
00323                  * to be > 0.
00324                  */
00325                 xpgSize = (xpgSize > value) ? xpgSize : value;
00326             }
00327             goto xpgCheckDone;
00328         }
00329 
00330     notXpg:
00331         gotSequential = 1;
00332         if (gotXpg) {
00333         mixedXPG:
00334             Tcl_SetResult(interp,
00335                     "cannot mix \"%\" and \"%n$\" conversion specifiers",
00336                     TCL_STATIC);
00337             goto error;
00338         }
00339 
00340     xpgCheckDone:
00341         /*
00342          * Parse any width specifier.
00343          */
00344 
00345         if ((ch < 0x80) && isdigit(UCHAR(ch))) {        /* INTL: "C" locale. */
00346             value = strtoul(format-1, &format, 10);     /* INTL: "C" locale. */
00347             flags |= SCAN_WIDTH;
00348             format += Tcl_UtfToUniChar(format, &ch);
00349         }
00350 
00351         /*
00352          * Handle any size specifier.
00353          */
00354 
00355         switch (ch) {
00356         case 'l':
00357             if (*format == 'l') {
00358                 flags |= SCAN_BIG;
00359                 format += 1;
00360                 format += Tcl_UtfToUniChar(format, &ch);
00361                 break;
00362             }
00363         case 'L':
00364             flags |= SCAN_LONGER;
00365         case 'h':
00366             format += Tcl_UtfToUniChar(format, &ch);
00367         }
00368 
00369         if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
00370             goto badIndex;
00371         }
00372 
00373         /*
00374          * Handle the various field types.
00375          */
00376 
00377         switch (ch) {
00378         case 'c':
00379             if (flags & SCAN_WIDTH) {
00380                 Tcl_SetResult(interp,
00381                         "field width may not be specified in %c conversion",
00382                         TCL_STATIC);
00383                 goto error;
00384             }
00385             /*
00386              * Fall through!
00387              */
00388         case 'n':
00389         case 's':
00390             if (flags & (SCAN_LONGER|SCAN_BIG)) {
00391             invalidFieldSize:
00392                 buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
00393                 Tcl_AppendResult(interp,
00394                         "field size modifier may not be specified in %", buf,
00395                         " conversion", NULL);
00396                 goto error;
00397             }
00398             /*
00399              * Fall through!
00400              */
00401         case 'd':
00402         case 'e':
00403         case 'f':
00404         case 'g':
00405         case 'i':
00406         case 'o':
00407         case 'x':
00408             break;
00409         case 'u':
00410             if (flags & SCAN_BIG) {
00411                 Tcl_SetResult(interp,
00412                         "unsigned bignum scans are invalid", TCL_STATIC);
00413                 goto error;
00414             }
00415             break;
00416             /*
00417              * Bracket terms need special checking
00418              */
00419         case '[':
00420             if (flags & (SCAN_LONGER|SCAN_BIG)) {
00421                 goto invalidFieldSize;
00422             }
00423             if (*format == '\0') {
00424                 goto badSet;
00425             }
00426             format += Tcl_UtfToUniChar(format, &ch);
00427             if (ch == '^') {
00428                 if (*format == '\0') {
00429                     goto badSet;
00430                 }
00431                 format += Tcl_UtfToUniChar(format, &ch);
00432             }
00433             if (ch == ']') {
00434                 if (*format == '\0') {
00435                     goto badSet;
00436                 }
00437                 format += Tcl_UtfToUniChar(format, &ch);
00438             }
00439             while (ch != ']') {
00440                 if (*format == '\0') {
00441                     goto badSet;
00442                 }
00443                 format += Tcl_UtfToUniChar(format, &ch);
00444             }
00445             break;
00446         badSet:
00447             Tcl_SetResult(interp, "unmatched [ in format string",
00448                     TCL_STATIC);
00449             goto error;
00450         default:
00451             {
00452                 char buf[TCL_UTF_MAX+1];
00453 
00454                 buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
00455                 Tcl_AppendResult(interp, "bad scan conversion character \"",
00456                         buf, "\"", NULL);
00457                 goto error;
00458             }
00459         }
00460         if (!(flags & SCAN_SUPPRESS)) {
00461             if (objIndex >= nspace) {
00462                 /*
00463                  * Expand the nassign buffer. If we are using XPG specifiers,
00464                  * make sure that we grow to a large enough size. xpgSize is
00465                  * guaranteed to be at least one larger than objIndex.
00466                  */
00467 
00468                 value = nspace;
00469                 if (xpgSize) {
00470                     nspace = xpgSize;
00471                 } else {
00472                     nspace += 16;       /* formerly STATIC_LIST_SIZE */
00473                 }
00474                 nassign = (int *) TclStackRealloc(interp, nassign,
00475                         nspace * sizeof(int));
00476                 for (i = value; i < nspace; i++) {
00477                     nassign[i] = 0;
00478                 }
00479             }
00480             nassign[objIndex]++;
00481             objIndex++;
00482         }
00483     }
00484 
00485     /*
00486      * Verify that all of the variable were assigned exactly once.
00487      */
00488 
00489     if (numVars == 0) {
00490         if (xpgSize) {
00491             numVars = xpgSize;
00492         } else {
00493             numVars = objIndex;
00494         }
00495     }
00496     if (totalSubs) {
00497         *totalSubs = numVars;
00498     }
00499     for (i = 0; i < numVars; i++) {
00500         if (nassign[i] > 1) {
00501             Tcl_SetResult(interp,
00502                     "variable is assigned by multiple \"%n$\" conversion specifiers",
00503                     TCL_STATIC);
00504             goto error;
00505         } else if (!xpgSize && (nassign[i] == 0)) {
00506             /*
00507              * If the space is empty, and xpgSize is 0 (means XPG wasn't used,
00508              * and/or numVars != 0), then too many vars were given
00509              */
00510 
00511             Tcl_SetResult(interp,
00512                     "variable is not assigned by any conversion specifiers",
00513                     TCL_STATIC);
00514             goto error;
00515         }
00516     }
00517 
00518     TclStackFree(interp, nassign);
00519     return TCL_OK;
00520 
00521   badIndex:
00522     if (gotXpg) {
00523         Tcl_SetResult(interp, "\"%n$\" argument index out of range",
00524                 TCL_STATIC);
00525     } else {
00526         Tcl_SetResult(interp,
00527                 "different numbers of variable names and field specifiers",
00528                 TCL_STATIC);
00529     }
00530 
00531   error:
00532     TclStackFree(interp, nassign);
00533     return TCL_ERROR;
00534 }
00535 
00536 /*
00537  *----------------------------------------------------------------------
00538  *
00539  * Tcl_ScanObjCmd --
00540  *
00541  *      This function is invoked to process the "scan" Tcl command. See the
00542  *      user documentation for details on what it does.
00543  *
00544  * Results:
00545  *      A standard Tcl result.
00546  *
00547  * Side effects:
00548  *      See the user documentation.
00549  *
00550  *----------------------------------------------------------------------
00551  */
00552 
00553         /* ARGSUSED */
00554 int
00555 Tcl_ScanObjCmd(
00556     ClientData dummy,           /* Not used. */
00557     Tcl_Interp *interp,         /* Current interpreter. */
00558     int objc,                   /* Number of arguments. */
00559     Tcl_Obj *CONST objv[])      /* Argument objects. */
00560 {
00561     char *format;
00562     int numVars, nconversions, totalVars = -1;
00563     int objIndex, offset, i, result, code;
00564     long value;
00565     CONST char *string, *end, *baseString;
00566     char op = 0;
00567     int width, underflow = 0;
00568     Tcl_WideInt wideValue;
00569     Tcl_UniChar ch, sch;
00570     Tcl_Obj **objs = NULL, *objPtr = NULL;
00571     int flags;
00572     char buf[513];              /* Temporary buffer to hold scanned number
00573                                  * strings before they are passed to
00574                                  * strtoul. */
00575 
00576     if (objc < 3) {
00577         Tcl_WrongNumArgs(interp, 1, objv,
00578                 "string format ?varName varName ...?");
00579         return TCL_ERROR;
00580     }
00581 
00582     format = Tcl_GetStringFromObj(objv[2], NULL);
00583     numVars = objc-3;
00584 
00585     /*
00586      * Check for errors in the format string.
00587      */
00588 
00589     if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
00590         return TCL_ERROR;
00591     }
00592 
00593     /*
00594      * Allocate space for the result objects.
00595      */
00596 
00597     if (totalVars > 0) {
00598         objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars);
00599         for (i = 0; i < totalVars; i++) {
00600             objs[i] = NULL;
00601         }
00602     }
00603 
00604     string = Tcl_GetStringFromObj(objv[1], NULL);
00605     baseString = string;
00606 
00607     /*
00608      * Iterate over the format string filling in the result objects until we
00609      * reach the end of input, the end of the format string, or there is a
00610      * mismatch.
00611      */
00612 
00613     objIndex = 0;
00614     nconversions = 0;
00615     while (*format != '\0') {
00616         int parseFlag = TCL_PARSE_NO_WHITESPACE;
00617         format += Tcl_UtfToUniChar(format, &ch);
00618 
00619         flags = 0;
00620 
00621         /*
00622          * If we see whitespace in the format, skip whitespace in the string.
00623          */
00624 
00625         if (Tcl_UniCharIsSpace(ch)) {
00626             offset = Tcl_UtfToUniChar(string, &sch);
00627             while (Tcl_UniCharIsSpace(sch)) {
00628                 if (*string == '\0') {
00629                     goto done;
00630                 }
00631                 string += offset;
00632                 offset = Tcl_UtfToUniChar(string, &sch);
00633             }
00634             continue;
00635         }
00636 
00637         if (ch != '%') {
00638         literal:
00639             if (*string == '\0') {
00640                 underflow = 1;
00641                 goto done;
00642             }
00643             string += Tcl_UtfToUniChar(string, &sch);
00644             if (ch != sch) {
00645                 goto done;
00646             }
00647             continue;
00648         }
00649 
00650         format += Tcl_UtfToUniChar(format, &ch);
00651         if (ch == '%') {
00652             goto literal;
00653         }
00654 
00655         /*
00656          * Check for assignment suppression ('*') or an XPG3-style assignment
00657          * ('%n$').
00658          */
00659 
00660         if (ch == '*') {
00661             flags |= SCAN_SUPPRESS;
00662             format += Tcl_UtfToUniChar(format, &ch);
00663         } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
00664             char *formatEnd;
00665             value = strtoul(format-1, &formatEnd, 10);/* INTL: "C" locale. */
00666             if (*formatEnd == '$') {
00667                 format = formatEnd+1;
00668                 format += Tcl_UtfToUniChar(format, &ch);
00669                 objIndex = (int) value - 1;
00670             }
00671         }
00672 
00673         /*
00674          * Parse any width specifier.
00675          */
00676 
00677         if ((ch < 0x80) && isdigit(UCHAR(ch))) {        /* INTL: "C" locale. */
00678             width = (int) strtoul(format-1, &format, 10);/* INTL: "C" locale. */
00679             format += Tcl_UtfToUniChar(format, &ch);
00680         } else {
00681             width = 0;
00682         }
00683 
00684         /*
00685          * Handle any size specifier.
00686          */
00687 
00688         switch (ch) {
00689         case 'l':
00690             if (*format == 'l') {
00691                 flags |= SCAN_BIG;
00692                 format += 1;
00693                 format += Tcl_UtfToUniChar(format, &ch);
00694                 break;
00695             }
00696         case 'L':
00697             flags |= SCAN_LONGER;
00698             /*
00699              * Fall through so we skip to the next character.
00700              */
00701         case 'h':
00702             format += Tcl_UtfToUniChar(format, &ch);
00703         }
00704 
00705         /*
00706          * Handle the various field types.
00707          */
00708 
00709         switch (ch) {
00710         case 'n':
00711             if (!(flags & SCAN_SUPPRESS)) {
00712                 objPtr = Tcl_NewIntObj(string - baseString);
00713                 Tcl_IncrRefCount(objPtr);
00714                 objs[objIndex++] = objPtr;
00715             }
00716             nconversions++;
00717             continue;
00718 
00719         case 'd':
00720             op = 'i';
00721             parseFlag |= TCL_PARSE_DECIMAL_ONLY;
00722             break;
00723         case 'i':
00724             op = 'i';
00725             parseFlag |= TCL_PARSE_SCAN_PREFIXES;
00726             break;
00727         case 'o':
00728             op = 'i';
00729             parseFlag |= TCL_PARSE_OCTAL_ONLY | TCL_PARSE_SCAN_PREFIXES;
00730             break;
00731         case 'x':
00732             op = 'i';
00733             parseFlag |= TCL_PARSE_HEXADECIMAL_ONLY;
00734             break;
00735         case 'u':
00736             op = 'i';
00737             parseFlag |= TCL_PARSE_DECIMAL_ONLY;
00738             flags |= SCAN_UNSIGNED;
00739             break;
00740 
00741         case 'f':
00742         case 'e':
00743         case 'g':
00744             op = 'f';
00745             break;
00746 
00747         case 's':
00748             op = 's';
00749             break;
00750 
00751         case 'c':
00752             op = 'c';
00753             flags |= SCAN_NOSKIP;
00754             break;
00755         case '[':
00756             op = '[';
00757             flags |= SCAN_NOSKIP;
00758             break;
00759         }
00760 
00761         /*
00762          * At this point, we will need additional characters from the string
00763          * to proceed.
00764          */
00765 
00766         if (*string == '\0') {
00767             underflow = 1;
00768             goto done;
00769         }
00770 
00771         /*
00772          * Skip any leading whitespace at the beginning of a field unless the
00773          * format suppresses this behavior.
00774          */
00775 
00776         if (!(flags & SCAN_NOSKIP)) {
00777             while (*string != '\0') {
00778                 offset = Tcl_UtfToUniChar(string, &sch);
00779                 if (!Tcl_UniCharIsSpace(sch)) {
00780                     break;
00781                 }
00782                 string += offset;
00783             }
00784             if (*string == '\0') {
00785                 underflow = 1;
00786                 goto done;
00787             }
00788         }
00789 
00790         /*
00791          * Perform the requested scanning operation.
00792          */
00793 
00794         switch (op) {
00795         case 's':
00796             /*
00797              * Scan a string up to width characters or whitespace.
00798              */
00799 
00800             if (width == 0) {
00801                 width = ~0;
00802             }
00803             end = string;
00804             while (*end != '\0') {
00805                 offset = Tcl_UtfToUniChar(end, &sch);
00806                 if (Tcl_UniCharIsSpace(sch)) {
00807                     break;
00808                 }
00809                 end += offset;
00810                 if (--width == 0) {
00811                     break;
00812                 }
00813             }
00814             if (!(flags & SCAN_SUPPRESS)) {
00815                 objPtr = Tcl_NewStringObj(string, end-string);
00816                 Tcl_IncrRefCount(objPtr);
00817                 objs[objIndex++] = objPtr;
00818             }
00819             string = end;
00820             break;
00821 
00822         case '[': {
00823             CharSet cset;
00824 
00825             if (width == 0) {
00826                 width = ~0;
00827             }
00828             end = string;
00829 
00830             format = BuildCharSet(&cset, format);
00831             while (*end != '\0') {
00832                 offset = Tcl_UtfToUniChar(end, &sch);
00833                 if (!CharInSet(&cset, (int)sch)) {
00834                     break;
00835                 }
00836                 end += offset;
00837                 if (--width == 0) {
00838                     break;
00839                 }
00840             }
00841             ReleaseCharSet(&cset);
00842 
00843             if (string == end) {
00844                 /*
00845                  * Nothing matched the range, stop processing.
00846                  */
00847                 goto done;
00848             }
00849             if (!(flags & SCAN_SUPPRESS)) {
00850                 objPtr = Tcl_NewStringObj(string, end-string);
00851                 Tcl_IncrRefCount(objPtr);
00852                 objs[objIndex++] = objPtr;
00853             }
00854             string = end;
00855 
00856             break;
00857         }
00858         case 'c':
00859             /*
00860              * Scan a single Unicode character.
00861              */
00862 
00863             string += Tcl_UtfToUniChar(string, &sch);
00864             if (!(flags & SCAN_SUPPRESS)) {
00865                 objPtr = Tcl_NewIntObj((int)sch);
00866                 Tcl_IncrRefCount(objPtr);
00867                 objs[objIndex++] = objPtr;
00868             }
00869             break;
00870 
00871         case 'i':
00872             /*
00873              * Scan an unsigned or signed integer.
00874              */
00875             objPtr = Tcl_NewLongObj(0);
00876             Tcl_IncrRefCount(objPtr);
00877             if (width == 0) {
00878                 width = ~0;
00879             }
00880             if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width,
00881                     &end, TCL_PARSE_INTEGER_ONLY | parseFlag)) {
00882                 Tcl_DecrRefCount(objPtr);
00883                 if (width < 0) {
00884                     if (*end == '\0') {
00885                         underflow = 1;
00886                     }
00887                 } else {
00888                     if (end == string + width) {
00889                         underflow = 1;
00890                     }
00891                 }
00892                 goto done;
00893             }
00894             string = end;
00895             if (flags & SCAN_SUPPRESS) {
00896                 Tcl_DecrRefCount(objPtr);
00897                 break;
00898             }
00899             if (flags & SCAN_LONGER) {
00900                 if (Tcl_GetWideIntFromObj(NULL, objPtr, &wideValue) != TCL_OK) {
00901                     wideValue = ~(Tcl_WideUInt)0 >> 1;  /* WIDE_MAX */
00902                     if (TclGetString(objPtr)[0] == '-') {
00903                         wideValue++;    /* WIDE_MAX + 1 = WIDE_MIN */
00904                     }
00905                 }
00906                 if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
00907                     sprintf(buf, "%" TCL_LL_MODIFIER "u",
00908                             (Tcl_WideUInt)wideValue);
00909                     Tcl_SetStringObj(objPtr, buf, -1);
00910                 } else {
00911                     Tcl_SetWideIntObj(objPtr, wideValue);
00912                 }
00913             } else if (!(flags & SCAN_BIG)) {
00914                 if (TclGetLongFromObj(NULL, objPtr, &value) != TCL_OK) {
00915                     if (TclGetString(objPtr)[0] == '-') {
00916                         value = LONG_MIN;
00917                     } else {
00918                         value = LONG_MAX;
00919                     }
00920                 }
00921                 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
00922                     sprintf(buf, "%lu", value); /* INTL: ISO digit */
00923                     Tcl_SetStringObj(objPtr, buf, -1);
00924                 } else {
00925                     Tcl_SetLongObj(objPtr, value);
00926                 }
00927             }
00928             objs[objIndex++] = objPtr;
00929             break;
00930 
00931         case 'f':
00932             /*
00933              * Scan a floating point number
00934              */
00935 
00936             objPtr = Tcl_NewDoubleObj(0.0);
00937             Tcl_IncrRefCount(objPtr);
00938             if (width == 0) {
00939                 width = ~0;
00940             }
00941             if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width,
00942                     &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE)) {
00943                 Tcl_DecrRefCount(objPtr);
00944                 if (width < 0) {
00945                     if (*end == '\0') {
00946                         underflow = 1;
00947                     }
00948                 } else {
00949                     if (end == string + width) {
00950                         underflow = 1;
00951                     }
00952                 }
00953                 goto done;
00954             } else if (flags & SCAN_SUPPRESS) {
00955                 Tcl_DecrRefCount(objPtr);
00956                 string = end;
00957             } else {
00958                 double dvalue;
00959                 if (Tcl_GetDoubleFromObj(NULL, objPtr, &dvalue) != TCL_OK) {
00960 #ifdef ACCEPT_NAN
00961                     if (objPtr->typePtr == &tclDoubleType) {
00962                         dValue = objPtr->internalRep.doubleValue;
00963                     } else
00964 #endif
00965                     {
00966                         Tcl_DecrRefCount(objPtr);
00967                         goto done;
00968                     }
00969                 }
00970                 Tcl_SetDoubleObj(objPtr, dvalue);
00971                 objs[objIndex++] = objPtr;
00972                 string = end;
00973             }
00974         }
00975         nconversions++;
00976     }
00977 
00978   done:
00979     result = 0;
00980     code = TCL_OK;
00981 
00982     if (numVars) {
00983         /*
00984          * In this case, variables were specified (classic scan).
00985          */
00986 
00987         for (i = 0; i < totalVars; i++) {
00988             if (objs[i] == NULL) {
00989                 continue;
00990             }
00991             result++;
00992             if (Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0) == NULL) {
00993                 Tcl_AppendResult(interp, "couldn't set variable \"",
00994                         TclGetString(objv[i+3]), "\"", NULL);
00995                 code = TCL_ERROR;
00996             }
00997             Tcl_DecrRefCount(objs[i]);
00998         }
00999     } else {
01000         /*
01001          * Here no vars were specified, we want a list returned (inline scan)
01002          */
01003 
01004         objPtr = Tcl_NewObj();
01005         for (i = 0; i < totalVars; i++) {
01006             if (objs[i] != NULL) {
01007                 Tcl_ListObjAppendElement(NULL, objPtr, objs[i]);
01008                 Tcl_DecrRefCount(objs[i]);
01009             } else {
01010                 /*
01011                  * More %-specifiers than matching chars, so we just spit out
01012                  * empty strings for these.
01013                  */
01014 
01015                 Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj());
01016             }
01017         }
01018     }
01019     if (objs != NULL) {
01020         ckfree((char*) objs);
01021     }
01022     if (code == TCL_OK) {
01023         if (underflow && (nconversions == 0)) {
01024             if (numVars) {
01025                 objPtr = Tcl_NewIntObj(-1);
01026             } else {
01027                 if (objPtr) {
01028                     Tcl_SetListObj(objPtr, 0, NULL);
01029                 } else {
01030                     objPtr = Tcl_NewObj();
01031                 }
01032             }
01033         } else if (numVars) {
01034             objPtr = Tcl_NewIntObj(result);
01035         }
01036         Tcl_SetObjResult(interp, objPtr);
01037     }
01038     return code;
01039 }
01040 
01041 /*
01042  * Local Variables:
01043  * mode: c
01044  * c-basic-offset: 4
01045  * fill-column: 78
01046  * End:
01047  */



Generated on Wed Mar 12 12:18:21 2008 by  doxygen 1.5.1