tclStringObj.c

Go to the documentation of this file.
00001 /*
00002  * tclStringObj.c --
00003  *
00004  *      This file contains functions that implement string operations on Tcl
00005  *      objects. Some string operations work with UTF strings and others
00006  *      require Unicode format. Functions that require knowledge of the width
00007  *      of each character, such as indexing, operate on Unicode data.
00008  *
00009  *      A Unicode string is an internationalized string. Conceptually, a
00010  *      Unicode string is an array of 16-bit quantities organized as a
00011  *      sequence of properly formed UTF-8 characters. There is a one-to-one
00012  *      map between Unicode and UTF characters. Because Unicode characters
00013  *      have a fixed width, operations such as indexing operate on Unicode
00014  *      data. The String object is optimized for the case where each UTF char
00015  *      in a string is only one byte. In this case, we store the value of
00016  *      numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
00017  *      is explicitly called).
00018  *
00019  *      The String object type stores one or both formats. The default
00020  *      behavior is to store UTF. Once Unicode is calculated by a function, it
00021  *      is stored in the internal rep for future access (without an additional
00022  *      O(n) cost).
00023  *
00024  *      To allow many appends to be done to an object without constantly
00025  *      reallocating the space for the string or Unicode representation, we
00026  *      allocate double the space for the string or Unicode and use the
00027  *      internal representation to keep track of how much space is used vs.
00028  *      allocated.
00029  *
00030  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
00031  * Copyright (c) 1999 by Scriptics Corporation.
00032  *
00033  * See the file "license.terms" for information on usage and redistribution of
00034  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
00035  *
00036  * RCS: @(#) $Id: tclStringObj.c,v 1.69 2008/01/10 16:09:23 dgp Exp $ */
00037 
00038 #include "tclInt.h"
00039 #include "tommath.h"
00040 
00041 /*
00042  * Prototypes for functions defined later in this file:
00043  */
00044 
00045 static void             AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr,
00046                             const Tcl_UniChar *unicode, int appendNumChars);
00047 static void             AppendUnicodeToUtfRep(Tcl_Obj *objPtr,
00048                             const Tcl_UniChar *unicode, int numChars);
00049 static void             AppendUtfToUnicodeRep(Tcl_Obj *objPtr,
00050                             const char *bytes, int numBytes);
00051 static void             AppendUtfToUtfRep(Tcl_Obj *objPtr,
00052                             const char *bytes, int numBytes);
00053 static void             FillUnicodeRep(Tcl_Obj *objPtr);
00054 static void             AppendPrintfToObjVA(Tcl_Obj *objPtr,
00055                             const char *format, va_list argList);
00056 static void             FreeStringInternalRep(Tcl_Obj *objPtr);
00057 static void             DupStringInternalRep(Tcl_Obj *objPtr,
00058                             Tcl_Obj *copyPtr);
00059 static int              SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
00060 static void             UpdateStringOfString(Tcl_Obj *objPtr);
00061 
00062 /*
00063  * The structure below defines the string Tcl object type by means of
00064  * functions that can be invoked by generic object code.
00065  */
00066 
00067 Tcl_ObjType tclStringType = {
00068     "string",                   /* name */
00069     FreeStringInternalRep,      /* freeIntRepPro */
00070     DupStringInternalRep,       /* dupIntRepProc */
00071     UpdateStringOfString,       /* updateStringProc */
00072     SetStringFromAny            /* setFromAnyProc */
00073 };
00074 
00075 /*
00076  * The following structure is the internal rep for a String object. It keeps
00077  * track of how much memory has been used and how much has been allocated for
00078  * the Unicode and UTF string to enable growing and shrinking of the UTF and
00079  * Unicode reps of the String object with fewer mallocs. To optimize string
00080  * length and indexing operations, this structure also stores the number of
00081  * characters (same of UTF and Unicode!) once that value has been computed.
00082  *
00083  * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
00084  * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
00085  * can be officially modified by altering the definition of Tcl_UniChar in
00086  * tcl.h, but do not do that unless you are sure what you're doing!
00087  */
00088 
00089 typedef struct String {
00090     int numChars;               /* The number of chars in the string. -1 means
00091                                  * this value has not been calculated. >= 0
00092                                  * means that there is a valid Unicode rep, or
00093                                  * that the number of UTF bytes == the number
00094                                  * of chars. */
00095     size_t allocated;           /* The amount of space actually allocated for
00096                                  * the UTF string (minus 1 byte for the
00097                                  * termination char). */
00098     size_t uallocated;          /* The amount of space actually allocated for
00099                                  * the Unicode string (minus 2 bytes for the
00100                                  * termination char). */
00101     int hasUnicode;             /* Boolean determining whether the string has
00102                                  * a Unicode representation. */
00103     Tcl_UniChar unicode[2];     /* The array of Unicode chars. The actual size
00104                                  * of this field depends on the 'uallocated'
00105                                  * field above. */
00106 } String;
00107 
00108 #define STRING_UALLOC(numChars) \
00109         (numChars * sizeof(Tcl_UniChar))
00110 #define STRING_SIZE(ualloc) \
00111         ((unsigned) ((ualloc) \
00112                  ? sizeof(String) - sizeof(Tcl_UniChar) + (ualloc) \
00113                  : sizeof(String)))
00114 #define GET_STRING(objPtr) \
00115         ((String *) (objPtr)->internalRep.otherValuePtr)
00116 #define SET_STRING(objPtr, stringPtr) \
00117         ((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr))
00118 
00119 /*
00120  * TCL STRING GROWTH ALGORITHM
00121  *
00122  * When growing strings (during an append, for example), the following growth
00123  * algorithm is used:
00124  *
00125  *   Attempt to allocate 2 * (originalLength + appendLength)
00126  *   On failure:
00127  *      attempt to allocate originalLength + 2*appendLength +
00128  *                      TCL_GROWTH_MIN_ALLOC
00129  *
00130  * This algorithm allows very good performance, as it rapidly increases the
00131  * memory allocated for a given string, which minimizes the number of
00132  * reallocations that must be performed. However, using only the doubling
00133  * algorithm can lead to a significant waste of memory. In particular, it may
00134  * fail even when there is sufficient memory available to complete the append
00135  * request (but there is not 2*totalLength memory available). So when the
00136  * doubling fails (because there is not enough memory available), the
00137  * algorithm requests a smaller amount of memory, which is still enough to
00138  * cover the request, but which hopefully will be less than the total
00139  * available memory.
00140  *
00141  * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very
00142  * small appends. Without this extra slush factor, a sequence of several small
00143  * appends would cause several memory allocations. As long as
00144  * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior.
00145  *
00146  * The growth algorithm can be tuned by adjusting the following parameters:
00147  *
00148  * TCL_GROWTH_MIN_ALLOC         Additional space, in bytes, to allocate when
00149  *                              the double allocation has failed. Default is
00150  *                              1024 (1 kilobyte).
00151  */
00152 
00153 #ifndef TCL_GROWTH_MIN_ALLOC
00154 #define TCL_GROWTH_MIN_ALLOC    1024
00155 #endif
00156 
00157 /*
00158  *----------------------------------------------------------------------
00159  *
00160  * Tcl_NewStringObj --
00161  *
00162  *      This function is normally called when not debugging: i.e., when
00163  *      TCL_MEM_DEBUG is not defined. It creates a new string object and
00164  *      initializes it from the byte pointer and length arguments.
00165  *
00166  *      When TCL_MEM_DEBUG is defined, this function just returns the result
00167  *      of calling the debugging version Tcl_DbNewStringObj.
00168  *
00169  * Results:
00170  *      A newly created string object is returned that has ref count zero.
00171  *
00172  * Side effects:
00173  *      The new object's internal string representation will be set to a copy
00174  *      of the length bytes starting at "bytes". If "length" is negative, use
00175  *      bytes up to the first NUL byte; i.e., assume "bytes" points to a
00176  *      C-style NUL-terminated string. The object's type is set to NULL. An
00177  *      extra NUL is added to the end of the new object's byte array.
00178  *
00179  *----------------------------------------------------------------------
00180  */
00181 
00182 #ifdef TCL_MEM_DEBUG
00183 #undef Tcl_NewStringObj
00184 Tcl_Obj *
00185 Tcl_NewStringObj(
00186     const char *bytes,          /* Points to the first of the length bytes
00187                                  * used to initialize the new object. */
00188     int length)                 /* The number of bytes to copy from "bytes"
00189                                  * when initializing the new object. If
00190                                  * negative, use bytes up to the first NUL
00191                                  * byte. */
00192 {
00193     return Tcl_DbNewStringObj(bytes, length, "unknown", 0);
00194 }
00195 #else /* if not TCL_MEM_DEBUG */
00196 Tcl_Obj *
00197 Tcl_NewStringObj(
00198     const char *bytes,          /* Points to the first of the length bytes
00199                                  * used to initialize the new object. */
00200     int length)                 /* The number of bytes to copy from "bytes"
00201                                  * when initializing the new object. If
00202                                  * negative, use bytes up to the first NUL
00203                                  * byte. */
00204 {
00205     register Tcl_Obj *objPtr;
00206 
00207     if (length < 0) {
00208         length = (bytes? strlen(bytes) : 0);
00209     }
00210     TclNewStringObj(objPtr, bytes, length);
00211     return objPtr;
00212 }
00213 #endif /* TCL_MEM_DEBUG */
00214 
00215 /*
00216  *----------------------------------------------------------------------
00217  *
00218  * Tcl_DbNewStringObj --
00219  *
00220  *      This function is normally called when debugging: i.e., when
00221  *      TCL_MEM_DEBUG is defined. It creates new string objects. It is the
00222  *      same as the Tcl_NewStringObj function above except that it calls
00223  *      Tcl_DbCkalloc directly with the file name and line number from its
00224  *      caller. This simplifies debugging since then the [memory active]
00225  *      command will report the correct file name and line number when
00226  *      reporting objects that haven't been freed.
00227  *
00228  *      When TCL_MEM_DEBUG is not defined, this function just returns the
00229  *      result of calling Tcl_NewStringObj.
00230  *
00231  * Results:
00232  *      A newly created string object is returned that has ref count zero.
00233  *
00234  * Side effects:
00235  *      The new object's internal string representation will be set to a copy
00236  *      of the length bytes starting at "bytes". If "length" is negative, use
00237  *      bytes up to the first NUL byte; i.e., assume "bytes" points to a
00238  *      C-style NUL-terminated string. The object's type is set to NULL. An
00239  *      extra NUL is added to the end of the new object's byte array.
00240  *
00241  *----------------------------------------------------------------------
00242  */
00243 
00244 #ifdef TCL_MEM_DEBUG
00245 Tcl_Obj *
00246 Tcl_DbNewStringObj(
00247     const char *bytes,          /* Points to the first of the length bytes
00248                                  * used to initialize the new object. */
00249     int length,                 /* The number of bytes to copy from "bytes"
00250                                  * when initializing the new object. If
00251                                  * negative, use bytes up to the first NUL
00252                                  * byte. */
00253     const char *file,           /* The name of the source file calling this
00254                                  * function; used for debugging. */
00255     int line)                   /* Line number in the source file; used for
00256                                  * debugging. */
00257 {
00258     register Tcl_Obj *objPtr;
00259 
00260     if (length < 0) {
00261         length = (bytes? strlen(bytes) : 0);
00262     }
00263     TclDbNewObj(objPtr, file, line);
00264     TclInitStringRep(objPtr, bytes, length);
00265     return objPtr;
00266 }
00267 #else /* if not TCL_MEM_DEBUG */
00268 Tcl_Obj *
00269 Tcl_DbNewStringObj(
00270     const char *bytes,          /* Points to the first of the length bytes
00271                                  * used to initialize the new object. */
00272     register int length,        /* The number of bytes to copy from "bytes"
00273                                  * when initializing the new object. If
00274                                  * negative, use bytes up to the first NUL
00275                                  * byte. */
00276     const char *file,           /* The name of the source file calling this
00277                                  * function; used for debugging. */
00278     int line)                   /* Line number in the source file; used for
00279                                  * debugging. */
00280 {
00281     return Tcl_NewStringObj(bytes, length);
00282 }
00283 #endif /* TCL_MEM_DEBUG */
00284 
00285 /*
00286  *---------------------------------------------------------------------------
00287  *
00288  * Tcl_NewUnicodeObj --
00289  *
00290  *      This function is creates a new String object and initializes it from
00291  *      the given Unicode String. If the Utf String is the same size as the
00292  *      Unicode string, don't duplicate the data.
00293  *
00294  * Results:
00295  *      The newly created object is returned. This object will have no initial
00296  *      string representation. The returned object has a ref count of 0.
00297  *
00298  * Side effects:
00299  *      Memory allocated for new object and copy of Unicode argument.
00300  *
00301  *---------------------------------------------------------------------------
00302  */
00303 
00304 Tcl_Obj *
00305 Tcl_NewUnicodeObj(
00306     const Tcl_UniChar *unicode, /* The unicode string used to initialize the
00307                                  * new object. */
00308     int numChars)               /* Number of characters in the unicode
00309                                  * string. */
00310 {
00311     Tcl_Obj *objPtr;
00312     String *stringPtr;
00313     size_t uallocated;
00314 
00315     if (numChars < 0) {
00316         numChars = 0;
00317         if (unicode) {
00318             while (unicode[numChars] != 0) {
00319                 numChars++;
00320             }
00321         }
00322     }
00323     uallocated = STRING_UALLOC(numChars);
00324 
00325     /*
00326      * Create a new obj with an invalid string rep.
00327      */
00328 
00329     TclNewObj(objPtr);
00330     Tcl_InvalidateStringRep(objPtr);
00331     objPtr->typePtr = &tclStringType;
00332 
00333     stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
00334     stringPtr->numChars = numChars;
00335     stringPtr->uallocated = uallocated;
00336     stringPtr->hasUnicode = (numChars > 0);
00337     stringPtr->allocated = 0;
00338     memcpy(stringPtr->unicode, unicode, uallocated);
00339     stringPtr->unicode[numChars] = 0;
00340     SET_STRING(objPtr, stringPtr);
00341     return objPtr;
00342 }
00343 
00344 /*
00345  *----------------------------------------------------------------------
00346  *
00347  * Tcl_GetCharLength --
00348  *
00349  *      Get the length of the Unicode string from the Tcl object.
00350  *
00351  * Results:
00352  *      Pointer to unicode string representing the unicode object.
00353  *
00354  * Side effects:
00355  *      Frees old internal rep. Allocates memory for new "String" internal
00356  *      rep.
00357  *
00358  *----------------------------------------------------------------------
00359  */
00360 
00361 int
00362 Tcl_GetCharLength(
00363     Tcl_Obj *objPtr)            /* The String object to get the num chars
00364                                  * of. */
00365 {
00366     String *stringPtr;
00367 
00368     SetStringFromAny(NULL, objPtr);
00369     stringPtr = GET_STRING(objPtr);
00370 
00371     /*
00372      * If numChars is unknown, then calculate the number of characaters while
00373      * populating the Unicode string.
00374      */
00375 
00376     if (stringPtr->numChars == -1) {
00377         register int i = objPtr->length;
00378         register unsigned char *str = (unsigned char *) objPtr->bytes;
00379 
00380         /*
00381          * This is a speed sensitive function, so run specially over the
00382          * string to count continuous ascii characters before resorting to the
00383          * Tcl_NumUtfChars call. This is a long form of:
00384          stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes,objPtr->length);
00385          *
00386          * TODO: Consider macro-izing this.
00387          */
00388 
00389         while (i && (*str < 0xC0)) {
00390             i--;
00391             str++;
00392         }
00393         stringPtr->numChars = objPtr->length - i;
00394         if (i) {
00395             stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes
00396                     + (objPtr->length - i), i);
00397         }
00398 
00399         if (stringPtr->numChars == objPtr->length) {
00400             /*
00401              * Since we've just calculated the number of chars, and all UTF
00402              * chars are 1-byte long, we don't need to store the unicode
00403              * string.
00404              */
00405 
00406             stringPtr->hasUnicode = 0;
00407         } else {
00408             /*
00409              * Since we've just calucalated the number of chars, and not all
00410              * UTF chars are 1-byte long, go ahead and populate the unicode
00411              * string.
00412              */
00413 
00414             FillUnicodeRep(objPtr);
00415 
00416             /*
00417              * We need to fetch the pointer again because we have just
00418              * reallocated the structure to make room for the Unicode data.
00419              */
00420 
00421             stringPtr = GET_STRING(objPtr);
00422         }
00423     }
00424     return stringPtr->numChars;
00425 }
00426 
00427 /*
00428  *----------------------------------------------------------------------
00429  *
00430  * Tcl_GetUniChar --
00431  *
00432  *      Get the index'th Unicode character from the String object. The index
00433  *      is assumed to be in the appropriate range.
00434  *
00435  * Results:
00436  *      Returns the index'th Unicode character in the Object.
00437  *
00438  * Side effects:
00439  *      Fills unichar with the index'th Unicode character.
00440  *
00441  *----------------------------------------------------------------------
00442  */
00443 
00444 Tcl_UniChar
00445 Tcl_GetUniChar(
00446     Tcl_Obj *objPtr,            /* The object to get the Unicode charater
00447                                  * from. */
00448     int index)                  /* Get the index'th Unicode character. */
00449 {
00450     Tcl_UniChar unichar;
00451     String *stringPtr;
00452 
00453     SetStringFromAny(NULL, objPtr);
00454     stringPtr = GET_STRING(objPtr);
00455 
00456     if (stringPtr->numChars == -1) {
00457         /*
00458          * We haven't yet calculated the length, so we don't have the Unicode
00459          * str. We need to know the number of chars before we can do indexing.
00460          */
00461 
00462         Tcl_GetCharLength(objPtr);
00463 
00464         /*
00465          * We need to fetch the pointer again because we may have just
00466          * reallocated the structure.
00467          */
00468 
00469         stringPtr = GET_STRING(objPtr);
00470     }
00471     if (stringPtr->hasUnicode == 0) {
00472         /*
00473          * All of the characters in the Utf string are 1 byte chars, so we
00474          * don't store the unicode char. We get the Utf string and convert the
00475          * index'th byte to a Unicode character.
00476          */
00477 
00478         unichar = (Tcl_UniChar) objPtr->bytes[index];
00479     } else {
00480         unichar = stringPtr->unicode[index];
00481     }
00482     return unichar;
00483 }
00484 
00485 /*
00486  *----------------------------------------------------------------------
00487  *
00488  * Tcl_GetUnicode --
00489  *
00490  *      Get the Unicode form of the String object. If the object is not
00491  *      already a String object, it will be converted to one. If the String
00492  *      object does not have a Unicode rep, then one is create from the UTF
00493  *      string format.
00494  *
00495  * Results:
00496  *      Returns a pointer to the object's internal Unicode string.
00497  *
00498  * Side effects:
00499  *      Converts the object to have the String internal rep.
00500  *
00501  *----------------------------------------------------------------------
00502  */
00503 
00504 Tcl_UniChar *
00505 Tcl_GetUnicode(
00506     Tcl_Obj *objPtr)            /* The object to find the unicode string
00507                                  * for. */
00508 {
00509     String *stringPtr;
00510 
00511     SetStringFromAny(NULL, objPtr);
00512     stringPtr = GET_STRING(objPtr);
00513 
00514     if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
00515         /*
00516          * We haven't yet calculated the length, or all of the characters in
00517          * the Utf string are 1 byte chars (so we didn't store the unicode
00518          * str). Since this function must return a unicode string, and one has
00519          * not yet been stored, force the Unicode to be calculated and stored
00520          * now.
00521          */
00522 
00523         FillUnicodeRep(objPtr);
00524 
00525         /*
00526          * We need to fetch the pointer again because we have just reallocated
00527          * the structure to make room for the Unicode data.
00528          */
00529 
00530         stringPtr = GET_STRING(objPtr);
00531     }
00532     return stringPtr->unicode;
00533 }
00534 
00535 /*
00536  *----------------------------------------------------------------------
00537  *
00538  * Tcl_GetUnicodeFromObj --
00539  *
00540  *      Get the Unicode form of the String object with length. If the object
00541  *      is not already a String object, it will be converted to one. If the
00542  *      String object does not have a Unicode rep, then one is create from the
00543  *      UTF string format.
00544  *
00545  * Results:
00546  *      Returns a pointer to the object's internal Unicode string.
00547  *
00548  * Side effects:
00549  *      Converts the object to have the String internal rep.
00550  *
00551  *----------------------------------------------------------------------
00552  */
00553 
00554 Tcl_UniChar *
00555 Tcl_GetUnicodeFromObj(
00556     Tcl_Obj *objPtr,            /* The object to find the unicode string
00557                                  * for. */
00558     int *lengthPtr)             /* If non-NULL, the location where the string
00559                                  * rep's unichar length should be stored. If
00560                                  * NULL, no length is stored. */
00561 {
00562     String *stringPtr;
00563 
00564     SetStringFromAny(NULL, objPtr);
00565     stringPtr = GET_STRING(objPtr);
00566 
00567     if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
00568         /*
00569          * We haven't yet calculated the length, or all of the characters in
00570          * the Utf string are 1 byte chars (so we didn't store the unicode
00571          * str). Since this function must return a unicode string, and one has
00572          * not yet been stored, force the Unicode to be calculated and stored
00573          * now.
00574          */
00575 
00576         FillUnicodeRep(objPtr);
00577 
00578         /*
00579          * We need to fetch the pointer again because we have just reallocated
00580          * the structure to make room for the Unicode data.
00581          */
00582 
00583         stringPtr = GET_STRING(objPtr);
00584     }
00585 
00586     if (lengthPtr != NULL) {
00587         *lengthPtr = stringPtr->numChars;
00588     }
00589     return stringPtr->unicode;
00590 }
00591 
00592 /*
00593  *----------------------------------------------------------------------
00594  *
00595  * Tcl_GetRange --
00596  *
00597  *      Create a Tcl Object that contains the chars between first and last of
00598  *      the object indicated by "objPtr". If the object is not already a
00599  *      String object, convert it to one. The first and last indices are
00600  *      assumed to be in the appropriate range.
00601  *
00602  * Results:
00603  *      Returns a new Tcl Object of the String type.
00604  *
00605  * Side effects:
00606  *      Changes the internal rep of "objPtr" to the String type.
00607  *
00608  *----------------------------------------------------------------------
00609  */
00610 
00611 Tcl_Obj *
00612 Tcl_GetRange(
00613     Tcl_Obj *objPtr,            /* The Tcl object to find the range of. */
00614     int first,                  /* First index of the range. */
00615     int last)                   /* Last index of the range. */
00616 {
00617     Tcl_Obj *newObjPtr;         /* The Tcl object to find the range of. */
00618     String *stringPtr;
00619 
00620     SetStringFromAny(NULL, objPtr);
00621     stringPtr = GET_STRING(objPtr);
00622 
00623     if (stringPtr->numChars == -1) {
00624         /*
00625          * We haven't yet calculated the length, so we don't have the Unicode
00626          * str. We need to know the number of chars before we can do indexing.
00627          */
00628 
00629         Tcl_GetCharLength(objPtr);
00630 
00631         /*
00632          * We need to fetch the pointer again because we may have just
00633          * reallocated the structure.
00634          */
00635 
00636         stringPtr = GET_STRING(objPtr);
00637     }
00638 
00639     if (objPtr->bytes && (stringPtr->numChars == objPtr->length)) {
00640         char *str = TclGetString(objPtr);
00641 
00642         /*
00643          * All of the characters in the Utf string are 1 byte chars, so we
00644          * don't store the unicode char. Create a new string object containing
00645          * the specified range of chars.
00646          */
00647 
00648         newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
00649 
00650         /*
00651          * Since we know the new string only has 1-byte chars, we can set it's
00652          * numChars field.
00653          */
00654 
00655         SetStringFromAny(NULL, newObjPtr);
00656         stringPtr = GET_STRING(newObjPtr);
00657         stringPtr->numChars = last-first+1;
00658     } else {
00659         newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first,
00660                 last-first+1);
00661     }
00662     return newObjPtr;
00663 }
00664 
00665 /*
00666  *----------------------------------------------------------------------
00667  *
00668  * Tcl_SetStringObj --
00669  *
00670  *      Modify an object to hold a string that is a copy of the bytes
00671  *      indicated by the byte pointer and length arguments.
00672  *
00673  * Results:
00674  *      None.
00675  *
00676  * Side effects:
00677  *      The object's string representation will be set to a copy of the
00678  *      "length" bytes starting at "bytes". If "length" is negative, use bytes
00679  *      up to the first NUL byte; i.e., assume "bytes" points to a C-style
00680  *      NUL-terminated string. The object's old string and internal
00681  *      representations are freed and the object's type is set NULL.
00682  *
00683  *----------------------------------------------------------------------
00684  */
00685 
00686 void
00687 Tcl_SetStringObj(
00688     register Tcl_Obj *objPtr,   /* Object whose internal rep to init. */
00689     const char *bytes,          /* Points to the first of the length bytes
00690                                  * used to initialize the object. */
00691     register int length)        /* The number of bytes to copy from "bytes"
00692                                  * when initializing the object. If negative,
00693                                  * use bytes up to the first NUL byte.*/
00694 {
00695     /*
00696      * Free any old string rep, then set the string rep to a copy of the
00697      * length bytes starting at "bytes".
00698      */
00699 
00700     if (Tcl_IsShared(objPtr)) {
00701         Tcl_Panic("%s called with shared object", "Tcl_SetStringObj");
00702     }
00703 
00704     /*
00705      * Set the type to NULL and free any internal rep for the old type.
00706      */
00707 
00708     TclFreeIntRep(objPtr);
00709     objPtr->typePtr = NULL;
00710 
00711     Tcl_InvalidateStringRep(objPtr);
00712     if (length < 0) {
00713         length = (bytes? strlen(bytes) : 0);
00714     }
00715     TclInitStringRep(objPtr, bytes, length);
00716 }
00717 
00718 /*
00719  *----------------------------------------------------------------------
00720  *
00721  * Tcl_SetObjLength --
00722  *
00723  *      This function changes the length of the string representation of an
00724  *      object.
00725  *
00726  * Results:
00727  *      None.
00728  *
00729  * Side effects:
00730  *      If the size of objPtr's string representation is greater than length,
00731  *      then it is reduced to length and a new terminating null byte is stored
00732  *      in the strength. If the length of the string representation is greater
00733  *      than length, the storage space is reallocated to the given length; a
00734  *      null byte is stored at the end, but other bytes past the end of the
00735  *      original string representation are undefined. The object's internal
00736  *      representation is changed to "expendable string".
00737  *
00738  *----------------------------------------------------------------------
00739  */
00740 
00741 void
00742 Tcl_SetObjLength(
00743     register Tcl_Obj *objPtr,   /* Pointer to object. This object must not
00744                                  * currently be shared. */
00745     register int length)        /* Number of bytes desired for string
00746                                  * representation of object, not including
00747                                  * terminating null byte. */
00748 {
00749     String *stringPtr;
00750 
00751     if (Tcl_IsShared(objPtr)) {
00752         Tcl_Panic("%s called with shared object", "Tcl_SetObjLength");
00753     }
00754     SetStringFromAny(NULL, objPtr);
00755 
00756     stringPtr = GET_STRING(objPtr);
00757 
00758     /*
00759      * Check that we're not extending a pure unicode string.
00760      */
00761 
00762     if (length > (int) stringPtr->allocated &&
00763             (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
00764         /*
00765          * Not enough space in current string. Reallocate the string space and
00766          * free the old string.
00767          */
00768 
00769         if (objPtr->bytes != tclEmptyStringRep) {
00770             objPtr->bytes = ckrealloc((char *) objPtr->bytes,
00771                     (unsigned) (length + 1));
00772         } else {
00773             char *newBytes = ckalloc((unsigned) (length+1));
00774 
00775             if (objPtr->bytes != NULL && objPtr->length != 0) {
00776                 memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length);
00777                 Tcl_InvalidateStringRep(objPtr);
00778             }
00779             objPtr->bytes = newBytes;
00780         }
00781         stringPtr->allocated = length;
00782 
00783         /*
00784          * Invalidate the unicode data.
00785          */
00786 
00787         stringPtr->hasUnicode = 0;
00788     }
00789 
00790     if (objPtr->bytes != NULL) {
00791         objPtr->length = length;
00792         if (objPtr->bytes != tclEmptyStringRep) {
00793             /*
00794              * Ensure the string is NUL-terminated.
00795              */
00796 
00797             objPtr->bytes[length] = 0;
00798         }
00799 
00800         /*
00801          * Invalidate the unicode data.
00802          */
00803 
00804         stringPtr->numChars = -1;
00805         stringPtr->hasUnicode = 0;
00806     } else {
00807         /*
00808          * Changing length of pure unicode string.
00809          */
00810 
00811         size_t uallocated = STRING_UALLOC(length);
00812 
00813         if (uallocated > stringPtr->uallocated) {
00814             stringPtr = (String *) ckrealloc((char*) stringPtr,
00815                     STRING_SIZE(uallocated));
00816             SET_STRING(objPtr, stringPtr);
00817             stringPtr->uallocated = uallocated;
00818         }
00819         stringPtr->numChars = length;
00820         stringPtr->hasUnicode = (length > 0);
00821 
00822         /*
00823          * Ensure the string is NUL-terminated.
00824          */
00825 
00826         stringPtr->unicode[length] = 0;
00827         stringPtr->allocated = 0;
00828         objPtr->length = 0;
00829     }
00830 }
00831 
00832 /*
00833  *----------------------------------------------------------------------
00834  *
00835  * Tcl_AttemptSetObjLength --
00836  *
00837  *      This function changes the length of the string representation of an
00838  *      object. It uses the attempt* (non-panic'ing) memory allocators.
00839  *
00840  * Results:
00841  *      1 if the requested memory was allocated, 0 otherwise.
00842  *
00843  * Side effects:
00844  *      If the size of objPtr's string representation is greater than length,
00845  *      then it is reduced to length and a new terminating null byte is stored
00846  *      in the strength. If the length of the string representation is greater
00847  *      than length, the storage space is reallocated to the given length; a
00848  *      null byte is stored at the end, but other bytes past the end of the
00849  *      original string representation are undefined. The object's internal
00850  *      representation is changed to "expendable string".
00851  *
00852  *----------------------------------------------------------------------
00853  */
00854 
00855 int
00856 Tcl_AttemptSetObjLength(
00857     register Tcl_Obj *objPtr,   /* Pointer to object. This object must not
00858                                  * currently be shared. */
00859     register int length)        /* Number of bytes desired for string
00860                                  * representation of object, not including
00861                                  * terminating null byte. */
00862 {
00863     String *stringPtr;
00864 
00865     if (Tcl_IsShared(objPtr)) {
00866         Tcl_Panic("%s called with shared object", "Tcl_AttemptSetObjLength");
00867     }
00868     SetStringFromAny(NULL, objPtr);
00869 
00870     stringPtr = GET_STRING(objPtr);
00871 
00872     /*
00873      * Check that we're not extending a pure unicode string.
00874      */
00875 
00876     if (length > (int) stringPtr->allocated &&
00877             (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
00878         char *newBytes;
00879 
00880         /*
00881          * Not enough space in current string. Reallocate the string space and
00882          * free the old string.
00883          */
00884 
00885         if (objPtr->bytes != tclEmptyStringRep) {
00886             newBytes = attemptckrealloc(objPtr->bytes,
00887                     (unsigned)(length + 1));
00888             if (newBytes == NULL) {
00889                 return 0;
00890             }
00891         } else {
00892             newBytes = attemptckalloc((unsigned) (length + 1));
00893             if (newBytes == NULL) {
00894                 return 0;
00895             }
00896             if (objPtr->bytes != NULL && objPtr->length != 0) {
00897                 memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length);
00898                 Tcl_InvalidateStringRep(objPtr);
00899             }
00900         }
00901         objPtr->bytes = newBytes;
00902         stringPtr->allocated = length;
00903 
00904         /*
00905          * Invalidate the unicode data.
00906          */
00907 
00908         stringPtr->hasUnicode = 0;
00909     }
00910 
00911     if (objPtr->bytes != NULL) {
00912         objPtr->length = length;
00913         if (objPtr->bytes != tclEmptyStringRep) {
00914             /*
00915              * Ensure the string is NULL-terminated.
00916              */
00917 
00918             objPtr->bytes[length] = 0;
00919         }
00920 
00921         /*
00922          * Invalidate the unicode data.
00923          */
00924 
00925         stringPtr->numChars = -1;
00926         stringPtr->hasUnicode = 0;
00927     } else {
00928         /*
00929          * Changing length of pure unicode string.
00930          */
00931 
00932         size_t uallocated = STRING_UALLOC(length);
00933 
00934         if (uallocated > stringPtr->uallocated) {
00935             stringPtr = (String *) attemptckrealloc((char*) stringPtr,
00936                     STRING_SIZE(uallocated));
00937             if (stringPtr == NULL) {
00938                 return 0;
00939             }
00940             SET_STRING(objPtr, stringPtr);
00941             stringPtr->uallocated = uallocated;
00942         }
00943         stringPtr->numChars = length;
00944         stringPtr->hasUnicode = (length > 0);
00945 
00946         /*
00947          * Ensure the string is NUL-terminated.
00948          */
00949 
00950         stringPtr->unicode[length] = 0;
00951         stringPtr->allocated = 0;
00952         objPtr->length = 0;
00953     }
00954     return 1;
00955 }
00956 
00957 /*
00958  *---------------------------------------------------------------------------
00959  *
00960  * TclSetUnicodeObj --
00961  *
00962  *      Modify an object to hold the Unicode string indicated by "unicode".
00963  *
00964  * Results:
00965  *      None.
00966  *
00967  * Side effects:
00968  *      Memory allocated for new "String" internal rep.
00969  *
00970  *---------------------------------------------------------------------------
00971  */
00972 
00973 void
00974 Tcl_SetUnicodeObj(
00975     Tcl_Obj *objPtr,            /* The object to set the string of. */
00976     const Tcl_UniChar *unicode, /* The unicode string used to initialize the
00977                                  * object. */
00978     int numChars)               /* Number of characters in the unicode
00979                                  * string. */
00980 {
00981     String *stringPtr;
00982     size_t uallocated;
00983 
00984     if (numChars < 0) {
00985         numChars = 0;
00986         if (unicode) {
00987             while (unicode[numChars] != 0) {
00988                 numChars++;
00989             }
00990         }
00991     }
00992     uallocated = STRING_UALLOC(numChars);
00993 
00994     /*
00995      * Free the internal rep if one exists, and invalidate the string rep.
00996      */
00997 
00998     TclFreeIntRep(objPtr);
00999     objPtr->typePtr = &tclStringType;
01000 
01001     /*
01002      * Allocate enough space for the String structure + Unicode string.
01003      */
01004 
01005     stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
01006     stringPtr->numChars = numChars;
01007     stringPtr->uallocated = uallocated;
01008     stringPtr->hasUnicode = (numChars > 0);
01009     stringPtr->allocated = 0;
01010     memcpy(stringPtr->unicode, unicode, uallocated);
01011     stringPtr->unicode[numChars] = 0;
01012 
01013     SET_STRING(objPtr, stringPtr);
01014     Tcl_InvalidateStringRep(objPtr);
01015     return;
01016 }
01017 
01018 /*
01019  *----------------------------------------------------------------------
01020  *
01021  * Tcl_AppendLimitedToObj --
01022  *
01023  *      This function appends a limited number of bytes from a sequence of
01024  *      bytes to an object, marking any limitation with an ellipsis.
01025  *
01026  * Results:
01027  *      None.
01028  *
01029  * Side effects:
01030  *      The bytes at *bytes are appended to the string representation of
01031  *      objPtr.
01032  *
01033  *----------------------------------------------------------------------
01034  */
01035 
01036 void
01037 Tcl_AppendLimitedToObj(
01038     register Tcl_Obj *objPtr,   /* Points to the object to append to. */
01039     const char *bytes,          /* Points to the bytes to append to the
01040                                  * object. */
01041     register int length,        /* The number of bytes available to be
01042                                  * appended from "bytes". If < 0, then all
01043                                  * bytes up to a NUL byte are available. */
01044     register int limit,         /* The maximum number of bytes to append to
01045                                  * the object. */
01046     const char *ellipsis)       /* Ellipsis marker string, appended to the
01047                                  * object to indicate not all available bytes
01048                                  * at "bytes" were appended. */
01049 {
01050     String *stringPtr;
01051     int toCopy = 0;
01052 
01053     if (Tcl_IsShared(objPtr)) {
01054         Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj");
01055     }
01056 
01057     SetStringFromAny(NULL, objPtr);
01058 
01059     if (length < 0) {
01060         length = (bytes ? strlen(bytes) : 0);
01061     }
01062     if (length == 0) {
01063         return;
01064     }
01065 
01066     if (length <= limit) {
01067         toCopy = length;
01068     } else {
01069         if (ellipsis == NULL) {
01070             ellipsis = "...";
01071         }
01072         toCopy = Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
01073     }
01074 
01075     /*
01076      * If objPtr has a valid Unicode rep, then append the Unicode conversion
01077      * of "bytes" to the objPtr's Unicode rep, otherwise append "bytes" to
01078      * objPtr's string rep.
01079      */
01080 
01081     stringPtr = GET_STRING(objPtr);
01082     if (stringPtr->hasUnicode != 0) {
01083         AppendUtfToUnicodeRep(objPtr, bytes, toCopy);
01084     } else {
01085         AppendUtfToUtfRep(objPtr, bytes, toCopy);
01086     }
01087 
01088     if (length <= limit) {
01089         return;
01090     }
01091 
01092     stringPtr = GET_STRING(objPtr);
01093     if (stringPtr->hasUnicode != 0) {
01094         AppendUtfToUnicodeRep(objPtr, ellipsis, -1);
01095     } else {
01096         AppendUtfToUtfRep(objPtr, ellipsis, -1);
01097     }
01098 }
01099 
01100 /*
01101  *----------------------------------------------------------------------
01102  *
01103  * Tcl_AppendToObj --
01104  *
01105  *      This function appends a sequence of bytes to an object.
01106  *
01107  * Results:
01108  *      None.
01109  *
01110  * Side effects:
01111  *      The bytes at *bytes are appended to the string representation of
01112  *      objPtr.
01113  *
01114  *----------------------------------------------------------------------
01115  */
01116 
01117 void
01118 Tcl_AppendToObj(
01119     register Tcl_Obj *objPtr,   /* Points to the object to append to. */
01120     const char *bytes,          /* Points to the bytes to append to the
01121                                  * object. */
01122     register int length)        /* The number of bytes to append from "bytes".
01123                                  * If < 0, then append all bytes up to NUL
01124                                  * byte. */
01125 {
01126     Tcl_AppendLimitedToObj(objPtr, bytes, length, INT_MAX, NULL);
01127 }
01128 
01129 /*
01130  *----------------------------------------------------------------------
01131  *
01132  * Tcl_AppendUnicodeToObj --
01133  *
01134  *      This function appends a Unicode string to an object in the most
01135  *      efficient manner possible. Length must be >= 0.
01136  *
01137  * Results:
01138  *      None.
01139  *
01140  * Side effects:
01141  *      Invalidates the string rep and creates a new Unicode string.
01142  *
01143  *----------------------------------------------------------------------
01144  */
01145 
01146 void
01147 Tcl_AppendUnicodeToObj(
01148     register Tcl_Obj *objPtr,   /* Points to the object to append to. */
01149     const Tcl_UniChar *unicode, /* The unicode string to append to the
01150                                  * object. */
01151     int length)                 /* Number of chars in "unicode". */
01152 {
01153     String *stringPtr;
01154 
01155     if (Tcl_IsShared(objPtr)) {
01156         Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj");
01157     }
01158 
01159     if (length == 0) {
01160         return;
01161     }
01162 
01163     SetStringFromAny(NULL, objPtr);
01164     stringPtr = GET_STRING(objPtr);
01165 
01166     /*
01167      * If objPtr has a valid Unicode rep, then append the "unicode" to the
01168      * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to
01169      * objPtr's string rep.
01170      */
01171 
01172     if (stringPtr->hasUnicode != 0) {
01173         AppendUnicodeToUnicodeRep(objPtr, unicode, length);
01174     } else {
01175         AppendUnicodeToUtfRep(objPtr, unicode, length);
01176     }
01177 }
01178 
01179 /*
01180  *----------------------------------------------------------------------
01181  *
01182  * Tcl_AppendObjToObj --
01183  *
01184  *      This function appends the string rep of one object to another.
01185  *      "objPtr" cannot be a shared object.
01186  *
01187  * Results:
01188  *      None.
01189  *
01190  * Side effects:
01191  *      The string rep of appendObjPtr is appended to the string
01192  *      representation of objPtr.
01193  *
01194  *----------------------------------------------------------------------
01195  */
01196 
01197 void
01198 Tcl_AppendObjToObj(
01199     Tcl_Obj *objPtr,            /* Points to the object to append to. */
01200     Tcl_Obj *appendObjPtr)      /* Object to append. */
01201 {
01202     String *stringPtr;
01203     int length, numChars, allOneByteChars;
01204     char *bytes;
01205 
01206     SetStringFromAny(NULL, objPtr);
01207 
01208     /*
01209      * If objPtr has a valid Unicode rep, then get a Unicode string from
01210      * appendObjPtr and append it.
01211      */
01212 
01213     stringPtr = GET_STRING(objPtr);
01214     if (stringPtr->hasUnicode != 0) {
01215         /*
01216          * If appendObjPtr is not of the "String" type, don't convert it.
01217          */
01218 
01219         if (appendObjPtr->typePtr == &tclStringType) {
01220             stringPtr = GET_STRING(appendObjPtr);
01221             if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
01222                 /*
01223                  * If appendObjPtr is a string obj with no valid Unicode rep,
01224                  * then fill its unicode rep.
01225                  */
01226 
01227                 FillUnicodeRep(appendObjPtr);
01228                 stringPtr = GET_STRING(appendObjPtr);
01229             }
01230             AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode,
01231                     stringPtr->numChars);
01232         } else {
01233             bytes = TclGetStringFromObj(appendObjPtr, &length);
01234             AppendUtfToUnicodeRep(objPtr, bytes, length);
01235         }
01236         return;
01237     }
01238 
01239     /*
01240      * Append to objPtr's UTF string rep. If we know the number of characters
01241      * in both objects before appending, then set the combined number of
01242      * characters in the final (appended-to) object.
01243      */
01244 
01245     bytes = TclGetStringFromObj(appendObjPtr, &length);
01246 
01247     allOneByteChars = 0;
01248     numChars = stringPtr->numChars;
01249     if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
01250         stringPtr = GET_STRING(appendObjPtr);
01251         if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) {
01252             numChars += stringPtr->numChars;
01253             allOneByteChars = 1;
01254         }
01255     }
01256 
01257     AppendUtfToUtfRep(objPtr, bytes, length);
01258 
01259     if (allOneByteChars) {
01260         stringPtr = GET_STRING(objPtr);
01261         stringPtr->numChars = numChars;
01262     }
01263 }
01264 
01265 /*
01266  *----------------------------------------------------------------------
01267  *
01268  * AppendUnicodeToUnicodeRep --
01269  *
01270  *      This function appends the contents of "unicode" to the Unicode rep of
01271  *      "objPtr". objPtr must already have a valid Unicode rep.
01272  *
01273  * Results:
01274  *      None.
01275  *
01276  * Side effects:
01277  *      objPtr's internal rep is reallocated.
01278  *
01279  *----------------------------------------------------------------------
01280  */
01281 
01282 static void
01283 AppendUnicodeToUnicodeRep(
01284     Tcl_Obj *objPtr,            /* Points to the object to append to. */
01285     const Tcl_UniChar *unicode, /* String to append. */
01286     int appendNumChars)         /* Number of chars of "unicode" to append. */
01287 {
01288     String *stringPtr, *tmpString;
01289     size_t numChars;
01290 
01291     if (appendNumChars < 0) {
01292         appendNumChars = 0;
01293         if (unicode) {
01294             while (unicode[appendNumChars] != 0) {
01295                 appendNumChars++;
01296             }
01297         }
01298     }
01299     if (appendNumChars == 0) {
01300         return;
01301     }
01302 
01303     SetStringFromAny(NULL, objPtr);
01304     stringPtr = GET_STRING(objPtr);
01305 
01306     /*
01307      * If not enough space has been allocated for the unicode rep, reallocate
01308      * the internal rep object with additional space. First try to double the
01309      * required allocation; if that fails, try a more modest increase. See the
01310      * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an
01311      * explanation of this growth algorithm.
01312      */
01313 
01314     numChars = stringPtr->numChars + appendNumChars;
01315 
01316     if (STRING_UALLOC(numChars) >= stringPtr->uallocated) {
01317         stringPtr->uallocated = STRING_UALLOC(2 * numChars);
01318         tmpString = (String *) attemptckrealloc((char *)stringPtr,
01319                 STRING_SIZE(stringPtr->uallocated));
01320         if (tmpString == NULL) {
01321             stringPtr->uallocated =
01322                     STRING_UALLOC(numChars + appendNumChars)
01323                     + TCL_GROWTH_MIN_ALLOC;
01324             tmpString = (String *) ckrealloc((char *)stringPtr,
01325                     STRING_SIZE(stringPtr->uallocated));
01326         }
01327         stringPtr = tmpString;
01328         SET_STRING(objPtr, stringPtr);
01329     }
01330 
01331     /*
01332      * Copy the new string onto the end of the old string, then add the
01333      * trailing null.
01334      */
01335 
01336     memcpy(stringPtr->unicode + stringPtr->numChars, unicode,
01337             appendNumChars * sizeof(Tcl_UniChar));
01338     stringPtr->unicode[numChars] = 0;
01339     stringPtr->numChars = numChars;
01340 
01341     Tcl_InvalidateStringRep(objPtr);
01342 }
01343 
01344 /*
01345  *----------------------------------------------------------------------
01346  *
01347  * AppendUnicodeToUtfRep --
01348  *
01349  *      This function converts the contents of "unicode" to UTF and appends
01350  *      the UTF to the string rep of "objPtr".
01351  *
01352  * Results:
01353  *      None.
01354  *
01355  * Side effects:
01356  *      objPtr's internal rep is reallocated.
01357  *
01358  *----------------------------------------------------------------------
01359  */
01360 
01361 static void
01362 AppendUnicodeToUtfRep(
01363     Tcl_Obj *objPtr,            /* Points to the object to append to. */
01364     const Tcl_UniChar *unicode, /* String to convert to UTF. */
01365     int numChars)               /* Number of chars of "unicode" to convert. */
01366 {
01367     Tcl_DString dsPtr;
01368     const char *bytes;
01369 
01370     if (numChars < 0) {
01371         numChars = 0;
01372         if (unicode) {
01373             while (unicode[numChars] != 0) {
01374                 numChars++;
01375             }
01376         }
01377     }
01378     if (numChars == 0) {
01379         return;
01380     }
01381 
01382     Tcl_DStringInit(&dsPtr);
01383     bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr);
01384     AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr));
01385     Tcl_DStringFree(&dsPtr);
01386 }
01387 
01388 /*
01389  *----------------------------------------------------------------------
01390  *
01391  * AppendUtfToUnicodeRep --
01392  *
01393  *      This function converts the contents of "bytes" to Unicode and appends
01394  *      the Unicode to the Unicode rep of "objPtr". objPtr must already have a
01395  *      valid Unicode rep.
01396  *
01397  * Results:
01398  *      None.
01399  *
01400  * Side effects:
01401  *      objPtr's internal rep is reallocated.
01402  *
01403  *----------------------------------------------------------------------
01404  */
01405 
01406 static void
01407 AppendUtfToUnicodeRep(
01408     Tcl_Obj *objPtr,            /* Points to the object to append to. */
01409     const char *bytes,          /* String to convert to Unicode. */
01410     int numBytes)               /* Number of bytes of "bytes" to convert. */
01411 {
01412     Tcl_DString dsPtr;
01413     int numChars;
01414     Tcl_UniChar *unicode;
01415 
01416     if (numBytes < 0) {
01417         numBytes = (bytes ? strlen(bytes) : 0);
01418     }
01419     if (numBytes == 0) {
01420         return;
01421     }
01422 
01423     Tcl_DStringInit(&dsPtr);
01424     numChars = Tcl_NumUtfChars(bytes, numBytes);
01425     unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr);
01426     AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
01427     Tcl_DStringFree(&dsPtr);
01428 }
01429 
01430 /*
01431  *----------------------------------------------------------------------
01432  *
01433  * AppendUtfToUtfRep --
01434  *
01435  *      This function appends "numBytes" bytes of "bytes" to the UTF string
01436  *      rep of "objPtr". objPtr must already have a valid String rep.
01437  *
01438  * Results:
01439  *      None.
01440  *
01441  * Side effects:
01442  *      objPtr's internal rep is reallocated.
01443  *
01444  *----------------------------------------------------------------------
01445  */
01446 
01447 static void
01448 AppendUtfToUtfRep(
01449     Tcl_Obj *objPtr,            /* Points to the object to append to. */
01450     const char *bytes,          /* String to append. */
01451     int numBytes)               /* Number of bytes of "bytes" to append. */
01452 {
01453     String *stringPtr;
01454     int newLength, oldLength;
01455 
01456     if (numBytes < 0) {
01457         numBytes = (bytes ? strlen(bytes) : 0);
01458     }
01459     if (numBytes == 0) {
01460         return;
01461     }
01462 
01463     /*
01464      * Copy the new string onto the end of the old string, then add the
01465      * trailing null.
01466      */
01467 
01468     oldLength = objPtr->length;
01469     newLength = numBytes + oldLength;
01470 
01471     stringPtr = GET_STRING(objPtr);
01472     if (newLength > (int) stringPtr->allocated) {
01473         /*
01474          * There isn't currently enough space in the string representation so
01475          * allocate additional space. First, try to double the length
01476          * required. If that fails, try a more modest allocation. See the "TCL
01477          * STRING GROWTH ALGORITHM" comment at the top of this file for an
01478          * explanation of this growth algorithm.
01479          */
01480 
01481         if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) {
01482             Tcl_SetObjLength(objPtr,
01483                     newLength + numBytes + TCL_GROWTH_MIN_ALLOC);
01484         }
01485     }
01486 
01487     /*
01488      * Invalidate the unicode data.
01489      */
01490 
01491     stringPtr->numChars = -1;
01492     stringPtr->hasUnicode = 0;
01493 
01494     memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes);
01495     objPtr->bytes[newLength] = 0;
01496     objPtr->length = newLength;
01497 }
01498 
01499 /*
01500  *----------------------------------------------------------------------
01501  *
01502  * Tcl_AppendStringsToObjVA --
01503  *
01504  *      This function appends one or more null-terminated strings to an
01505  *      object.
01506  *
01507  * Results:
01508  *      None.
01509  *
01510  * Side effects:
01511  *      The contents of all the string arguments are appended to the string
01512  *      representation of objPtr.
01513  *
01514  *----------------------------------------------------------------------
01515  */
01516 
01517 void
01518 Tcl_AppendStringsToObjVA(
01519     Tcl_Obj *objPtr,            /* Points to the object to append to. */
01520     va_list argList)            /* Variable argument list. */
01521 {
01522 #define STATIC_LIST_SIZE 16
01523     String *stringPtr;
01524     int newLength, oldLength, attemptLength;
01525     register char *string, *dst;
01526     char *static_list[STATIC_LIST_SIZE];
01527     char **args = static_list;
01528     int nargs_space = STATIC_LIST_SIZE;
01529     int nargs, i;
01530 
01531     if (Tcl_IsShared(objPtr)) {
01532         Tcl_Panic("%s called with shared object", "Tcl_AppendStringsToObj");
01533     }
01534 
01535     SetStringFromAny(NULL, objPtr);
01536 
01537     /*
01538      * Figure out how much space is needed for all the strings, and expand the
01539      * string representation if it isn't big enough. If no bytes would be
01540      * appended, just return. Note that on some platforms (notably OS/390) the
01541      * argList is an array so we need to use memcpy.
01542      */
01543 
01544     nargs = 0;
01545     newLength = 0;
01546     oldLength = objPtr->length;
01547     while (1) {
01548         string = va_arg(argList, char *);
01549         if (string == NULL) {
01550             break;
01551         }
01552         if (nargs >= nargs_space) {
01553             /*
01554              * Expand the args buffer.
01555              */
01556 
01557             nargs_space += STATIC_LIST_SIZE;
01558             if (args == static_list) {
01559                 args = (void *) ckalloc(nargs_space * sizeof(char *));
01560                 for (i = 0; i < nargs; ++i) {
01561                     args[i] = static_list[i];
01562                 }
01563             } else {
01564                 args = (void *) ckrealloc((void *) args,
01565                         nargs_space * sizeof(char *));
01566             }
01567         }
01568         newLength += strlen(string);
01569         args[nargs++] = string;
01570     }
01571     if (newLength == 0) {
01572         goto done;
01573     }
01574 
01575     stringPtr = GET_STRING(objPtr);
01576     if (oldLength + newLength > (int) stringPtr->allocated) {
01577         /*
01578          * There isn't currently enough space in the string representation, so
01579          * allocate additional space. If the current string representation
01580          * isn't empty (i.e. it looks like we're doing a series of appends)
01581          * then try to allocate extra space to accomodate future growth: first
01582          * try to double the required memory; if that fails, try a more modest
01583          * allocation. See the "TCL STRING GROWTH ALGORITHM" comment at the
01584          * top of this file for an explanation of this growth algorithm.
01585          * Otherwise, if the current string representation is empty, exactly
01586          * enough memory is allocated.
01587          */
01588 
01589         if (oldLength == 0) {
01590             Tcl_SetObjLength(objPtr, newLength);
01591         } else {
01592             attemptLength = 2 * (oldLength + newLength);
01593             if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) {
01594                 attemptLength = oldLength + (2 * newLength) +
01595                         TCL_GROWTH_MIN_ALLOC;
01596                 Tcl_SetObjLength(objPtr, attemptLength);
01597             }
01598         }
01599     }
01600 
01601     /*
01602      * Make a second pass through the arguments, appending all the strings to
01603      * the object.
01604      */
01605 
01606     dst = objPtr->bytes + oldLength;
01607     for (i = 0; i < nargs; ++i) {
01608         string = args[i];
01609         if (string == NULL) {
01610             break;
01611         }
01612         while (*string != 0) {
01613             *dst = *string;
01614             dst++;
01615             string++;
01616         }
01617     }
01618 
01619     /*
01620      * Add a null byte to terminate the string. However, be careful: it's
01621      * possible that the object is totally empty (if it was empty originally
01622      * and there was nothing to append). In this case dst is NULL; just leave
01623      * everything alone.
01624      */
01625 
01626     if (dst != NULL) {
01627         *dst = 0;
01628     }
01629     objPtr->length = oldLength + newLength;
01630 
01631   done:
01632     /*
01633      * If we had to allocate a buffer from the heap, free it now.
01634      */
01635 
01636     if (args != static_list) {
01637         ckfree((void *) args);
01638     }
01639 #undef STATIC_LIST_SIZE
01640 }
01641 
01642 /*
01643  *----------------------------------------------------------------------
01644  *
01645  * Tcl_AppendStringsToObj --
01646  *
01647  *      This function appends one or more null-terminated strings to an
01648  *      object.
01649  *
01650  * Results:
01651  *      None.
01652  *
01653  * Side effects:
01654  *      The contents of all the string arguments are appended to the string
01655  *      representation of objPtr.
01656  *
01657  *----------------------------------------------------------------------
01658  */
01659 
01660 void
01661 Tcl_AppendStringsToObj(
01662     Tcl_Obj *objPtr,
01663     ...)
01664 {
01665     va_list argList;
01666 
01667     va_start(argList, objPtr);
01668     Tcl_AppendStringsToObjVA(objPtr, argList);
01669     va_end(argList);
01670 }
01671 
01672 /*
01673  *----------------------------------------------------------------------
01674  *
01675  * Tcl_AppendFormatToObj --
01676  *
01677  *      This function appends a list of Tcl_Obj's to a Tcl_Obj according to
01678  *      the formatting instructions embedded in the format string. The
01679  *      formatting instructions are inspired by sprintf(). Returns TCL_OK when
01680  *      successful. If there's an error in the arguments, TCL_ERROR is
01681  *      returned, and an error message is written to the interp, if non-NULL.
01682  *
01683  * Results:
01684  *      A standard Tcl result.
01685  *
01686  * Side effects:
01687  *      None.
01688  *
01689  *----------------------------------------------------------------------
01690  */
01691 
01692 int
01693 Tcl_AppendFormatToObj(
01694     Tcl_Interp *interp,
01695     Tcl_Obj *appendObj,
01696     const char *format,
01697     int objc,
01698     Tcl_Obj *const objv[])
01699 {
01700     const char *span = format, *msg;
01701     int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0;
01702     int originalLength;
01703     static const char *mixedXPG =
01704             "cannot mix \"%\" and \"%n$\" conversion specifiers";
01705     static const char *badIndex[2] = {
01706         "not enough arguments for all format specifiers",
01707         "\"%n$\" argument index out of range"
01708     };
01709 
01710     if (Tcl_IsShared(appendObj)) {
01711         Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj");
01712     }
01713     TclGetStringFromObj(appendObj, &originalLength);
01714 
01715     /*
01716      * Format string is NUL-terminated.
01717      */
01718 
01719     while (*format != '\0') {
01720         char *end;
01721         int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag;
01722         int width, gotPrecision, precision, useShort, useWide, useBig;
01723         int newXpg, numChars, allocSegment = 0;
01724         Tcl_Obj *segment;
01725         Tcl_UniChar ch;
01726         int step = Tcl_UtfToUniChar(format, &ch);
01727 
01728         format += step;
01729         if (ch != '%') {
01730             numBytes += step;
01731             continue;
01732         }
01733         if (numBytes) {
01734             Tcl_AppendToObj(appendObj, span, numBytes);
01735             numBytes = 0;
01736         }
01737 
01738         /*
01739          * Saw a % : process the format specifier.
01740          *
01741          * Step 0. Handle special case of escaped format marker (i.e., %%).
01742          */
01743 
01744         step = Tcl_UtfToUniChar(format, &ch);
01745         if (ch == '%') {
01746             span = format;
01747             numBytes = step;
01748             format += step;
01749             continue;
01750         }
01751 
01752         /*
01753          * Step 1. XPG3 position specifier
01754          */
01755 
01756         newXpg = 0;
01757         if (isdigit(UCHAR(ch))) {
01758             int position = strtoul(format, &end, 10);
01759             if (*end == '$') {
01760                 newXpg = 1;
01761                 objIndex = position - 1;
01762                 format = end + 1;
01763                 step = Tcl_UtfToUniChar(format, &ch);
01764             }
01765         }
01766         if (newXpg) {
01767             if (gotSequential) {
01768                 msg = mixedXPG;
01769                 goto errorMsg;
01770             }
01771             gotXpg = 1;
01772         } else {
01773             if (gotXpg) {
01774                 msg = mixedXPG;
01775                 goto errorMsg;
01776             }
01777             gotSequential = 1;
01778         }
01779         if ((objIndex < 0) || (objIndex >= objc)) {
01780             msg = badIndex[gotXpg];
01781             goto errorMsg;
01782         }
01783 
01784         /*
01785          * Step 2. Set of flags.
01786          */
01787 
01788         gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0;
01789         sawFlag = 1;
01790         do {
01791             switch (ch) {
01792             case '-':
01793                 gotMinus = 1;
01794                 break;
01795             case '#':
01796                 gotHash = 1;
01797                 break;
01798             case '0':
01799                 gotZero = 1;
01800                 break;
01801             case ' ':
01802                 gotSpace = 1;
01803                 break;
01804             case '+':
01805                 gotPlus = 1;
01806                 break;
01807             default:
01808                 sawFlag = 0;
01809             }
01810             if (sawFlag) {
01811                 format += step;
01812                 step = Tcl_UtfToUniChar(format, &ch);
01813             }
01814         } while (sawFlag);
01815 
01816         /*
01817          * Step 3. Minimum field width.
01818          */
01819 
01820         width = 0;
01821         if (isdigit(UCHAR(ch))) {
01822             width = strtoul(format, &end, 10);
01823             format = end;
01824             step = Tcl_UtfToUniChar(format, &ch);
01825         } else if (ch == '*') {
01826             if (objIndex >= objc - 1) {
01827                 msg = badIndex[gotXpg];
01828                 goto errorMsg;
01829             }
01830             if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) {
01831                 goto error;
01832             }
01833             if (width < 0) {
01834                 width = -width;
01835                 gotMinus = 1;
01836             }
01837             objIndex++;
01838             format += step;
01839             step = Tcl_UtfToUniChar(format, &ch);
01840         }
01841 
01842         /*
01843          * Step 4. Precision.
01844          */
01845 
01846         gotPrecision = precision = 0;
01847         if (ch == '.') {
01848             gotPrecision = 1;
01849             format += step;
01850             step = Tcl_UtfToUniChar(format, &ch);
01851         }
01852         if (isdigit(UCHAR(ch))) {
01853             precision = strtoul(format, &end, 10);
01854             format = end;
01855             step = Tcl_UtfToUniChar(format, &ch);
01856         } else if (ch == '*') {
01857             if (objIndex >= objc - 1) {
01858                 msg = badIndex[gotXpg];
01859                 goto errorMsg;
01860             }
01861             if (TclGetIntFromObj(interp, objv[objIndex], &precision)
01862                     != TCL_OK) {
01863                 goto error;
01864             }
01865 
01866             /*
01867              * TODO: Check this truncation logic.
01868              */
01869 
01870             if (precision < 0) {
01871                 precision = 0;
01872             }
01873             objIndex++;
01874             format += step;
01875             step = Tcl_UtfToUniChar(format, &ch);
01876         }
01877 
01878         /*
01879          * Step 5. Length modifier.
01880          */
01881 
01882         useShort = useWide = useBig = 0;
01883         if (ch == 'h') {
01884             useShort = 1;
01885             format += step;
01886             step = Tcl_UtfToUniChar(format, &ch);
01887         } else if (ch == 'l') {
01888             format += step;
01889             step = Tcl_UtfToUniChar(format, &ch);
01890             if (ch == 'l') {
01891                 useBig = 1;
01892                 format += step;
01893                 step = Tcl_UtfToUniChar(format, &ch);
01894             } else {
01895 #ifndef TCL_WIDE_INT_IS_LONG
01896                 useWide = 1;
01897 #endif
01898             }
01899         }
01900 
01901         format += step;
01902         span = format;
01903 
01904         /*
01905          * Step 6. The actual conversion character.
01906          */
01907 
01908         segment = objv[objIndex];
01909         if (ch == 'i') {
01910             ch = 'd';
01911         }
01912         switch (ch) {
01913         case '\0':
01914             msg = "format string ended in middle of field specifier";
01915             goto errorMsg;
01916         case 's': {
01917             numChars = Tcl_GetCharLength(segment);
01918             if (gotPrecision && (precision < numChars)) {
01919                 segment = Tcl_GetRange(segment, 0, precision - 1);
01920                 Tcl_IncrRefCount(segment);
01921                 allocSegment = 1;
01922             }
01923             break;
01924         }
01925         case 'c': {
01926             char buf[TCL_UTF_MAX];
01927             int code, length;
01928 
01929             if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
01930                 goto error;
01931             }
01932             length = Tcl_UniCharToUtf(code, buf);
01933             segment = Tcl_NewStringObj(buf, length);
01934             Tcl_IncrRefCount(segment);
01935             allocSegment = 1;
01936             break;
01937         }
01938 
01939         case 'u':
01940             if (useBig) {
01941                 msg = "unsigned bignum format is invalid";
01942                 goto errorMsg;
01943             }
01944         case 'd':
01945         case 'o':
01946         case 'x':
01947         case 'X': {
01948             short int s = 0;    /* Silence compiler warning; only defined and
01949                                  * used when useShort is true. */
01950             long l;
01951             Tcl_WideInt w;
01952             mp_int big;
01953             int isNegative = 0;
01954 
01955             if (useBig) {
01956                 if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
01957                     goto error;
01958                 }
01959                 isNegative = (mp_cmp_d(&big, 0) == MP_LT);
01960             } else if (useWide) {
01961                 if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
01962                     Tcl_Obj *objPtr;
01963 
01964                     if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
01965                         goto error;
01966                     }
01967                     mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big);
01968                     objPtr = Tcl_NewBignumObj(&big);
01969                     Tcl_IncrRefCount(objPtr);
01970                     Tcl_GetWideIntFromObj(NULL, objPtr, &w);
01971                     Tcl_DecrRefCount(objPtr);
01972                 }
01973                 isNegative = (w < (Tcl_WideInt)0);
01974             } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
01975                 if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
01976                     Tcl_Obj *objPtr;
01977 
01978                     if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
01979                         goto error;
01980                     }
01981                     mp_mod_2d(&big, (int) CHAR_BIT * sizeof(long), &big);
01982                     objPtr = Tcl_NewBignumObj(&big);
01983                     Tcl_IncrRefCount(objPtr);
01984                     TclGetLongFromObj(NULL, objPtr, &l);
01985                     Tcl_DecrRefCount(objPtr);
01986                 } else {
01987                     l = Tcl_WideAsLong(w);
01988                 }
01989                 if (useShort) {
01990                     s = (short int) l;
01991                     isNegative = (s < (short int)0);
01992                 } else {
01993                     isNegative = (l < (long)0);
01994                 }
01995             } else if (useShort) {
01996                 s = (short int) l;
01997                 isNegative = (s < (short int)0);
01998             } else {
01999                 isNegative = (l < (long)0);
02000             }
02001 
02002             segment = Tcl_NewObj();
02003             allocSegment = 1;
02004             Tcl_IncrRefCount(segment);
02005 
02006             if ((isNegative || gotPlus) && (useBig || (ch == 'd'))) {
02007                 Tcl_AppendToObj(segment, (isNegative ? "-" : "+"), 1);
02008             }
02009 
02010             if (gotHash) {
02011                 switch (ch) {
02012                 case 'o':
02013                     Tcl_AppendToObj(segment, "0", 1);
02014                     precision--;
02015                     break;
02016                 case 'x':
02017                 case 'X':
02018                     Tcl_AppendToObj(segment, "0x", 2);
02019                     break;
02020                 }
02021             }
02022 
02023             switch (ch) {
02024             case 'd': {
02025                 int length;
02026                 Tcl_Obj *pure;
02027                 const char *bytes;
02028 
02029                 if (useShort) {
02030                     pure = Tcl_NewIntObj((int)(s));
02031                 } else if (useWide) {
02032                     pure = Tcl_NewWideIntObj(w);
02033                 } else if (useBig) {
02034                     pure = Tcl_NewBignumObj(&big);
02035                 } else {
02036                     pure = Tcl_NewLongObj(l);
02037                 }
02038                 Tcl_IncrRefCount(pure);
02039                 bytes = TclGetStringFromObj(pure, &length);
02040 
02041                 /*
02042                  * Already did the sign above.
02043                  */
02044 
02045                 if (*bytes == '-') {
02046                     length--;
02047                     bytes++;
02048                 }
02049 
02050                 /*
02051                  * Canonical decimal string reps for integers are composed
02052                  * entirely of one-byte encoded characters, so "length" is the
02053                  * number of chars.
02054                  */
02055 
02056                 if (gotPrecision) {
02057                     while (length < precision) {
02058                         Tcl_AppendToObj(segment, "0", 1);
02059                         length++;
02060                     }
02061                     gotZero = 0;
02062                 }
02063                 if (gotZero) {
02064                     length += Tcl_GetCharLength(segment);
02065                     while (length < width) {
02066                         Tcl_AppendToObj(segment, "0", 1);
02067                         length++;
02068                     }
02069                 }
02070                 Tcl_AppendToObj(segment, bytes, -1);
02071                 Tcl_DecrRefCount(pure);
02072                 break;
02073             }
02074 
02075             case 'u':
02076             case 'o':
02077             case 'x':
02078             case 'X': {
02079                 Tcl_WideUInt bits = (Tcl_WideUInt)0;
02080                 int length, numBits = 4, numDigits = 0, base = 16;
02081                 int index = 0, shift = 0;
02082                 Tcl_Obj *pure;
02083                 char *bytes;
02084 
02085                 if (ch == 'u') {
02086                     base = 10;
02087                 }
02088                 if (ch == 'o') {
02089                     base = 8;
02090                     numBits = 3;
02091                 }
02092                 if (useShort) {
02093                     unsigned short int us = (unsigned short int) s;
02094 
02095                     bits = (Tcl_WideUInt) us;
02096                     while (us) {
02097                         numDigits++;
02098                         us /= base;
02099                     }
02100                 } else if (useWide) {
02101                     Tcl_WideUInt uw = (Tcl_WideUInt) w;
02102 
02103                     bits = uw;
02104                     while (uw) {
02105                         numDigits++;
02106                         uw /= base;
02107                     }
02108                 } else if (useBig && big.used) {
02109                     int leftover = (big.used * DIGIT_BIT) % numBits;
02110                     mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover);
02111 
02112                     numDigits = 1 + ((big.used * DIGIT_BIT) / numBits);
02113                     while ((mask & big.dp[big.used-1]) == 0) {
02114                         numDigits--;
02115                         mask >>= numBits;
02116                     }
02117                 } else if (!useBig) {
02118                     unsigned long int ul = (unsigned long int) l;
02119 
02120                     bits = (Tcl_WideUInt) ul;
02121                     while (ul) {
02122                         numDigits++;
02123                         ul /= base;
02124                     }
02125                 }
02126 
02127                 /*
02128                  * Need to be sure zero becomes "0", not "".
02129                  */
02130 
02131                 if ((numDigits == 0) && !((ch == 'o') && gotHash)) {
02132                     numDigits = 1;
02133                 }
02134                 pure = Tcl_NewObj();
02135                 Tcl_SetObjLength(pure, numDigits);
02136                 bytes = TclGetString(pure);
02137                 length = numDigits;
02138                 while (numDigits--) {
02139                     int digitOffset;
02140 
02141                     if (useBig && big.used) {
02142                         if ((size_t) shift <
02143                                 CHAR_BIT*sizeof(Tcl_WideUInt) - DIGIT_BIT) {
02144                             bits |= (((Tcl_WideUInt)big.dp[index++]) <<shift);
02145                             shift += DIGIT_BIT;
02146                         }
02147                         shift -= numBits;
02148                     }
02149                     digitOffset = (int) (bits % base);
02150                     if (digitOffset > 9) {
02151                         bytes[numDigits] = 'a' + digitOffset - 10;
02152                     } else {
02153                         bytes[numDigits] = '0' + digitOffset;
02154                     }
02155                     bits /= base;
02156                 }
02157                 if (gotPrecision) {
02158                     while (length < precision) {
02159                         Tcl_AppendToObj(segment, "0", 1);
02160                         length++;
02161                     }
02162                     gotZero = 0;
02163                 }
02164                 if (gotZero) {
02165                     length += Tcl_GetCharLength(segment);
02166                     while (length < width) {
02167                         Tcl_AppendToObj(segment, "0", 1);
02168                         length++;
02169                     }
02170                 }
02171                 Tcl_AppendObjToObj(segment, pure);
02172                 Tcl_DecrRefCount(pure);
02173                 break;
02174             }
02175 
02176             }
02177             break;
02178         }
02179 
02180         case 'e':
02181         case 'E':
02182         case 'f':
02183         case 'g':
02184         case 'G': {
02185 #define MAX_FLOAT_SIZE 320
02186             char spec[2*TCL_INTEGER_SPACE + 9], *p = spec;
02187             double d;
02188             int length = MAX_FLOAT_SIZE;
02189             char *bytes;
02190 
02191             if (Tcl_GetDoubleFromObj(interp, segment, &d) != TCL_OK) {
02192                 /* TODO: Figure out ACCEPT_NAN here */
02193                 goto error;
02194             }
02195             *p++ = '%';
02196             if (gotMinus) {
02197                 *p++ = '-';
02198             }
02199             if (gotHash) {
02200                 *p++ = '#';
02201             }
02202             if (gotZero) {
02203                 *p++ = '0';
02204             }
02205             if (gotSpace) {
02206                 *p++ = ' ';
02207             }
02208             if (gotPlus) {
02209                 *p++ = '+';
02210             }
02211             if (width) {
02212                 p += sprintf(p, "%d", width);
02213             }
02214             if (gotPrecision) {
02215                 *p++ = '.';
02216                 p += sprintf(p, "%d", precision);
02217                 length += precision;
02218             }
02219 
02220             /*
02221              * Don't pass length modifiers!
02222              */
02223 
02224             *p++ = (char) ch;
02225             *p = '\0';
02226 
02227             segment = Tcl_NewObj();
02228             allocSegment = 1;
02229             Tcl_SetObjLength(segment, length);
02230             bytes = TclGetString(segment);
02231             Tcl_SetObjLength(segment, sprintf(bytes, spec, d));
02232             break;
02233         }
02234         default:
02235             if (interp != NULL) {
02236                 char buf[40];
02237 
02238                 sprintf(buf, "bad field specifier \"%c\"", ch);
02239                 Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, -1));
02240             }
02241             goto error;
02242         }
02243 
02244         switch (ch) {
02245         case 'E':
02246         case 'G':
02247         case 'X': {
02248             Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment)));
02249         }
02250         }
02251 
02252         numChars = Tcl_GetCharLength(segment);
02253         if (!gotMinus) {
02254             while (numChars < width) {
02255                 Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
02256                 numChars++;
02257             }
02258         }
02259         Tcl_AppendObjToObj(appendObj, segment);
02260         if (allocSegment) {
02261             Tcl_DecrRefCount(segment);
02262         }
02263         while (numChars < width) {
02264             Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1);
02265             numChars++;
02266         }
02267 
02268         objIndex += gotSequential;
02269     }
02270     if (numBytes) {
02271         Tcl_AppendToObj(appendObj, span, numBytes);
02272         numBytes = 0;
02273     }
02274 
02275     return TCL_OK;
02276 
02277   errorMsg:
02278     if (interp != NULL) {
02279         Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1));
02280     }
02281   error:
02282     Tcl_SetObjLength(appendObj, originalLength);
02283     return TCL_ERROR;
02284 }
02285 
02286 /*
02287  *---------------------------------------------------------------------------
02288  *
02289  * Tcl_Format--
02290  *
02291  * Results:
02292  *      A refcount zero Tcl_Obj.
02293  *
02294  * Side effects:
02295  *      None.
02296  *
02297  *---------------------------------------------------------------------------
02298  */
02299 
02300 Tcl_Obj *
02301 Tcl_Format(
02302     Tcl_Interp *interp,
02303     const char *format,
02304     int objc,
02305     Tcl_Obj *const objv[])
02306 {
02307     int result;
02308     Tcl_Obj *objPtr = Tcl_NewObj();
02309     result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv);
02310     if (result != TCL_OK) {
02311         Tcl_DecrRefCount(objPtr);
02312         return NULL;
02313     }
02314     return objPtr;
02315 }
02316 
02317 /*
02318  *---------------------------------------------------------------------------
02319  *
02320  * AppendPrintfToObjVA --
02321  *
02322  * Results:
02323  *
02324  * Side effects:
02325  *
02326  *---------------------------------------------------------------------------
02327  */
02328 
02329 static void
02330 AppendPrintfToObjVA(
02331     Tcl_Obj *objPtr,
02332     const char *format,
02333     va_list argList)
02334 {
02335     int code, objc;
02336     Tcl_Obj **objv, *list = Tcl_NewObj();
02337     const char *p;
02338     char *end;
02339 
02340     p = format;
02341     Tcl_IncrRefCount(list);
02342     while (*p != '\0') {
02343         int size = 0, seekingConversion = 1, gotPrecision = 0;
02344         int lastNum = -1;
02345 
02346         if (*p++ != '%') {
02347             continue;
02348         }
02349         if (*p == '%') {
02350             p++;
02351             continue;
02352         }
02353         do {
02354             switch (*p) {
02355 
02356             case '\0':
02357                 seekingConversion = 0;
02358                 break;
02359             case 's': {
02360                 const char *q, *end, *bytes = va_arg(argList, char *);
02361                 seekingConversion = 0;
02362 
02363                 /*
02364                  * The buffer to copy characters from starts at bytes and ends
02365                  * at either the first NUL byte, or after lastNum bytes, when
02366                  * caller has indicated a limit.
02367                  */
02368 
02369                 end = bytes;
02370                 while ((!gotPrecision || lastNum--) && (*end != '\0')) {
02371                     end++;
02372                 }
02373 
02374                 /*
02375                  * Within that buffer, we trim both ends if needed so that we
02376                  * copy only whole characters, and avoid copying any partial
02377                  * multi-byte characters.
02378                  */
02379 
02380                 q = Tcl_UtfPrev(end, bytes);
02381                 if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
02382                     end = q;
02383                 }
02384 
02385                 q = bytes + TCL_UTF_MAX;
02386                 while ((bytes < end) && (bytes < q)
02387                         && ((*bytes & 0xC0) == 0x80)) {
02388                     bytes++;
02389                 }
02390 
02391                 Tcl_ListObjAppendElement(NULL, list,
02392                         Tcl_NewStringObj(bytes , (int)(end - bytes)));
02393 
02394                 break;
02395             }
02396             case 'c':
02397             case 'i':
02398             case 'u':
02399             case 'd':
02400             case 'o':
02401             case 'x':
02402             case 'X':
02403                 seekingConversion = 0;
02404                 switch (size) {
02405                 case -1:
02406                 case 0:
02407                     Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
02408                             (long int)va_arg(argList, int)));
02409                     break;
02410                 case 1:
02411                     Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
02412                             va_arg(argList, long int)));
02413                     break;
02414                 }
02415                 break;
02416             case 'e':
02417             case 'E':
02418             case 'f':
02419             case 'g':
02420             case 'G':
02421                 Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
02422                         va_arg(argList, double)));
02423                 seekingConversion = 0;
02424                 break;
02425             case '*':
02426                 lastNum = (int)va_arg(argList, int);
02427                 Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum));
02428                 p++;
02429                 break;
02430             case '0': case '1': case '2': case '3': case '4':
02431             case '5': case '6': case '7': case '8': case '9':
02432                 lastNum = (int) strtoul(p, &end, 10);
02433                 p = end;
02434                 break;
02435             case '.':
02436                 gotPrecision = 1;
02437                 p++;
02438                 break;
02439             /* TODO: support for wide (and bignum?) arguments */
02440             case 'l':
02441                 size = 1;
02442                 p++;
02443                 break;
02444             case 'h':
02445                 size = -1;
02446             default:
02447                 p++;
02448             }
02449         } while (seekingConversion);
02450     }
02451     TclListObjGetElements(NULL, list, &objc, &objv);
02452     code = Tcl_AppendFormatToObj(NULL, objPtr, format, objc, objv);
02453     if (code != TCL_OK) {
02454         Tcl_AppendPrintfToObj(objPtr,
02455                 "Unable to format \"%s\" with supplied arguments: %s",
02456                 format, Tcl_GetString(list));
02457     }
02458     Tcl_DecrRefCount(list);
02459 }
02460 
02461 /*
02462  *---------------------------------------------------------------------------
02463  *
02464  * Tcl_AppendPrintfToObj --
02465  *
02466  * Results:
02467  *      A standard Tcl result.
02468  *
02469  * Side effects:
02470  *      None.
02471  *
02472  *---------------------------------------------------------------------------
02473  */
02474 
02475 void
02476 Tcl_AppendPrintfToObj(
02477     Tcl_Obj *objPtr,
02478     const char *format,
02479     ...)
02480 {
02481     va_list argList;
02482 
02483     va_start(argList, format);
02484     AppendPrintfToObjVA(objPtr, format, argList);
02485     va_end(argList);
02486 }
02487 
02488 /*
02489  *---------------------------------------------------------------------------
02490  *
02491  * Tcl_ObjPrintf --
02492  *
02493  * Results:
02494  *      A refcount zero Tcl_Obj.
02495  *
02496  * Side effects:
02497  *      None.
02498  *
02499  *---------------------------------------------------------------------------
02500  */
02501 
02502 Tcl_Obj *
02503 Tcl_ObjPrintf(
02504     const char *format,
02505     ...)
02506 {
02507     va_list argList;
02508     Tcl_Obj *objPtr = Tcl_NewObj();
02509 
02510     va_start(argList, format);
02511     AppendPrintfToObjVA(objPtr, format, argList);
02512     va_end(argList);
02513     return objPtr;
02514 }
02515 
02516 /*
02517  *---------------------------------------------------------------------------
02518  *
02519  * TclStringObjReverse --
02520  *
02521  *      Implements the [string reverse] operation.
02522  *
02523  * Results:
02524  *      An unshared Tcl value which is the [string reverse] of the argument
02525  *      supplied.  When sharing rules permit, the returned value might be
02526  *      the argument with modifications done in place.
02527  *
02528  * Side effects:
02529  *      May allocate a new Tcl_Obj.
02530  *
02531  *---------------------------------------------------------------------------
02532  */
02533 
02534 Tcl_Obj *
02535 TclStringObjReverse(
02536     Tcl_Obj *objPtr)
02537 {
02538     String *stringPtr;
02539     int numChars = Tcl_GetCharLength(objPtr);
02540     int i = 0, lastCharIdx = numChars - 1;
02541     char *bytes;
02542 
02543     if (numChars <= 1) {
02544         return objPtr;
02545     }
02546 
02547     stringPtr = GET_STRING(objPtr);
02548     if (stringPtr->hasUnicode) {
02549         Tcl_UniChar *source = stringPtr->unicode;
02550 
02551         if (Tcl_IsShared(objPtr)) {
02552             Tcl_UniChar *dest, ch = 0;
02553 
02554             /*
02555              * Create a non-empty, pure unicode value, so we can coax
02556              * Tcl_SetObjLength into growing the unicode rep buffer.
02557              */
02558 
02559             Tcl_Obj *resultPtr = Tcl_NewUnicodeObj(&ch, 1);
02560             Tcl_SetObjLength(resultPtr, numChars);
02561             dest = Tcl_GetUnicode(resultPtr);
02562 
02563             while (i < numChars) {
02564                 dest[i++] = source[lastCharIdx--];
02565             }
02566             return resultPtr;
02567         }
02568 
02569         while (i < lastCharIdx) {
02570             Tcl_UniChar tmp = source[lastCharIdx];
02571             source[lastCharIdx--] = source[i];
02572             source[i++] = tmp;
02573         }
02574         Tcl_InvalidateStringRep(objPtr);
02575         return objPtr;
02576     }
02577 
02578     bytes = TclGetString(objPtr);
02579     if (Tcl_IsShared(objPtr)) {
02580         char *dest;
02581         Tcl_Obj *resultPtr = Tcl_NewObj();
02582         Tcl_SetObjLength(resultPtr, numChars);
02583         dest = TclGetString(resultPtr);
02584         while (i < numChars) {
02585             dest[i++] = bytes[lastCharIdx--];
02586         }
02587         return resultPtr;
02588     }
02589 
02590     while (i < lastCharIdx) {
02591         char tmp = bytes[lastCharIdx];
02592         bytes[lastCharIdx--] = bytes[i];
02593         bytes[i++] = tmp;
02594     }
02595     return objPtr;
02596 }
02597 
02598 /*
02599  *---------------------------------------------------------------------------
02600  *
02601  * FillUnicodeRep --
02602  *
02603  *      Populate the Unicode internal rep with the Unicode form of its string
02604  *      rep. The object must alread have a "String" internal rep.
02605  *
02606  * Results:
02607  *      None.
02608  *
02609  * Side effects:
02610  *      Reallocates the String internal rep.
02611  *
02612  *---------------------------------------------------------------------------
02613  */
02614 
02615 static void
02616 FillUnicodeRep(
02617     Tcl_Obj *objPtr)            /* The object in which to fill the unicode
02618                                  * rep. */
02619 {
02620     String *stringPtr;
02621     size_t uallocated;
02622     char *srcEnd, *src = objPtr->bytes;
02623     Tcl_UniChar *dst;
02624 
02625     stringPtr = GET_STRING(objPtr);
02626     if (stringPtr->numChars == -1) {
02627         stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length);
02628     }
02629     stringPtr->hasUnicode = (stringPtr->numChars > 0);
02630 
02631     uallocated = STRING_UALLOC(stringPtr->numChars);
02632     if (uallocated > stringPtr->uallocated) {
02633         /*
02634          * If not enough space has been allocated for the unicode rep,
02635          * reallocate the internal rep object.
02636          *
02637          * There isn't currently enough space in the Unicode representation so
02638          * allocate additional space. If the current Unicode representation
02639          * isn't empty (i.e. it looks like we've done some appends) then
02640          * overallocate the space so that we won't have to do as much
02641          * reallocation in the future.
02642          */
02643 
02644         if (stringPtr->uallocated > 0) {
02645             uallocated *= 2;
02646         }
02647         stringPtr = (String *) ckrealloc((char*) stringPtr,
02648                 STRING_SIZE(uallocated));
02649         stringPtr->uallocated = uallocated;
02650     }
02651 
02652     /*
02653      * Convert src to Unicode and store the coverted data in "unicode".
02654      */
02655 
02656     srcEnd = src + objPtr->length;
02657     for (dst = stringPtr->unicode; src < srcEnd; dst++) {
02658         src += TclUtfToUniChar(src, dst);
02659     }
02660     *dst = 0;
02661 
02662     SET_STRING(objPtr, stringPtr);
02663 }
02664 
02665 /*
02666  *----------------------------------------------------------------------
02667  *
02668  * DupStringInternalRep --
02669  *
02670  *      Initialize the internal representation of a new Tcl_Obj to a copy of
02671  *      the internal representation of an existing string object.
02672  *
02673  * Results:
02674  *      None.
02675  *
02676  * Side effects:
02677  *      copyPtr's internal rep is set to a copy of srcPtr's internal
02678  *      representation.
02679  *
02680  *----------------------------------------------------------------------
02681  */
02682 
02683 static void
02684 DupStringInternalRep(
02685     register Tcl_Obj *srcPtr,   /* Object with internal rep to copy. Must have
02686                                  * an internal rep of type "String". */
02687     register Tcl_Obj *copyPtr)  /* Object with internal rep to set. Must not
02688                                  * currently have an internal rep.*/
02689 {
02690     String *srcStringPtr = GET_STRING(srcPtr);
02691     String *copyStringPtr = NULL;
02692 
02693     /*
02694      * If the src obj is a string of 1-byte Utf chars, then copy the string
02695      * rep of the source object and create an "empty" Unicode internal rep for
02696      * the new object. Otherwise, copy Unicode internal rep, and invalidate
02697      * the string rep of the new object.
02698      */
02699 
02700     if (srcStringPtr->hasUnicode == 0) {
02701         copyStringPtr = (String *) ckalloc(STRING_SIZE(STRING_UALLOC(0)));
02702         copyStringPtr->uallocated = STRING_UALLOC(0);
02703     } else {
02704         copyStringPtr = (String *) ckalloc(
02705                 STRING_SIZE(srcStringPtr->uallocated));
02706         copyStringPtr->uallocated = srcStringPtr->uallocated;
02707 
02708         memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
02709                 (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar));
02710         copyStringPtr->unicode[srcStringPtr->numChars] = 0;
02711     }
02712     copyStringPtr->numChars = srcStringPtr->numChars;
02713     copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
02714     copyStringPtr->allocated = srcStringPtr->allocated;
02715 
02716     /*
02717      * Tricky point: the string value was copied by generic object management
02718      * code, so it doesn't contain any extra bytes that might exist in the
02719      * source object.
02720      */
02721 
02722     copyStringPtr->allocated = copyPtr->length;
02723 
02724     SET_STRING(copyPtr, copyStringPtr);
02725     copyPtr->typePtr = &tclStringType;
02726 }
02727 
02728 /*
02729  *----------------------------------------------------------------------
02730  *
02731  * SetStringFromAny --
02732  *
02733  *      Create an internal representation of type "String" for an object.
02734  *
02735  * Results:
02736  *      This operation always succeeds and returns TCL_OK.
02737  *
02738  * Side effects:
02739  *      Any old internal reputation for objPtr is freed and the internal
02740  *      representation is set to "String".
02741  *
02742  *----------------------------------------------------------------------
02743  */
02744 
02745 static int
02746 SetStringFromAny(
02747     Tcl_Interp *interp,         /* Used for error reporting if not NULL. */
02748     register Tcl_Obj *objPtr)   /* The object to convert. */
02749 {
02750     /*
02751      * The Unicode object is optimized for the case where each UTF char in a
02752      * string is only one byte. In this case, we store the value of numChars,
02753      * but we don't copy the bytes to the unicodeObj->unicode.
02754      */
02755 
02756     if (objPtr->typePtr != &tclStringType) {
02757         String *stringPtr;
02758 
02759         if (objPtr->typePtr != NULL) {
02760             if (objPtr->bytes == NULL) {
02761                 objPtr->typePtr->updateStringProc(objPtr);
02762             }
02763             TclFreeIntRep(objPtr);
02764         }
02765         objPtr->typePtr = &tclStringType;
02766 
02767         /*
02768          * Allocate enough space for the basic String structure.
02769          */
02770 
02771         stringPtr = (String *) ckalloc(STRING_SIZE(STRING_UALLOC(0)));
02772         stringPtr->numChars = -1;
02773         stringPtr->uallocated = STRING_UALLOC(0);
02774         stringPtr->hasUnicode = 0;
02775 
02776         if (objPtr->bytes != NULL) {
02777             stringPtr->allocated = objPtr->length;
02778             objPtr->bytes[objPtr->length] = 0;
02779         } else {
02780             objPtr->length = 0;
02781         }
02782         SET_STRING(objPtr, stringPtr);
02783     }
02784     return TCL_OK;
02785 }
02786 
02787 /*
02788  *----------------------------------------------------------------------
02789  *
02790  * UpdateStringOfString --
02791  *
02792  *      Update the string representation for an object whose internal
02793  *      representation is "String".
02794  *
02795  * Results:
02796  *      None.
02797  *
02798  * Side effects:
02799  *      The object's string may be set by converting its Unicode represention
02800  *      to UTF format.
02801  *
02802  *----------------------------------------------------------------------
02803  */
02804 
02805 static void
02806 UpdateStringOfString(
02807     Tcl_Obj *objPtr)            /* Object with string rep to update. */
02808 {
02809     int i, size;
02810     Tcl_UniChar *unicode;
02811     char dummy[TCL_UTF_MAX];
02812     char *dst;
02813     String *stringPtr;
02814 
02815     stringPtr = GET_STRING(objPtr);
02816     if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) {
02817         if (stringPtr->numChars <= 0) {
02818             /*
02819              * If there is no Unicode rep, or the string has 0 chars, then set
02820              * the string rep to an empty string.
02821              */
02822 
02823             objPtr->bytes = tclEmptyStringRep;
02824             objPtr->length = 0;
02825             return;
02826         }
02827 
02828         unicode = stringPtr->unicode;
02829 
02830         /*
02831          * Translate the Unicode string to UTF. "size" will hold the amount of
02832          * space the UTF string needs.
02833          */
02834 
02835         size = 0;
02836         for (i = 0; i < stringPtr->numChars; i++) {
02837             size += Tcl_UniCharToUtf((int) unicode[i], dummy);
02838         }
02839 
02840         dst = (char *) ckalloc((unsigned) (size + 1));
02841         objPtr->bytes = dst;
02842         objPtr->length = size;
02843         stringPtr->allocated = size;
02844 
02845         for (i = 0; i < stringPtr->numChars; i++) {
02846             dst += Tcl_UniCharToUtf(unicode[i], dst);
02847         }
02848         *dst = '\0';
02849     }
02850     return;
02851 }
02852 
02853 /*
02854  *----------------------------------------------------------------------
02855  *
02856  * FreeStringInternalRep --
02857  *
02858  *      Deallocate the storage associated with a String data object's internal
02859  *      representation.
02860  *
02861  * Results:
02862  *      None.
02863  *
02864  * Side effects:
02865  *      Frees memory.
02866  *
02867  *----------------------------------------------------------------------
02868  */
02869 
02870 static void
02871 FreeStringInternalRep(
02872     Tcl_Obj *objPtr)            /* Object with internal rep to free. */
02873 {
02874     ckfree((char *) GET_STRING(objPtr));
02875 }
02876 
02877 /*
02878  * Local Variables:
02879  * mode: c
02880  * c-basic-offset: 4
02881  * fill-column: 78
02882  * End:
02883  */



Generated on Wed Mar 12 12:18:21 2008 by  doxygen 1.5.1