tclStringObj.cGo to the documentation of this file.00001 /* 00002 * tclStringObj.c -- 00003 * 00004 * This file contains functions that implement string operations on Tcl 00005 * objects. Some string operations work with UTF strings and others 00006 * require Unicode format. Functions that require knowledge of the width 00007 * of each character, such as indexing, operate on Unicode data. 00008 * 00009 * A Unicode string is an internationalized string. Conceptually, a 00010 * Unicode string is an array of 16-bit quantities organized as a 00011 * sequence of properly formed UTF-8 characters. There is a one-to-one 00012 * map between Unicode and UTF characters. Because Unicode characters 00013 * have a fixed width, operations such as indexing operate on Unicode 00014 * data. The String object is optimized for the case where each UTF char 00015 * in a string is only one byte. In this case, we store the value of 00016 * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode 00017 * is explicitly called). 00018 * 00019 * The String object type stores one or both formats. The default 00020 * behavior is to store UTF. Once Unicode is calculated by a function, it 00021 * is stored in the internal rep for future access (without an additional 00022 * O(n) cost). 00023 * 00024 * To allow many appends to be done to an object without constantly 00025 * reallocating the space for the string or Unicode representation, we 00026 * allocate double the space for the string or Unicode and use the 00027 * internal representation to keep track of how much space is used vs. 00028 * allocated. 00029 * 00030 * Copyright (c) 1995-1997 Sun Microsystems, Inc. 00031 * Copyright (c) 1999 by Scriptics Corporation. 00032 * 00033 * See the file "license.terms" for information on usage and redistribution of 00034 * this file, and for a DISCLAIMER OF ALL WARRANTIES. 00035 * 00036 * RCS: @(#) $Id: tclStringObj.c,v 1.69 2008/01/10 16:09:23 dgp Exp $ */ 00037 00038 #include "tclInt.h" 00039 #include "tommath.h" 00040 00041 /* 00042 * Prototypes for functions defined later in this file: 00043 */ 00044 00045 static void AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr, 00046 const Tcl_UniChar *unicode, int appendNumChars); 00047 static void AppendUnicodeToUtfRep(Tcl_Obj *objPtr, 00048 const Tcl_UniChar *unicode, int numChars); 00049 static void AppendUtfToUnicodeRep(Tcl_Obj *objPtr, 00050 const char *bytes, int numBytes); 00051 static void AppendUtfToUtfRep(Tcl_Obj *objPtr, 00052 const char *bytes, int numBytes); 00053 static void FillUnicodeRep(Tcl_Obj *objPtr); 00054 static void AppendPrintfToObjVA(Tcl_Obj *objPtr, 00055 const char *format, va_list argList); 00056 static void FreeStringInternalRep(Tcl_Obj *objPtr); 00057 static void DupStringInternalRep(Tcl_Obj *objPtr, 00058 Tcl_Obj *copyPtr); 00059 static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); 00060 static void UpdateStringOfString(Tcl_Obj *objPtr); 00061 00062 /* 00063 * The structure below defines the string Tcl object type by means of 00064 * functions that can be invoked by generic object code. 00065 */ 00066 00067 Tcl_ObjType tclStringType = { 00068 "string", /* name */ 00069 FreeStringInternalRep, /* freeIntRepPro */ 00070 DupStringInternalRep, /* dupIntRepProc */ 00071 UpdateStringOfString, /* updateStringProc */ 00072 SetStringFromAny /* setFromAnyProc */ 00073 }; 00074 00075 /* 00076 * The following structure is the internal rep for a String object. It keeps 00077 * track of how much memory has been used and how much has been allocated for 00078 * the Unicode and UTF string to enable growing and shrinking of the UTF and 00079 * Unicode reps of the String object with fewer mallocs. To optimize string 00080 * length and indexing operations, this structure also stores the number of 00081 * characters (same of UTF and Unicode!) once that value has been computed. 00082 * 00083 * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 00084 * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This 00085 * can be officially modified by altering the definition of Tcl_UniChar in 00086 * tcl.h, but do not do that unless you are sure what you're doing! 00087 */ 00088 00089 typedef struct String { 00090 int numChars; /* The number of chars in the string. -1 means 00091 * this value has not been calculated. >= 0 00092 * means that there is a valid Unicode rep, or 00093 * that the number of UTF bytes == the number 00094 * of chars. */ 00095 size_t allocated; /* The amount of space actually allocated for 00096 * the UTF string (minus 1 byte for the 00097 * termination char). */ 00098 size_t uallocated; /* The amount of space actually allocated for 00099 * the Unicode string (minus 2 bytes for the 00100 * termination char). */ 00101 int hasUnicode; /* Boolean determining whether the string has 00102 * a Unicode representation. */ 00103 Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual size 00104 * of this field depends on the 'uallocated' 00105 * field above. */ 00106 } String; 00107 00108 #define STRING_UALLOC(numChars) \ 00109 (numChars * sizeof(Tcl_UniChar)) 00110 #define STRING_SIZE(ualloc) \ 00111 ((unsigned) ((ualloc) \ 00112 ? sizeof(String) - sizeof(Tcl_UniChar) + (ualloc) \ 00113 : sizeof(String))) 00114 #define GET_STRING(objPtr) \ 00115 ((String *) (objPtr)->internalRep.otherValuePtr) 00116 #define SET_STRING(objPtr, stringPtr) \ 00117 ((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr)) 00118 00119 /* 00120 * TCL STRING GROWTH ALGORITHM 00121 * 00122 * When growing strings (during an append, for example), the following growth 00123 * algorithm is used: 00124 * 00125 * Attempt to allocate 2 * (originalLength + appendLength) 00126 * On failure: 00127 * attempt to allocate originalLength + 2*appendLength + 00128 * TCL_GROWTH_MIN_ALLOC 00129 * 00130 * This algorithm allows very good performance, as it rapidly increases the 00131 * memory allocated for a given string, which minimizes the number of 00132 * reallocations that must be performed. However, using only the doubling 00133 * algorithm can lead to a significant waste of memory. In particular, it may 00134 * fail even when there is sufficient memory available to complete the append 00135 * request (but there is not 2*totalLength memory available). So when the 00136 * doubling fails (because there is not enough memory available), the 00137 * algorithm requests a smaller amount of memory, which is still enough to 00138 * cover the request, but which hopefully will be less than the total 00139 * available memory. 00140 * 00141 * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling of very 00142 * small appends. Without this extra slush factor, a sequence of several small 00143 * appends would cause several memory allocations. As long as 00144 * TCL_GROWTH_MIN_ALLOC is a reasonable size, we can avoid that behavior. 00145 * 00146 * The growth algorithm can be tuned by adjusting the following parameters: 00147 * 00148 * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when 00149 * the double allocation has failed. Default is 00150 * 1024 (1 kilobyte). 00151 */ 00152 00153 #ifndef TCL_GROWTH_MIN_ALLOC 00154 #define TCL_GROWTH_MIN_ALLOC 1024 00155 #endif 00156 00157 /* 00158 *---------------------------------------------------------------------- 00159 * 00160 * Tcl_NewStringObj -- 00161 * 00162 * This function is normally called when not debugging: i.e., when 00163 * TCL_MEM_DEBUG is not defined. It creates a new string object and 00164 * initializes it from the byte pointer and length arguments. 00165 * 00166 * When TCL_MEM_DEBUG is defined, this function just returns the result 00167 * of calling the debugging version Tcl_DbNewStringObj. 00168 * 00169 * Results: 00170 * A newly created string object is returned that has ref count zero. 00171 * 00172 * Side effects: 00173 * The new object's internal string representation will be set to a copy 00174 * of the length bytes starting at "bytes". If "length" is negative, use 00175 * bytes up to the first NUL byte; i.e., assume "bytes" points to a 00176 * C-style NUL-terminated string. The object's type is set to NULL. An 00177 * extra NUL is added to the end of the new object's byte array. 00178 * 00179 *---------------------------------------------------------------------- 00180 */ 00181 00182 #ifdef TCL_MEM_DEBUG 00183 #undef Tcl_NewStringObj 00184 Tcl_Obj * 00185 Tcl_NewStringObj( 00186 const char *bytes, /* Points to the first of the length bytes 00187 * used to initialize the new object. */ 00188 int length) /* The number of bytes to copy from "bytes" 00189 * when initializing the new object. If 00190 * negative, use bytes up to the first NUL 00191 * byte. */ 00192 { 00193 return Tcl_DbNewStringObj(bytes, length, "unknown", 0); 00194 } 00195 #else /* if not TCL_MEM_DEBUG */ 00196 Tcl_Obj * 00197 Tcl_NewStringObj( 00198 const char *bytes, /* Points to the first of the length bytes 00199 * used to initialize the new object. */ 00200 int length) /* The number of bytes to copy from "bytes" 00201 * when initializing the new object. If 00202 * negative, use bytes up to the first NUL 00203 * byte. */ 00204 { 00205 register Tcl_Obj *objPtr; 00206 00207 if (length < 0) { 00208 length = (bytes? strlen(bytes) : 0); 00209 } 00210 TclNewStringObj(objPtr, bytes, length); 00211 return objPtr; 00212 } 00213 #endif /* TCL_MEM_DEBUG */ 00214 00215 /* 00216 *---------------------------------------------------------------------- 00217 * 00218 * Tcl_DbNewStringObj -- 00219 * 00220 * This function is normally called when debugging: i.e., when 00221 * TCL_MEM_DEBUG is defined. It creates new string objects. It is the 00222 * same as the Tcl_NewStringObj function above except that it calls 00223 * Tcl_DbCkalloc directly with the file name and line number from its 00224 * caller. This simplifies debugging since then the [memory active] 00225 * command will report the correct file name and line number when 00226 * reporting objects that haven't been freed. 00227 * 00228 * When TCL_MEM_DEBUG is not defined, this function just returns the 00229 * result of calling Tcl_NewStringObj. 00230 * 00231 * Results: 00232 * A newly created string object is returned that has ref count zero. 00233 * 00234 * Side effects: 00235 * The new object's internal string representation will be set to a copy 00236 * of the length bytes starting at "bytes". If "length" is negative, use 00237 * bytes up to the first NUL byte; i.e., assume "bytes" points to a 00238 * C-style NUL-terminated string. The object's type is set to NULL. An 00239 * extra NUL is added to the end of the new object's byte array. 00240 * 00241 *---------------------------------------------------------------------- 00242 */ 00243 00244 #ifdef TCL_MEM_DEBUG 00245 Tcl_Obj * 00246 Tcl_DbNewStringObj( 00247 const char *bytes, /* Points to the first of the length bytes 00248 * used to initialize the new object. */ 00249 int length, /* The number of bytes to copy from "bytes" 00250 * when initializing the new object. If 00251 * negative, use bytes up to the first NUL 00252 * byte. */ 00253 const char *file, /* The name of the source file calling this 00254 * function; used for debugging. */ 00255 int line) /* Line number in the source file; used for 00256 * debugging. */ 00257 { 00258 register Tcl_Obj *objPtr; 00259 00260 if (length < 0) { 00261 length = (bytes? strlen(bytes) : 0); 00262 } 00263 TclDbNewObj(objPtr, file, line); 00264 TclInitStringRep(objPtr, bytes, length); 00265 return objPtr; 00266 } 00267 #else /* if not TCL_MEM_DEBUG */ 00268 Tcl_Obj * 00269 Tcl_DbNewStringObj( 00270 const char *bytes, /* Points to the first of the length bytes 00271 * used to initialize the new object. */ 00272 register int length, /* The number of bytes to copy from "bytes" 00273 * when initializing the new object. If 00274 * negative, use bytes up to the first NUL 00275 * byte. */ 00276 const char *file, /* The name of the source file calling this 00277 * function; used for debugging. */ 00278 int line) /* Line number in the source file; used for 00279 * debugging. */ 00280 { 00281 return Tcl_NewStringObj(bytes, length); 00282 } 00283 #endif /* TCL_MEM_DEBUG */ 00284 00285 /* 00286 *--------------------------------------------------------------------------- 00287 * 00288 * Tcl_NewUnicodeObj -- 00289 * 00290 * This function is creates a new String object and initializes it from 00291 * the given Unicode String. If the Utf String is the same size as the 00292 * Unicode string, don't duplicate the data. 00293 * 00294 * Results: 00295 * The newly created object is returned. This object will have no initial 00296 * string representation. The returned object has a ref count of 0. 00297 * 00298 * Side effects: 00299 * Memory allocated for new object and copy of Unicode argument. 00300 * 00301 *--------------------------------------------------------------------------- 00302 */ 00303 00304 Tcl_Obj * 00305 Tcl_NewUnicodeObj( 00306 const Tcl_UniChar *unicode, /* The unicode string used to initialize the 00307 * new object. */ 00308 int numChars) /* Number of characters in the unicode 00309 * string. */ 00310 { 00311 Tcl_Obj *objPtr; 00312 String *stringPtr; 00313 size_t uallocated; 00314 00315 if (numChars < 0) { 00316 numChars = 0; 00317 if (unicode) { 00318 while (unicode[numChars] != 0) { 00319 numChars++; 00320 } 00321 } 00322 } 00323 uallocated = STRING_UALLOC(numChars); 00324 00325 /* 00326 * Create a new obj with an invalid string rep. 00327 */ 00328 00329 TclNewObj(objPtr); 00330 Tcl_InvalidateStringRep(objPtr); 00331 objPtr->typePtr = &tclStringType; 00332 00333 stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); 00334 stringPtr->numChars = numChars; 00335 stringPtr->uallocated = uallocated; 00336 stringPtr->hasUnicode = (numChars > 0); 00337 stringPtr->allocated = 0; 00338 memcpy(stringPtr->unicode, unicode, uallocated); 00339 stringPtr->unicode[numChars] = 0; 00340 SET_STRING(objPtr, stringPtr); 00341 return objPtr; 00342 } 00343 00344 /* 00345 *---------------------------------------------------------------------- 00346 * 00347 * Tcl_GetCharLength -- 00348 * 00349 * Get the length of the Unicode string from the Tcl object. 00350 * 00351 * Results: 00352 * Pointer to unicode string representing the unicode object. 00353 * 00354 * Side effects: 00355 * Frees old internal rep. Allocates memory for new "String" internal 00356 * rep. 00357 * 00358 *---------------------------------------------------------------------- 00359 */ 00360 00361 int 00362 Tcl_GetCharLength( 00363 Tcl_Obj *objPtr) /* The String object to get the num chars 00364 * of. */ 00365 { 00366 String *stringPtr; 00367 00368 SetStringFromAny(NULL, objPtr); 00369 stringPtr = GET_STRING(objPtr); 00370 00371 /* 00372 * If numChars is unknown, then calculate the number of characaters while 00373 * populating the Unicode string. 00374 */ 00375 00376 if (stringPtr->numChars == -1) { 00377 register int i = objPtr->length; 00378 register unsigned char *str = (unsigned char *) objPtr->bytes; 00379 00380 /* 00381 * This is a speed sensitive function, so run specially over the 00382 * string to count continuous ascii characters before resorting to the 00383 * Tcl_NumUtfChars call. This is a long form of: 00384 stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes,objPtr->length); 00385 * 00386 * TODO: Consider macro-izing this. 00387 */ 00388 00389 while (i && (*str < 0xC0)) { 00390 i--; 00391 str++; 00392 } 00393 stringPtr->numChars = objPtr->length - i; 00394 if (i) { 00395 stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes 00396 + (objPtr->length - i), i); 00397 } 00398 00399 if (stringPtr->numChars == objPtr->length) { 00400 /* 00401 * Since we've just calculated the number of chars, and all UTF 00402 * chars are 1-byte long, we don't need to store the unicode 00403 * string. 00404 */ 00405 00406 stringPtr->hasUnicode = 0; 00407 } else { 00408 /* 00409 * Since we've just calucalated the number of chars, and not all 00410 * UTF chars are 1-byte long, go ahead and populate the unicode 00411 * string. 00412 */ 00413 00414 FillUnicodeRep(objPtr); 00415 00416 /* 00417 * We need to fetch the pointer again because we have just 00418 * reallocated the structure to make room for the Unicode data. 00419 */ 00420 00421 stringPtr = GET_STRING(objPtr); 00422 } 00423 } 00424 return stringPtr->numChars; 00425 } 00426 00427 /* 00428 *---------------------------------------------------------------------- 00429 * 00430 * Tcl_GetUniChar -- 00431 * 00432 * Get the index'th Unicode character from the String object. The index 00433 * is assumed to be in the appropriate range. 00434 * 00435 * Results: 00436 * Returns the index'th Unicode character in the Object. 00437 * 00438 * Side effects: 00439 * Fills unichar with the index'th Unicode character. 00440 * 00441 *---------------------------------------------------------------------- 00442 */ 00443 00444 Tcl_UniChar 00445 Tcl_GetUniChar( 00446 Tcl_Obj *objPtr, /* The object to get the Unicode charater 00447 * from. */ 00448 int index) /* Get the index'th Unicode character. */ 00449 { 00450 Tcl_UniChar unichar; 00451 String *stringPtr; 00452 00453 SetStringFromAny(NULL, objPtr); 00454 stringPtr = GET_STRING(objPtr); 00455 00456 if (stringPtr->numChars == -1) { 00457 /* 00458 * We haven't yet calculated the length, so we don't have the Unicode 00459 * str. We need to know the number of chars before we can do indexing. 00460 */ 00461 00462 Tcl_GetCharLength(objPtr); 00463 00464 /* 00465 * We need to fetch the pointer again because we may have just 00466 * reallocated the structure. 00467 */ 00468 00469 stringPtr = GET_STRING(objPtr); 00470 } 00471 if (stringPtr->hasUnicode == 0) { 00472 /* 00473 * All of the characters in the Utf string are 1 byte chars, so we 00474 * don't store the unicode char. We get the Utf string and convert the 00475 * index'th byte to a Unicode character. 00476 */ 00477 00478 unichar = (Tcl_UniChar) objPtr->bytes[index]; 00479 } else { 00480 unichar = stringPtr->unicode[index]; 00481 } 00482 return unichar; 00483 } 00484 00485 /* 00486 *---------------------------------------------------------------------- 00487 * 00488 * Tcl_GetUnicode -- 00489 * 00490 * Get the Unicode form of the String object. If the object is not 00491 * already a String object, it will be converted to one. If the String 00492 * object does not have a Unicode rep, then one is create from the UTF 00493 * string format. 00494 * 00495 * Results: 00496 * Returns a pointer to the object's internal Unicode string. 00497 * 00498 * Side effects: 00499 * Converts the object to have the String internal rep. 00500 * 00501 *---------------------------------------------------------------------- 00502 */ 00503 00504 Tcl_UniChar * 00505 Tcl_GetUnicode( 00506 Tcl_Obj *objPtr) /* The object to find the unicode string 00507 * for. */ 00508 { 00509 String *stringPtr; 00510 00511 SetStringFromAny(NULL, objPtr); 00512 stringPtr = GET_STRING(objPtr); 00513 00514 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { 00515 /* 00516 * We haven't yet calculated the length, or all of the characters in 00517 * the Utf string are 1 byte chars (so we didn't store the unicode 00518 * str). Since this function must return a unicode string, and one has 00519 * not yet been stored, force the Unicode to be calculated and stored 00520 * now. 00521 */ 00522 00523 FillUnicodeRep(objPtr); 00524 00525 /* 00526 * We need to fetch the pointer again because we have just reallocated 00527 * the structure to make room for the Unicode data. 00528 */ 00529 00530 stringPtr = GET_STRING(objPtr); 00531 } 00532 return stringPtr->unicode; 00533 } 00534 00535 /* 00536 *---------------------------------------------------------------------- 00537 * 00538 * Tcl_GetUnicodeFromObj -- 00539 * 00540 * Get the Unicode form of the String object with length. If the object 00541 * is not already a String object, it will be converted to one. If the 00542 * String object does not have a Unicode rep, then one is create from the 00543 * UTF string format. 00544 * 00545 * Results: 00546 * Returns a pointer to the object's internal Unicode string. 00547 * 00548 * Side effects: 00549 * Converts the object to have the String internal rep. 00550 * 00551 *---------------------------------------------------------------------- 00552 */ 00553 00554 Tcl_UniChar * 00555 Tcl_GetUnicodeFromObj( 00556 Tcl_Obj *objPtr, /* The object to find the unicode string 00557 * for. */ 00558 int *lengthPtr) /* If non-NULL, the location where the string 00559 * rep's unichar length should be stored. If 00560 * NULL, no length is stored. */ 00561 { 00562 String *stringPtr; 00563 00564 SetStringFromAny(NULL, objPtr); 00565 stringPtr = GET_STRING(objPtr); 00566 00567 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { 00568 /* 00569 * We haven't yet calculated the length, or all of the characters in 00570 * the Utf string are 1 byte chars (so we didn't store the unicode 00571 * str). Since this function must return a unicode string, and one has 00572 * not yet been stored, force the Unicode to be calculated and stored 00573 * now. 00574 */ 00575 00576 FillUnicodeRep(objPtr); 00577 00578 /* 00579 * We need to fetch the pointer again because we have just reallocated 00580 * the structure to make room for the Unicode data. 00581 */ 00582 00583 stringPtr = GET_STRING(objPtr); 00584 } 00585 00586 if (lengthPtr != NULL) { 00587 *lengthPtr = stringPtr->numChars; 00588 } 00589 return stringPtr->unicode; 00590 } 00591 00592 /* 00593 *---------------------------------------------------------------------- 00594 * 00595 * Tcl_GetRange -- 00596 * 00597 * Create a Tcl Object that contains the chars between first and last of 00598 * the object indicated by "objPtr". If the object is not already a 00599 * String object, convert it to one. The first and last indices are 00600 * assumed to be in the appropriate range. 00601 * 00602 * Results: 00603 * Returns a new Tcl Object of the String type. 00604 * 00605 * Side effects: 00606 * Changes the internal rep of "objPtr" to the String type. 00607 * 00608 *---------------------------------------------------------------------- 00609 */ 00610 00611 Tcl_Obj * 00612 Tcl_GetRange( 00613 Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ 00614 int first, /* First index of the range. */ 00615 int last) /* Last index of the range. */ 00616 { 00617 Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ 00618 String *stringPtr; 00619 00620 SetStringFromAny(NULL, objPtr); 00621 stringPtr = GET_STRING(objPtr); 00622 00623 if (stringPtr->numChars == -1) { 00624 /* 00625 * We haven't yet calculated the length, so we don't have the Unicode 00626 * str. We need to know the number of chars before we can do indexing. 00627 */ 00628 00629 Tcl_GetCharLength(objPtr); 00630 00631 /* 00632 * We need to fetch the pointer again because we may have just 00633 * reallocated the structure. 00634 */ 00635 00636 stringPtr = GET_STRING(objPtr); 00637 } 00638 00639 if (objPtr->bytes && (stringPtr->numChars == objPtr->length)) { 00640 char *str = TclGetString(objPtr); 00641 00642 /* 00643 * All of the characters in the Utf string are 1 byte chars, so we 00644 * don't store the unicode char. Create a new string object containing 00645 * the specified range of chars. 00646 */ 00647 00648 newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); 00649 00650 /* 00651 * Since we know the new string only has 1-byte chars, we can set it's 00652 * numChars field. 00653 */ 00654 00655 SetStringFromAny(NULL, newObjPtr); 00656 stringPtr = GET_STRING(newObjPtr); 00657 stringPtr->numChars = last-first+1; 00658 } else { 00659 newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first, 00660 last-first+1); 00661 } 00662 return newObjPtr; 00663 } 00664 00665 /* 00666 *---------------------------------------------------------------------- 00667 * 00668 * Tcl_SetStringObj -- 00669 * 00670 * Modify an object to hold a string that is a copy of the bytes 00671 * indicated by the byte pointer and length arguments. 00672 * 00673 * Results: 00674 * None. 00675 * 00676 * Side effects: 00677 * The object's string representation will be set to a copy of the 00678 * "length" bytes starting at "bytes". If "length" is negative, use bytes 00679 * up to the first NUL byte; i.e., assume "bytes" points to a C-style 00680 * NUL-terminated string. The object's old string and internal 00681 * representations are freed and the object's type is set NULL. 00682 * 00683 *---------------------------------------------------------------------- 00684 */ 00685 00686 void 00687 Tcl_SetStringObj( 00688 register Tcl_Obj *objPtr, /* Object whose internal rep to init. */ 00689 const char *bytes, /* Points to the first of the length bytes 00690 * used to initialize the object. */ 00691 register int length) /* The number of bytes to copy from "bytes" 00692 * when initializing the object. If negative, 00693 * use bytes up to the first NUL byte.*/ 00694 { 00695 /* 00696 * Free any old string rep, then set the string rep to a copy of the 00697 * length bytes starting at "bytes". 00698 */ 00699 00700 if (Tcl_IsShared(objPtr)) { 00701 Tcl_Panic("%s called with shared object", "Tcl_SetStringObj"); 00702 } 00703 00704 /* 00705 * Set the type to NULL and free any internal rep for the old type. 00706 */ 00707 00708 TclFreeIntRep(objPtr); 00709 objPtr->typePtr = NULL; 00710 00711 Tcl_InvalidateStringRep(objPtr); 00712 if (length < 0) { 00713 length = (bytes? strlen(bytes) : 0); 00714 } 00715 TclInitStringRep(objPtr, bytes, length); 00716 } 00717 00718 /* 00719 *---------------------------------------------------------------------- 00720 * 00721 * Tcl_SetObjLength -- 00722 * 00723 * This function changes the length of the string representation of an 00724 * object. 00725 * 00726 * Results: 00727 * None. 00728 * 00729 * Side effects: 00730 * If the size of objPtr's string representation is greater than length, 00731 * then it is reduced to length and a new terminating null byte is stored 00732 * in the strength. If the length of the string representation is greater 00733 * than length, the storage space is reallocated to the given length; a 00734 * null byte is stored at the end, but other bytes past the end of the 00735 * original string representation are undefined. The object's internal 00736 * representation is changed to "expendable string". 00737 * 00738 *---------------------------------------------------------------------- 00739 */ 00740 00741 void 00742 Tcl_SetObjLength( 00743 register Tcl_Obj *objPtr, /* Pointer to object. This object must not 00744 * currently be shared. */ 00745 register int length) /* Number of bytes desired for string 00746 * representation of object, not including 00747 * terminating null byte. */ 00748 { 00749 String *stringPtr; 00750 00751 if (Tcl_IsShared(objPtr)) { 00752 Tcl_Panic("%s called with shared object", "Tcl_SetObjLength"); 00753 } 00754 SetStringFromAny(NULL, objPtr); 00755 00756 stringPtr = GET_STRING(objPtr); 00757 00758 /* 00759 * Check that we're not extending a pure unicode string. 00760 */ 00761 00762 if (length > (int) stringPtr->allocated && 00763 (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { 00764 /* 00765 * Not enough space in current string. Reallocate the string space and 00766 * free the old string. 00767 */ 00768 00769 if (objPtr->bytes != tclEmptyStringRep) { 00770 objPtr->bytes = ckrealloc((char *) objPtr->bytes, 00771 (unsigned) (length + 1)); 00772 } else { 00773 char *newBytes = ckalloc((unsigned) (length+1)); 00774 00775 if (objPtr->bytes != NULL && objPtr->length != 0) { 00776 memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length); 00777 Tcl_InvalidateStringRep(objPtr); 00778 } 00779 objPtr->bytes = newBytes; 00780 } 00781 stringPtr->allocated = length; 00782 00783 /* 00784 * Invalidate the unicode data. 00785 */ 00786 00787 stringPtr->hasUnicode = 0; 00788 } 00789 00790 if (objPtr->bytes != NULL) { 00791 objPtr->length = length; 00792 if (objPtr->bytes != tclEmptyStringRep) { 00793 /* 00794 * Ensure the string is NUL-terminated. 00795 */ 00796 00797 objPtr->bytes[length] = 0; 00798 } 00799 00800 /* 00801 * Invalidate the unicode data. 00802 */ 00803 00804 stringPtr->numChars = -1; 00805 stringPtr->hasUnicode = 0; 00806 } else { 00807 /* 00808 * Changing length of pure unicode string. 00809 */ 00810 00811 size_t uallocated = STRING_UALLOC(length); 00812 00813 if (uallocated > stringPtr->uallocated) { 00814 stringPtr = (String *) ckrealloc((char*) stringPtr, 00815 STRING_SIZE(uallocated)); 00816 SET_STRING(objPtr, stringPtr); 00817 stringPtr->uallocated = uallocated; 00818 } 00819 stringPtr->numChars = length; 00820 stringPtr->hasUnicode = (length > 0); 00821 00822 /* 00823 * Ensure the string is NUL-terminated. 00824 */ 00825 00826 stringPtr->unicode[length] = 0; 00827 stringPtr->allocated = 0; 00828 objPtr->length = 0; 00829 } 00830 } 00831 00832 /* 00833 *---------------------------------------------------------------------- 00834 * 00835 * Tcl_AttemptSetObjLength -- 00836 * 00837 * This function changes the length of the string representation of an 00838 * object. It uses the attempt* (non-panic'ing) memory allocators. 00839 * 00840 * Results: 00841 * 1 if the requested memory was allocated, 0 otherwise. 00842 * 00843 * Side effects: 00844 * If the size of objPtr's string representation is greater than length, 00845 * then it is reduced to length and a new terminating null byte is stored 00846 * in the strength. If the length of the string representation is greater 00847 * than length, the storage space is reallocated to the given length; a 00848 * null byte is stored at the end, but other bytes past the end of the 00849 * original string representation are undefined. The object's internal 00850 * representation is changed to "expendable string". 00851 * 00852 *---------------------------------------------------------------------- 00853 */ 00854 00855 int 00856 Tcl_AttemptSetObjLength( 00857 register Tcl_Obj *objPtr, /* Pointer to object. This object must not 00858 * currently be shared. */ 00859 register int length) /* Number of bytes desired for string 00860 * representation of object, not including 00861 * terminating null byte. */ 00862 { 00863 String *stringPtr; 00864 00865 if (Tcl_IsShared(objPtr)) { 00866 Tcl_Panic("%s called with shared object", "Tcl_AttemptSetObjLength"); 00867 } 00868 SetStringFromAny(NULL, objPtr); 00869 00870 stringPtr = GET_STRING(objPtr); 00871 00872 /* 00873 * Check that we're not extending a pure unicode string. 00874 */ 00875 00876 if (length > (int) stringPtr->allocated && 00877 (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { 00878 char *newBytes; 00879 00880 /* 00881 * Not enough space in current string. Reallocate the string space and 00882 * free the old string. 00883 */ 00884 00885 if (objPtr->bytes != tclEmptyStringRep) { 00886 newBytes = attemptckrealloc(objPtr->bytes, 00887 (unsigned)(length + 1)); 00888 if (newBytes == NULL) { 00889 return 0; 00890 } 00891 } else { 00892 newBytes = attemptckalloc((unsigned) (length + 1)); 00893 if (newBytes == NULL) { 00894 return 0; 00895 } 00896 if (objPtr->bytes != NULL && objPtr->length != 0) { 00897 memcpy(newBytes, objPtr->bytes, (size_t) objPtr->length); 00898 Tcl_InvalidateStringRep(objPtr); 00899 } 00900 } 00901 objPtr->bytes = newBytes; 00902 stringPtr->allocated = length; 00903 00904 /* 00905 * Invalidate the unicode data. 00906 */ 00907 00908 stringPtr->hasUnicode = 0; 00909 } 00910 00911 if (objPtr->bytes != NULL) { 00912 objPtr->length = length; 00913 if (objPtr->bytes != tclEmptyStringRep) { 00914 /* 00915 * Ensure the string is NULL-terminated. 00916 */ 00917 00918 objPtr->bytes[length] = 0; 00919 } 00920 00921 /* 00922 * Invalidate the unicode data. 00923 */ 00924 00925 stringPtr->numChars = -1; 00926 stringPtr->hasUnicode = 0; 00927 } else { 00928 /* 00929 * Changing length of pure unicode string. 00930 */ 00931 00932 size_t uallocated = STRING_UALLOC(length); 00933 00934 if (uallocated > stringPtr->uallocated) { 00935 stringPtr = (String *) attemptckrealloc((char*) stringPtr, 00936 STRING_SIZE(uallocated)); 00937 if (stringPtr == NULL) { 00938 return 0; 00939 } 00940 SET_STRING(objPtr, stringPtr); 00941 stringPtr->uallocated = uallocated; 00942 } 00943 stringPtr->numChars = length; 00944 stringPtr->hasUnicode = (length > 0); 00945 00946 /* 00947 * Ensure the string is NUL-terminated. 00948 */ 00949 00950 stringPtr->unicode[length] = 0; 00951 stringPtr->allocated = 0; 00952 objPtr->length = 0; 00953 } 00954 return 1; 00955 } 00956 00957 /* 00958 *--------------------------------------------------------------------------- 00959 * 00960 * TclSetUnicodeObj -- 00961 * 00962 * Modify an object to hold the Unicode string indicated by "unicode". 00963 * 00964 * Results: 00965 * None. 00966 * 00967 * Side effects: 00968 * Memory allocated for new "String" internal rep. 00969 * 00970 *--------------------------------------------------------------------------- 00971 */ 00972 00973 void 00974 Tcl_SetUnicodeObj( 00975 Tcl_Obj *objPtr, /* The object to set the string of. */ 00976 const Tcl_UniChar *unicode, /* The unicode string used to initialize the 00977 * object. */ 00978 int numChars) /* Number of characters in the unicode 00979 * string. */ 00980 { 00981 String *stringPtr; 00982 size_t uallocated; 00983 00984 if (numChars < 0) { 00985 numChars = 0; 00986 if (unicode) { 00987 while (unicode[numChars] != 0) { 00988 numChars++; 00989 } 00990 } 00991 } 00992 uallocated = STRING_UALLOC(numChars); 00993 00994 /* 00995 * Free the internal rep if one exists, and invalidate the string rep. 00996 */ 00997 00998 TclFreeIntRep(objPtr); 00999 objPtr->typePtr = &tclStringType; 01000 01001 /* 01002 * Allocate enough space for the String structure + Unicode string. 01003 */ 01004 01005 stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); 01006 stringPtr->numChars = numChars; 01007 stringPtr->uallocated = uallocated; 01008 stringPtr->hasUnicode = (numChars > 0); 01009 stringPtr->allocated = 0; 01010 memcpy(stringPtr->unicode, unicode, uallocated); 01011 stringPtr->unicode[numChars] = 0; 01012 01013 SET_STRING(objPtr, stringPtr); 01014 Tcl_InvalidateStringRep(objPtr); 01015 return; 01016 } 01017 01018 /* 01019 *---------------------------------------------------------------------- 01020 * 01021 * Tcl_AppendLimitedToObj -- 01022 * 01023 * This function appends a limited number of bytes from a sequence of 01024 * bytes to an object, marking any limitation with an ellipsis. 01025 * 01026 * Results: 01027 * None. 01028 * 01029 * Side effects: 01030 * The bytes at *bytes are appended to the string representation of 01031 * objPtr. 01032 * 01033 *---------------------------------------------------------------------- 01034 */ 01035 01036 void 01037 Tcl_AppendLimitedToObj( 01038 register Tcl_Obj *objPtr, /* Points to the object to append to. */ 01039 const char *bytes, /* Points to the bytes to append to the 01040 * object. */ 01041 register int length, /* The number of bytes available to be 01042 * appended from "bytes". If < 0, then all 01043 * bytes up to a NUL byte are available. */ 01044 register int limit, /* The maximum number of bytes to append to 01045 * the object. */ 01046 const char *ellipsis) /* Ellipsis marker string, appended to the 01047 * object to indicate not all available bytes 01048 * at "bytes" were appended. */ 01049 { 01050 String *stringPtr; 01051 int toCopy = 0; 01052 01053 if (Tcl_IsShared(objPtr)) { 01054 Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); 01055 } 01056 01057 SetStringFromAny(NULL, objPtr); 01058 01059 if (length < 0) { 01060 length = (bytes ? strlen(bytes) : 0); 01061 } 01062 if (length == 0) { 01063 return; 01064 } 01065 01066 if (length <= limit) { 01067 toCopy = length; 01068 } else { 01069 if (ellipsis == NULL) { 01070 ellipsis = "..."; 01071 } 01072 toCopy = Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; 01073 } 01074 01075 /* 01076 * If objPtr has a valid Unicode rep, then append the Unicode conversion 01077 * of "bytes" to the objPtr's Unicode rep, otherwise append "bytes" to 01078 * objPtr's string rep. 01079 */ 01080 01081 stringPtr = GET_STRING(objPtr); 01082 if (stringPtr->hasUnicode != 0) { 01083 AppendUtfToUnicodeRep(objPtr, bytes, toCopy); 01084 } else { 01085 AppendUtfToUtfRep(objPtr, bytes, toCopy); 01086 } 01087 01088 if (length <= limit) { 01089 return; 01090 } 01091 01092 stringPtr = GET_STRING(objPtr); 01093 if (stringPtr->hasUnicode != 0) { 01094 AppendUtfToUnicodeRep(objPtr, ellipsis, -1); 01095 } else { 01096 AppendUtfToUtfRep(objPtr, ellipsis, -1); 01097 } 01098 } 01099 01100 /* 01101 *---------------------------------------------------------------------- 01102 * 01103 * Tcl_AppendToObj -- 01104 * 01105 * This function appends a sequence of bytes to an object. 01106 * 01107 * Results: 01108 * None. 01109 * 01110 * Side effects: 01111 * The bytes at *bytes are appended to the string representation of 01112 * objPtr. 01113 * 01114 *---------------------------------------------------------------------- 01115 */ 01116 01117 void 01118 Tcl_AppendToObj( 01119 register Tcl_Obj *objPtr, /* Points to the object to append to. */ 01120 const char *bytes, /* Points to the bytes to append to the 01121 * object. */ 01122 register int length) /* The number of bytes to append from "bytes". 01123 * If < 0, then append all bytes up to NUL 01124 * byte. */ 01125 { 01126 Tcl_AppendLimitedToObj(objPtr, bytes, length, INT_MAX, NULL); 01127 } 01128 01129 /* 01130 *---------------------------------------------------------------------- 01131 * 01132 * Tcl_AppendUnicodeToObj -- 01133 * 01134 * This function appends a Unicode string to an object in the most 01135 * efficient manner possible. Length must be >= 0. 01136 * 01137 * Results: 01138 * None. 01139 * 01140 * Side effects: 01141 * Invalidates the string rep and creates a new Unicode string. 01142 * 01143 *---------------------------------------------------------------------- 01144 */ 01145 01146 void 01147 Tcl_AppendUnicodeToObj( 01148 register Tcl_Obj *objPtr, /* Points to the object to append to. */ 01149 const Tcl_UniChar *unicode, /* The unicode string to append to the 01150 * object. */ 01151 int length) /* Number of chars in "unicode". */ 01152 { 01153 String *stringPtr; 01154 01155 if (Tcl_IsShared(objPtr)) { 01156 Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); 01157 } 01158 01159 if (length == 0) { 01160 return; 01161 } 01162 01163 SetStringFromAny(NULL, objPtr); 01164 stringPtr = GET_STRING(objPtr); 01165 01166 /* 01167 * If objPtr has a valid Unicode rep, then append the "unicode" to the 01168 * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to 01169 * objPtr's string rep. 01170 */ 01171 01172 if (stringPtr->hasUnicode != 0) { 01173 AppendUnicodeToUnicodeRep(objPtr, unicode, length); 01174 } else { 01175 AppendUnicodeToUtfRep(objPtr, unicode, length); 01176 } 01177 } 01178 01179 /* 01180 *---------------------------------------------------------------------- 01181 * 01182 * Tcl_AppendObjToObj -- 01183 * 01184 * This function appends the string rep of one object to another. 01185 * "objPtr" cannot be a shared object. 01186 * 01187 * Results: 01188 * None. 01189 * 01190 * Side effects: 01191 * The string rep of appendObjPtr is appended to the string 01192 * representation of objPtr. 01193 * 01194 *---------------------------------------------------------------------- 01195 */ 01196 01197 void 01198 Tcl_AppendObjToObj( 01199 Tcl_Obj *objPtr, /* Points to the object to append to. */ 01200 Tcl_Obj *appendObjPtr) /* Object to append. */ 01201 { 01202 String *stringPtr; 01203 int length, numChars, allOneByteChars; 01204 char *bytes; 01205 01206 SetStringFromAny(NULL, objPtr); 01207 01208 /* 01209 * If objPtr has a valid Unicode rep, then get a Unicode string from 01210 * appendObjPtr and append it. 01211 */ 01212 01213 stringPtr = GET_STRING(objPtr); 01214 if (stringPtr->hasUnicode != 0) { 01215 /* 01216 * If appendObjPtr is not of the "String" type, don't convert it. 01217 */ 01218 01219 if (appendObjPtr->typePtr == &tclStringType) { 01220 stringPtr = GET_STRING(appendObjPtr); 01221 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { 01222 /* 01223 * If appendObjPtr is a string obj with no valid Unicode rep, 01224 * then fill its unicode rep. 01225 */ 01226 01227 FillUnicodeRep(appendObjPtr); 01228 stringPtr = GET_STRING(appendObjPtr); 01229 } 01230 AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode, 01231 stringPtr->numChars); 01232 } else { 01233 bytes = TclGetStringFromObj(appendObjPtr, &length); 01234 AppendUtfToUnicodeRep(objPtr, bytes, length); 01235 } 01236 return; 01237 } 01238 01239 /* 01240 * Append to objPtr's UTF string rep. If we know the number of characters 01241 * in both objects before appending, then set the combined number of 01242 * characters in the final (appended-to) object. 01243 */ 01244 01245 bytes = TclGetStringFromObj(appendObjPtr, &length); 01246 01247 allOneByteChars = 0; 01248 numChars = stringPtr->numChars; 01249 if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { 01250 stringPtr = GET_STRING(appendObjPtr); 01251 if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) { 01252 numChars += stringPtr->numChars; 01253 allOneByteChars = 1; 01254 } 01255 } 01256 01257 AppendUtfToUtfRep(objPtr, bytes, length); 01258 01259 if (allOneByteChars) { 01260 stringPtr = GET_STRING(objPtr); 01261 stringPtr->numChars = numChars; 01262 } 01263 } 01264 01265 /* 01266 *---------------------------------------------------------------------- 01267 * 01268 * AppendUnicodeToUnicodeRep -- 01269 * 01270 * This function appends the contents of "unicode" to the Unicode rep of 01271 * "objPtr". objPtr must already have a valid Unicode rep. 01272 * 01273 * Results: 01274 * None. 01275 * 01276 * Side effects: 01277 * objPtr's internal rep is reallocated. 01278 * 01279 *---------------------------------------------------------------------- 01280 */ 01281 01282 static void 01283 AppendUnicodeToUnicodeRep( 01284 Tcl_Obj *objPtr, /* Points to the object to append to. */ 01285 const Tcl_UniChar *unicode, /* String to append. */ 01286 int appendNumChars) /* Number of chars of "unicode" to append. */ 01287 { 01288 String *stringPtr, *tmpString; 01289 size_t numChars; 01290 01291 if (appendNumChars < 0) { 01292 appendNumChars = 0; 01293 if (unicode) { 01294 while (unicode[appendNumChars] != 0) { 01295 appendNumChars++; 01296 } 01297 } 01298 } 01299 if (appendNumChars == 0) { 01300 return; 01301 } 01302 01303 SetStringFromAny(NULL, objPtr); 01304 stringPtr = GET_STRING(objPtr); 01305 01306 /* 01307 * If not enough space has been allocated for the unicode rep, reallocate 01308 * the internal rep object with additional space. First try to double the 01309 * required allocation; if that fails, try a more modest increase. See the 01310 * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an 01311 * explanation of this growth algorithm. 01312 */ 01313 01314 numChars = stringPtr->numChars + appendNumChars; 01315 01316 if (STRING_UALLOC(numChars) >= stringPtr->uallocated) { 01317 stringPtr->uallocated = STRING_UALLOC(2 * numChars); 01318 tmpString = (String *) attemptckrealloc((char *)stringPtr, 01319 STRING_SIZE(stringPtr->uallocated)); 01320 if (tmpString == NULL) { 01321 stringPtr->uallocated = 01322 STRING_UALLOC(numChars + appendNumChars) 01323 + TCL_GROWTH_MIN_ALLOC; 01324 tmpString = (String *) ckrealloc((char *)stringPtr, 01325 STRING_SIZE(stringPtr->uallocated)); 01326 } 01327 stringPtr = tmpString; 01328 SET_STRING(objPtr, stringPtr); 01329 } 01330 01331 /* 01332 * Copy the new string onto the end of the old string, then add the 01333 * trailing null. 01334 */ 01335 01336 memcpy(stringPtr->unicode + stringPtr->numChars, unicode, 01337 appendNumChars * sizeof(Tcl_UniChar)); 01338 stringPtr->unicode[numChars] = 0; 01339 stringPtr->numChars = numChars; 01340 01341 Tcl_InvalidateStringRep(objPtr); 01342 } 01343 01344 /* 01345 *---------------------------------------------------------------------- 01346 * 01347 * AppendUnicodeToUtfRep -- 01348 * 01349 * This function converts the contents of "unicode" to UTF and appends 01350 * the UTF to the string rep of "objPtr". 01351 * 01352 * Results: 01353 * None. 01354 * 01355 * Side effects: 01356 * objPtr's internal rep is reallocated. 01357 * 01358 *---------------------------------------------------------------------- 01359 */ 01360 01361 static void 01362 AppendUnicodeToUtfRep( 01363 Tcl_Obj *objPtr, /* Points to the object to append to. */ 01364 const Tcl_UniChar *unicode, /* String to convert to UTF. */ 01365 int numChars) /* Number of chars of "unicode" to convert. */ 01366 { 01367 Tcl_DString dsPtr; 01368 const char *bytes; 01369 01370 if (numChars < 0) { 01371 numChars = 0; 01372 if (unicode) { 01373 while (unicode[numChars] != 0) { 01374 numChars++; 01375 } 01376 } 01377 } 01378 if (numChars == 0) { 01379 return; 01380 } 01381 01382 Tcl_DStringInit(&dsPtr); 01383 bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); 01384 AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); 01385 Tcl_DStringFree(&dsPtr); 01386 } 01387 01388 /* 01389 *---------------------------------------------------------------------- 01390 * 01391 * AppendUtfToUnicodeRep -- 01392 * 01393 * This function converts the contents of "bytes" to Unicode and appends 01394 * the Unicode to the Unicode rep of "objPtr". objPtr must already have a 01395 * valid Unicode rep. 01396 * 01397 * Results: 01398 * None. 01399 * 01400 * Side effects: 01401 * objPtr's internal rep is reallocated. 01402 * 01403 *---------------------------------------------------------------------- 01404 */ 01405 01406 static void 01407 AppendUtfToUnicodeRep( 01408 Tcl_Obj *objPtr, /* Points to the object to append to. */ 01409 const char *bytes, /* String to convert to Unicode. */ 01410 int numBytes) /* Number of bytes of "bytes" to convert. */ 01411 { 01412 Tcl_DString dsPtr; 01413 int numChars; 01414 Tcl_UniChar *unicode; 01415 01416 if (numBytes < 0) { 01417 numBytes = (bytes ? strlen(bytes) : 0); 01418 } 01419 if (numBytes == 0) { 01420 return; 01421 } 01422 01423 Tcl_DStringInit(&dsPtr); 01424 numChars = Tcl_NumUtfChars(bytes, numBytes); 01425 unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr); 01426 AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); 01427 Tcl_DStringFree(&dsPtr); 01428 } 01429 01430 /* 01431 *---------------------------------------------------------------------- 01432 * 01433 * AppendUtfToUtfRep -- 01434 * 01435 * This function appends "numBytes" bytes of "bytes" to the UTF string 01436 * rep of "objPtr". objPtr must already have a valid String rep. 01437 * 01438 * Results: 01439 * None. 01440 * 01441 * Side effects: 01442 * objPtr's internal rep is reallocated. 01443 * 01444 *---------------------------------------------------------------------- 01445 */ 01446 01447 static void 01448 AppendUtfToUtfRep( 01449 Tcl_Obj *objPtr, /* Points to the object to append to. */ 01450 const char *bytes, /* String to append. */ 01451 int numBytes) /* Number of bytes of "bytes" to append. */ 01452 { 01453 String *stringPtr; 01454 int newLength, oldLength; 01455 01456 if (numBytes < 0) { 01457 numBytes = (bytes ? strlen(bytes) : 0); 01458 } 01459 if (numBytes == 0) { 01460 return; 01461 } 01462 01463 /* 01464 * Copy the new string onto the end of the old string, then add the 01465 * trailing null. 01466 */ 01467 01468 oldLength = objPtr->length; 01469 newLength = numBytes + oldLength; 01470 01471 stringPtr = GET_STRING(objPtr); 01472 if (newLength > (int) stringPtr->allocated) { 01473 /* 01474 * There isn't currently enough space in the string representation so 01475 * allocate additional space. First, try to double the length 01476 * required. If that fails, try a more modest allocation. See the "TCL 01477 * STRING GROWTH ALGORITHM" comment at the top of this file for an 01478 * explanation of this growth algorithm. 01479 */ 01480 01481 if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) { 01482 Tcl_SetObjLength(objPtr, 01483 newLength + numBytes + TCL_GROWTH_MIN_ALLOC); 01484 } 01485 } 01486 01487 /* 01488 * Invalidate the unicode data. 01489 */ 01490 01491 stringPtr->numChars = -1; 01492 stringPtr->hasUnicode = 0; 01493 01494 memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes); 01495 objPtr->bytes[newLength] = 0; 01496 objPtr->length = newLength; 01497 } 01498 01499 /* 01500 *---------------------------------------------------------------------- 01501 * 01502 * Tcl_AppendStringsToObjVA -- 01503 * 01504 * This function appends one or more null-terminated strings to an 01505 * object. 01506 * 01507 * Results: 01508 * None. 01509 * 01510 * Side effects: 01511 * The contents of all the string arguments are appended to the string 01512 * representation of objPtr. 01513 * 01514 *---------------------------------------------------------------------- 01515 */ 01516 01517 void 01518 Tcl_AppendStringsToObjVA( 01519 Tcl_Obj *objPtr, /* Points to the object to append to. */ 01520 va_list argList) /* Variable argument list. */ 01521 { 01522 #define STATIC_LIST_SIZE 16 01523 String *stringPtr; 01524 int newLength, oldLength, attemptLength; 01525 register char *string, *dst; 01526 char *static_list[STATIC_LIST_SIZE]; 01527 char **args = static_list; 01528 int nargs_space = STATIC_LIST_SIZE; 01529 int nargs, i; 01530 01531 if (Tcl_IsShared(objPtr)) { 01532 Tcl_Panic("%s called with shared object", "Tcl_AppendStringsToObj"); 01533 } 01534 01535 SetStringFromAny(NULL, objPtr); 01536 01537 /* 01538 * Figure out how much space is needed for all the strings, and expand the 01539 * string representation if it isn't big enough. If no bytes would be 01540 * appended, just return. Note that on some platforms (notably OS/390) the 01541 * argList is an array so we need to use memcpy. 01542 */ 01543 01544 nargs = 0; 01545 newLength = 0; 01546 oldLength = objPtr->length; 01547 while (1) { 01548 string = va_arg(argList, char *); 01549 if (string == NULL) { 01550 break; 01551 } 01552 if (nargs >= nargs_space) { 01553 /* 01554 * Expand the args buffer. 01555 */ 01556 01557 nargs_space += STATIC_LIST_SIZE; 01558 if (args == static_list) { 01559 args = (void *) ckalloc(nargs_space * sizeof(char *)); 01560 for (i = 0; i < nargs; ++i) { 01561 args[i] = static_list[i]; 01562 } 01563 } else { 01564 args = (void *) ckrealloc((void *) args, 01565 nargs_space * sizeof(char *)); 01566 } 01567 } 01568 newLength += strlen(string); 01569 args[nargs++] = string; 01570 } 01571 if (newLength == 0) { 01572 goto done; 01573 } 01574 01575 stringPtr = GET_STRING(objPtr); 01576 if (oldLength + newLength > (int) stringPtr->allocated) { 01577 /* 01578 * There isn't currently enough space in the string representation, so 01579 * allocate additional space. If the current string representation 01580 * isn't empty (i.e. it looks like we're doing a series of appends) 01581 * then try to allocate extra space to accomodate future growth: first 01582 * try to double the required memory; if that fails, try a more modest 01583 * allocation. See the "TCL STRING GROWTH ALGORITHM" comment at the 01584 * top of this file for an explanation of this growth algorithm. 01585 * Otherwise, if the current string representation is empty, exactly 01586 * enough memory is allocated. 01587 */ 01588 01589 if (oldLength == 0) { 01590 Tcl_SetObjLength(objPtr, newLength); 01591 } else { 01592 attemptLength = 2 * (oldLength + newLength); 01593 if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) { 01594 attemptLength = oldLength + (2 * newLength) + 01595 TCL_GROWTH_MIN_ALLOC; 01596 Tcl_SetObjLength(objPtr, attemptLength); 01597 } 01598 } 01599 } 01600 01601 /* 01602 * Make a second pass through the arguments, appending all the strings to 01603 * the object. 01604 */ 01605 01606 dst = objPtr->bytes + oldLength; 01607 for (i = 0; i < nargs; ++i) { 01608 string = args[i]; 01609 if (string == NULL) { 01610 break; 01611 } 01612 while (*string != 0) { 01613 *dst = *string; 01614 dst++; 01615 string++; 01616 } 01617 } 01618 01619 /* 01620 * Add a null byte to terminate the string. However, be careful: it's 01621 * possible that the object is totally empty (if it was empty originally 01622 * and there was nothing to append). In this case dst is NULL; just leave 01623 * everything alone. 01624 */ 01625 01626 if (dst != NULL) { 01627 *dst = 0; 01628 } 01629 objPtr->length = oldLength + newLength; 01630 01631 done: 01632 /* 01633 * If we had to allocate a buffer from the heap, free it now. 01634 */ 01635 01636 if (args != static_list) { 01637 ckfree((void *) args); 01638 } 01639 #undef STATIC_LIST_SIZE 01640 } 01641 01642 /* 01643 *---------------------------------------------------------------------- 01644 * 01645 * Tcl_AppendStringsToObj -- 01646 * 01647 * This function appends one or more null-terminated strings to an 01648 * object. 01649 * 01650 * Results: 01651 * None. 01652 * 01653 * Side effects: 01654 * The contents of all the string arguments are appended to the string 01655 * representation of objPtr. 01656 * 01657 *---------------------------------------------------------------------- 01658 */ 01659 01660 void 01661 Tcl_AppendStringsToObj( 01662 Tcl_Obj *objPtr, 01663 ...) 01664 { 01665 va_list argList; 01666 01667 va_start(argList, objPtr); 01668 Tcl_AppendStringsToObjVA(objPtr, argList); 01669 va_end(argList); 01670 } 01671 01672 /* 01673 *---------------------------------------------------------------------- 01674 * 01675 * Tcl_AppendFormatToObj -- 01676 * 01677 * This function appends a list of Tcl_Obj's to a Tcl_Obj according to 01678 * the formatting instructions embedded in the format string. The 01679 * formatting instructions are inspired by sprintf(). Returns TCL_OK when 01680 * successful. If there's an error in the arguments, TCL_ERROR is 01681 * returned, and an error message is written to the interp, if non-NULL. 01682 * 01683 * Results: 01684 * A standard Tcl result. 01685 * 01686 * Side effects: 01687 * None. 01688 * 01689 *---------------------------------------------------------------------- 01690 */ 01691 01692 int 01693 Tcl_AppendFormatToObj( 01694 Tcl_Interp *interp, 01695 Tcl_Obj *appendObj, 01696 const char *format, 01697 int objc, 01698 Tcl_Obj *const objv[]) 01699 { 01700 const char *span = format, *msg; 01701 int numBytes = 0, objIndex = 0, gotXpg = 0, gotSequential = 0; 01702 int originalLength; 01703 static const char *mixedXPG = 01704 "cannot mix \"%\" and \"%n$\" conversion specifiers"; 01705 static const char *badIndex[2] = { 01706 "not enough arguments for all format specifiers", 01707 "\"%n$\" argument index out of range" 01708 }; 01709 01710 if (Tcl_IsShared(appendObj)) { 01711 Tcl_Panic("%s called with shared object", "Tcl_AppendFormatToObj"); 01712 } 01713 TclGetStringFromObj(appendObj, &originalLength); 01714 01715 /* 01716 * Format string is NUL-terminated. 01717 */ 01718 01719 while (*format != '\0') { 01720 char *end; 01721 int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag; 01722 int width, gotPrecision, precision, useShort, useWide, useBig; 01723 int newXpg, numChars, allocSegment = 0; 01724 Tcl_Obj *segment; 01725 Tcl_UniChar ch; 01726 int step = Tcl_UtfToUniChar(format, &ch); 01727 01728 format += step; 01729 if (ch != '%') { 01730 numBytes += step; 01731 continue; 01732 } 01733 if (numBytes) { 01734 Tcl_AppendToObj(appendObj, span, numBytes); 01735 numBytes = 0; 01736 } 01737 01738 /* 01739 * Saw a % : process the format specifier. 01740 * 01741 * Step 0. Handle special case of escaped format marker (i.e., %%). 01742 */ 01743 01744 step = Tcl_UtfToUniChar(format, &ch); 01745 if (ch == '%') { 01746 span = format; 01747 numBytes = step; 01748 format += step; 01749 continue; 01750 } 01751 01752 /* 01753 * Step 1. XPG3 position specifier 01754 */ 01755 01756 newXpg = 0; 01757 if (isdigit(UCHAR(ch))) { 01758 int position = strtoul(format, &end, 10); 01759 if (*end == '$') { 01760 newXpg = 1; 01761 objIndex = position - 1; 01762 format = end + 1; 01763 step = Tcl_UtfToUniChar(format, &ch); 01764 } 01765 } 01766 if (newXpg) { 01767 if (gotSequential) { 01768 msg = mixedXPG; 01769 goto errorMsg; 01770 } 01771 gotXpg = 1; 01772 } else { 01773 if (gotXpg) { 01774 msg = mixedXPG; 01775 goto errorMsg; 01776 } 01777 gotSequential = 1; 01778 } 01779 if ((objIndex < 0) || (objIndex >= objc)) { 01780 msg = badIndex[gotXpg]; 01781 goto errorMsg; 01782 } 01783 01784 /* 01785 * Step 2. Set of flags. 01786 */ 01787 01788 gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0; 01789 sawFlag = 1; 01790 do { 01791 switch (ch) { 01792 case '-': 01793 gotMinus = 1; 01794 break; 01795 case '#': 01796 gotHash = 1; 01797 break; 01798 case '0': 01799 gotZero = 1; 01800 break; 01801 case ' ': 01802 gotSpace = 1; 01803 break; 01804 case '+': 01805 gotPlus = 1; 01806 break; 01807 default: 01808 sawFlag = 0; 01809 } 01810 if (sawFlag) { 01811 format += step; 01812 step = Tcl_UtfToUniChar(format, &ch); 01813 } 01814 } while (sawFlag); 01815 01816 /* 01817 * Step 3. Minimum field width. 01818 */ 01819 01820 width = 0; 01821 if (isdigit(UCHAR(ch))) { 01822 width = strtoul(format, &end, 10); 01823 format = end; 01824 step = Tcl_UtfToUniChar(format, &ch); 01825 } else if (ch == '*') { 01826 if (objIndex >= objc - 1) { 01827 msg = badIndex[gotXpg]; 01828 goto errorMsg; 01829 } 01830 if (TclGetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) { 01831 goto error; 01832 } 01833 if (width < 0) { 01834 width = -width; 01835 gotMinus = 1; 01836 } 01837 objIndex++; 01838 format += step; 01839 step = Tcl_UtfToUniChar(format, &ch); 01840 } 01841 01842 /* 01843 * Step 4. Precision. 01844 */ 01845 01846 gotPrecision = precision = 0; 01847 if (ch == '.') { 01848 gotPrecision = 1; 01849 format += step; 01850 step = Tcl_UtfToUniChar(format, &ch); 01851 } 01852 if (isdigit(UCHAR(ch))) { 01853 precision = strtoul(format, &end, 10); 01854 format = end; 01855 step = Tcl_UtfToUniChar(format, &ch); 01856 } else if (ch == '*') { 01857 if (objIndex >= objc - 1) { 01858 msg = badIndex[gotXpg]; 01859 goto errorMsg; 01860 } 01861 if (TclGetIntFromObj(interp, objv[objIndex], &precision) 01862 != TCL_OK) { 01863 goto error; 01864 } 01865 01866 /* 01867 * TODO: Check this truncation logic. 01868 */ 01869 01870 if (precision < 0) { 01871 precision = 0; 01872 } 01873 objIndex++; 01874 format += step; 01875 step = Tcl_UtfToUniChar(format, &ch); 01876 } 01877 01878 /* 01879 * Step 5. Length modifier. 01880 */ 01881 01882 useShort = useWide = useBig = 0; 01883 if (ch == 'h') { 01884 useShort = 1; 01885 format += step; 01886 step = Tcl_UtfToUniChar(format, &ch); 01887 } else if (ch == 'l') { 01888 format += step; 01889 step = Tcl_UtfToUniChar(format, &ch); 01890 if (ch == 'l') { 01891 useBig = 1; 01892 format += step; 01893 step = Tcl_UtfToUniChar(format, &ch); 01894 } else { 01895 #ifndef TCL_WIDE_INT_IS_LONG 01896 useWide = 1; 01897 #endif 01898 } 01899 } 01900 01901 format += step; 01902 span = format; 01903 01904 /* 01905 * Step 6. The actual conversion character. 01906 */ 01907 01908 segment = objv[objIndex]; 01909 if (ch == 'i') { 01910 ch = 'd'; 01911 } 01912 switch (ch) { 01913 case '\0': 01914 msg = "format string ended in middle of field specifier"; 01915 goto errorMsg; 01916 case 's': { 01917 numChars = Tcl_GetCharLength(segment); 01918 if (gotPrecision && (precision < numChars)) { 01919 segment = Tcl_GetRange(segment, 0, precision - 1); 01920 Tcl_IncrRefCount(segment); 01921 allocSegment = 1; 01922 } 01923 break; 01924 } 01925 case 'c': { 01926 char buf[TCL_UTF_MAX]; 01927 int code, length; 01928 01929 if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { 01930 goto error; 01931 } 01932 length = Tcl_UniCharToUtf(code, buf); 01933 segment = Tcl_NewStringObj(buf, length); 01934 Tcl_IncrRefCount(segment); 01935 allocSegment = 1; 01936 break; 01937 } 01938 01939 case 'u': 01940 if (useBig) { 01941 msg = "unsigned bignum format is invalid"; 01942 goto errorMsg; 01943 } 01944 case 'd': 01945 case 'o': 01946 case 'x': 01947 case 'X': { 01948 short int s = 0; /* Silence compiler warning; only defined and 01949 * used when useShort is true. */ 01950 long l; 01951 Tcl_WideInt w; 01952 mp_int big; 01953 int isNegative = 0; 01954 01955 if (useBig) { 01956 if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) { 01957 goto error; 01958 } 01959 isNegative = (mp_cmp_d(&big, 0) == MP_LT); 01960 } else if (useWide) { 01961 if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { 01962 Tcl_Obj *objPtr; 01963 01964 if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { 01965 goto error; 01966 } 01967 mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big); 01968 objPtr = Tcl_NewBignumObj(&big); 01969 Tcl_IncrRefCount(objPtr); 01970 Tcl_GetWideIntFromObj(NULL, objPtr, &w); 01971 Tcl_DecrRefCount(objPtr); 01972 } 01973 isNegative = (w < (Tcl_WideInt)0); 01974 } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) { 01975 if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) { 01976 Tcl_Obj *objPtr; 01977 01978 if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) { 01979 goto error; 01980 } 01981 mp_mod_2d(&big, (int) CHAR_BIT * sizeof(long), &big); 01982 objPtr = Tcl_NewBignumObj(&big); 01983 Tcl_IncrRefCount(objPtr); 01984 TclGetLongFromObj(NULL, objPtr, &l); 01985 Tcl_DecrRefCount(objPtr); 01986 } else { 01987 l = Tcl_WideAsLong(w); 01988 } 01989 if (useShort) { 01990 s = (short int) l; 01991 isNegative = (s < (short int)0); 01992 } else { 01993 isNegative = (l < (long)0); 01994 } 01995 } else if (useShort) { 01996 s = (short int) l; 01997 isNegative = (s < (short int)0); 01998 } else { 01999 isNegative = (l < (long)0); 02000 } 02001 02002 segment = Tcl_NewObj(); 02003 allocSegment = 1; 02004 Tcl_IncrRefCount(segment); 02005 02006 if ((isNegative || gotPlus) && (useBig || (ch == 'd'))) { 02007 Tcl_AppendToObj(segment, (isNegative ? "-" : "+"), 1); 02008 } 02009 02010 if (gotHash) { 02011 switch (ch) { 02012 case 'o': 02013 Tcl_AppendToObj(segment, "0", 1); 02014 precision--; 02015 break; 02016 case 'x': 02017 case 'X': 02018 Tcl_AppendToObj(segment, "0x", 2); 02019 break; 02020 } 02021 } 02022 02023 switch (ch) { 02024 case 'd': { 02025 int length; 02026 Tcl_Obj *pure; 02027 const char *bytes; 02028 02029 if (useShort) { 02030 pure = Tcl_NewIntObj((int)(s)); 02031 } else if (useWide) { 02032 pure = Tcl_NewWideIntObj(w); 02033 } else if (useBig) { 02034 pure = Tcl_NewBignumObj(&big); 02035 } else { 02036 pure = Tcl_NewLongObj(l); 02037 } 02038 Tcl_IncrRefCount(pure); 02039 bytes = TclGetStringFromObj(pure, &length); 02040 02041 /* 02042 * Already did the sign above. 02043 */ 02044 02045 if (*bytes == '-') { 02046 length--; 02047 bytes++; 02048 } 02049 02050 /* 02051 * Canonical decimal string reps for integers are composed 02052 * entirely of one-byte encoded characters, so "length" is the 02053 * number of chars. 02054 */ 02055 02056 if (gotPrecision) { 02057 while (length < precision) { 02058 Tcl_AppendToObj(segment, "0", 1); 02059 length++; 02060 } 02061 gotZero = 0; 02062 } 02063 if (gotZero) { 02064 length += Tcl_GetCharLength(segment); 02065 while (length < width) { 02066 Tcl_AppendToObj(segment, "0", 1); 02067 length++; 02068 } 02069 } 02070 Tcl_AppendToObj(segment, bytes, -1); 02071 Tcl_DecrRefCount(pure); 02072 break; 02073 } 02074 02075 case 'u': 02076 case 'o': 02077 case 'x': 02078 case 'X': { 02079 Tcl_WideUInt bits = (Tcl_WideUInt)0; 02080 int length, numBits = 4, numDigits = 0, base = 16; 02081 int index = 0, shift = 0; 02082 Tcl_Obj *pure; 02083 char *bytes; 02084 02085 if (ch == 'u') { 02086 base = 10; 02087 } 02088 if (ch == 'o') { 02089 base = 8; 02090 numBits = 3; 02091 } 02092 if (useShort) { 02093 unsigned short int us = (unsigned short int) s; 02094 02095 bits = (Tcl_WideUInt) us; 02096 while (us) { 02097 numDigits++; 02098 us /= base; 02099 } 02100 } else if (useWide) { 02101 Tcl_WideUInt uw = (Tcl_WideUInt) w; 02102 02103 bits = uw; 02104 while (uw) { 02105 numDigits++; 02106 uw /= base; 02107 } 02108 } else if (useBig && big.used) { 02109 int leftover = (big.used * DIGIT_BIT) % numBits; 02110 mp_digit mask = (~(mp_digit)0) << (DIGIT_BIT-leftover); 02111 02112 numDigits = 1 + ((big.used * DIGIT_BIT) / numBits); 02113 while ((mask & big.dp[big.used-1]) == 0) { 02114 numDigits--; 02115 mask >>= numBits; 02116 } 02117 } else if (!useBig) { 02118 unsigned long int ul = (unsigned long int) l; 02119 02120 bits = (Tcl_WideUInt) ul; 02121 while (ul) { 02122 numDigits++; 02123 ul /= base; 02124 } 02125 } 02126 02127 /* 02128 * Need to be sure zero becomes "0", not "". 02129 */ 02130 02131 if ((numDigits == 0) && !((ch == 'o') && gotHash)) { 02132 numDigits = 1; 02133 } 02134 pure = Tcl_NewObj(); 02135 Tcl_SetObjLength(pure, numDigits); 02136 bytes = TclGetString(pure); 02137 length = numDigits; 02138 while (numDigits--) { 02139 int digitOffset; 02140 02141 if (useBig && big.used) { 02142 if ((size_t) shift < 02143 CHAR_BIT*sizeof(Tcl_WideUInt) - DIGIT_BIT) { 02144 bits |= (((Tcl_WideUInt)big.dp[index++]) <<shift); 02145 shift += DIGIT_BIT; 02146 } 02147 shift -= numBits; 02148 } 02149 digitOffset = (int) (bits % base); 02150 if (digitOffset > 9) { 02151 bytes[numDigits] = 'a' + digitOffset - 10; 02152 } else { 02153 bytes[numDigits] = '0' + digitOffset; 02154 } 02155 bits /= base; 02156 } 02157 if (gotPrecision) { 02158 while (length < precision) { 02159 Tcl_AppendToObj(segment, "0", 1); 02160 length++; 02161 } 02162 gotZero = 0; 02163 } 02164 if (gotZero) { 02165 length += Tcl_GetCharLength(segment); 02166 while (length < width) { 02167 Tcl_AppendToObj(segment, "0", 1); 02168 length++; 02169 } 02170 } 02171 Tcl_AppendObjToObj(segment, pure); 02172 Tcl_DecrRefCount(pure); 02173 break; 02174 } 02175 02176 } 02177 break; 02178 } 02179 02180 case 'e': 02181 case 'E': 02182 case 'f': 02183 case 'g': 02184 case 'G': { 02185 #define MAX_FLOAT_SIZE 320 02186 char spec[2*TCL_INTEGER_SPACE + 9], *p = spec; 02187 double d; 02188 int length = MAX_FLOAT_SIZE; 02189 char *bytes; 02190 02191 if (Tcl_GetDoubleFromObj(interp, segment, &d) != TCL_OK) { 02192 /* TODO: Figure out ACCEPT_NAN here */ 02193 goto error; 02194 } 02195 *p++ = '%'; 02196 if (gotMinus) { 02197 *p++ = '-'; 02198 } 02199 if (gotHash) { 02200 *p++ = '#'; 02201 } 02202 if (gotZero) { 02203 *p++ = '0'; 02204 } 02205 if (gotSpace) { 02206 *p++ = ' '; 02207 } 02208 if (gotPlus) { 02209 *p++ = '+'; 02210 } 02211 if (width) { 02212 p += sprintf(p, "%d", width); 02213 } 02214 if (gotPrecision) { 02215 *p++ = '.'; 02216 p += sprintf(p, "%d", precision); 02217 length += precision; 02218 } 02219 02220 /* 02221 * Don't pass length modifiers! 02222 */ 02223 02224 *p++ = (char) ch; 02225 *p = '\0'; 02226 02227 segment = Tcl_NewObj(); 02228 allocSegment = 1; 02229 Tcl_SetObjLength(segment, length); 02230 bytes = TclGetString(segment); 02231 Tcl_SetObjLength(segment, sprintf(bytes, spec, d)); 02232 break; 02233 } 02234 default: 02235 if (interp != NULL) { 02236 char buf[40]; 02237 02238 sprintf(buf, "bad field specifier \"%c\"", ch); 02239 Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, -1)); 02240 } 02241 goto error; 02242 } 02243 02244 switch (ch) { 02245 case 'E': 02246 case 'G': 02247 case 'X': { 02248 Tcl_SetObjLength(segment, Tcl_UtfToUpper(TclGetString(segment))); 02249 } 02250 } 02251 02252 numChars = Tcl_GetCharLength(segment); 02253 if (!gotMinus) { 02254 while (numChars < width) { 02255 Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); 02256 numChars++; 02257 } 02258 } 02259 Tcl_AppendObjToObj(appendObj, segment); 02260 if (allocSegment) { 02261 Tcl_DecrRefCount(segment); 02262 } 02263 while (numChars < width) { 02264 Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); 02265 numChars++; 02266 } 02267 02268 objIndex += gotSequential; 02269 } 02270 if (numBytes) { 02271 Tcl_AppendToObj(appendObj, span, numBytes); 02272 numBytes = 0; 02273 } 02274 02275 return TCL_OK; 02276 02277 errorMsg: 02278 if (interp != NULL) { 02279 Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); 02280 } 02281 error: 02282 Tcl_SetObjLength(appendObj, originalLength); 02283 return TCL_ERROR; 02284 } 02285 02286 /* 02287 *--------------------------------------------------------------------------- 02288 * 02289 * Tcl_Format-- 02290 * 02291 * Results: 02292 * A refcount zero Tcl_Obj. 02293 * 02294 * Side effects: 02295 * None. 02296 * 02297 *--------------------------------------------------------------------------- 02298 */ 02299 02300 Tcl_Obj * 02301 Tcl_Format( 02302 Tcl_Interp *interp, 02303 const char *format, 02304 int objc, 02305 Tcl_Obj *const objv[]) 02306 { 02307 int result; 02308 Tcl_Obj *objPtr = Tcl_NewObj(); 02309 result = Tcl_AppendFormatToObj(interp, objPtr, format, objc, objv); 02310 if (result != TCL_OK) { 02311 Tcl_DecrRefCount(objPtr); 02312 return NULL; 02313 } 02314 return objPtr; 02315 } 02316 02317 /* 02318 *--------------------------------------------------------------------------- 02319 * 02320 * AppendPrintfToObjVA -- 02321 * 02322 * Results: 02323 * 02324 * Side effects: 02325 * 02326 *--------------------------------------------------------------------------- 02327 */ 02328 02329 static void 02330 AppendPrintfToObjVA( 02331 Tcl_Obj *objPtr, 02332 const char *format, 02333 va_list argList) 02334 { 02335 int code, objc; 02336 Tcl_Obj **objv, *list = Tcl_NewObj(); 02337 const char *p; 02338 char *end; 02339 02340 p = format; 02341 Tcl_IncrRefCount(list); 02342 while (*p != '\0') { 02343 int size = 0, seekingConversion = 1, gotPrecision = 0; 02344 int lastNum = -1; 02345 02346 if (*p++ != '%') { 02347 continue; 02348 } 02349 if (*p == '%') { 02350 p++; 02351 continue; 02352 } 02353 do { 02354 switch (*p) { 02355 02356 case '\0': 02357 seekingConversion = 0; 02358 break; 02359 case 's': { 02360 const char *q, *end, *bytes = va_arg(argList, char *); 02361 seekingConversion = 0; 02362 02363 /* 02364 * The buffer to copy characters from starts at bytes and ends 02365 * at either the first NUL byte, or after lastNum bytes, when 02366 * caller has indicated a limit. 02367 */ 02368 02369 end = bytes; 02370 while ((!gotPrecision || lastNum--) && (*end != '\0')) { 02371 end++; 02372 } 02373 02374 /* 02375 * Within that buffer, we trim both ends if needed so that we 02376 * copy only whole characters, and avoid copying any partial 02377 * multi-byte characters. 02378 */ 02379 02380 q = Tcl_UtfPrev(end, bytes); 02381 if (!Tcl_UtfCharComplete(q, (int)(end - q))) { 02382 end = q; 02383 } 02384 02385 q = bytes + TCL_UTF_MAX; 02386 while ((bytes < end) && (bytes < q) 02387 && ((*bytes & 0xC0) == 0x80)) { 02388 bytes++; 02389 } 02390 02391 Tcl_ListObjAppendElement(NULL, list, 02392 Tcl_NewStringObj(bytes , (int)(end - bytes))); 02393 02394 break; 02395 } 02396 case 'c': 02397 case 'i': 02398 case 'u': 02399 case 'd': 02400 case 'o': 02401 case 'x': 02402 case 'X': 02403 seekingConversion = 0; 02404 switch (size) { 02405 case -1: 02406 case 0: 02407 Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( 02408 (long int)va_arg(argList, int))); 02409 break; 02410 case 1: 02411 Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj( 02412 va_arg(argList, long int))); 02413 break; 02414 } 02415 break; 02416 case 'e': 02417 case 'E': 02418 case 'f': 02419 case 'g': 02420 case 'G': 02421 Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj( 02422 va_arg(argList, double))); 02423 seekingConversion = 0; 02424 break; 02425 case '*': 02426 lastNum = (int)va_arg(argList, int); 02427 Tcl_ListObjAppendElement(NULL, list, Tcl_NewIntObj(lastNum)); 02428 p++; 02429 break; 02430 case '0': case '1': case '2': case '3': case '4': 02431 case '5': case '6': case '7': case '8': case '9': 02432 lastNum = (int) strtoul(p, &end, 10); 02433 p = end; 02434 break; 02435 case '.': 02436 gotPrecision = 1; 02437 p++; 02438 break; 02439 /* TODO: support for wide (and bignum?) arguments */ 02440 case 'l': 02441 size = 1; 02442 p++; 02443 break; 02444 case 'h': 02445 size = -1; 02446 default: 02447 p++; 02448 } 02449 } while (seekingConversion); 02450 } 02451 TclListObjGetElements(NULL, list, &objc, &objv); 02452 code = Tcl_AppendFormatToObj(NULL, objPtr, format, objc, objv); 02453 if (code != TCL_OK) { 02454 Tcl_AppendPrintfToObj(objPtr, 02455 "Unable to format \"%s\" with supplied arguments: %s", 02456 format, Tcl_GetString(list)); 02457 } 02458 Tcl_DecrRefCount(list); 02459 } 02460 02461 /* 02462 *--------------------------------------------------------------------------- 02463 * 02464 * Tcl_AppendPrintfToObj -- 02465 * 02466 * Results: 02467 * A standard Tcl result. 02468 * 02469 * Side effects: 02470 * None. 02471 * 02472 *--------------------------------------------------------------------------- 02473 */ 02474 02475 void 02476 Tcl_AppendPrintfToObj( 02477 Tcl_Obj *objPtr, 02478 const char *format, 02479 ...) 02480 { 02481 va_list argList; 02482 02483 va_start(argList, format); 02484 AppendPrintfToObjVA(objPtr, format, argList); 02485 va_end(argList); 02486 } 02487 02488 /* 02489 *--------------------------------------------------------------------------- 02490 * 02491 * Tcl_ObjPrintf -- 02492 * 02493 * Results: 02494 * A refcount zero Tcl_Obj. 02495 * 02496 * Side effects: 02497 * None. 02498 * 02499 *--------------------------------------------------------------------------- 02500 */ 02501 02502 Tcl_Obj * 02503 Tcl_ObjPrintf( 02504 const char *format, 02505 ...) 02506 { 02507 va_list argList; 02508 Tcl_Obj *objPtr = Tcl_NewObj(); 02509 02510 va_start(argList, format); 02511 AppendPrintfToObjVA(objPtr, format, argList); 02512 va_end(argList); 02513 return objPtr; 02514 } 02515 02516 /* 02517 *--------------------------------------------------------------------------- 02518 * 02519 * TclStringObjReverse -- 02520 * 02521 * Implements the [string reverse] operation. 02522 * 02523 * Results: 02524 * An unshared Tcl value which is the [string reverse] of the argument 02525 * supplied. When sharing rules permit, the returned value might be 02526 * the argument with modifications done in place. 02527 * 02528 * Side effects: 02529 * May allocate a new Tcl_Obj. 02530 * 02531 *--------------------------------------------------------------------------- 02532 */ 02533 02534 Tcl_Obj * 02535 TclStringObjReverse( 02536 Tcl_Obj *objPtr) 02537 { 02538 String *stringPtr; 02539 int numChars = Tcl_GetCharLength(objPtr); 02540 int i = 0, lastCharIdx = numChars - 1; 02541 char *bytes; 02542 02543 if (numChars <= 1) { 02544 return objPtr; 02545 } 02546 02547 stringPtr = GET_STRING(objPtr); 02548 if (stringPtr->hasUnicode) { 02549 Tcl_UniChar *source = stringPtr->unicode; 02550 02551 if (Tcl_IsShared(objPtr)) { 02552 Tcl_UniChar *dest, ch = 0; 02553 02554 /* 02555 * Create a non-empty, pure unicode value, so we can coax 02556 * Tcl_SetObjLength into growing the unicode rep buffer. 02557 */ 02558 02559 Tcl_Obj *resultPtr = Tcl_NewUnicodeObj(&ch, 1); 02560 Tcl_SetObjLength(resultPtr, numChars); 02561 dest = Tcl_GetUnicode(resultPtr); 02562 02563 while (i < numChars) { 02564 dest[i++] = source[lastCharIdx--]; 02565 } 02566 return resultPtr; 02567 } 02568 02569 while (i < lastCharIdx) { 02570 Tcl_UniChar tmp = source[lastCharIdx]; 02571 source[lastCharIdx--] = source[i]; 02572 source[i++] = tmp; 02573 } 02574 Tcl_InvalidateStringRep(objPtr); 02575 return objPtr; 02576 } 02577 02578 bytes = TclGetString(objPtr); 02579 if (Tcl_IsShared(objPtr)) { 02580 char *dest; 02581 Tcl_Obj *resultPtr = Tcl_NewObj(); 02582 Tcl_SetObjLength(resultPtr, numChars); 02583 dest = TclGetString(resultPtr); 02584 while (i < numChars) { 02585 dest[i++] = bytes[lastCharIdx--]; 02586 } 02587 return resultPtr; 02588 } 02589 02590 while (i < lastCharIdx) { 02591 char tmp = bytes[lastCharIdx]; 02592 bytes[lastCharIdx--] = bytes[i]; 02593 bytes[i++] = tmp; 02594 } 02595 return objPtr; 02596 } 02597 02598 /* 02599 *--------------------------------------------------------------------------- 02600 * 02601 * FillUnicodeRep -- 02602 * 02603 * Populate the Unicode internal rep with the Unicode form of its string 02604 * rep. The object must alread have a "String" internal rep. 02605 * 02606 * Results: 02607 * None. 02608 * 02609 * Side effects: 02610 * Reallocates the String internal rep. 02611 * 02612 *--------------------------------------------------------------------------- 02613 */ 02614 02615 static void 02616 FillUnicodeRep( 02617 Tcl_Obj *objPtr) /* The object in which to fill the unicode 02618 * rep. */ 02619 { 02620 String *stringPtr; 02621 size_t uallocated; 02622 char *srcEnd, *src = objPtr->bytes; 02623 Tcl_UniChar *dst; 02624 02625 stringPtr = GET_STRING(objPtr); 02626 if (stringPtr->numChars == -1) { 02627 stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length); 02628 } 02629 stringPtr->hasUnicode = (stringPtr->numChars > 0); 02630 02631 uallocated = STRING_UALLOC(stringPtr->numChars); 02632 if (uallocated > stringPtr->uallocated) { 02633 /* 02634 * If not enough space has been allocated for the unicode rep, 02635 * reallocate the internal rep object. 02636 * 02637 * There isn't currently enough space in the Unicode representation so 02638 * allocate additional space. If the current Unicode representation 02639 * isn't empty (i.e. it looks like we've done some appends) then 02640 * overallocate the space so that we won't have to do as much 02641 * reallocation in the future. 02642 */ 02643 02644 if (stringPtr->uallocated > 0) { 02645 uallocated *= 2; 02646 } 02647 stringPtr = (String *) ckrealloc((char*) stringPtr, 02648 STRING_SIZE(uallocated)); 02649 stringPtr->uallocated = uallocated; 02650 } 02651 02652 /* 02653 * Convert src to Unicode and store the coverted data in "unicode". 02654 */ 02655 02656 srcEnd = src + objPtr->length; 02657 for (dst = stringPtr->unicode; src < srcEnd; dst++) { 02658 src += TclUtfToUniChar(src, dst); 02659 } 02660 *dst = 0; 02661 02662 SET_STRING(objPtr, stringPtr); 02663 } 02664 02665 /* 02666 *---------------------------------------------------------------------- 02667 * 02668 * DupStringInternalRep -- 02669 * 02670 * Initialize the internal representation of a new Tcl_Obj to a copy of 02671 * the internal representation of an existing string object. 02672 * 02673 * Results: 02674 * None. 02675 * 02676 * Side effects: 02677 * copyPtr's internal rep is set to a copy of srcPtr's internal 02678 * representation. 02679 * 02680 *---------------------------------------------------------------------- 02681 */ 02682 02683 static void 02684 DupStringInternalRep( 02685 register Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have 02686 * an internal rep of type "String". */ 02687 register Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not 02688 * currently have an internal rep.*/ 02689 { 02690 String *srcStringPtr = GET_STRING(srcPtr); 02691 String *copyStringPtr = NULL; 02692 02693 /* 02694 * If the src obj is a string of 1-byte Utf chars, then copy the string 02695 * rep of the source object and create an "empty" Unicode internal rep for 02696 * the new object. Otherwise, copy Unicode internal rep, and invalidate 02697 * the string rep of the new object. 02698 */ 02699 02700 if (srcStringPtr->hasUnicode == 0) { 02701 copyStringPtr = (String *) ckalloc(STRING_SIZE(STRING_UALLOC(0))); 02702 copyStringPtr->uallocated = STRING_UALLOC(0); 02703 } else { 02704 copyStringPtr = (String *) ckalloc( 02705 STRING_SIZE(srcStringPtr->uallocated)); 02706 copyStringPtr->uallocated = srcStringPtr->uallocated; 02707 02708 memcpy(copyStringPtr->unicode, srcStringPtr->unicode, 02709 (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); 02710 copyStringPtr->unicode[srcStringPtr->numChars] = 0; 02711 } 02712 copyStringPtr->numChars = srcStringPtr->numChars; 02713 copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; 02714 copyStringPtr->allocated = srcStringPtr->allocated; 02715 02716 /* 02717 * Tricky point: the string value was copied by generic object management 02718 * code, so it doesn't contain any extra bytes that might exist in the 02719 * source object. 02720 */ 02721 02722 copyStringPtr->allocated = copyPtr->length; 02723 02724 SET_STRING(copyPtr, copyStringPtr); 02725 copyPtr->typePtr = &tclStringType; 02726 } 02727 02728 /* 02729 *---------------------------------------------------------------------- 02730 * 02731 * SetStringFromAny -- 02732 * 02733 * Create an internal representation of type "String" for an object. 02734 * 02735 * Results: 02736 * This operation always succeeds and returns TCL_OK. 02737 * 02738 * Side effects: 02739 * Any old internal reputation for objPtr is freed and the internal 02740 * representation is set to "String". 02741 * 02742 *---------------------------------------------------------------------- 02743 */ 02744 02745 static int 02746 SetStringFromAny( 02747 Tcl_Interp *interp, /* Used for error reporting if not NULL. */ 02748 register Tcl_Obj *objPtr) /* The object to convert. */ 02749 { 02750 /* 02751 * The Unicode object is optimized for the case where each UTF char in a 02752 * string is only one byte. In this case, we store the value of numChars, 02753 * but we don't copy the bytes to the unicodeObj->unicode. 02754 */ 02755 02756 if (objPtr->typePtr != &tclStringType) { 02757 String *stringPtr; 02758 02759 if (objPtr->typePtr != NULL) { 02760 if (objPtr->bytes == NULL) { 02761 objPtr->typePtr->updateStringProc(objPtr); 02762 } 02763 TclFreeIntRep(objPtr); 02764 } 02765 objPtr->typePtr = &tclStringType; 02766 02767 /* 02768 * Allocate enough space for the basic String structure. 02769 */ 02770 02771 stringPtr = (String *) ckalloc(STRING_SIZE(STRING_UALLOC(0))); 02772 stringPtr->numChars = -1; 02773 stringPtr->uallocated = STRING_UALLOC(0); 02774 stringPtr->hasUnicode = 0; 02775 02776 if (objPtr->bytes != NULL) { 02777 stringPtr->allocated = objPtr->length; 02778 objPtr->bytes[objPtr->length] = 0; 02779 } else { 02780 objPtr->length = 0; 02781 } 02782 SET_STRING(objPtr, stringPtr); 02783 } 02784 return TCL_OK; 02785 } 02786 02787 /* 02788 *---------------------------------------------------------------------- 02789 * 02790 * UpdateStringOfString -- 02791 * 02792 * Update the string representation for an object whose internal 02793 * representation is "String". 02794 * 02795 * Results: 02796 * None. 02797 * 02798 * Side effects: 02799 * The object's string may be set by converting its Unicode represention 02800 * to UTF format. 02801 * 02802 *---------------------------------------------------------------------- 02803 */ 02804 02805 static void 02806 UpdateStringOfString( 02807 Tcl_Obj *objPtr) /* Object with string rep to update. */ 02808 { 02809 int i, size; 02810 Tcl_UniChar *unicode; 02811 char dummy[TCL_UTF_MAX]; 02812 char *dst; 02813 String *stringPtr; 02814 02815 stringPtr = GET_STRING(objPtr); 02816 if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) { 02817 if (stringPtr->numChars <= 0) { 02818 /* 02819 * If there is no Unicode rep, or the string has 0 chars, then set 02820 * the string rep to an empty string. 02821 */ 02822 02823 objPtr->bytes = tclEmptyStringRep; 02824 objPtr->length = 0; 02825 return; 02826 } 02827 02828 unicode = stringPtr->unicode; 02829 02830 /* 02831 * Translate the Unicode string to UTF. "size" will hold the amount of 02832 * space the UTF string needs. 02833 */ 02834 02835 size = 0; 02836 for (i = 0; i < stringPtr->numChars; i++) { 02837 size += Tcl_UniCharToUtf((int) unicode[i], dummy); 02838 } 02839 02840 dst = (char *) ckalloc((unsigned) (size + 1)); 02841 objPtr->bytes = dst; 02842 objPtr->length = size; 02843 stringPtr->allocated = size; 02844 02845 for (i = 0; i < stringPtr->numChars; i++) { 02846 dst += Tcl_UniCharToUtf(unicode[i], dst); 02847 } 02848 *dst = '\0'; 02849 } 02850 return; 02851 } 02852 02853 /* 02854 *---------------------------------------------------------------------- 02855 * 02856 * FreeStringInternalRep -- 02857 * 02858 * Deallocate the storage associated with a String data object's internal 02859 * representation. 02860 * 02861 * Results: 02862 * None. 02863 * 02864 * Side effects: 02865 * Frees memory. 02866 * 02867 *---------------------------------------------------------------------- 02868 */ 02869 02870 static void 02871 FreeStringInternalRep( 02872 Tcl_Obj *objPtr) /* Object with internal rep to free. */ 02873 { 02874 ckfree((char *) GET_STRING(objPtr)); 02875 } 02876 02877 /* 02878 * Local Variables: 02879 * mode: c 02880 * c-basic-offset: 4 02881 * fill-column: 78 02882 * End: 02883 */
Generated on Wed Mar 12 12:18:21 2008 by 1.5.1 |