tclEncoding.cGo to the documentation of this file.00001 /* 00002 * tclEncoding.c -- 00003 * 00004 * Contains the implementation of the encoding conversion package. 00005 * 00006 * Copyright (c) 1996-1998 Sun Microsystems, Inc. 00007 * 00008 * See the file "license.terms" for information on usage and redistribution of 00009 * this file, and for a DISCLAIMER OF ALL WARRANTIES. 00010 * 00011 * RCS: @(#) $Id: tclEncoding.c,v 1.58 2007/12/13 15:23:16 dgp Exp $ 00012 */ 00013 00014 #include "tclInt.h" 00015 00016 typedef size_t (LengthProc)(CONST char *src); 00017 00018 /* 00019 * The following data structure represents an encoding, which describes how to 00020 * convert between various character sets and UTF-8. 00021 */ 00022 00023 typedef struct Encoding { 00024 char *name; /* Name of encoding. Malloced because (1) hash 00025 * table entry that owns this encoding may be 00026 * freed prior to this encoding being freed, 00027 * (2) string passed in the Tcl_EncodingType 00028 * structure may not be persistent. */ 00029 Tcl_EncodingConvertProc *toUtfProc; 00030 /* Function to convert from external encoding 00031 * into UTF-8. */ 00032 Tcl_EncodingConvertProc *fromUtfProc; 00033 /* Function to convert from UTF-8 into 00034 * external encoding. */ 00035 Tcl_EncodingFreeProc *freeProc; 00036 /* If non-NULL, function to call when this 00037 * encoding is deleted. */ 00038 int nullSize; /* Number of 0x00 bytes that signify 00039 * end-of-string in this encoding. This number 00040 * is used to determine the source string 00041 * length when the srcLen argument is 00042 * negative. This number can be 1 or 2. */ 00043 ClientData clientData; /* Arbitrary value associated with encoding 00044 * type. Passed to conversion functions. */ 00045 LengthProc *lengthProc; /* Function to compute length of 00046 * null-terminated strings in this encoding. 00047 * If nullSize is 1, this is strlen; if 00048 * nullSize is 2, this is a function that 00049 * returns the number of bytes in a 0x0000 00050 * terminated string. */ 00051 int refCount; /* Number of uses of this structure. */ 00052 Tcl_HashEntry *hPtr; /* Hash table entry that owns this encoding. */ 00053 } Encoding; 00054 00055 /* 00056 * The following structure is the clientData for a dynamically-loaded, 00057 * table-driven encoding created by LoadTableEncoding(). It maps between 00058 * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only) 00059 * encoding. 00060 */ 00061 00062 typedef struct TableEncodingData { 00063 int fallback; /* Character (in this encoding) to substitute 00064 * when this encoding cannot represent a UTF-8 00065 * character. */ 00066 char prefixBytes[256]; /* If a byte in the input stream is a lead 00067 * byte for a 2-byte sequence, the 00068 * corresponding entry in this array is 1, 00069 * otherwise it is 0. */ 00070 unsigned short **toUnicode; /* Two dimensional sparse matrix to map 00071 * characters from the encoding to Unicode. 00072 * Each element of the toUnicode array points 00073 * to an array of 256 shorts. If there is no 00074 * corresponding character in Unicode, the 00075 * value in the matrix is 0x0000. 00076 * malloc'd. */ 00077 unsigned short **fromUnicode; 00078 /* Two dimensional sparse matrix to map 00079 * characters from Unicode to the encoding. 00080 * Each element of the fromUnicode array 00081 * points to an array of 256 shorts. If there 00082 * is no corresponding character the encoding, 00083 * the value in the matrix is 0x0000. 00084 * malloc'd. */ 00085 } TableEncodingData; 00086 00087 /* 00088 * The following structures is the clientData for a dynamically-loaded, 00089 * escape-driven encoding that is itself comprised of other simpler encodings. 00090 * An example is "iso-2022-jp", which uses escape sequences to switch between 00091 * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven" 00092 * does not necessarily mean that the ESCAPE character is the character used 00093 * for switching character sets. 00094 */ 00095 00096 typedef struct EscapeSubTable { 00097 unsigned int sequenceLen; /* Length of following string. */ 00098 char sequence[16]; /* Escape code that marks this encoding. */ 00099 char name[32]; /* Name for encoding. */ 00100 Encoding *encodingPtr; /* Encoding loaded using above name, or NULL 00101 * if this sub-encoding has not been needed 00102 * yet. */ 00103 } EscapeSubTable; 00104 00105 typedef struct EscapeEncodingData { 00106 int fallback; /* Character (in this encoding) to substitute 00107 * when this encoding cannot represent a UTF-8 00108 * character. */ 00109 unsigned int initLen; /* Length of following string. */ 00110 char init[16]; /* String to emit or expect before first char 00111 * in conversion. */ 00112 unsigned int finalLen; /* Length of following string. */ 00113 char final[16]; /* String to emit or expect after last char in 00114 * conversion. */ 00115 char prefixBytes[256]; /* If a byte in the input stream is the first 00116 * character of one of the escape sequences in 00117 * the following array, the corresponding 00118 * entry in this array is 1, otherwise it is 00119 * 0. */ 00120 int numSubTables; /* Length of following array. */ 00121 EscapeSubTable subTables[1];/* Information about each EscapeSubTable used 00122 * by this encoding type. The actual size will 00123 * be as large as necessary to hold all 00124 * EscapeSubTables. */ 00125 } EscapeEncodingData; 00126 00127 /* 00128 * Constants used when loading an encoding file to identify the type of the 00129 * file. 00130 */ 00131 00132 #define ENCODING_SINGLEBYTE 0 00133 #define ENCODING_DOUBLEBYTE 1 00134 #define ENCODING_MULTIBYTE 2 00135 #define ENCODING_ESCAPE 3 00136 00137 /* 00138 * A list of directories in which Tcl should look for *.enc files. This list 00139 * is shared by all threads. Access is governed by a mutex lock. 00140 */ 00141 00142 static TclInitProcessGlobalValueProc InitializeEncodingSearchPath; 00143 static ProcessGlobalValue encodingSearchPath = { 00144 0, 0, NULL, NULL, InitializeEncodingSearchPath, NULL, NULL 00145 }; 00146 00147 /* 00148 * A map from encoding names to the directories in which their data files have 00149 * been seen. The string value of the map is shared by all threads. Access to 00150 * the shared string is governed by a mutex lock. 00151 */ 00152 00153 static ProcessGlobalValue encodingFileMap = { 00154 0, 0, NULL, NULL, NULL, NULL, NULL 00155 }; 00156 00157 /* 00158 * A list of directories making up the "library path". Historically this 00159 * search path has served many uses, but the only one remaining is a base for 00160 * the encodingSearchPath above. If the application does not explicitly set 00161 * the encodingSearchPath, then it will be initialized by appending /encoding 00162 * to each directory in this "libraryPath". 00163 */ 00164 00165 static ProcessGlobalValue libraryPath = { 00166 0, 0, NULL, NULL, TclpInitLibraryPath, NULL, NULL 00167 }; 00168 00169 static int encodingsInitialized = 0; 00170 00171 /* 00172 * Hash table that keeps track of all loaded Encodings. Keys are the string 00173 * names that represent the encoding, values are (Encoding *). 00174 */ 00175 00176 static Tcl_HashTable encodingTable; 00177 TCL_DECLARE_MUTEX(encodingMutex) 00178 00179 /* 00180 * The following are used to hold the default and current system encodings. 00181 * If NULL is passed to one of the conversion routines, the current setting of 00182 * the system encoding will be used to perform the conversion. 00183 */ 00184 00185 static Tcl_Encoding defaultEncoding; 00186 static Tcl_Encoding systemEncoding; 00187 00188 /* 00189 * The following variable is used in the sparse matrix code for a 00190 * TableEncoding to represent a page in the table that has no entries. 00191 */ 00192 00193 static unsigned short emptyPage[256]; 00194 00195 /* 00196 * Functions used only in this module. 00197 */ 00198 00199 static int BinaryProc(ClientData clientData, 00200 CONST char *src, int srcLen, int flags, 00201 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00202 int *srcReadPtr, int *dstWrotePtr, 00203 int *dstCharsPtr); 00204 static void DupEncodingIntRep(Tcl_Obj *srcPtr, Tcl_Obj *dupPtr); 00205 static void EscapeFreeProc(ClientData clientData); 00206 static int EscapeFromUtfProc(ClientData clientData, 00207 CONST char *src, int srcLen, int flags, 00208 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00209 int *srcReadPtr, int *dstWrotePtr, 00210 int *dstCharsPtr); 00211 static int EscapeToUtfProc(ClientData clientData, 00212 CONST char *src, int srcLen, int flags, 00213 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00214 int *srcReadPtr, int *dstWrotePtr, 00215 int *dstCharsPtr); 00216 static void FillEncodingFileMap(void); 00217 static void FreeEncoding(Tcl_Encoding encoding); 00218 static void FreeEncodingIntRep(Tcl_Obj *objPtr); 00219 static Encoding * GetTableEncoding(EscapeEncodingData *dataPtr, 00220 int state); 00221 static Tcl_Encoding LoadEncodingFile(Tcl_Interp *interp, CONST char *name); 00222 static Tcl_Encoding LoadTableEncoding(CONST char *name, int type, 00223 Tcl_Channel chan); 00224 static Tcl_Encoding LoadEscapeEncoding(CONST char *name, Tcl_Channel chan); 00225 static Tcl_Channel OpenEncodingFileChannel(Tcl_Interp *interp, 00226 CONST char *name); 00227 static void TableFreeProc(ClientData clientData); 00228 static int TableFromUtfProc(ClientData clientData, 00229 CONST char *src, int srcLen, int flags, 00230 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00231 int *srcReadPtr, int *dstWrotePtr, 00232 int *dstCharsPtr); 00233 static int TableToUtfProc(ClientData clientData, CONST char *src, 00234 int srcLen, int flags, Tcl_EncodingState *statePtr, 00235 char *dst, int dstLen, int *srcReadPtr, 00236 int *dstWrotePtr, int *dstCharsPtr); 00237 static size_t unilen(CONST char *src); 00238 static int UnicodeToUtfProc(ClientData clientData, 00239 CONST char *src, int srcLen, int flags, 00240 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00241 int *srcReadPtr, int *dstWrotePtr, 00242 int *dstCharsPtr); 00243 static int UtfToUnicodeProc(ClientData clientData, 00244 CONST char *src, int srcLen, int flags, 00245 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00246 int *srcReadPtr, int *dstWrotePtr, 00247 int *dstCharsPtr); 00248 static int UtfToUtfProc(ClientData clientData, 00249 CONST char *src, int srcLen, int flags, 00250 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00251 int *srcReadPtr, int *dstWrotePtr, 00252 int *dstCharsPtr, int pureNullMode); 00253 static int UtfIntToUtfExtProc(ClientData clientData, 00254 CONST char *src, int srcLen, int flags, 00255 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00256 int *srcReadPtr, int *dstWrotePtr, 00257 int *dstCharsPtr); 00258 static int UtfExtToUtfIntProc(ClientData clientData, 00259 CONST char *src, int srcLen, int flags, 00260 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00261 int *srcReadPtr, int *dstWrotePtr, 00262 int *dstCharsPtr); 00263 static int Iso88591FromUtfProc(ClientData clientData, 00264 CONST char *src, int srcLen, int flags, 00265 Tcl_EncodingState *statePtr, char *dst, int dstLen, 00266 int *srcReadPtr, int *dstWrotePtr, 00267 int *dstCharsPtr); 00268 static int Iso88591ToUtfProc(ClientData clientData, 00269 CONST char *src, int srcLen, int flags, 00270 Tcl_EncodingState *statePtr, char *dst, 00271 int dstLen, int *srcReadPtr, int *dstWrotePtr, 00272 int *dstCharsPtr); 00273 00274 /* 00275 * A Tcl_ObjType for holding a cached Tcl_Encoding in the otherValuePtr field 00276 * of the intrep. This should help the lifetime of encodings be more useful. 00277 * See concerns raised in [Bug 1077262]. 00278 */ 00279 00280 static Tcl_ObjType encodingType = { 00281 "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL 00282 }; 00283 00284 /* 00285 *---------------------------------------------------------------------- 00286 * 00287 * Tcl_GetEncodingFromObj -- 00288 * 00289 * Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr), if 00290 * possible, and returns TCL_OK. If no such encoding exists, TCL_ERROR is 00291 * returned, and if interp is non-NULL, an error message is written 00292 * there. 00293 * 00294 * Results: 00295 * Standard Tcl return code. 00296 * 00297 * Side effects: 00298 * Caches the Tcl_Encoding value as the internal rep of (*objPtr). 00299 * 00300 *---------------------------------------------------------------------- 00301 */ 00302 00303 int 00304 Tcl_GetEncodingFromObj( 00305 Tcl_Interp *interp, 00306 Tcl_Obj *objPtr, 00307 Tcl_Encoding *encodingPtr) 00308 { 00309 CONST char *name = Tcl_GetString(objPtr); 00310 if (objPtr->typePtr != &encodingType) { 00311 Tcl_Encoding encoding = Tcl_GetEncoding(interp, name); 00312 00313 if (encoding == NULL) { 00314 return TCL_ERROR; 00315 } 00316 TclFreeIntRep(objPtr); 00317 objPtr->internalRep.otherValuePtr = (VOID *) encoding; 00318 objPtr->typePtr = &encodingType; 00319 } 00320 *encodingPtr = Tcl_GetEncoding(NULL, name); 00321 return TCL_OK; 00322 } 00323 00324 /* 00325 *---------------------------------------------------------------------- 00326 * 00327 * FreeEncodingIntRep -- 00328 * 00329 * The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType. 00330 * 00331 *---------------------------------------------------------------------- 00332 */ 00333 00334 static void 00335 FreeEncodingIntRep( 00336 Tcl_Obj *objPtr) 00337 { 00338 Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr); 00339 } 00340 00341 /* 00342 *---------------------------------------------------------------------- 00343 * 00344 * DupEncodingIntRep -- 00345 * 00346 * The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType. 00347 * 00348 *---------------------------------------------------------------------- 00349 */ 00350 00351 static void 00352 DupEncodingIntRep( 00353 Tcl_Obj *srcPtr, 00354 Tcl_Obj *dupPtr) 00355 { 00356 dupPtr->internalRep.otherValuePtr = (VOID *) 00357 Tcl_GetEncoding(NULL, srcPtr->bytes); 00358 } 00359 00360 /* 00361 *---------------------------------------------------------------------- 00362 * 00363 * Tcl_GetEncodingSearchPath -- 00364 * 00365 * Keeps the per-thread copy of the encoding search path current with 00366 * changes to the global copy. 00367 * 00368 * Results: 00369 * Returns a "list" (Tcl_Obj *) that contains the encoding search path. 00370 * 00371 *---------------------------------------------------------------------- 00372 */ 00373 00374 Tcl_Obj * 00375 Tcl_GetEncodingSearchPath(void) 00376 { 00377 return TclGetProcessGlobalValue(&encodingSearchPath); 00378 } 00379 00380 /* 00381 *---------------------------------------------------------------------- 00382 * 00383 * Tcl_SetEncodingSearchPath -- 00384 * 00385 * Keeps the per-thread copy of the encoding search path current with 00386 * changes to the global copy. 00387 * 00388 *---------------------------------------------------------------------- 00389 */ 00390 00391 int 00392 Tcl_SetEncodingSearchPath( 00393 Tcl_Obj *searchPath) 00394 { 00395 int dummy; 00396 00397 if (TCL_ERROR == Tcl_ListObjLength(NULL, searchPath, &dummy)) { 00398 return TCL_ERROR; 00399 } 00400 TclSetProcessGlobalValue(&encodingSearchPath, searchPath, NULL); 00401 return TCL_OK; 00402 } 00403 00404 /* 00405 *---------------------------------------------------------------------- 00406 * 00407 * TclGetLibraryPath -- 00408 * 00409 * Keeps the per-thread copy of the library path current with changes to 00410 * the global copy. 00411 * 00412 * Results: 00413 * Returns a "list" (Tcl_Obj *) that contains the library path. 00414 * 00415 *---------------------------------------------------------------------- 00416 */ 00417 00418 Tcl_Obj * 00419 TclGetLibraryPath(void) 00420 { 00421 return TclGetProcessGlobalValue(&libraryPath); 00422 } 00423 00424 /* 00425 *---------------------------------------------------------------------- 00426 * 00427 * TclSetLibraryPath -- 00428 * 00429 * Keeps the per-thread copy of the library path current with changes to 00430 * the global copy. 00431 * 00432 * NOTE: this routine returns void, so there's no way to report the error 00433 * that searchPath is not a valid list. In that case, this routine will 00434 * silently do nothing. 00435 * 00436 *---------------------------------------------------------------------- 00437 */ 00438 00439 void 00440 TclSetLibraryPath( 00441 Tcl_Obj *path) 00442 { 00443 int dummy; 00444 00445 if (TCL_ERROR == Tcl_ListObjLength(NULL, path, &dummy)) { 00446 return; 00447 } 00448 TclSetProcessGlobalValue(&libraryPath, path, NULL); 00449 } 00450 00451 /* 00452 *--------------------------------------------------------------------------- 00453 * 00454 * FillEncodingFileMap -- 00455 * 00456 * Called to bring the encoding file map in sync with the current value 00457 * of the encoding search path. 00458 * 00459 * Scan the directories on the encoding search path, find the *.enc 00460 * files, and store the found pathnames in a map associated with the 00461 * encoding name. 00462 * 00463 * In particular, if $dir is on the encoding search path, and the file 00464 * $dir/foo.enc is found, then store a "foo" -> $dir entry in the map. 00465 * Later, any need for the "foo" encoding will quickly * be able to 00466 * construct the $dir/foo.enc pathname for reading the encoding data. 00467 * 00468 * Results: 00469 * None. 00470 * 00471 * Side effects: 00472 * Entries are added to the encoding file map. 00473 * 00474 *--------------------------------------------------------------------------- 00475 */ 00476 00477 static void 00478 FillEncodingFileMap(void) 00479 { 00480 int i, numDirs = 0; 00481 Tcl_Obj *map, *searchPath; 00482 00483 searchPath = Tcl_GetEncodingSearchPath(); 00484 Tcl_IncrRefCount(searchPath); 00485 Tcl_ListObjLength(NULL, searchPath, &numDirs); 00486 map = Tcl_NewDictObj(); 00487 Tcl_IncrRefCount(map); 00488 00489 for (i = numDirs-1; i >= 0; i--) { 00490 /* 00491 * Iterate backwards through the search path so as we overwrite 00492 * entries found, we favor files earlier on the search path. 00493 */ 00494 00495 int j, numFiles; 00496 Tcl_Obj *directory, *matchFileList = Tcl_NewObj(); 00497 Tcl_Obj **filev; 00498 Tcl_GlobTypeData readableFiles = { 00499 TCL_GLOB_TYPE_FILE, TCL_GLOB_PERM_R, NULL, NULL 00500 }; 00501 00502 Tcl_ListObjIndex(NULL, searchPath, i, &directory); 00503 Tcl_IncrRefCount(directory); 00504 Tcl_IncrRefCount(matchFileList); 00505 Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc", 00506 &readableFiles); 00507 00508 Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev); 00509 for (j=0; j<numFiles; j++) { 00510 Tcl_Obj *encodingName, *file; 00511 00512 file = TclPathPart(NULL, filev[j], TCL_PATH_TAIL); 00513 encodingName = TclPathPart(NULL, file, TCL_PATH_ROOT); 00514 Tcl_DictObjPut(NULL, map, encodingName, directory); 00515 Tcl_DecrRefCount(file); 00516 Tcl_DecrRefCount(encodingName); 00517 } 00518 Tcl_DecrRefCount(matchFileList); 00519 Tcl_DecrRefCount(directory); 00520 } 00521 Tcl_DecrRefCount(searchPath); 00522 TclSetProcessGlobalValue(&encodingFileMap, map, NULL); 00523 Tcl_DecrRefCount(map); 00524 } 00525 00526 /* 00527 *--------------------------------------------------------------------------- 00528 * 00529 * TclInitEncodingSubsystem -- 00530 * 00531 * Initialize all resources used by this subsystem on a per-process 00532 * basis. 00533 * 00534 * Results: 00535 * None. 00536 * 00537 * Side effects: 00538 * Depends on the memory, object, and IO subsystems. 00539 * 00540 *--------------------------------------------------------------------------- 00541 */ 00542 00543 void 00544 TclInitEncodingSubsystem(void) 00545 { 00546 Tcl_EncodingType type; 00547 00548 if (encodingsInitialized) { 00549 return; 00550 } 00551 00552 Tcl_MutexLock(&encodingMutex); 00553 Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); 00554 Tcl_MutexUnlock(&encodingMutex); 00555 00556 /* 00557 * Create a few initial encodings. Note that the UTF-8 to UTF-8 00558 * translation is not a no-op, because it will turn a stream of improperly 00559 * formed UTF-8 into a properly formed stream. 00560 */ 00561 00562 type.encodingName = "identity"; 00563 type.toUtfProc = BinaryProc; 00564 type.fromUtfProc = BinaryProc; 00565 type.freeProc = NULL; 00566 type.nullSize = 1; 00567 type.clientData = NULL; 00568 00569 defaultEncoding = Tcl_CreateEncoding(&type); 00570 systemEncoding = Tcl_GetEncoding(NULL, type.encodingName); 00571 00572 type.encodingName = "utf-8"; 00573 type.toUtfProc = UtfExtToUtfIntProc; 00574 type.fromUtfProc = UtfIntToUtfExtProc; 00575 type.freeProc = NULL; 00576 type.nullSize = 1; 00577 type.clientData = NULL; 00578 Tcl_CreateEncoding(&type); 00579 00580 type.encodingName = "unicode"; 00581 type.toUtfProc = UnicodeToUtfProc; 00582 type.fromUtfProc = UtfToUnicodeProc; 00583 type.freeProc = NULL; 00584 type.nullSize = 2; 00585 type.clientData = NULL; 00586 Tcl_CreateEncoding(&type); 00587 00588 /* 00589 * Need the iso8859-1 encoding in order to process binary data, so force 00590 * it to always be embedded. Note that this encoding *must* be a proper 00591 * table encoding or some of the escape encodings crash! Hence the ugly 00592 * code to duplicate the structure of a table encoding here. 00593 */ 00594 00595 { 00596 TableEncodingData *dataPtr = (TableEncodingData *) 00597 ckalloc(sizeof(TableEncodingData)); 00598 unsigned size; 00599 unsigned short i; 00600 00601 memset(dataPtr, 0, sizeof(TableEncodingData)); 00602 dataPtr->fallback = '?'; 00603 00604 size = 256*(sizeof(unsigned short *) + sizeof(unsigned short)); 00605 dataPtr->toUnicode = (unsigned short **) ckalloc(size); 00606 memset(dataPtr->toUnicode, 0, size); 00607 dataPtr->fromUnicode = (unsigned short **) ckalloc(size); 00608 memset(dataPtr->fromUnicode, 0, size); 00609 00610 dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256); 00611 dataPtr->fromUnicode[0] = (unsigned short *) 00612 (dataPtr->fromUnicode + 256); 00613 for (i=1 ; i<256 ; i++) { 00614 dataPtr->toUnicode[i] = emptyPage; 00615 dataPtr->fromUnicode[i] = emptyPage; 00616 } 00617 00618 for (i=0 ; i<256 ; i++) { 00619 dataPtr->toUnicode[0][i] = i; 00620 dataPtr->fromUnicode[0][i] = i; 00621 } 00622 00623 type.encodingName = "iso8859-1"; 00624 type.toUtfProc = Iso88591ToUtfProc; 00625 type.fromUtfProc = Iso88591FromUtfProc; 00626 type.freeProc = TableFreeProc; 00627 type.nullSize = 1; 00628 type.clientData = dataPtr; 00629 Tcl_CreateEncoding(&type); 00630 } 00631 00632 encodingsInitialized = 1; 00633 } 00634 00635 /* 00636 *---------------------------------------------------------------------- 00637 * 00638 * TclFinalizeEncodingSubsystem -- 00639 * 00640 * Release the state associated with the encoding subsystem. 00641 * 00642 * Results: 00643 * None. 00644 * 00645 * Side effects: 00646 * Frees all of the encodings. 00647 * 00648 *---------------------------------------------------------------------- 00649 */ 00650 00651 void 00652 TclFinalizeEncodingSubsystem(void) 00653 { 00654 Tcl_HashSearch search; 00655 Tcl_HashEntry *hPtr; 00656 00657 Tcl_MutexLock(&encodingMutex); 00658 encodingsInitialized = 0; 00659 FreeEncoding(systemEncoding); 00660 00661 hPtr = Tcl_FirstHashEntry(&encodingTable, &search); 00662 while (hPtr != NULL) { 00663 /* 00664 * Call FreeEncoding instead of doing it directly to handle refcounts 00665 * like escape encodings use. [Bug 524674] Make sure to call 00666 * Tcl_FirstHashEntry repeatedly so that all encodings are eventually 00667 * cleaned up. 00668 */ 00669 00670 FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr)); 00671 hPtr = Tcl_FirstHashEntry(&encodingTable, &search); 00672 } 00673 00674 Tcl_DeleteHashTable(&encodingTable); 00675 Tcl_MutexUnlock(&encodingMutex); 00676 } 00677 00678 /* 00679 *------------------------------------------------------------------------- 00680 * 00681 * Tcl_GetDefaultEncodingDir -- 00682 * 00683 * Legacy public interface to retrieve first directory in the encoding 00684 * searchPath. 00685 * 00686 * Results: 00687 * The directory pathname, as a string, or NULL for an empty encoding 00688 * search path. 00689 * 00690 * Side effects: 00691 * None. 00692 * 00693 *------------------------------------------------------------------------- 00694 */ 00695 00696 CONST char * 00697 Tcl_GetDefaultEncodingDir(void) 00698 { 00699 int numDirs; 00700 Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath(); 00701 00702 Tcl_ListObjLength(NULL, searchPath, &numDirs); 00703 if (numDirs == 0) { 00704 return NULL; 00705 } 00706 Tcl_ListObjIndex(NULL, searchPath, 0, &first); 00707 00708 return Tcl_GetString(first); 00709 } 00710 00711 /* 00712 *------------------------------------------------------------------------- 00713 * 00714 * Tcl_SetDefaultEncodingDir -- 00715 * 00716 * Legacy public interface to set the first directory in the encoding 00717 * search path. 00718 * 00719 * Results: 00720 * None. 00721 * 00722 * Side effects: 00723 * Modifies the encoding search path. 00724 * 00725 *------------------------------------------------------------------------- 00726 */ 00727 00728 void 00729 Tcl_SetDefaultEncodingDir( 00730 CONST char *path) 00731 { 00732 Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath(); 00733 Tcl_Obj *directory = Tcl_NewStringObj(path, -1); 00734 00735 searchPath = Tcl_DuplicateObj(searchPath); 00736 Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory); 00737 Tcl_SetEncodingSearchPath(searchPath); 00738 } 00739 00740 /* 00741 *------------------------------------------------------------------------- 00742 * 00743 * Tcl_GetEncoding -- 00744 * 00745 * Given the name of a encoding, find the corresponding Tcl_Encoding 00746 * token. If the encoding did not already exist, Tcl attempts to 00747 * dynamically load an encoding by that name. 00748 * 00749 * Results: 00750 * Returns a token that represents the encoding. If the name didn't refer 00751 * to any known or loadable encoding, NULL is returned. If NULL was 00752 * returned, an error message is left in interp's result object, unless 00753 * interp was NULL. 00754 * 00755 * Side effects: 00756 * The new encoding type is entered into a table visible to all 00757 * interpreters, keyed off the encoding's name. For each call to this 00758 * function, there should eventually be a call to Tcl_FreeEncoding, so 00759 * that the database can be cleaned up when encodings aren't needed 00760 * anymore. 00761 * 00762 *------------------------------------------------------------------------- 00763 */ 00764 00765 Tcl_Encoding 00766 Tcl_GetEncoding( 00767 Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ 00768 CONST char *name) /* The name of the desired encoding. */ 00769 { 00770 Tcl_HashEntry *hPtr; 00771 Encoding *encodingPtr; 00772 00773 Tcl_MutexLock(&encodingMutex); 00774 if (name == NULL) { 00775 encodingPtr = (Encoding *) systemEncoding; 00776 encodingPtr->refCount++; 00777 Tcl_MutexUnlock(&encodingMutex); 00778 return systemEncoding; 00779 } 00780 00781 hPtr = Tcl_FindHashEntry(&encodingTable, name); 00782 if (hPtr != NULL) { 00783 encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); 00784 encodingPtr->refCount++; 00785 Tcl_MutexUnlock(&encodingMutex); 00786 return (Tcl_Encoding) encodingPtr; 00787 } 00788 Tcl_MutexUnlock(&encodingMutex); 00789 00790 return LoadEncodingFile(interp, name); 00791 } 00792 00793 /* 00794 *--------------------------------------------------------------------------- 00795 * 00796 * Tcl_FreeEncoding -- 00797 * 00798 * This function is called to release an encoding allocated by 00799 * Tcl_CreateEncoding() or Tcl_GetEncoding(). 00800 * 00801 * Results: 00802 * None. 00803 * 00804 * Side effects: 00805 * The reference count associated with the encoding is decremented and 00806 * the encoding may be deleted if nothing is using it anymore. 00807 * 00808 *--------------------------------------------------------------------------- 00809 */ 00810 00811 void 00812 Tcl_FreeEncoding( 00813 Tcl_Encoding encoding) 00814 { 00815 Tcl_MutexLock(&encodingMutex); 00816 FreeEncoding(encoding); 00817 Tcl_MutexUnlock(&encodingMutex); 00818 } 00819 00820 /* 00821 *---------------------------------------------------------------------- 00822 * 00823 * FreeEncoding -- 00824 * 00825 * This function is called to release an encoding by functions that 00826 * already have the encodingMutex. 00827 * 00828 * Results: 00829 * None. 00830 * 00831 * Side effects: 00832 * The reference count associated with the encoding is decremented and 00833 * the encoding may be deleted if nothing is using it anymore. 00834 * 00835 *---------------------------------------------------------------------- 00836 */ 00837 00838 static void 00839 FreeEncoding( 00840 Tcl_Encoding encoding) 00841 { 00842 Encoding *encodingPtr; 00843 00844 encodingPtr = (Encoding *) encoding; 00845 if (encodingPtr == NULL) { 00846 return; 00847 } 00848 encodingPtr->refCount--; 00849 if (encodingPtr->refCount == 0) { 00850 if (encodingPtr->freeProc != NULL) { 00851 (*encodingPtr->freeProc)(encodingPtr->clientData); 00852 } 00853 if (encodingPtr->hPtr != NULL) { 00854 Tcl_DeleteHashEntry(encodingPtr->hPtr); 00855 } 00856 ckfree((char *) encodingPtr->name); 00857 ckfree((char *) encodingPtr); 00858 } 00859 } 00860 00861 /* 00862 *------------------------------------------------------------------------- 00863 * 00864 * Tcl_GetEncodingName -- 00865 * 00866 * Given an encoding, return the name that was used to constuct the 00867 * encoding. 00868 * 00869 * Results: 00870 * The name of the encoding. 00871 * 00872 * Side effects: 00873 * None. 00874 * 00875 *--------------------------------------------------------------------------- 00876 */ 00877 00878 CONST char * 00879 Tcl_GetEncodingName( 00880 Tcl_Encoding encoding) /* The encoding whose name to fetch. */ 00881 { 00882 if (encoding == NULL) { 00883 encoding = systemEncoding; 00884 } 00885 00886 return ((Encoding *) encoding)->name; 00887 } 00888 00889 /* 00890 *------------------------------------------------------------------------- 00891 * 00892 * Tcl_GetEncodingNames -- 00893 * 00894 * Get the list of all known encodings, including the ones stored as 00895 * files on disk in the encoding path. 00896 * 00897 * Results: 00898 * Modifies interp's result object to hold a list of all the available 00899 * encodings. 00900 * 00901 * Side effects: 00902 * None. 00903 * 00904 *------------------------------------------------------------------------- 00905 */ 00906 00907 void 00908 Tcl_GetEncodingNames( 00909 Tcl_Interp *interp) /* Interp to hold result. */ 00910 { 00911 Tcl_HashTable table; 00912 Tcl_HashSearch search; 00913 Tcl_HashEntry *hPtr; 00914 Tcl_Obj *map, *name, *result = Tcl_NewObj(); 00915 Tcl_DictSearch mapSearch; 00916 int dummy, done = 0; 00917 00918 Tcl_InitObjHashTable(&table); 00919 00920 /* 00921 * Copy encoding names from loaded encoding table to table. 00922 */ 00923 00924 Tcl_MutexLock(&encodingMutex); 00925 for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL; 00926 hPtr = Tcl_NextHashEntry(&search)) { 00927 Encoding *encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); 00928 Tcl_CreateHashEntry(&table, 00929 (char *) Tcl_NewStringObj(encodingPtr->name, -1), &dummy); 00930 } 00931 Tcl_MutexUnlock(&encodingMutex); 00932 00933 FillEncodingFileMap(); 00934 map = TclGetProcessGlobalValue(&encodingFileMap); 00935 00936 /* 00937 * Copy encoding names from encoding file map to table. 00938 */ 00939 00940 Tcl_DictObjFirst(NULL, map, &mapSearch, &name, NULL, &done); 00941 for (; !done; Tcl_DictObjNext(&mapSearch, &name, NULL, &done)) { 00942 Tcl_CreateHashEntry(&table, (char *) name, &dummy); 00943 } 00944 00945 /* 00946 * Pull all encoding names from table into the result list. 00947 */ 00948 00949 for (hPtr = Tcl_FirstHashEntry(&table, &search); hPtr != NULL; 00950 hPtr = Tcl_NextHashEntry(&search)) { 00951 Tcl_ListObjAppendElement(NULL, result, 00952 (Tcl_Obj *) Tcl_GetHashKey(&table, hPtr)); 00953 } 00954 Tcl_SetObjResult(interp, result); 00955 Tcl_DeleteHashTable(&table); 00956 } 00957 00958 /* 00959 *------------------------------------------------------------------------ 00960 * 00961 * Tcl_SetSystemEncoding -- 00962 * 00963 * Sets the default encoding that should be used whenever the user passes 00964 * a NULL value in to one of the conversion routines. If the supplied 00965 * name is NULL, the system encoding is reset to the default system 00966 * encoding. 00967 * 00968 * Results: 00969 * The return value is TCL_OK if the system encoding was successfully set 00970 * to the encoding specified by name, TCL_ERROR otherwise. If TCL_ERROR 00971 * is returned, an error message is left in interp's result object, 00972 * unless interp was NULL. 00973 * 00974 * Side effects: 00975 * The reference count of the new system encoding is incremented. The 00976 * reference count of the old system encoding is decremented and it may 00977 * be freed. 00978 * 00979 *------------------------------------------------------------------------ 00980 */ 00981 00982 int 00983 Tcl_SetSystemEncoding( 00984 Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ 00985 CONST char *name) /* The name of the desired encoding, or NULL 00986 * to reset to default encoding. */ 00987 { 00988 Tcl_Encoding encoding; 00989 Encoding *encodingPtr; 00990 00991 if (name == NULL) { 00992 Tcl_MutexLock(&encodingMutex); 00993 encoding = defaultEncoding; 00994 encodingPtr = (Encoding *) encoding; 00995 encodingPtr->refCount++; 00996 Tcl_MutexUnlock(&encodingMutex); 00997 } else { 00998 encoding = Tcl_GetEncoding(interp, name); 00999 if (encoding == NULL) { 01000 return TCL_ERROR; 01001 } 01002 } 01003 01004 Tcl_MutexLock(&encodingMutex); 01005 FreeEncoding(systemEncoding); 01006 systemEncoding = encoding; 01007 Tcl_MutexUnlock(&encodingMutex); 01008 01009 return TCL_OK; 01010 } 01011 01012 /* 01013 *--------------------------------------------------------------------------- 01014 * 01015 * Tcl_CreateEncoding -- 01016 * 01017 * This function is called to define a new encoding and the functions 01018 * that are used to convert between the specified encoding and Unicode. 01019 * 01020 * Results: 01021 * Returns a token that represents the encoding. If an encoding with the 01022 * same name already existed, the old encoding token remains valid and 01023 * continues to behave as it used to, and will eventually be garbage 01024 * collected when the last reference to it goes away. Any subsequent 01025 * calls to Tcl_GetEncoding with the specified name will retrieve the 01026 * most recent encoding token. 01027 * 01028 * Side effects: 01029 * The new encoding type is entered into a table visible to all 01030 * interpreters, keyed off the encoding's name. For each call to this 01031 * function, there should eventually be a call to Tcl_FreeEncoding, so 01032 * that the database can be cleaned up when encodings aren't needed 01033 * anymore. 01034 * 01035 *--------------------------------------------------------------------------- 01036 */ 01037 01038 Tcl_Encoding 01039 Tcl_CreateEncoding( 01040 const Tcl_EncodingType *typePtr) 01041 /* The encoding type. */ 01042 { 01043 Tcl_HashEntry *hPtr; 01044 int isNew; 01045 Encoding *encodingPtr; 01046 char *name; 01047 01048 Tcl_MutexLock(&encodingMutex); 01049 hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &isNew); 01050 if (isNew == 0) { 01051 /* 01052 * Remove old encoding from hash table, but don't delete it until last 01053 * reference goes away. 01054 */ 01055 01056 encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); 01057 encodingPtr->hPtr = NULL; 01058 } 01059 01060 name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1); 01061 01062 encodingPtr = (Encoding *) ckalloc(sizeof(Encoding)); 01063 encodingPtr->name = strcpy(name, typePtr->encodingName); 01064 encodingPtr->toUtfProc = typePtr->toUtfProc; 01065 encodingPtr->fromUtfProc = typePtr->fromUtfProc; 01066 encodingPtr->freeProc = typePtr->freeProc; 01067 encodingPtr->nullSize = typePtr->nullSize; 01068 encodingPtr->clientData = typePtr->clientData; 01069 if (typePtr->nullSize == 1) { 01070 encodingPtr->lengthProc = (LengthProc *) strlen; 01071 } else { 01072 encodingPtr->lengthProc = (LengthProc *) unilen; 01073 } 01074 encodingPtr->refCount = 1; 01075 encodingPtr->hPtr = hPtr; 01076 Tcl_SetHashValue(hPtr, encodingPtr); 01077 01078 Tcl_MutexUnlock(&encodingMutex); 01079 01080 return (Tcl_Encoding) encodingPtr; 01081 } 01082 01083 /* 01084 *------------------------------------------------------------------------- 01085 * 01086 * Tcl_ExternalToUtfDString -- 01087 * 01088 * Convert a source buffer from the specified encoding into UTF-8. If any 01089 * of the bytes in the source buffer are invalid or cannot be represented 01090 * in the target encoding, a default fallback character will be 01091 * substituted. 01092 * 01093 * Results: 01094 * The converted bytes are stored in the DString, which is then NULL 01095 * terminated. The return value is a pointer to the value stored in the 01096 * DString. 01097 * 01098 * Side effects: 01099 * None. 01100 * 01101 *------------------------------------------------------------------------- 01102 */ 01103 01104 char * 01105 Tcl_ExternalToUtfDString( 01106 Tcl_Encoding encoding, /* The encoding for the source string, or NULL 01107 * for the default system encoding. */ 01108 CONST char *src, /* Source string in specified encoding. */ 01109 int srcLen, /* Source string length in bytes, or < 0 for 01110 * encoding-specific string length. */ 01111 Tcl_DString *dstPtr) /* Uninitialized or free DString in which the 01112 * converted string is stored. */ 01113 { 01114 char *dst; 01115 Tcl_EncodingState state; 01116 Encoding *encodingPtr; 01117 int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; 01118 01119 Tcl_DStringInit(dstPtr); 01120 dst = Tcl_DStringValue(dstPtr); 01121 dstLen = dstPtr->spaceAvl - 1; 01122 01123 if (encoding == NULL) { 01124 encoding = systemEncoding; 01125 } 01126 encodingPtr = (Encoding *) encoding; 01127 01128 if (src == NULL) { 01129 srcLen = 0; 01130 } else if (srcLen < 0) { 01131 srcLen = (*encodingPtr->lengthProc)(src); 01132 } 01133 01134 flags = TCL_ENCODING_START | TCL_ENCODING_END; 01135 01136 while (1) { 01137 result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, 01138 srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, 01139 &dstChars); 01140 soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); 01141 01142 if (result != TCL_CONVERT_NOSPACE) { 01143 Tcl_DStringSetLength(dstPtr, soFar); 01144 return Tcl_DStringValue(dstPtr); 01145 } 01146 01147 flags &= ~TCL_ENCODING_START; 01148 src += srcRead; 01149 srcLen -= srcRead; 01150 if (Tcl_DStringLength(dstPtr) == 0) { 01151 Tcl_DStringSetLength(dstPtr, dstLen); 01152 } 01153 Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); 01154 dst = Tcl_DStringValue(dstPtr) + soFar; 01155 dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; 01156 } 01157 } 01158 01159 /* 01160 *------------------------------------------------------------------------- 01161 * 01162 * Tcl_ExternalToUtf -- 01163 * 01164 * Convert a source buffer from the specified encoding into UTF-8. 01165 * 01166 * Results: 01167 * The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE, 01168 * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as 01169 * documented in tcl.h. 01170 * 01171 * Side effects: 01172 * The converted bytes are stored in the output buffer. 01173 * 01174 *------------------------------------------------------------------------- 01175 */ 01176 01177 int 01178 Tcl_ExternalToUtf( 01179 Tcl_Interp *interp, /* Interp for error return, if not NULL. */ 01180 Tcl_Encoding encoding, /* The encoding for the source string, or NULL 01181 * for the default system encoding. */ 01182 CONST char *src, /* Source string in specified encoding. */ 01183 int srcLen, /* Source string length in bytes, or < 0 for 01184 * encoding-specific string length. */ 01185 int flags, /* Conversion control flags. */ 01186 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 01187 * information used during a piecewise 01188 * conversion. Contents of statePtr are 01189 * initialized and/or reset by conversion 01190 * routine under control of flags argument. */ 01191 char *dst, /* Output buffer in which converted string is 01192 * stored. */ 01193 int dstLen, /* The maximum length of output buffer in 01194 * bytes. */ 01195 int *srcReadPtr, /* Filled with the number of bytes from the 01196 * source string that were converted. This may 01197 * be less than the original source length if 01198 * there was a problem converting some source 01199 * characters. */ 01200 int *dstWrotePtr, /* Filled with the number of bytes that were 01201 * stored in the output buffer as a result of 01202 * the conversion. */ 01203 int *dstCharsPtr) /* Filled with the number of characters that 01204 * correspond to the bytes stored in the 01205 * output buffer. */ 01206 { 01207 Encoding *encodingPtr; 01208 int result, srcRead, dstWrote, dstChars; 01209 Tcl_EncodingState state; 01210 01211 if (encoding == NULL) { 01212 encoding = systemEncoding; 01213 } 01214 encodingPtr = (Encoding *) encoding; 01215 01216 if (src == NULL) { 01217 srcLen = 0; 01218 } else if (srcLen < 0) { 01219 srcLen = (*encodingPtr->lengthProc)(src); 01220 } 01221 if (statePtr == NULL) { 01222 flags |= TCL_ENCODING_START | TCL_ENCODING_END; 01223 statePtr = &state; 01224 } 01225 if (srcReadPtr == NULL) { 01226 srcReadPtr = &srcRead; 01227 } 01228 if (dstWrotePtr == NULL) { 01229 dstWrotePtr = &dstWrote; 01230 } 01231 if (dstCharsPtr == NULL) { 01232 dstCharsPtr = &dstChars; 01233 } 01234 01235 /* 01236 * If there are any null characters in the middle of the buffer, they will 01237 * converted to the UTF-8 null character (\xC080). To get the actual \0 at 01238 * the end of the destination buffer, we need to append it manually. 01239 */ 01240 01241 dstLen--; 01242 result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen, 01243 flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, 01244 dstCharsPtr); 01245 dst[*dstWrotePtr] = '\0'; 01246 01247 return result; 01248 } 01249 01250 /* 01251 *------------------------------------------------------------------------- 01252 * 01253 * Tcl_UtfToExternalDString -- 01254 * 01255 * Convert a source buffer from UTF-8 into the specified encoding. If any 01256 * of the bytes in the source buffer are invalid or cannot be represented 01257 * in the target encoding, a default fallback character will be 01258 * substituted. 01259 * 01260 * Results: 01261 * The converted bytes are stored in the DString, which is then NULL 01262 * terminated in an encoding-specific manner. The return value is a 01263 * pointer to the value stored in the DString. 01264 * 01265 * Side effects: 01266 * None. 01267 * 01268 *------------------------------------------------------------------------- 01269 */ 01270 01271 char * 01272 Tcl_UtfToExternalDString( 01273 Tcl_Encoding encoding, /* The encoding for the converted string, or 01274 * NULL for the default system encoding. */ 01275 CONST char *src, /* Source string in UTF-8. */ 01276 int srcLen, /* Source string length in bytes, or < 0 for 01277 * strlen(). */ 01278 Tcl_DString *dstPtr) /* Uninitialized or free DString in which the 01279 * converted string is stored. */ 01280 { 01281 char *dst; 01282 Tcl_EncodingState state; 01283 Encoding *encodingPtr; 01284 int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; 01285 01286 Tcl_DStringInit(dstPtr); 01287 dst = Tcl_DStringValue(dstPtr); 01288 dstLen = dstPtr->spaceAvl - 1; 01289 01290 if (encoding == NULL) { 01291 encoding = systemEncoding; 01292 } 01293 encodingPtr = (Encoding *) encoding; 01294 01295 if (src == NULL) { 01296 srcLen = 0; 01297 } else if (srcLen < 0) { 01298 srcLen = strlen(src); 01299 } 01300 flags = TCL_ENCODING_START | TCL_ENCODING_END; 01301 while (1) { 01302 result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, 01303 srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, 01304 &dstChars); 01305 soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); 01306 01307 if (result != TCL_CONVERT_NOSPACE) { 01308 if (encodingPtr->nullSize == 2) { 01309 Tcl_DStringSetLength(dstPtr, soFar + 1); 01310 } 01311 Tcl_DStringSetLength(dstPtr, soFar); 01312 return Tcl_DStringValue(dstPtr); 01313 } 01314 01315 flags &= ~TCL_ENCODING_START; 01316 src += srcRead; 01317 srcLen -= srcRead; 01318 if (Tcl_DStringLength(dstPtr) == 0) { 01319 Tcl_DStringSetLength(dstPtr, dstLen); 01320 } 01321 Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); 01322 dst = Tcl_DStringValue(dstPtr) + soFar; 01323 dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; 01324 } 01325 } 01326 01327 /* 01328 *------------------------------------------------------------------------- 01329 * 01330 * Tcl_UtfToExternal -- 01331 * 01332 * Convert a buffer from UTF-8 into the specified encoding. 01333 * 01334 * Results: 01335 * The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE, 01336 * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as 01337 * documented in tcl.h. 01338 * 01339 * Side effects: 01340 * The converted bytes are stored in the output buffer. 01341 * 01342 *------------------------------------------------------------------------- 01343 */ 01344 01345 int 01346 Tcl_UtfToExternal( 01347 Tcl_Interp *interp, /* Interp for error return, if not NULL. */ 01348 Tcl_Encoding encoding, /* The encoding for the converted string, or 01349 * NULL for the default system encoding. */ 01350 CONST char *src, /* Source string in UTF-8. */ 01351 int srcLen, /* Source string length in bytes, or < 0 for 01352 * strlen(). */ 01353 int flags, /* Conversion control flags. */ 01354 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 01355 * information used during a piecewise 01356 * conversion. Contents of statePtr are 01357 * initialized and/or reset by conversion 01358 * routine under control of flags argument. */ 01359 char *dst, /* Output buffer in which converted string 01360 * is stored. */ 01361 int dstLen, /* The maximum length of output buffer in 01362 * bytes. */ 01363 int *srcReadPtr, /* Filled with the number of bytes from the 01364 * source string that were converted. This may 01365 * be less than the original source length if 01366 * there was a problem converting some source 01367 * characters. */ 01368 int *dstWrotePtr, /* Filled with the number of bytes that were 01369 * stored in the output buffer as a result of 01370 * the conversion. */ 01371 int *dstCharsPtr) /* Filled with the number of characters that 01372 * correspond to the bytes stored in the 01373 * output buffer. */ 01374 { 01375 Encoding *encodingPtr; 01376 int result, srcRead, dstWrote, dstChars; 01377 Tcl_EncodingState state; 01378 01379 if (encoding == NULL) { 01380 encoding = systemEncoding; 01381 } 01382 encodingPtr = (Encoding *) encoding; 01383 01384 if (src == NULL) { 01385 srcLen = 0; 01386 } else if (srcLen < 0) { 01387 srcLen = strlen(src); 01388 } 01389 if (statePtr == NULL) { 01390 flags |= TCL_ENCODING_START | TCL_ENCODING_END; 01391 statePtr = &state; 01392 } 01393 if (srcReadPtr == NULL) { 01394 srcReadPtr = &srcRead; 01395 } 01396 if (dstWrotePtr == NULL) { 01397 dstWrotePtr = &dstWrote; 01398 } 01399 if (dstCharsPtr == NULL) { 01400 dstCharsPtr = &dstChars; 01401 } 01402 01403 dstLen -= encodingPtr->nullSize; 01404 result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen, 01405 flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, 01406 dstCharsPtr); 01407 if (encodingPtr->nullSize == 2) { 01408 dst[*dstWrotePtr + 1] = '\0'; 01409 } 01410 dst[*dstWrotePtr] = '\0'; 01411 01412 return result; 01413 } 01414 01415 /* 01416 *--------------------------------------------------------------------------- 01417 * 01418 * Tcl_FindExecutable -- 01419 * 01420 * This function computes the absolute path name of the current 01421 * application, given its argv[0] value. 01422 * 01423 * Results: 01424 * None. 01425 * 01426 * Side effects: 01427 * The absolute pathname for the application is computed and stored to be 01428 * returned later be [info nameofexecutable]. 01429 * 01430 *--------------------------------------------------------------------------- 01431 */ 01432 01433 void 01434 Tcl_FindExecutable( 01435 CONST char *argv0) /* The value of the application's argv[0] 01436 * (native). */ 01437 { 01438 TclInitSubsystems(); 01439 TclpSetInitialEncodings(); 01440 TclpFindExecutable(argv0); 01441 } 01442 01443 /* 01444 *--------------------------------------------------------------------------- 01445 * 01446 * OpenEncodingFileChannel -- 01447 * 01448 * Open the file believed to hold data for the encoding, "name". 01449 * 01450 * Results: 01451 * Returns the readable Tcl_Channel from opening the file, or NULL if the 01452 * file could not be successfully opened. If NULL was returned, an error 01453 * message is left in interp's result object, unless interp was NULL. 01454 * 01455 * Side effects: 01456 * Channel may be opened. Information about the filesystem may be cached 01457 * to speed later calls. 01458 * 01459 *--------------------------------------------------------------------------- 01460 */ 01461 01462 static Tcl_Channel 01463 OpenEncodingFileChannel( 01464 Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ 01465 CONST char *name) /* The name of the encoding file on disk and 01466 * also the name for new encoding. */ 01467 { 01468 Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1); 01469 Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj); 01470 Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath()); 01471 Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap); 01472 Tcl_Obj **dir, *path, *directory = NULL; 01473 Tcl_Channel chan = NULL; 01474 int i, numDirs; 01475 01476 Tcl_ListObjGetElements(NULL, searchPath, &numDirs, &dir); 01477 Tcl_IncrRefCount(nameObj); 01478 Tcl_AppendToObj(fileNameObj, ".enc", -1); 01479 Tcl_IncrRefCount(fileNameObj); 01480 Tcl_DictObjGet(NULL, map, nameObj, &directory); 01481 01482 /* 01483 * Check that any cached directory is still on the encoding search path. 01484 */ 01485 01486 if (NULL != directory) { 01487 int verified = 0; 01488 01489 for (i=0; i<numDirs && !verified; i++) { 01490 if (dir[i] == directory) { 01491 verified = 1; 01492 } 01493 } 01494 if (!verified) { 01495 CONST char *dirString = Tcl_GetString(directory); 01496 for (i=0; i<numDirs && !verified; i++) { 01497 if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) { 01498 verified = 1; 01499 } 01500 } 01501 } 01502 if (!verified) { 01503 /* 01504 * Directory no longer on the search path. Remove from cache. 01505 */ 01506 01507 map = Tcl_DuplicateObj(map); 01508 Tcl_DictObjRemove(NULL, map, nameObj); 01509 TclSetProcessGlobalValue(&encodingFileMap, map, NULL); 01510 directory = NULL; 01511 } 01512 } 01513 01514 if (NULL != directory) { 01515 /* 01516 * Got a directory from the cache. Try to use it first. 01517 */ 01518 01519 Tcl_IncrRefCount(directory); 01520 path = Tcl_FSJoinToPath(directory, 1, &fileNameObj); 01521 Tcl_IncrRefCount(path); 01522 Tcl_DecrRefCount(directory); 01523 chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0); 01524 Tcl_DecrRefCount(path); 01525 } 01526 01527 /* 01528 * Scan the search path until we find it. 01529 */ 01530 01531 for (i=0; i<numDirs && (chan == NULL); i++) { 01532 path = Tcl_FSJoinToPath(dir[i], 1, &fileNameObj); 01533 Tcl_IncrRefCount(path); 01534 chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0); 01535 Tcl_DecrRefCount(path); 01536 if (chan != NULL) { 01537 /* 01538 * Save directory in the cache. 01539 */ 01540 01541 map = Tcl_DuplicateObj(TclGetProcessGlobalValue(&encodingFileMap)); 01542 Tcl_DictObjPut(NULL, map, nameObj, dir[i]); 01543 TclSetProcessGlobalValue(&encodingFileMap, map, NULL); 01544 } 01545 } 01546 01547 if ((NULL == chan) && (interp != NULL)) { 01548 Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); 01549 Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, NULL); 01550 } 01551 Tcl_DecrRefCount(fileNameObj); 01552 Tcl_DecrRefCount(nameObj); 01553 Tcl_DecrRefCount(searchPath); 01554 01555 return chan; 01556 } 01557 01558 /* 01559 *--------------------------------------------------------------------------- 01560 * 01561 * LoadEncodingFile -- 01562 * 01563 * Read a file that describes an encoding and create a new Encoding from 01564 * the data. 01565 * 01566 * Results: 01567 * The return value is the newly loaded Encoding, or NULL if the file 01568 * didn't exist of was in the incorrect format. If NULL was returned, an 01569 * error message is left in interp's result object, unless interp was 01570 * NULL. 01571 * 01572 * Side effects: 01573 * File read from disk. 01574 * 01575 *--------------------------------------------------------------------------- 01576 */ 01577 01578 static Tcl_Encoding 01579 LoadEncodingFile( 01580 Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ 01581 CONST char *name) /* The name of the encoding file on disk and 01582 * also the name for new encoding. */ 01583 { 01584 Tcl_Channel chan = NULL; 01585 Tcl_Encoding encoding = NULL; 01586 int ch; 01587 01588 chan = OpenEncodingFileChannel(interp, name); 01589 if (chan == NULL) { 01590 return NULL; 01591 } 01592 01593 Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8"); 01594 01595 while (1) { 01596 Tcl_DString ds; 01597 01598 Tcl_DStringInit(&ds); 01599 Tcl_Gets(chan, &ds); 01600 ch = Tcl_DStringValue(&ds)[0]; 01601 Tcl_DStringFree(&ds); 01602 if (ch != '#') { 01603 break; 01604 } 01605 } 01606 01607 switch (ch) { 01608 case 'S': 01609 encoding = LoadTableEncoding(name, ENCODING_SINGLEBYTE, chan); 01610 break; 01611 case 'D': 01612 encoding = LoadTableEncoding(name, ENCODING_DOUBLEBYTE, chan); 01613 break; 01614 case 'M': 01615 encoding = LoadTableEncoding(name, ENCODING_MULTIBYTE, chan); 01616 break; 01617 case 'E': 01618 encoding = LoadEscapeEncoding(name, chan); 01619 break; 01620 } 01621 if ((encoding == NULL) && (interp != NULL)) { 01622 Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL); 01623 } 01624 Tcl_Close(NULL, chan); 01625 01626 return encoding; 01627 } 01628 01629 /* 01630 *------------------------------------------------------------------------- 01631 * 01632 * LoadTableEncoding -- 01633 * 01634 * Helper function for LoadEncodingTable(). Loads a table to that 01635 * converts between Unicode and some other encoding and creates an 01636 * encoding (using a TableEncoding structure) from that information. 01637 * 01638 * File contains binary data, but begins with a marker to indicate 01639 * byte-ordering, so that same binary file can be read on either endian 01640 * platforms. 01641 * 01642 * Results: 01643 * The return value is the new encoding, or NULL if the encoding could 01644 * not be created (because the file contained invalid data). 01645 * 01646 * Side effects: 01647 * None. 01648 * 01649 *------------------------------------------------------------------------- 01650 */ 01651 01652 static Tcl_Encoding 01653 LoadTableEncoding( 01654 CONST char *name, /* Name for new encoding. */ 01655 int type, /* Type of encoding (ENCODING_?????). */ 01656 Tcl_Channel chan) /* File containing new encoding. */ 01657 { 01658 Tcl_DString lineString; 01659 Tcl_Obj *objPtr; 01660 char *line; 01661 int i, hi, lo, numPages, symbol, fallback; 01662 unsigned char used[256]; 01663 unsigned int size; 01664 TableEncodingData *dataPtr; 01665 unsigned short *pageMemPtr; 01666 Tcl_EncodingType encType; 01667 01668 /* 01669 * Speed over memory. Use a full 256 character table to decode hex 01670 * sequences in the encoding files. 01671 */ 01672 01673 static char staticHex[] = { 01674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 ... 15 */ 01675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 ... 31 */ 01676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 ... 47 */ 01677 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 48 ... 63 */ 01678 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 64 ... 79 */ 01679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 ... 95 */ 01680 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 96 ... 111 */ 01681 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 112 ... 127 */ 01682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128 ... 143 */ 01683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 ... 159 */ 01684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 ... 175 */ 01685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 ... 191 */ 01686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 ... 207 */ 01687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 ... 223 */ 01688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 ... 239 */ 01689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 ... 255 */ 01690 }; 01691 01692 Tcl_DStringInit(&lineString); 01693 Tcl_Gets(chan, &lineString); 01694 line = Tcl_DStringValue(&lineString); 01695 01696 fallback = (int) strtol(line, &line, 16); 01697 symbol = (int) strtol(line, &line, 10); 01698 numPages = (int) strtol(line, &line, 10); 01699 Tcl_DStringFree(&lineString); 01700 01701 if (numPages < 0) { 01702 numPages = 0; 01703 } else if (numPages > 256) { 01704 numPages = 256; 01705 } 01706 01707 memset(used, 0, sizeof(used)); 01708 01709 #undef PAGESIZE 01710 #define PAGESIZE (256 * sizeof(unsigned short)) 01711 01712 dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData)); 01713 memset(dataPtr, 0, sizeof(TableEncodingData)); 01714 01715 dataPtr->fallback = fallback; 01716 01717 /* 01718 * Read the table that maps characters to Unicode. Performs a single 01719 * malloc to get the memory for the array and all the pages needed by the 01720 * array. 01721 */ 01722 01723 size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; 01724 dataPtr->toUnicode = (unsigned short **) ckalloc(size); 01725 memset(dataPtr->toUnicode, 0, size); 01726 pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); 01727 01728 TclNewObj(objPtr); 01729 Tcl_IncrRefCount(objPtr); 01730 for (i = 0; i < numPages; i++) { 01731 int ch; 01732 char *p; 01733 01734 Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0); 01735 p = Tcl_GetString(objPtr); 01736 hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])]; 01737 dataPtr->toUnicode[hi] = pageMemPtr; 01738 p += 2; 01739 for (lo = 0; lo < 256; lo++) { 01740 if ((lo & 0x0f) == 0) { 01741 p++; 01742 } 01743 ch = (staticHex[UCHAR(p[0])] << 12) + (staticHex[UCHAR(p[1])] << 8) 01744 + (staticHex[UCHAR(p[2])] << 4) + staticHex[UCHAR(p[3])]; 01745 if (ch != 0) { 01746 used[ch >> 8] = 1; 01747 } 01748 *pageMemPtr = (unsigned short) ch; 01749 pageMemPtr++; 01750 p += 4; 01751 } 01752 } 01753 TclDecrRefCount(objPtr); 01754 01755 if (type == ENCODING_DOUBLEBYTE) { 01756 memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes)); 01757 } else { 01758 for (hi = 1; hi < 256; hi++) { 01759 if (dataPtr->toUnicode[hi] != NULL) { 01760 dataPtr->prefixBytes[hi] = 1; 01761 } 01762 } 01763 } 01764 01765 /* 01766 * Invert toUnicode array to produce the fromUnicode array. Performs a 01767 * single malloc to get the memory for the array and all the pages needed 01768 * by the array. While reading in the toUnicode array, we remembered what 01769 * pages that would be needed for the fromUnicode array. 01770 */ 01771 01772 if (symbol) { 01773 used[0] = 1; 01774 } 01775 numPages = 0; 01776 for (hi = 0; hi < 256; hi++) { 01777 if (used[hi]) { 01778 numPages++; 01779 } 01780 } 01781 size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; 01782 dataPtr->fromUnicode = (unsigned short **) ckalloc(size); 01783 memset(dataPtr->fromUnicode, 0, size); 01784 pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256); 01785 01786 for (hi = 0; hi < 256; hi++) { 01787 if (dataPtr->toUnicode[hi] == NULL) { 01788 dataPtr->toUnicode[hi] = emptyPage; 01789 } else { 01790 for (lo = 0; lo < 256; lo++) { 01791 int ch; 01792 01793 ch = dataPtr->toUnicode[hi][lo]; 01794 if (ch != 0) { 01795 unsigned short *page; 01796 01797 page = dataPtr->fromUnicode[ch >> 8]; 01798 if (page == NULL) { 01799 page = pageMemPtr; 01800 pageMemPtr += 256; 01801 dataPtr->fromUnicode[ch >> 8] = page; 01802 } 01803 page[ch & 0xff] = (unsigned short) ((hi << 8) + lo); 01804 } 01805 } 01806 } 01807 } 01808 if (type == ENCODING_MULTIBYTE) { 01809 /* 01810 * If multibyte encodings don't have a backslash character, define 01811 * one. Otherwise, on Windows, native file names won't work because 01812 * the backslash in the file name will map to the unknown character 01813 * (question mark) when converting from UTF-8 to external encoding. 01814 */ 01815 01816 if (dataPtr->fromUnicode[0] != NULL) { 01817 if (dataPtr->fromUnicode[0]['\\'] == '\0') { 01818 dataPtr->fromUnicode[0]['\\'] = '\\'; 01819 } 01820 } 01821 } 01822 if (symbol) { 01823 unsigned short *page; 01824 01825 /* 01826 * Make a special symbol encoding that not only maps the symbol 01827 * characters from their Unicode code points down into page 0, but 01828 * also ensure that the characters on page 0 map to themselves. This 01829 * is so that a symbol font can be used to display a simple string 01830 * like "abcd" and have alpha, beta, chi, delta show up, rather than 01831 * have "unknown" chars show up because strictly speaking the symbol 01832 * font doesn't have glyphs for those low ascii chars. 01833 */ 01834 01835 page = dataPtr->fromUnicode[0]; 01836 if (page == NULL) { 01837 page = pageMemPtr; 01838 dataPtr->fromUnicode[0] = page; 01839 } 01840 for (lo = 0; lo < 256; lo++) { 01841 if (dataPtr->toUnicode[0][lo] != 0) { 01842 page[lo] = (unsigned short) lo; 01843 } 01844 } 01845 } 01846 for (hi = 0; hi < 256; hi++) { 01847 if (dataPtr->fromUnicode[hi] == NULL) { 01848 dataPtr->fromUnicode[hi] = emptyPage; 01849 } 01850 } 01851 01852 /* 01853 * For trailing 'R'everse encoding, see [Patch 689341] 01854 */ 01855 01856 Tcl_DStringInit(&lineString); 01857 do { 01858 int len; 01859 01860 /* 01861 * Skip leading empty lines. 01862 */ 01863 01864 while ((len = Tcl_Gets(chan, &lineString)) == 0) { 01865 /* empty body */ 01866 } 01867 01868 if (len < 0) { 01869 break; 01870 } 01871 line = Tcl_DStringValue(&lineString); 01872 if (line[0] != 'R') { 01873 break; 01874 } 01875 for (Tcl_DStringSetLength(&lineString, 0); 01876 (len = Tcl_Gets(chan, &lineString)) >= 0; 01877 Tcl_DStringSetLength(&lineString, 0)) { 01878 unsigned char* p; 01879 int to, from; 01880 01881 if (len < 5) { 01882 continue; 01883 } 01884 p = (unsigned char*) Tcl_DStringValue(&lineString); 01885 to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) 01886 + (staticHex[p[2]] << 4) + staticHex[p[3]]; 01887 if (to == 0) { 01888 continue; 01889 } 01890 for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) { 01891 from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) 01892 + (staticHex[p[2]] << 4) + staticHex[p[3]]; 01893 if (from == 0) { 01894 continue; 01895 } 01896 dataPtr->fromUnicode[from >> 8][from & 0xff] = to; 01897 } 01898 } 01899 } while (0); 01900 Tcl_DStringFree(&lineString); 01901 01902 encType.encodingName = name; 01903 encType.toUtfProc = TableToUtfProc; 01904 encType.fromUtfProc = TableFromUtfProc; 01905 encType.freeProc = TableFreeProc; 01906 encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1; 01907 encType.clientData = (ClientData) dataPtr; 01908 01909 return Tcl_CreateEncoding(&encType); 01910 } 01911 01912 /* 01913 *------------------------------------------------------------------------- 01914 * 01915 * LoadEscapeEncoding -- 01916 * 01917 * Helper function for LoadEncodingTable(). Loads a state machine that 01918 * converts between Unicode and some other encoding. 01919 * 01920 * File contains text data that describes the escape sequences that are 01921 * used to choose an encoding and the associated names for the 01922 * sub-encodings. 01923 * 01924 * Results: 01925 * The return value is the new encoding, or NULL if the encoding could 01926 * not be created (because the file contained invalid data). 01927 * 01928 * Side effects: 01929 * None. 01930 * 01931 *------------------------------------------------------------------------- 01932 */ 01933 01934 static Tcl_Encoding 01935 LoadEscapeEncoding( 01936 CONST char *name, /* Name for new encoding. */ 01937 Tcl_Channel chan) /* File containing new encoding. */ 01938 { 01939 int i; 01940 unsigned int size; 01941 Tcl_DString escapeData; 01942 char init[16], final[16]; 01943 EscapeEncodingData *dataPtr; 01944 Tcl_EncodingType type; 01945 01946 init[0] = '\0'; 01947 final[0] = '\0'; 01948 Tcl_DStringInit(&escapeData); 01949 01950 while (1) { 01951 int argc; 01952 CONST char **argv; 01953 char *line; 01954 Tcl_DString lineString; 01955 01956 Tcl_DStringInit(&lineString); 01957 if (Tcl_Gets(chan, &lineString) < 0) { 01958 break; 01959 } 01960 line = Tcl_DStringValue(&lineString); 01961 if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) { 01962 continue; 01963 } 01964 if (argc >= 2) { 01965 if (strcmp(argv[0], "name") == 0) { 01966 /* do nothing */ 01967 } else if (strcmp(argv[0], "init") == 0) { 01968 strncpy(init, argv[1], sizeof(init)); 01969 init[sizeof(init) - 1] = '\0'; 01970 } else if (strcmp(argv[0], "final") == 0) { 01971 strncpy(final, argv[1], sizeof(final)); 01972 final[sizeof(final) - 1] = '\0'; 01973 } else { 01974 EscapeSubTable est; 01975 01976 strncpy(est.sequence, argv[1], sizeof(est.sequence)); 01977 est.sequence[sizeof(est.sequence) - 1] = '\0'; 01978 est.sequenceLen = strlen(est.sequence); 01979 01980 strncpy(est.name, argv[0], sizeof(est.name)); 01981 est.name[sizeof(est.name) - 1] = '\0'; 01982 01983 /* 01984 * To avoid infinite recursion in [encoding system iso2022-*] 01985 */ 01986 01987 Tcl_GetEncoding(NULL, est.name); 01988 01989 est.encodingPtr = NULL; 01990 Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); 01991 } 01992 } 01993 ckfree((char *) argv); 01994 Tcl_DStringFree(&lineString); 01995 } 01996 01997 size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable) 01998 + Tcl_DStringLength(&escapeData); 01999 dataPtr = (EscapeEncodingData *) ckalloc(size); 02000 dataPtr->initLen = strlen(init); 02001 strcpy(dataPtr->init, init); 02002 dataPtr->finalLen = strlen(final); 02003 strcpy(dataPtr->final, final); 02004 dataPtr->numSubTables = 02005 Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); 02006 memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData), 02007 (size_t) Tcl_DStringLength(&escapeData)); 02008 Tcl_DStringFree(&escapeData); 02009 02010 memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes)); 02011 for (i = 0; i < dataPtr->numSubTables; i++) { 02012 dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1; 02013 } 02014 if (dataPtr->init[0] != '\0') { 02015 dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1; 02016 } 02017 if (dataPtr->final[0] != '\0') { 02018 dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1; 02019 } 02020 02021 type.encodingName = name; 02022 type.toUtfProc = EscapeToUtfProc; 02023 type.fromUtfProc = EscapeFromUtfProc; 02024 type.freeProc = EscapeFreeProc; 02025 type.nullSize = 1; 02026 type.clientData = (ClientData) dataPtr; 02027 02028 return Tcl_CreateEncoding(&type); 02029 } 02030 02031 /* 02032 *------------------------------------------------------------------------- 02033 * 02034 * BinaryProc -- 02035 * 02036 * The default conversion when no other conversion is specified. No 02037 * translation is done; source bytes are copied directly to destination 02038 * bytes. 02039 * 02040 * Results: 02041 * Returns TCL_OK if conversion was successful. 02042 * 02043 * Side effects: 02044 * None. 02045 * 02046 *------------------------------------------------------------------------- 02047 */ 02048 02049 static int 02050 BinaryProc( 02051 ClientData clientData, /* Not used. */ 02052 CONST char *src, /* Source string (unknown encoding). */ 02053 int srcLen, /* Source string length in bytes. */ 02054 int flags, /* Conversion control flags. */ 02055 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02056 * information used during a piecewise 02057 * conversion. Contents of statePtr are 02058 * initialized and/or reset by conversion 02059 * routine under control of flags argument. */ 02060 char *dst, /* Output buffer in which converted string is 02061 * stored. */ 02062 int dstLen, /* The maximum length of output buffer in 02063 * bytes. */ 02064 int *srcReadPtr, /* Filled with the number of bytes from the 02065 * source string that were converted. */ 02066 int *dstWrotePtr, /* Filled with the number of bytes that were 02067 * stored in the output buffer as a result of 02068 * the conversion. */ 02069 int *dstCharsPtr) /* Filled with the number of characters that 02070 * correspond to the bytes stored in the 02071 * output buffer. */ 02072 { 02073 int result; 02074 02075 result = TCL_OK; 02076 dstLen -= TCL_UTF_MAX - 1; 02077 if (dstLen < 0) { 02078 dstLen = 0; 02079 } 02080 if (srcLen > dstLen) { 02081 srcLen = dstLen; 02082 result = TCL_CONVERT_NOSPACE; 02083 } 02084 02085 *srcReadPtr = srcLen; 02086 *dstWrotePtr = srcLen; 02087 *dstCharsPtr = srcLen; 02088 memcpy(dst, src, (size_t) srcLen); 02089 return result; 02090 } 02091 02092 /* 02093 *------------------------------------------------------------------------- 02094 * 02095 * UtfExtToUtfIntProc -- 02096 * 02097 * Convert from UTF-8 to UTF-8. While converting null-bytes from the 02098 * Tcl's internal representation (0xc0, 0x80) to the official 02099 * representation (0x00). See UtfToUtfProc for details. 02100 * 02101 * Results: 02102 * Returns TCL_OK if conversion was successful. 02103 * 02104 * Side effects: 02105 * None. 02106 * 02107 *------------------------------------------------------------------------- 02108 */ 02109 02110 static int 02111 UtfIntToUtfExtProc( 02112 ClientData clientData, /* Not used. */ 02113 CONST char *src, /* Source string in UTF-8. */ 02114 int srcLen, /* Source string length in bytes. */ 02115 int flags, /* Conversion control flags. */ 02116 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02117 * information used during a piecewise 02118 * conversion. Contents of statePtr are 02119 * initialized and/or reset by conversion 02120 * routine under control of flags argument. */ 02121 char *dst, /* Output buffer in which converted string 02122 * is stored. */ 02123 int dstLen, /* The maximum length of output buffer in 02124 * bytes. */ 02125 int *srcReadPtr, /* Filled with the number of bytes from the 02126 * source string that were converted. This may 02127 * be less than the original source length if 02128 * there was a problem converting some source 02129 * characters. */ 02130 int *dstWrotePtr, /* Filled with the number of bytes that were 02131 * stored in the output buffer as a result of 02132 * the conversion. */ 02133 int *dstCharsPtr) /* Filled with the number of characters that 02134 * correspond to the bytes stored in the 02135 * output buffer. */ 02136 { 02137 return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, 02138 srcReadPtr, dstWrotePtr, dstCharsPtr, 1); 02139 } 02140 02141 /* 02142 *------------------------------------------------------------------------- 02143 * 02144 * UtfExtToUtfIntProc -- 02145 * 02146 * Convert from UTF-8 to UTF-8 while converting null-bytes from the 02147 * official representation (0x00) to Tcl's internal representation (0xc0, 02148 * 0x80). See UtfToUtfProc for details. 02149 * 02150 * Results: 02151 * Returns TCL_OK if conversion was successful. 02152 * 02153 * Side effects: 02154 * None. 02155 * 02156 *------------------------------------------------------------------------- 02157 */ 02158 static int 02159 UtfExtToUtfIntProc( 02160 ClientData clientData, /* Not used. */ 02161 CONST char *src, /* Source string in UTF-8. */ 02162 int srcLen, /* Source string length in bytes. */ 02163 int flags, /* Conversion control flags. */ 02164 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02165 * information used during a piecewise 02166 * conversion. Contents of statePtr are 02167 * initialized and/or reset by conversion 02168 * routine under control of flags argument. */ 02169 char *dst, /* Output buffer in which converted string is 02170 * stored. */ 02171 int dstLen, /* The maximum length of output buffer in 02172 * bytes. */ 02173 int *srcReadPtr, /* Filled with the number of bytes from the 02174 * source string that were converted. This may 02175 * be less than the original source length if 02176 * there was a problem converting some source 02177 * characters. */ 02178 int *dstWrotePtr, /* Filled with the number of bytes that were 02179 * stored in the output buffer as a result of 02180 * the conversion. */ 02181 int *dstCharsPtr) /* Filled with the number of characters that 02182 * correspond to the bytes stored in the 02183 * output buffer. */ 02184 { 02185 return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, 02186 srcReadPtr, dstWrotePtr, dstCharsPtr, 0); 02187 } 02188 02189 /* 02190 *------------------------------------------------------------------------- 02191 * 02192 * UtfToUtfProc -- 02193 * 02194 * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation 02195 * is not a no-op, because it will turn a stream of improperly formed 02196 * UTF-8 into a properly formed stream. 02197 * 02198 * Results: 02199 * Returns TCL_OK if conversion was successful. 02200 * 02201 * Side effects: 02202 * None. 02203 * 02204 *------------------------------------------------------------------------- 02205 */ 02206 02207 static int 02208 UtfToUtfProc( 02209 ClientData clientData, /* Not used. */ 02210 CONST char *src, /* Source string in UTF-8. */ 02211 int srcLen, /* Source string length in bytes. */ 02212 int flags, /* Conversion control flags. */ 02213 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02214 * information used during a piecewise 02215 * conversion. Contents of statePtr are 02216 * initialized and/or reset by conversion 02217 * routine under control of flags argument. */ 02218 char *dst, /* Output buffer in which converted string is 02219 * stored. */ 02220 int dstLen, /* The maximum length of output buffer in 02221 * bytes. */ 02222 int *srcReadPtr, /* Filled with the number of bytes from the 02223 * source string that were converted. This may 02224 * be less than the original source length if 02225 * there was a problem converting some source 02226 * characters. */ 02227 int *dstWrotePtr, /* Filled with the number of bytes that were 02228 * stored in the output buffer as a result of 02229 * the conversion. */ 02230 int *dstCharsPtr, /* Filled with the number of characters that 02231 * correspond to the bytes stored in the 02232 * output buffer. */ 02233 int pureNullMode) /* Convert embedded nulls from internal 02234 * representation to real null-bytes or vice 02235 * versa. */ 02236 { 02237 CONST char *srcStart, *srcEnd, *srcClose; 02238 char *dstStart, *dstEnd; 02239 int result, numChars; 02240 Tcl_UniChar ch; 02241 02242 result = TCL_OK; 02243 02244 srcStart = src; 02245 srcEnd = src + srcLen; 02246 srcClose = srcEnd; 02247 if ((flags & TCL_ENCODING_END) == 0) { 02248 srcClose -= TCL_UTF_MAX; 02249 } 02250 02251 dstStart = dst; 02252 dstEnd = dst + dstLen - TCL_UTF_MAX; 02253 02254 for (numChars = 0; src < srcEnd; numChars++) { 02255 if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { 02256 /* 02257 * If there is more string to follow, this will ensure that the 02258 * last UTF-8 character in the source buffer hasn't been cut off. 02259 */ 02260 02261 result = TCL_CONVERT_MULTIBYTE; 02262 break; 02263 } 02264 if (dst > dstEnd) { 02265 result = TCL_CONVERT_NOSPACE; 02266 break; 02267 } 02268 if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) { 02269 /* 02270 * Copy 7bit chatacters, but skip null-bytes when we are in input 02271 * mode, so that they get converted to 0xc080. 02272 */ 02273 02274 *dst++ = *src++; 02275 } else if (pureNullMode == 1 && UCHAR(*src) == 0xc0 && 02276 UCHAR(*(src+1)) == 0x80) { 02277 /* 02278 * Convert 0xc080 to real nulls when we are in output mode. 02279 */ 02280 02281 *dst++ = 0; 02282 src += 2; 02283 } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { 02284 /* 02285 * Always check before using Tcl_UtfToUniChar. Not doing can so 02286 * cause it run beyond the endof the buffer! If we happen such an 02287 * incomplete char its byts are made to represent themselves. 02288 */ 02289 02290 ch = (Tcl_UniChar) *src; 02291 src += 1; 02292 dst += Tcl_UniCharToUtf(ch, dst); 02293 } else { 02294 src += Tcl_UtfToUniChar(src, &ch); 02295 dst += Tcl_UniCharToUtf(ch, dst); 02296 } 02297 } 02298 02299 *srcReadPtr = src - srcStart; 02300 *dstWrotePtr = dst - dstStart; 02301 *dstCharsPtr = numChars; 02302 return result; 02303 } 02304 02305 /* 02306 *------------------------------------------------------------------------- 02307 * 02308 * UnicodeToUtfProc -- 02309 * 02310 * Convert from Unicode to UTF-8. 02311 * 02312 * Results: 02313 * Returns TCL_OK if conversion was successful. 02314 * 02315 * Side effects: 02316 * None. 02317 * 02318 *------------------------------------------------------------------------- 02319 */ 02320 02321 static int 02322 UnicodeToUtfProc( 02323 ClientData clientData, /* Not used. */ 02324 CONST char *src, /* Source string in Unicode. */ 02325 int srcLen, /* Source string length in bytes. */ 02326 int flags, /* Conversion control flags. */ 02327 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02328 * information used during a piecewise 02329 * conversion. Contents of statePtr are 02330 * initialized and/or reset by conversion 02331 * routine under control of flags argument. */ 02332 char *dst, /* Output buffer in which converted string is 02333 * stored. */ 02334 int dstLen, /* The maximum length of output buffer in 02335 * bytes. */ 02336 int *srcReadPtr, /* Filled with the number of bytes from the 02337 * source string that were converted. This may 02338 * be less than the original source length if 02339 * there was a problem converting some source 02340 * characters. */ 02341 int *dstWrotePtr, /* Filled with the number of bytes that were 02342 * stored in the output buffer as a result of 02343 * the conversion. */ 02344 int *dstCharsPtr) /* Filled with the number of characters that 02345 * correspond to the bytes stored in the 02346 * output buffer. */ 02347 { 02348 CONST char *srcStart, *srcEnd; 02349 char *dstEnd, *dstStart; 02350 int result, numChars; 02351 Tcl_UniChar ch; 02352 02353 result = TCL_OK; 02354 if ((srcLen % sizeof(Tcl_UniChar)) != 0) { 02355 result = TCL_CONVERT_MULTIBYTE; 02356 srcLen /= sizeof(Tcl_UniChar); 02357 srcLen *= sizeof(Tcl_UniChar); 02358 } 02359 02360 srcStart = src; 02361 srcEnd = src + srcLen; 02362 02363 dstStart = dst; 02364 dstEnd = dst + dstLen - TCL_UTF_MAX; 02365 02366 for (numChars = 0; src < srcEnd; numChars++) { 02367 if (dst > dstEnd) { 02368 result = TCL_CONVERT_NOSPACE; 02369 break; 02370 } 02371 /* 02372 * Special case for 1-byte utf chars for speed. Make sure we 02373 * work with Tcl_UniChar-size data. 02374 */ 02375 ch = *(Tcl_UniChar *)src; 02376 if (ch && ch < 0x80) { 02377 *dst++ = (ch & 0xFF); 02378 } else { 02379 dst += Tcl_UniCharToUtf(ch, dst); 02380 } 02381 src += sizeof(Tcl_UniChar); 02382 } 02383 02384 *srcReadPtr = src - srcStart; 02385 *dstWrotePtr = dst - dstStart; 02386 *dstCharsPtr = numChars; 02387 return result; 02388 } 02389 02390 /* 02391 *------------------------------------------------------------------------- 02392 * 02393 * UtfToUnicodeProc -- 02394 * 02395 * Convert from UTF-8 to Unicode. 02396 * 02397 * Results: 02398 * Returns TCL_OK if conversion was successful. 02399 * 02400 * Side effects: 02401 * None. 02402 * 02403 *------------------------------------------------------------------------- 02404 */ 02405 02406 static int 02407 UtfToUnicodeProc( 02408 ClientData clientData, /* TableEncodingData that specifies 02409 * encoding. */ 02410 CONST char *src, /* Source string in UTF-8. */ 02411 int srcLen, /* Source string length in bytes. */ 02412 int flags, /* Conversion control flags. */ 02413 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02414 * information used during a piecewise 02415 * conversion. Contents of statePtr are 02416 * initialized and/or reset by conversion 02417 * routine under control of flags argument. */ 02418 char *dst, /* Output buffer in which converted string is 02419 * stored. */ 02420 int dstLen, /* The maximum length of output buffer in 02421 * bytes. */ 02422 int *srcReadPtr, /* Filled with the number of bytes from the 02423 * source string that were converted. This may 02424 * be less than the original source length if 02425 * there was a problem converting some source 02426 * characters. */ 02427 int *dstWrotePtr, /* Filled with the number of bytes that were 02428 * stored in the output buffer as a result of 02429 * the conversion. */ 02430 int *dstCharsPtr) /* Filled with the number of characters that 02431 * correspond to the bytes stored in the 02432 * output buffer. */ 02433 { 02434 CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; 02435 int result, numChars; 02436 Tcl_UniChar ch; 02437 02438 srcStart = src; 02439 srcEnd = src + srcLen; 02440 srcClose = srcEnd; 02441 if ((flags & TCL_ENCODING_END) == 0) { 02442 srcClose -= TCL_UTF_MAX; 02443 } 02444 02445 dstStart = dst; 02446 dstEnd = dst + dstLen - sizeof(Tcl_UniChar); 02447 02448 result = TCL_OK; 02449 for (numChars = 0; src < srcEnd; numChars++) { 02450 if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { 02451 /* 02452 * If there is more string to follow, this will ensure that the 02453 * last UTF-8 character in the source buffer hasn't been cut off. 02454 */ 02455 02456 result = TCL_CONVERT_MULTIBYTE; 02457 break; 02458 } 02459 if (dst > dstEnd) { 02460 result = TCL_CONVERT_NOSPACE; 02461 break; 02462 } 02463 src += TclUtfToUniChar(src, &ch); 02464 /* 02465 * Need to handle this in a way that won't cause misalignment 02466 * by casting dst to a Tcl_UniChar. [Bug 1122671] 02467 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. 02468 */ 02469 #ifdef WORDS_BIGENDIAN 02470 *dst++ = (ch >> 8); 02471 *dst++ = (ch & 0xFF); 02472 #else 02473 *dst++ = (ch & 0xFF); 02474 *dst++ = (ch >> 8); 02475 #endif 02476 } 02477 *srcReadPtr = src - srcStart; 02478 *dstWrotePtr = dst - dstStart; 02479 *dstCharsPtr = numChars; 02480 return result; 02481 } 02482 02483 /* 02484 *------------------------------------------------------------------------- 02485 * 02486 * TableToUtfProc -- 02487 * 02488 * Convert from the encoding specified by the TableEncodingData into 02489 * UTF-8. 02490 * 02491 * Results: 02492 * Returns TCL_OK if conversion was successful. 02493 * 02494 * Side effects: 02495 * None. 02496 * 02497 *------------------------------------------------------------------------- 02498 */ 02499 02500 static int 02501 TableToUtfProc( 02502 ClientData clientData, /* TableEncodingData that specifies 02503 * encoding. */ 02504 CONST char *src, /* Source string in specified encoding. */ 02505 int srcLen, /* Source string length in bytes. */ 02506 int flags, /* Conversion control flags. */ 02507 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02508 * information used during a piecewise 02509 * conversion. Contents of statePtr are 02510 * initialized and/or reset by conversion 02511 * routine under control of flags argument. */ 02512 char *dst, /* Output buffer in which converted string is 02513 * stored. */ 02514 int dstLen, /* The maximum length of output buffer in 02515 * bytes. */ 02516 int *srcReadPtr, /* Filled with the number of bytes from the 02517 * source string that were converted. This may 02518 * be less than the original source length if 02519 * there was a problem converting some source 02520 * characters. */ 02521 int *dstWrotePtr, /* Filled with the number of bytes that were 02522 * stored in the output buffer as a result of 02523 * the conversion. */ 02524 int *dstCharsPtr) /* Filled with the number of characters that 02525 * correspond to the bytes stored in the 02526 * output buffer. */ 02527 { 02528 CONST char *srcStart, *srcEnd; 02529 char *dstEnd, *dstStart, *prefixBytes; 02530 int result, byte, numChars; 02531 Tcl_UniChar ch; 02532 unsigned short **toUnicode; 02533 unsigned short *pageZero; 02534 TableEncodingData *dataPtr; 02535 02536 srcStart = src; 02537 srcEnd = src + srcLen; 02538 02539 dstStart = dst; 02540 dstEnd = dst + dstLen - TCL_UTF_MAX; 02541 02542 dataPtr = (TableEncodingData *) clientData; 02543 toUnicode = dataPtr->toUnicode; 02544 prefixBytes = dataPtr->prefixBytes; 02545 pageZero = toUnicode[0]; 02546 02547 result = TCL_OK; 02548 for (numChars = 0; src < srcEnd; numChars++) { 02549 if (dst > dstEnd) { 02550 result = TCL_CONVERT_NOSPACE; 02551 break; 02552 } 02553 byte = *((unsigned char *) src); 02554 if (prefixBytes[byte]) { 02555 src++; 02556 if (src >= srcEnd) { 02557 src--; 02558 result = TCL_CONVERT_MULTIBYTE; 02559 break; 02560 } 02561 ch = toUnicode[byte][*((unsigned char *) src)]; 02562 } else { 02563 ch = pageZero[byte]; 02564 } 02565 if ((ch == 0) && (byte != 0)) { 02566 if (flags & TCL_ENCODING_STOPONERROR) { 02567 result = TCL_CONVERT_SYNTAX; 02568 break; 02569 } 02570 if (prefixBytes[byte]) { 02571 src--; 02572 } 02573 ch = (Tcl_UniChar) byte; 02574 } 02575 /* 02576 * Special case for 1-byte utf chars for speed. 02577 */ 02578 if (ch && ch < 0x80) { 02579 *dst++ = (char) ch; 02580 } else { 02581 dst += Tcl_UniCharToUtf(ch, dst); 02582 } 02583 src++; 02584 } 02585 02586 *srcReadPtr = src - srcStart; 02587 *dstWrotePtr = dst - dstStart; 02588 *dstCharsPtr = numChars; 02589 return result; 02590 } 02591 02592 /* 02593 *------------------------------------------------------------------------- 02594 * 02595 * TableFromUtfProc -- 02596 * 02597 * Convert from UTF-8 into the encoding specified by the 02598 * TableEncodingData. 02599 * 02600 * Results: 02601 * Returns TCL_OK if conversion was successful. 02602 * 02603 * Side effects: 02604 * None. 02605 * 02606 *------------------------------------------------------------------------- 02607 */ 02608 02609 static int 02610 TableFromUtfProc( 02611 ClientData clientData, /* TableEncodingData that specifies 02612 * encoding. */ 02613 CONST char *src, /* Source string in UTF-8. */ 02614 int srcLen, /* Source string length in bytes. */ 02615 int flags, /* Conversion control flags. */ 02616 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02617 * information used during a piecewise 02618 * conversion. Contents of statePtr are 02619 * initialized and/or reset by conversion 02620 * routine under control of flags argument. */ 02621 char *dst, /* Output buffer in which converted string is 02622 * stored. */ 02623 int dstLen, /* The maximum length of output buffer in 02624 * bytes. */ 02625 int *srcReadPtr, /* Filled with the number of bytes from the 02626 * source string that were converted. This may 02627 * be less than the original source length if 02628 * there was a problem converting some source 02629 * characters. */ 02630 int *dstWrotePtr, /* Filled with the number of bytes that were 02631 * stored in the output buffer as a result of 02632 * the conversion. */ 02633 int *dstCharsPtr) /* Filled with the number of characters that 02634 * correspond to the bytes stored in the 02635 * output buffer. */ 02636 { 02637 CONST char *srcStart, *srcEnd, *srcClose; 02638 char *dstStart, *dstEnd, *prefixBytes; 02639 Tcl_UniChar ch; 02640 int result, len, word, numChars; 02641 TableEncodingData *dataPtr; 02642 unsigned short **fromUnicode; 02643 02644 result = TCL_OK; 02645 02646 dataPtr = (TableEncodingData *) clientData; 02647 prefixBytes = dataPtr->prefixBytes; 02648 fromUnicode = dataPtr->fromUnicode; 02649 02650 srcStart = src; 02651 srcEnd = src + srcLen; 02652 srcClose = srcEnd; 02653 if ((flags & TCL_ENCODING_END) == 0) { 02654 srcClose -= TCL_UTF_MAX; 02655 } 02656 02657 dstStart = dst; 02658 dstEnd = dst + dstLen - 1; 02659 02660 for (numChars = 0; src < srcEnd; numChars++) { 02661 if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { 02662 /* 02663 * If there is more string to follow, this will ensure that the 02664 * last UTF-8 character in the source buffer hasn't been cut off. 02665 */ 02666 02667 result = TCL_CONVERT_MULTIBYTE; 02668 break; 02669 } 02670 len = TclUtfToUniChar(src, &ch); 02671 02672 #if TCL_UTF_MAX > 3 02673 /* 02674 * This prevents a crash condition. More evaluation is required for 02675 * full support of int Tcl_UniChar. [Bug 1004065] 02676 */ 02677 02678 if (ch & 0xffff0000) { 02679 word = 0; 02680 } else 02681 #endif 02682 word = fromUnicode[(ch >> 8)][ch & 0xff]; 02683 02684 if ((word == 0) && (ch != 0)) { 02685 if (flags & TCL_ENCODING_STOPONERROR) { 02686 result = TCL_CONVERT_UNKNOWN; 02687 break; 02688 } 02689 word = dataPtr->fallback; 02690 } 02691 if (prefixBytes[(word >> 8)] != 0) { 02692 if (dst + 1 > dstEnd) { 02693 result = TCL_CONVERT_NOSPACE; 02694 break; 02695 } 02696 dst[0] = (char) (word >> 8); 02697 dst[1] = (char) word; 02698 dst += 2; 02699 } else { 02700 if (dst > dstEnd) { 02701 result = TCL_CONVERT_NOSPACE; 02702 break; 02703 } 02704 dst[0] = (char) word; 02705 dst++; 02706 } 02707 src += len; 02708 } 02709 02710 *srcReadPtr = src - srcStart; 02711 *dstWrotePtr = dst - dstStart; 02712 *dstCharsPtr = numChars; 02713 return result; 02714 } 02715 02716 /* 02717 *------------------------------------------------------------------------- 02718 * 02719 * Iso88591ToUtfProc -- 02720 * 02721 * Convert from the "iso8859-1" encoding into UTF-8. 02722 * 02723 * Results: 02724 * Returns TCL_OK if conversion was successful. 02725 * 02726 * Side effects: 02727 * None. 02728 * 02729 *------------------------------------------------------------------------- 02730 */ 02731 02732 static int 02733 Iso88591ToUtfProc( 02734 ClientData clientData, /* Ignored. */ 02735 CONST char *src, /* Source string in specified encoding. */ 02736 int srcLen, /* Source string length in bytes. */ 02737 int flags, /* Conversion control flags. */ 02738 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02739 * information used during a piecewise 02740 * conversion. Contents of statePtr are 02741 * initialized and/or reset by conversion 02742 * routine under control of flags argument. */ 02743 char *dst, /* Output buffer in which converted string is 02744 * stored. */ 02745 int dstLen, /* The maximum length of output buffer in 02746 * bytes. */ 02747 int *srcReadPtr, /* Filled with the number of bytes from the 02748 * source string that were converted. This may 02749 * be less than the original source length if 02750 * there was a problem converting some source 02751 * characters. */ 02752 int *dstWrotePtr, /* Filled with the number of bytes that were 02753 * stored in the output buffer as a result of 02754 * the conversion. */ 02755 int *dstCharsPtr) /* Filled with the number of characters that 02756 * correspond to the bytes stored in the 02757 * output buffer. */ 02758 { 02759 CONST char *srcStart, *srcEnd; 02760 char *dstEnd, *dstStart; 02761 int result, numChars; 02762 02763 srcStart = src; 02764 srcEnd = src + srcLen; 02765 02766 dstStart = dst; 02767 dstEnd = dst + dstLen - TCL_UTF_MAX; 02768 02769 result = TCL_OK; 02770 for (numChars = 0; src < srcEnd; numChars++) { 02771 Tcl_UniChar ch; 02772 02773 if (dst > dstEnd) { 02774 result = TCL_CONVERT_NOSPACE; 02775 break; 02776 } 02777 ch = (Tcl_UniChar) *((unsigned char *) src); 02778 /* 02779 * Special case for 1-byte utf chars for speed. 02780 */ 02781 if (ch && ch < 0x80) { 02782 *dst++ = (char) ch; 02783 } else { 02784 dst += Tcl_UniCharToUtf(ch, dst); 02785 } 02786 src++; 02787 } 02788 02789 *srcReadPtr = src - srcStart; 02790 *dstWrotePtr = dst - dstStart; 02791 *dstCharsPtr = numChars; 02792 return result; 02793 } 02794 02795 /* 02796 *------------------------------------------------------------------------- 02797 * 02798 * Iso88591FromUtfProc -- 02799 * 02800 * Convert from UTF-8 into the encoding "iso8859-1". 02801 * 02802 * Results: 02803 * Returns TCL_OK if conversion was successful. 02804 * 02805 * Side effects: 02806 * None. 02807 * 02808 *------------------------------------------------------------------------- 02809 */ 02810 02811 static int 02812 Iso88591FromUtfProc( 02813 ClientData clientData, /* Ignored. */ 02814 CONST char *src, /* Source string in UTF-8. */ 02815 int srcLen, /* Source string length in bytes. */ 02816 int flags, /* Conversion control flags. */ 02817 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02818 * information used during a piecewise 02819 * conversion. Contents of statePtr are 02820 * initialized and/or reset by conversion 02821 * routine under control of flags argument. */ 02822 char *dst, /* Output buffer in which converted string is 02823 * stored. */ 02824 int dstLen, /* The maximum length of output buffer in 02825 * bytes. */ 02826 int *srcReadPtr, /* Filled with the number of bytes from the 02827 * source string that were converted. This may 02828 * be less than the original source length if 02829 * there was a problem converting some source 02830 * characters. */ 02831 int *dstWrotePtr, /* Filled with the number of bytes that were 02832 * stored in the output buffer as a result of 02833 * the conversion. */ 02834 int *dstCharsPtr) /* Filled with the number of characters that 02835 * correspond to the bytes stored in the 02836 * output buffer. */ 02837 { 02838 CONST char *srcStart, *srcEnd, *srcClose; 02839 char *dstStart, *dstEnd; 02840 int result, numChars; 02841 02842 result = TCL_OK; 02843 02844 srcStart = src; 02845 srcEnd = src + srcLen; 02846 srcClose = srcEnd; 02847 if ((flags & TCL_ENCODING_END) == 0) { 02848 srcClose -= TCL_UTF_MAX; 02849 } 02850 02851 dstStart = dst; 02852 dstEnd = dst + dstLen - 1; 02853 02854 for (numChars = 0; src < srcEnd; numChars++) { 02855 Tcl_UniChar ch; 02856 int len; 02857 02858 if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { 02859 /* 02860 * If there is more string to follow, this will ensure that the 02861 * last UTF-8 character in the source buffer hasn't been cut off. 02862 */ 02863 02864 result = TCL_CONVERT_MULTIBYTE; 02865 break; 02866 } 02867 len = TclUtfToUniChar(src, &ch); 02868 02869 /* 02870 * Check for illegal characters. 02871 */ 02872 02873 if (ch > 0xff) { 02874 if (flags & TCL_ENCODING_STOPONERROR) { 02875 result = TCL_CONVERT_UNKNOWN; 02876 break; 02877 } 02878 02879 /* 02880 * Plunge on, using '?' as a fallback character. 02881 */ 02882 02883 ch = (Tcl_UniChar) '?'; 02884 } 02885 02886 if (dst > dstEnd) { 02887 result = TCL_CONVERT_NOSPACE; 02888 break; 02889 } 02890 *(dst++) = (char) ch; 02891 src += len; 02892 } 02893 02894 *srcReadPtr = src - srcStart; 02895 *dstWrotePtr = dst - dstStart; 02896 *dstCharsPtr = numChars; 02897 return result; 02898 } 02899 02900 /* 02901 *--------------------------------------------------------------------------- 02902 * 02903 * TableFreeProc -- 02904 * 02905 * This function is invoked when an encoding is deleted. It deletes the 02906 * memory used by the TableEncodingData. 02907 * 02908 * Results: 02909 * None. 02910 * 02911 * Side effects: 02912 * Memory freed. 02913 * 02914 *--------------------------------------------------------------------------- 02915 */ 02916 02917 static void 02918 TableFreeProc( 02919 ClientData clientData) /* TableEncodingData that specifies 02920 * encoding. */ 02921 { 02922 TableEncodingData *dataPtr; 02923 02924 /* 02925 * Make sure we aren't freeing twice on shutdown. [Bug 219314] 02926 */ 02927 02928 dataPtr = (TableEncodingData *) clientData; 02929 ckfree((char *) dataPtr->toUnicode); 02930 ckfree((char *) dataPtr->fromUnicode); 02931 ckfree((char *) dataPtr); 02932 } 02933 02934 /* 02935 *------------------------------------------------------------------------- 02936 * 02937 * EscapeToUtfProc -- 02938 * 02939 * Convert from the encoding specified by the EscapeEncodingData into 02940 * UTF-8. 02941 * 02942 * Results: 02943 * Returns TCL_OK if conversion was successful. 02944 * 02945 * Side effects: 02946 * None. 02947 * 02948 *------------------------------------------------------------------------- 02949 */ 02950 02951 static int 02952 EscapeToUtfProc( 02953 ClientData clientData, /* EscapeEncodingData that specifies 02954 * encoding. */ 02955 CONST char *src, /* Source string in specified encoding. */ 02956 int srcLen, /* Source string length in bytes. */ 02957 int flags, /* Conversion control flags. */ 02958 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 02959 * information used during a piecewise 02960 * conversion. Contents of statePtr are 02961 * initialized and/or reset by conversion 02962 * routine under control of flags argument. */ 02963 char *dst, /* Output buffer in which converted string is 02964 * stored. */ 02965 int dstLen, /* The maximum length of output buffer in 02966 * bytes. */ 02967 int *srcReadPtr, /* Filled with the number of bytes from the 02968 * source string that were converted. This may 02969 * be less than the original source length if 02970 * there was a problem converting some source 02971 * characters. */ 02972 int *dstWrotePtr, /* Filled with the number of bytes that were 02973 * stored in the output buffer as a result of 02974 * the conversion. */ 02975 int *dstCharsPtr) /* Filled with the number of characters that 02976 * correspond to the bytes stored in the 02977 * output buffer. */ 02978 { 02979 EscapeEncodingData *dataPtr; 02980 char *prefixBytes, *tablePrefixBytes; 02981 unsigned short **tableToUnicode; 02982 Encoding *encodingPtr; 02983 int state, result, numChars; 02984 CONST char *srcStart, *srcEnd; 02985 char *dstStart, *dstEnd; 02986 02987 result = TCL_OK; 02988 02989 tablePrefixBytes = NULL; /* lint. */ 02990 tableToUnicode = NULL; /* lint. */ 02991 02992 dataPtr = (EscapeEncodingData *) clientData; 02993 prefixBytes = dataPtr->prefixBytes; 02994 encodingPtr = NULL; 02995 02996 srcStart = src; 02997 srcEnd = src + srcLen; 02998 02999 dstStart = dst; 03000 dstEnd = dst + dstLen - TCL_UTF_MAX; 03001 03002 state = PTR2INT(*statePtr); 03003 if (flags & TCL_ENCODING_START) { 03004 state = 0; 03005 } 03006 03007 for (numChars = 0; src < srcEnd; ) { 03008 int byte, hi, lo, ch; 03009 03010 if (dst > dstEnd) { 03011 result = TCL_CONVERT_NOSPACE; 03012 break; 03013 } 03014 byte = *((unsigned char *) src); 03015 if (prefixBytes[byte]) { 03016 unsigned int left, len, longest; 03017 int checked, i; 03018 EscapeSubTable *subTablePtr; 03019 03020 /* 03021 * Saw the beginning of an escape sequence. 03022 */ 03023 03024 left = srcEnd - src; 03025 len = dataPtr->initLen; 03026 longest = len; 03027 checked = 0; 03028 03029 if (len <= left) { 03030 checked++; 03031 if ((len > 0) && (memcmp(src, dataPtr->init, len) == 0)) { 03032 /* 03033 * If we see initialization string, skip it, even if we're 03034 * not at the beginning of the buffer. 03035 */ 03036 03037 src += len; 03038 continue; 03039 } 03040 } 03041 03042 len = dataPtr->finalLen; 03043 if (len > longest) { 03044 longest = len; 03045 } 03046 03047 if (len <= left) { 03048 checked++; 03049 if ((len > 0) && (memcmp(src, dataPtr->final, len) == 0)) { 03050 /* 03051 * If we see finalization string, skip it, even if we're 03052 * not at the end of the buffer. 03053 */ 03054 03055 src += len; 03056 continue; 03057 } 03058 } 03059 03060 subTablePtr = dataPtr->subTables; 03061 for (i = 0; i < dataPtr->numSubTables; i++) { 03062 len = subTablePtr->sequenceLen; 03063 if (len > longest) { 03064 longest = len; 03065 } 03066 if (len <= left) { 03067 checked++; 03068 if ((len > 0) && 03069 (memcmp(src, subTablePtr->sequence, len) == 0)) { 03070 state = i; 03071 encodingPtr = NULL; 03072 subTablePtr = NULL; 03073 src += len; 03074 break; 03075 } 03076 } 03077 subTablePtr++; 03078 } 03079 03080 if (subTablePtr == NULL) { 03081 /* 03082 * A match was found, the escape sequence was consumed, and 03083 * the state was updated. 03084 */ 03085 03086 continue; 03087 } 03088 03089 /* 03090 * We have a split-up or unrecognized escape sequence. If we 03091 * checked all the sequences, then it's a syntax error, otherwise 03092 * we need more bytes to determine a match. 03093 */ 03094 03095 if ((checked == dataPtr->numSubTables + 2) 03096 || (flags & TCL_ENCODING_END)) { 03097 if ((flags & TCL_ENCODING_STOPONERROR) == 0) { 03098 /* 03099 * Skip the unknown escape sequence. 03100 */ 03101 03102 src += longest; 03103 continue; 03104 } 03105 result = TCL_CONVERT_SYNTAX; 03106 } else { 03107 result = TCL_CONVERT_MULTIBYTE; 03108 } 03109 break; 03110 } 03111 03112 if (encodingPtr == NULL) { 03113 TableEncodingData *tableDataPtr; 03114 03115 encodingPtr = GetTableEncoding(dataPtr, state); 03116 tableDataPtr = (TableEncodingData *) encodingPtr->clientData; 03117 tablePrefixBytes = tableDataPtr->prefixBytes; 03118 tableToUnicode = tableDataPtr->toUnicode; 03119 } 03120 03121 if (tablePrefixBytes[byte]) { 03122 src++; 03123 if (src >= srcEnd) { 03124 src--; 03125 result = TCL_CONVERT_MULTIBYTE; 03126 break; 03127 } 03128 hi = byte; 03129 lo = *((unsigned char *) src); 03130 } else { 03131 hi = 0; 03132 lo = byte; 03133 } 03134 03135 ch = tableToUnicode[hi][lo]; 03136 dst += Tcl_UniCharToUtf(ch, dst); 03137 src++; 03138 numChars++; 03139 } 03140 03141 *statePtr = (Tcl_EncodingState) INT2PTR(state); 03142 *srcReadPtr = src - srcStart; 03143 *dstWrotePtr = dst - dstStart; 03144 *dstCharsPtr = numChars; 03145 return result; 03146 } 03147 03148 /* 03149 *------------------------------------------------------------------------- 03150 * 03151 * EscapeFromUtfProc -- 03152 * 03153 * Convert from UTF-8 into the encoding specified by the 03154 * EscapeEncodingData. 03155 * 03156 * Results: 03157 * Returns TCL_OK if conversion was successful. 03158 * 03159 * Side effects: 03160 * None. 03161 * 03162 *------------------------------------------------------------------------- 03163 */ 03164 03165 static int 03166 EscapeFromUtfProc( 03167 ClientData clientData, /* EscapeEncodingData that specifies 03168 * encoding. */ 03169 CONST char *src, /* Source string in UTF-8. */ 03170 int srcLen, /* Source string length in bytes. */ 03171 int flags, /* Conversion control flags. */ 03172 Tcl_EncodingState *statePtr,/* Place for conversion routine to store state 03173 * information used during a piecewise 03174 * conversion. Contents of statePtr are 03175 * initialized and/or reset by conversion 03176 * routine under control of flags argument. */ 03177 char *dst, /* Output buffer in which converted string is 03178 * stored. */ 03179 int dstLen, /* The maximum length of output buffer in 03180 * bytes. */ 03181 int *srcReadPtr, /* Filled with the number of bytes from the 03182 * source string that were converted. This may 03183 * be less than the original source length if 03184 * there was a problem converting some source 03185 * characters. */ 03186 int *dstWrotePtr, /* Filled with the number of bytes that were 03187 * stored in the output buffer as a result of 03188 * the conversion. */ 03189 int *dstCharsPtr) /* Filled with the number of characters that 03190 * correspond to the bytes stored in the 03191 * output buffer. */ 03192 { 03193 EscapeEncodingData *dataPtr; 03194 Encoding *encodingPtr; 03195 CONST char *srcStart, *srcEnd, *srcClose; 03196 char *dstStart, *dstEnd; 03197 int state, result, numChars; 03198 TableEncodingData *tableDataPtr; 03199 char *tablePrefixBytes; 03200 unsigned short **tableFromUnicode; 03201 03202 result = TCL_OK; 03203 03204 dataPtr = (EscapeEncodingData *) clientData; 03205 03206 srcStart = src; 03207 srcEnd = src + srcLen; 03208 srcClose = srcEnd; 03209 if ((flags & TCL_ENCODING_END) == 0) { 03210 srcClose -= TCL_UTF_MAX; 03211 } 03212 03213 dstStart = dst; 03214 dstEnd = dst + dstLen - 1; 03215 03216 /* 03217 * RFC1468 states that the text starts in ASCII, and switches to Japanese 03218 * characters, and that the text must end in ASCII. [Patch 474358] 03219 */ 03220 03221 if (flags & TCL_ENCODING_START) { 03222 state = 0; 03223 if ((dst + dataPtr->initLen) > dstEnd) { 03224 *srcReadPtr = 0; 03225 *dstWrotePtr = 0; 03226 return TCL_CONVERT_NOSPACE; 03227 } 03228 memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen); 03229 dst += dataPtr->initLen; 03230 } else { 03231 state = PTR2INT(*statePtr); 03232 } 03233 03234 encodingPtr = GetTableEncoding(dataPtr, state); 03235 tableDataPtr = (TableEncodingData *) encodingPtr->clientData; 03236 tablePrefixBytes = tableDataPtr->prefixBytes; 03237 tableFromUnicode = tableDataPtr->fromUnicode; 03238 03239 for (numChars = 0; src < srcEnd; numChars++) { 03240 unsigned int len; 03241 int word; 03242 Tcl_UniChar ch; 03243 03244 if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { 03245 /* 03246 * If there is more string to follow, this will ensure that the 03247 * last UTF-8 character in the source buffer hasn't been cut off. 03248 */ 03249 03250 result = TCL_CONVERT_MULTIBYTE; 03251 break; 03252 } 03253 len = TclUtfToUniChar(src, &ch); 03254 word = tableFromUnicode[(ch >> 8)][ch & 0xff]; 03255 03256 if ((word == 0) && (ch != 0)) { 03257 int oldState; 03258 EscapeSubTable *subTablePtr; 03259 03260 oldState = state; 03261 for (state = 0; state < dataPtr->numSubTables; state++) { 03262 encodingPtr = GetTableEncoding(dataPtr, state); 03263 tableDataPtr = (TableEncodingData *) encodingPtr->clientData; 03264 word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff]; 03265 if (word != 0) { 03266 break; 03267 } 03268 } 03269 03270 if (word == 0) { 03271 state = oldState; 03272 if (flags & TCL_ENCODING_STOPONERROR) { 03273 result = TCL_CONVERT_UNKNOWN; 03274 break; 03275 } 03276 encodingPtr = GetTableEncoding(dataPtr, state); 03277 tableDataPtr = (TableEncodingData *) encodingPtr->clientData; 03278 word = tableDataPtr->fallback; 03279 } 03280 03281 tablePrefixBytes = tableDataPtr->prefixBytes; 03282 tableFromUnicode = tableDataPtr->fromUnicode; 03283 03284 /* 03285 * The state variable has the value of oldState when word is 0. 03286 * In this case, the escape sequense should not be copied to dst 03287 * because the current character set is not changed. 03288 */ 03289 03290 if (state != oldState) { 03291 subTablePtr = &dataPtr->subTables[state]; 03292 if ((dst + subTablePtr->sequenceLen) > dstEnd) { 03293 /* 03294 * If there is no space to write the escape sequence, the 03295 * state variable must be changed to the value of oldState 03296 * variable because this escape sequence must be written 03297 * in the next conversion. 03298 */ 03299 03300 state = oldState; 03301 result = TCL_CONVERT_NOSPACE; 03302 break; 03303 } 03304 memcpy(dst, subTablePtr->sequence, 03305 (size_t) subTablePtr->sequenceLen); 03306 dst += subTablePtr->sequenceLen; 03307 } 03308 } 03309 03310 if (tablePrefixBytes[(word >> 8)] != 0) { 03311 if (dst + 1 > dstEnd) { 03312 result = TCL_CONVERT_NOSPACE; 03313 break; 03314 } 03315 dst[0] = (char) (word >> 8); 03316 dst[1] = (char) word; 03317 dst += 2; 03318 } else { 03319 if (dst > dstEnd) { 03320 result = TCL_CONVERT_NOSPACE; 03321 break; 03322 } 03323 dst[0] = (char) word; 03324 dst++; 03325 } 03326 src += len; 03327 } 03328 03329 if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) { 03330 unsigned int len = dataPtr->subTables[0].sequenceLen; 03331 /* 03332 * Certain encodings like iso2022-jp need to write 03333 * an escape sequence after all characters have 03334 * been converted. This logic checks that enough 03335 * room is available in the buffer for the escape bytes. 03336 * The TCL_ENCODING_END flag is cleared after a final 03337 * escape sequence has been added to the buffer so 03338 * that another call to this method does not attempt 03339 * to append escape bytes a second time. 03340 */ 03341 if ((dst + dataPtr->finalLen + (state?len:0)) > dstEnd) { 03342 result = TCL_CONVERT_NOSPACE; 03343 } else { 03344 if (state) { 03345 memcpy(dst, dataPtr->subTables[0].sequence, (size_t) len); 03346 dst += len; 03347 } 03348 memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen); 03349 dst += dataPtr->finalLen; 03350 state &= ~TCL_ENCODING_END; 03351 } 03352 } 03353 03354 *statePtr = (Tcl_EncodingState) INT2PTR(state); 03355 *srcReadPtr = src - srcStart; 03356 *dstWrotePtr = dst - dstStart; 03357 *dstCharsPtr = numChars; 03358 return result; 03359 } 03360 03361 /* 03362 *--------------------------------------------------------------------------- 03363 * 03364 * EscapeFreeProc -- 03365 * 03366 * This function is invoked when an EscapeEncodingData encoding is 03367 * deleted. It deletes the memory used by the encoding. 03368 * 03369 * Results: 03370 * None. 03371 * 03372 * Side effects: 03373 * Memory freed. 03374 * 03375 *--------------------------------------------------------------------------- 03376 */ 03377 03378 static void 03379 EscapeFreeProc( 03380 ClientData clientData) /* EscapeEncodingData that specifies 03381 * encoding. */ 03382 { 03383 EscapeEncodingData *dataPtr; 03384 EscapeSubTable *subTablePtr; 03385 int i; 03386 03387 dataPtr = (EscapeEncodingData *) clientData; 03388 if (dataPtr == NULL) { 03389 return; 03390 } 03391 subTablePtr = dataPtr->subTables; 03392 for (i = 0; i < dataPtr->numSubTables; i++) { 03393 FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr); 03394 subTablePtr++; 03395 } 03396 ckfree((char *) dataPtr); 03397 } 03398 03399 /* 03400 *--------------------------------------------------------------------------- 03401 * 03402 * GetTableEncoding -- 03403 * 03404 * Helper function for the EscapeEncodingData conversions. Gets the 03405 * encoding (of type TextEncodingData) that represents the specified 03406 * state. 03407 * 03408 * Results: 03409 * The return value is the encoding. 03410 * 03411 * Side effects: 03412 * If the encoding that represents the specified state has not already 03413 * been used by this EscapeEncoding, it will be loaded and cached in the 03414 * dataPtr. 03415 * 03416 *--------------------------------------------------------------------------- 03417 */ 03418 03419 static Encoding * 03420 GetTableEncoding( 03421 EscapeEncodingData *dataPtr,/* Contains names of encodings. */ 03422 int state) /* Index in dataPtr of desired Encoding. */ 03423 { 03424 EscapeSubTable *subTablePtr; 03425 Encoding *encodingPtr; 03426 03427 subTablePtr = &dataPtr->subTables[state]; 03428 encodingPtr = subTablePtr->encodingPtr; 03429 03430 if (encodingPtr == NULL) { 03431 encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name); 03432 if ((encodingPtr == NULL) 03433 || (encodingPtr->toUtfProc != TableToUtfProc 03434 && encodingPtr->toUtfProc != Iso88591ToUtfProc)) { 03435 Tcl_Panic("EscapeToUtfProc: invalid sub table"); 03436 } 03437 subTablePtr->encodingPtr = encodingPtr; 03438 } 03439 03440 return encodingPtr; 03441 } 03442 03443 /* 03444 *--------------------------------------------------------------------------- 03445 * 03446 * unilen -- 03447 * 03448 * A helper function for the Tcl_ExternalToUtf functions. This function 03449 * is similar to strlen for double-byte characters: it returns the number 03450 * of bytes in a 0x0000 terminated string. 03451 * 03452 * Results: 03453 * As above. 03454 * 03455 * Side effects: 03456 * None. 03457 * 03458 *--------------------------------------------------------------------------- 03459 */ 03460 03461 static size_t 03462 unilen( 03463 CONST char *src) 03464 { 03465 unsigned short *p; 03466 03467 p = (unsigned short *) src; 03468 while (*p != 0x0000) { 03469 p++; 03470 } 03471 return (char *) p - src; 03472 } 03473 03474 /* 03475 *------------------------------------------------------------------------- 03476 * 03477 * InitializeEncodingSearchPath -- 03478 * 03479 * This is the fallback routine that sets the default value of the 03480 * encoding search path if the application has not set one via a call to 03481 * Tcl_SetEncodingSearchPath() by the first time the search path is needed 03482 * to load encoding data. 03483 * 03484 * The default encoding search path is produced by taking each directory 03485 * in the library path, appending a subdirectory named "encoding", and if 03486 * the resulting directory exists, adding it to the encoding search path. 03487 * 03488 * Results: 03489 * None. 03490 * 03491 * Side effects: 03492 * Sets the encoding search path to an initial value. 03493 * 03494 *------------------------------------------------------------------------- 03495 */ 03496 03497 static void 03498 InitializeEncodingSearchPath( 03499 char **valuePtr, 03500 int *lengthPtr, 03501 Tcl_Encoding *encodingPtr) 03502 { 03503 char *bytes; 03504 int i, numDirs, numBytes; 03505 Tcl_Obj *libPath, *encodingObj, *searchPath; 03506 03507 TclNewLiteralStringObj(encodingObj, "encoding"); 03508 TclNewObj(searchPath); 03509 Tcl_IncrRefCount(encodingObj); 03510 Tcl_IncrRefCount(searchPath); 03511 libPath = TclGetLibraryPath(); 03512 Tcl_IncrRefCount(libPath); 03513 Tcl_ListObjLength(NULL, libPath, &numDirs); 03514 03515 for (i = 0; i < numDirs; i++) { 03516 Tcl_Obj *directory, *path; 03517 Tcl_StatBuf stat; 03518 03519 Tcl_ListObjIndex(NULL, libPath, i, &directory); 03520 path = Tcl_FSJoinToPath(directory, 1, &encodingObj); 03521 Tcl_IncrRefCount(path); 03522 if ((0 == Tcl_FSStat(path, &stat)) && S_ISDIR(stat.st_mode)) { 03523 Tcl_ListObjAppendElement(NULL, searchPath, path); 03524 } 03525 Tcl_DecrRefCount(path); 03526 } 03527 03528 Tcl_DecrRefCount(libPath); 03529 Tcl_DecrRefCount(encodingObj); 03530 *encodingPtr = libraryPath.encoding; 03531 if (*encodingPtr) { 03532 ((Encoding *)(*encodingPtr))->refCount++; 03533 } 03534 bytes = Tcl_GetStringFromObj(searchPath, &numBytes); 03535 03536 *lengthPtr = numBytes; 03537 *valuePtr = ckalloc((unsigned int) numBytes + 1); 03538 memcpy(*valuePtr, bytes, (size_t) numBytes + 1); 03539 Tcl_DecrRefCount(searchPath); 03540 } 03541 03542 /* 03543 * Local Variables: 03544 * mode: c 03545 * c-basic-offset: 4 03546 * fill-column: 78 03547 * End: 03548 */ 03549
Generated on Wed Mar 12 12:18:15 2008 by 1.5.1 |