tclEncoding.c

Go to the documentation of this file.
00001 /*
00002  * tclEncoding.c --
00003  *
00004  *      Contains the implementation of the encoding conversion package.
00005  *
00006  * Copyright (c) 1996-1998 Sun Microsystems, Inc.
00007  *
00008  * See the file "license.terms" for information on usage and redistribution of
00009  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
00010  *
00011  * RCS: @(#) $Id: tclEncoding.c,v 1.58 2007/12/13 15:23:16 dgp Exp $
00012  */
00013 
00014 #include "tclInt.h"
00015 
00016 typedef size_t (LengthProc)(CONST char *src);
00017 
00018 /*
00019  * The following data structure represents an encoding, which describes how to
00020  * convert between various character sets and UTF-8.
00021  */
00022 
00023 typedef struct Encoding {
00024     char *name;                 /* Name of encoding. Malloced because (1) hash
00025                                  * table entry that owns this encoding may be
00026                                  * freed prior to this encoding being freed,
00027                                  * (2) string passed in the Tcl_EncodingType
00028                                  * structure may not be persistent. */
00029     Tcl_EncodingConvertProc *toUtfProc;
00030                                 /* Function to convert from external encoding
00031                                  * into UTF-8. */
00032     Tcl_EncodingConvertProc *fromUtfProc;
00033                                 /* Function to convert from UTF-8 into
00034                                  * external encoding. */
00035     Tcl_EncodingFreeProc *freeProc;
00036                                 /* If non-NULL, function to call when this
00037                                  * encoding is deleted. */
00038     int nullSize;               /* Number of 0x00 bytes that signify
00039                                  * end-of-string in this encoding. This number
00040                                  * is used to determine the source string
00041                                  * length when the srcLen argument is
00042                                  * negative. This number can be 1 or 2. */
00043     ClientData clientData;      /* Arbitrary value associated with encoding
00044                                  * type. Passed to conversion functions. */
00045     LengthProc *lengthProc;     /* Function to compute length of
00046                                  * null-terminated strings in this encoding.
00047                                  * If nullSize is 1, this is strlen; if
00048                                  * nullSize is 2, this is a function that
00049                                  * returns the number of bytes in a 0x0000
00050                                  * terminated string. */
00051     int refCount;               /* Number of uses of this structure. */
00052     Tcl_HashEntry *hPtr;        /* Hash table entry that owns this encoding. */
00053 } Encoding;
00054 
00055 /*
00056  * The following structure is the clientData for a dynamically-loaded,
00057  * table-driven encoding created by LoadTableEncoding(). It maps between
00058  * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only)
00059  * encoding.
00060  */
00061 
00062 typedef struct TableEncodingData {
00063     int fallback;               /* Character (in this encoding) to substitute
00064                                  * when this encoding cannot represent a UTF-8
00065                                  * character. */
00066     char prefixBytes[256];      /* If a byte in the input stream is a lead
00067                                  * byte for a 2-byte sequence, the
00068                                  * corresponding entry in this array is 1,
00069                                  * otherwise it is 0. */
00070     unsigned short **toUnicode; /* Two dimensional sparse matrix to map
00071                                  * characters from the encoding to Unicode.
00072                                  * Each element of the toUnicode array points
00073                                  * to an array of 256 shorts. If there is no
00074                                  * corresponding character in Unicode, the
00075                                  * value in the matrix is 0x0000.
00076                                  * malloc'd. */
00077     unsigned short **fromUnicode;
00078                                 /* Two dimensional sparse matrix to map
00079                                  * characters from Unicode to the encoding.
00080                                  * Each element of the fromUnicode array
00081                                  * points to an array of 256 shorts. If there
00082                                  * is no corresponding character the encoding,
00083                                  * the value in the matrix is 0x0000.
00084                                  * malloc'd. */
00085 } TableEncodingData;
00086 
00087 /*
00088  * The following structures is the clientData for a dynamically-loaded,
00089  * escape-driven encoding that is itself comprised of other simpler encodings.
00090  * An example is "iso-2022-jp", which uses escape sequences to switch between
00091  * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven"
00092  * does not necessarily mean that the ESCAPE character is the character used
00093  * for switching character sets.
00094  */
00095 
00096 typedef struct EscapeSubTable {
00097     unsigned int sequenceLen;   /* Length of following string. */
00098     char sequence[16];          /* Escape code that marks this encoding. */
00099     char name[32];              /* Name for encoding. */
00100     Encoding *encodingPtr;      /* Encoding loaded using above name, or NULL
00101                                  * if this sub-encoding has not been needed
00102                                  * yet. */
00103 } EscapeSubTable;
00104 
00105 typedef struct EscapeEncodingData {
00106     int fallback;               /* Character (in this encoding) to substitute
00107                                  * when this encoding cannot represent a UTF-8
00108                                  * character. */
00109     unsigned int initLen;       /* Length of following string. */
00110     char init[16];              /* String to emit or expect before first char
00111                                  * in conversion. */
00112     unsigned int finalLen;      /* Length of following string. */
00113     char final[16];             /* String to emit or expect after last char in
00114                                  * conversion. */
00115     char prefixBytes[256];      /* If a byte in the input stream is the first
00116                                  * character of one of the escape sequences in
00117                                  * the following array, the corresponding
00118                                  * entry in this array is 1, otherwise it is
00119                                  * 0. */
00120     int numSubTables;           /* Length of following array. */
00121     EscapeSubTable subTables[1];/* Information about each EscapeSubTable used
00122                                  * by this encoding type. The actual size will
00123                                  * be as large as necessary to hold all
00124                                  * EscapeSubTables. */
00125 } EscapeEncodingData;
00126 
00127 /*
00128  * Constants used when loading an encoding file to identify the type of the
00129  * file.
00130  */
00131 
00132 #define ENCODING_SINGLEBYTE     0
00133 #define ENCODING_DOUBLEBYTE     1
00134 #define ENCODING_MULTIBYTE      2
00135 #define ENCODING_ESCAPE         3
00136 
00137 /*
00138  * A list of directories in which Tcl should look for *.enc files. This list
00139  * is shared by all threads. Access is governed by a mutex lock.
00140  */
00141 
00142 static TclInitProcessGlobalValueProc InitializeEncodingSearchPath;
00143 static ProcessGlobalValue encodingSearchPath = {
00144     0, 0, NULL, NULL, InitializeEncodingSearchPath, NULL, NULL
00145 };
00146 
00147 /*
00148  * A map from encoding names to the directories in which their data files have
00149  * been seen. The string value of the map is shared by all threads. Access to
00150  * the shared string is governed by a mutex lock.
00151  */
00152 
00153 static ProcessGlobalValue encodingFileMap = {
00154     0, 0, NULL, NULL, NULL, NULL, NULL
00155 };
00156 
00157 /*
00158  * A list of directories making up the "library path". Historically this
00159  * search path has served many uses, but the only one remaining is a base for
00160  * the encodingSearchPath above. If the application does not explicitly set
00161  * the encodingSearchPath, then it will be initialized by appending /encoding
00162  * to each directory in this "libraryPath".
00163  */
00164 
00165 static ProcessGlobalValue libraryPath = {
00166     0, 0, NULL, NULL, TclpInitLibraryPath, NULL, NULL
00167 };
00168 
00169 static int encodingsInitialized = 0;
00170 
00171 /*
00172  * Hash table that keeps track of all loaded Encodings. Keys are the string
00173  * names that represent the encoding, values are (Encoding *).
00174  */
00175 
00176 static Tcl_HashTable encodingTable;
00177 TCL_DECLARE_MUTEX(encodingMutex)
00178 
00179 /*
00180  * The following are used to hold the default and current system encodings.
00181  * If NULL is passed to one of the conversion routines, the current setting of
00182  * the system encoding will be used to perform the conversion.
00183  */
00184 
00185 static Tcl_Encoding defaultEncoding;
00186 static Tcl_Encoding systemEncoding;
00187 
00188 /*
00189  * The following variable is used in the sparse matrix code for a
00190  * TableEncoding to represent a page in the table that has no entries.
00191  */
00192 
00193 static unsigned short emptyPage[256];
00194 
00195 /*
00196  * Functions used only in this module.
00197  */
00198 
00199 static int              BinaryProc(ClientData clientData,
00200                             CONST char *src, int srcLen, int flags,
00201                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00202                             int *srcReadPtr, int *dstWrotePtr,
00203                             int *dstCharsPtr);
00204 static void             DupEncodingIntRep(Tcl_Obj *srcPtr, Tcl_Obj *dupPtr);
00205 static void             EscapeFreeProc(ClientData clientData);
00206 static int              EscapeFromUtfProc(ClientData clientData,
00207                             CONST char *src, int srcLen, int flags,
00208                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00209                             int *srcReadPtr, int *dstWrotePtr,
00210                             int *dstCharsPtr);
00211 static int              EscapeToUtfProc(ClientData clientData,
00212                             CONST char *src, int srcLen, int flags,
00213                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00214                             int *srcReadPtr, int *dstWrotePtr,
00215                             int *dstCharsPtr);
00216 static void             FillEncodingFileMap(void);
00217 static void             FreeEncoding(Tcl_Encoding encoding);
00218 static void             FreeEncodingIntRep(Tcl_Obj *objPtr);
00219 static Encoding *       GetTableEncoding(EscapeEncodingData *dataPtr,
00220                             int state);
00221 static Tcl_Encoding     LoadEncodingFile(Tcl_Interp *interp, CONST char *name);
00222 static Tcl_Encoding     LoadTableEncoding(CONST char *name, int type,
00223                             Tcl_Channel chan);
00224 static Tcl_Encoding     LoadEscapeEncoding(CONST char *name, Tcl_Channel chan);
00225 static Tcl_Channel      OpenEncodingFileChannel(Tcl_Interp *interp,
00226                             CONST char *name);
00227 static void             TableFreeProc(ClientData clientData);
00228 static int              TableFromUtfProc(ClientData clientData,
00229                             CONST char *src, int srcLen, int flags,
00230                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00231                             int *srcReadPtr, int *dstWrotePtr,
00232                             int *dstCharsPtr);
00233 static int              TableToUtfProc(ClientData clientData, CONST char *src,
00234                             int srcLen, int flags, Tcl_EncodingState *statePtr,
00235                             char *dst, int dstLen, int *srcReadPtr,
00236                             int *dstWrotePtr, int *dstCharsPtr);
00237 static size_t           unilen(CONST char *src);
00238 static int              UnicodeToUtfProc(ClientData clientData,
00239                             CONST char *src, int srcLen, int flags,
00240                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00241                             int *srcReadPtr, int *dstWrotePtr,
00242                             int *dstCharsPtr);
00243 static int              UtfToUnicodeProc(ClientData clientData,
00244                             CONST char *src, int srcLen, int flags,
00245                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00246                             int *srcReadPtr, int *dstWrotePtr,
00247                             int *dstCharsPtr);
00248 static int              UtfToUtfProc(ClientData clientData,
00249                             CONST char *src, int srcLen, int flags,
00250                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00251                             int *srcReadPtr, int *dstWrotePtr,
00252                             int *dstCharsPtr, int pureNullMode);
00253 static int              UtfIntToUtfExtProc(ClientData clientData,
00254                             CONST char *src, int srcLen, int flags,
00255                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00256                             int *srcReadPtr, int *dstWrotePtr,
00257                             int *dstCharsPtr);
00258 static int              UtfExtToUtfIntProc(ClientData clientData,
00259                             CONST char *src, int srcLen, int flags,
00260                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00261                             int *srcReadPtr, int *dstWrotePtr,
00262                             int *dstCharsPtr);
00263 static int              Iso88591FromUtfProc(ClientData clientData,
00264                             CONST char *src, int srcLen, int flags,
00265                             Tcl_EncodingState *statePtr, char *dst, int dstLen,
00266                             int *srcReadPtr, int *dstWrotePtr,
00267                             int *dstCharsPtr);
00268 static int              Iso88591ToUtfProc(ClientData clientData,
00269                             CONST char *src, int srcLen, int flags,
00270                             Tcl_EncodingState *statePtr, char *dst,
00271                             int dstLen, int *srcReadPtr, int *dstWrotePtr,
00272                             int *dstCharsPtr);
00273 
00274 /*
00275  * A Tcl_ObjType for holding a cached Tcl_Encoding in the otherValuePtr field
00276  * of the intrep. This should help the lifetime of encodings be more useful.
00277  * See concerns raised in [Bug 1077262].
00278  */
00279 
00280 static Tcl_ObjType encodingType = {
00281     "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
00282 };
00283 
00284 /*
00285  *----------------------------------------------------------------------
00286  *
00287  * Tcl_GetEncodingFromObj --
00288  *
00289  *      Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr), if
00290  *      possible, and returns TCL_OK. If no such encoding exists, TCL_ERROR is
00291  *      returned, and if interp is non-NULL, an error message is written
00292  *      there.
00293  *
00294  * Results:
00295  *      Standard Tcl return code.
00296  *
00297  * Side effects:
00298  *      Caches the Tcl_Encoding value as the internal rep of (*objPtr).
00299  *
00300  *----------------------------------------------------------------------
00301  */
00302 
00303 int
00304 Tcl_GetEncodingFromObj(
00305     Tcl_Interp *interp,
00306     Tcl_Obj *objPtr,
00307     Tcl_Encoding *encodingPtr)
00308 {
00309     CONST char *name = Tcl_GetString(objPtr);
00310     if (objPtr->typePtr != &encodingType) {
00311         Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
00312 
00313         if (encoding == NULL) {
00314             return TCL_ERROR;
00315         }
00316         TclFreeIntRep(objPtr);
00317         objPtr->internalRep.otherValuePtr = (VOID *) encoding;
00318         objPtr->typePtr = &encodingType;
00319     }
00320     *encodingPtr = Tcl_GetEncoding(NULL, name);
00321     return TCL_OK;
00322 }
00323 
00324 /*
00325  *----------------------------------------------------------------------
00326  *
00327  * FreeEncodingIntRep --
00328  *
00329  *      The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType.
00330  *
00331  *----------------------------------------------------------------------
00332  */
00333 
00334 static void
00335 FreeEncodingIntRep(
00336     Tcl_Obj *objPtr)
00337 {
00338     Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr);
00339 }
00340 
00341 /*
00342  *----------------------------------------------------------------------
00343  *
00344  * DupEncodingIntRep --
00345  *
00346  *      The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType.
00347  *
00348  *----------------------------------------------------------------------
00349  */
00350 
00351 static void
00352 DupEncodingIntRep(
00353     Tcl_Obj *srcPtr,
00354     Tcl_Obj *dupPtr)
00355 {
00356     dupPtr->internalRep.otherValuePtr = (VOID *)
00357             Tcl_GetEncoding(NULL, srcPtr->bytes);
00358 }
00359 
00360 /*
00361  *----------------------------------------------------------------------
00362  *
00363  * Tcl_GetEncodingSearchPath --
00364  *
00365  *      Keeps the per-thread copy of the encoding search path current with
00366  *      changes to the global copy.
00367  *
00368  * Results:
00369  *      Returns a "list" (Tcl_Obj *) that contains the encoding search path.
00370  *
00371  *----------------------------------------------------------------------
00372  */
00373 
00374 Tcl_Obj *
00375 Tcl_GetEncodingSearchPath(void)
00376 {
00377     return TclGetProcessGlobalValue(&encodingSearchPath);
00378 }
00379 
00380 /*
00381  *----------------------------------------------------------------------
00382  *
00383  * Tcl_SetEncodingSearchPath --
00384  *
00385  *      Keeps the per-thread copy of the encoding search path current with
00386  *      changes to the global copy.
00387  *
00388  *----------------------------------------------------------------------
00389  */
00390 
00391 int
00392 Tcl_SetEncodingSearchPath(
00393     Tcl_Obj *searchPath)
00394 {
00395     int dummy;
00396 
00397     if (TCL_ERROR == Tcl_ListObjLength(NULL, searchPath, &dummy)) {
00398         return TCL_ERROR;
00399     }
00400     TclSetProcessGlobalValue(&encodingSearchPath, searchPath, NULL);
00401     return TCL_OK;
00402 }
00403 
00404 /*
00405  *----------------------------------------------------------------------
00406  *
00407  * TclGetLibraryPath --
00408  *
00409  *      Keeps the per-thread copy of the library path current with changes to
00410  *      the global copy.
00411  *
00412  * Results:
00413  *      Returns a "list" (Tcl_Obj *) that contains the library path.
00414  *
00415  *----------------------------------------------------------------------
00416  */
00417 
00418 Tcl_Obj *
00419 TclGetLibraryPath(void)
00420 {
00421     return TclGetProcessGlobalValue(&libraryPath);
00422 }
00423 
00424 /*
00425  *----------------------------------------------------------------------
00426  *
00427  * TclSetLibraryPath --
00428  *
00429  *      Keeps the per-thread copy of the library path current with changes to
00430  *      the global copy.
00431  *
00432  *      NOTE: this routine returns void, so there's no way to report the error
00433  *      that searchPath is not a valid list. In that case, this routine will
00434  *      silently do nothing.
00435  *
00436  *----------------------------------------------------------------------
00437  */
00438 
00439 void
00440 TclSetLibraryPath(
00441     Tcl_Obj *path)
00442 {
00443     int dummy;
00444 
00445     if (TCL_ERROR == Tcl_ListObjLength(NULL, path, &dummy)) {
00446         return;
00447     }
00448     TclSetProcessGlobalValue(&libraryPath, path, NULL);
00449 }
00450 
00451 /*
00452  *---------------------------------------------------------------------------
00453  *
00454  * FillEncodingFileMap --
00455  *
00456  *      Called to bring the encoding file map in sync with the current value
00457  *      of the encoding search path.
00458  *
00459  *      Scan the directories on the encoding search path, find the *.enc
00460  *      files, and store the found pathnames in a map associated with the
00461  *      encoding name.
00462  *
00463  *      In particular, if $dir is on the encoding search path, and the file
00464  *      $dir/foo.enc is found, then store a "foo" -> $dir entry in the map.
00465  *      Later, any need for the "foo" encoding will quickly * be able to
00466  *      construct the $dir/foo.enc pathname for reading the encoding data.
00467  *
00468  * Results:
00469  *      None.
00470  *
00471  * Side effects:
00472  *      Entries are added to the encoding file map.
00473  *
00474  *---------------------------------------------------------------------------
00475  */
00476 
00477 static void
00478 FillEncodingFileMap(void)
00479 {
00480     int i, numDirs = 0;
00481     Tcl_Obj *map, *searchPath;
00482 
00483     searchPath = Tcl_GetEncodingSearchPath();
00484     Tcl_IncrRefCount(searchPath);
00485     Tcl_ListObjLength(NULL, searchPath, &numDirs);
00486     map = Tcl_NewDictObj();
00487     Tcl_IncrRefCount(map);
00488 
00489     for (i = numDirs-1; i >= 0; i--) {
00490         /*
00491          * Iterate backwards through the search path so as we overwrite
00492          * entries found, we favor files earlier on the search path.
00493          */
00494 
00495         int j, numFiles;
00496         Tcl_Obj *directory, *matchFileList = Tcl_NewObj();
00497         Tcl_Obj **filev;
00498         Tcl_GlobTypeData readableFiles = {
00499             TCL_GLOB_TYPE_FILE, TCL_GLOB_PERM_R, NULL, NULL
00500         };
00501 
00502         Tcl_ListObjIndex(NULL, searchPath, i, &directory);
00503         Tcl_IncrRefCount(directory);
00504         Tcl_IncrRefCount(matchFileList);
00505         Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc",
00506                 &readableFiles);
00507 
00508         Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev);
00509         for (j=0; j<numFiles; j++) {
00510             Tcl_Obj *encodingName, *file;
00511 
00512             file = TclPathPart(NULL, filev[j], TCL_PATH_TAIL);
00513             encodingName = TclPathPart(NULL, file, TCL_PATH_ROOT);
00514             Tcl_DictObjPut(NULL, map, encodingName, directory);
00515             Tcl_DecrRefCount(file);
00516             Tcl_DecrRefCount(encodingName);
00517         }
00518         Tcl_DecrRefCount(matchFileList);
00519         Tcl_DecrRefCount(directory);
00520     }
00521     Tcl_DecrRefCount(searchPath);
00522     TclSetProcessGlobalValue(&encodingFileMap, map, NULL);
00523     Tcl_DecrRefCount(map);
00524 }
00525 
00526 /*
00527  *---------------------------------------------------------------------------
00528  *
00529  * TclInitEncodingSubsystem --
00530  *
00531  *      Initialize all resources used by this subsystem on a per-process
00532  *      basis.
00533  *
00534  * Results:
00535  *      None.
00536  *
00537  * Side effects:
00538  *      Depends on the memory, object, and IO subsystems.
00539  *
00540  *---------------------------------------------------------------------------
00541  */
00542 
00543 void
00544 TclInitEncodingSubsystem(void)
00545 {
00546     Tcl_EncodingType type;
00547 
00548     if (encodingsInitialized) {
00549         return;
00550     }
00551 
00552     Tcl_MutexLock(&encodingMutex);
00553     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
00554     Tcl_MutexUnlock(&encodingMutex);
00555 
00556     /*
00557      * Create a few initial encodings. Note that the UTF-8 to UTF-8
00558      * translation is not a no-op, because it will turn a stream of improperly
00559      * formed UTF-8 into a properly formed stream.
00560      */
00561 
00562     type.encodingName   = "identity";
00563     type.toUtfProc      = BinaryProc;
00564     type.fromUtfProc    = BinaryProc;
00565     type.freeProc       = NULL;
00566     type.nullSize       = 1;
00567     type.clientData     = NULL;
00568 
00569     defaultEncoding     = Tcl_CreateEncoding(&type);
00570     systemEncoding      = Tcl_GetEncoding(NULL, type.encodingName);
00571 
00572     type.encodingName   = "utf-8";
00573     type.toUtfProc      = UtfExtToUtfIntProc;
00574     type.fromUtfProc    = UtfIntToUtfExtProc;
00575     type.freeProc       = NULL;
00576     type.nullSize       = 1;
00577     type.clientData     = NULL;
00578     Tcl_CreateEncoding(&type);
00579 
00580     type.encodingName   = "unicode";
00581     type.toUtfProc      = UnicodeToUtfProc;
00582     type.fromUtfProc    = UtfToUnicodeProc;
00583     type.freeProc       = NULL;
00584     type.nullSize       = 2;
00585     type.clientData     = NULL;
00586     Tcl_CreateEncoding(&type);
00587 
00588     /*
00589      * Need the iso8859-1 encoding in order to process binary data, so force
00590      * it to always be embedded. Note that this encoding *must* be a proper
00591      * table encoding or some of the escape encodings crash! Hence the ugly
00592      * code to duplicate the structure of a table encoding here.
00593      */
00594 
00595     {
00596         TableEncodingData *dataPtr = (TableEncodingData *)
00597                 ckalloc(sizeof(TableEncodingData));
00598         unsigned size;
00599         unsigned short i;
00600 
00601         memset(dataPtr, 0, sizeof(TableEncodingData));
00602         dataPtr->fallback = '?';
00603 
00604         size = 256*(sizeof(unsigned short *) + sizeof(unsigned short));
00605         dataPtr->toUnicode = (unsigned short **) ckalloc(size);
00606         memset(dataPtr->toUnicode, 0, size);
00607         dataPtr->fromUnicode = (unsigned short **) ckalloc(size);
00608         memset(dataPtr->fromUnicode, 0, size);
00609 
00610         dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256);
00611         dataPtr->fromUnicode[0] = (unsigned short *)
00612                 (dataPtr->fromUnicode + 256);
00613         for (i=1 ; i<256 ; i++) {
00614             dataPtr->toUnicode[i] = emptyPage;
00615             dataPtr->fromUnicode[i] = emptyPage;
00616         }
00617 
00618         for (i=0 ; i<256 ; i++) {
00619             dataPtr->toUnicode[0][i] = i;
00620             dataPtr->fromUnicode[0][i] = i;
00621         }
00622 
00623         type.encodingName       = "iso8859-1";
00624         type.toUtfProc          = Iso88591ToUtfProc;
00625         type.fromUtfProc        = Iso88591FromUtfProc;
00626         type.freeProc           = TableFreeProc;
00627         type.nullSize           = 1;
00628         type.clientData         = dataPtr;
00629         Tcl_CreateEncoding(&type);
00630     }
00631 
00632     encodingsInitialized = 1;
00633 }
00634 
00635 /*
00636  *----------------------------------------------------------------------
00637  *
00638  * TclFinalizeEncodingSubsystem --
00639  *
00640  *      Release the state associated with the encoding subsystem.
00641  *
00642  * Results:
00643  *      None.
00644  *
00645  * Side effects:
00646  *      Frees all of the encodings.
00647  *
00648  *----------------------------------------------------------------------
00649  */
00650 
00651 void
00652 TclFinalizeEncodingSubsystem(void)
00653 {
00654     Tcl_HashSearch search;
00655     Tcl_HashEntry *hPtr;
00656 
00657     Tcl_MutexLock(&encodingMutex);
00658     encodingsInitialized = 0;
00659     FreeEncoding(systemEncoding);
00660 
00661     hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
00662     while (hPtr != NULL) {
00663         /*
00664          * Call FreeEncoding instead of doing it directly to handle refcounts
00665          * like escape encodings use. [Bug 524674] Make sure to call
00666          * Tcl_FirstHashEntry repeatedly so that all encodings are eventually
00667          * cleaned up.
00668          */
00669 
00670         FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr));
00671         hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
00672     }
00673 
00674     Tcl_DeleteHashTable(&encodingTable);
00675     Tcl_MutexUnlock(&encodingMutex);
00676 }
00677 
00678 /*
00679  *-------------------------------------------------------------------------
00680  *
00681  * Tcl_GetDefaultEncodingDir --
00682  *
00683  *      Legacy public interface to retrieve first directory in the encoding
00684  *      searchPath.
00685  *
00686  * Results:
00687  *      The directory pathname, as a string, or NULL for an empty encoding
00688  *      search path.
00689  *
00690  * Side effects:
00691  *      None.
00692  *
00693  *-------------------------------------------------------------------------
00694  */
00695 
00696 CONST char *
00697 Tcl_GetDefaultEncodingDir(void)
00698 {
00699     int numDirs;
00700     Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath();
00701 
00702     Tcl_ListObjLength(NULL, searchPath, &numDirs);
00703     if (numDirs == 0) {
00704         return NULL;
00705     }
00706     Tcl_ListObjIndex(NULL, searchPath, 0, &first);
00707 
00708     return Tcl_GetString(first);
00709 }
00710 
00711 /*
00712  *-------------------------------------------------------------------------
00713  *
00714  * Tcl_SetDefaultEncodingDir --
00715  *
00716  *      Legacy public interface to set the first directory in the encoding
00717  *      search path.
00718  *
00719  * Results:
00720  *      None.
00721  *
00722  * Side effects:
00723  *      Modifies the encoding search path.
00724  *
00725  *-------------------------------------------------------------------------
00726  */
00727 
00728 void
00729 Tcl_SetDefaultEncodingDir(
00730     CONST char *path)
00731 {
00732     Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath();
00733     Tcl_Obj *directory = Tcl_NewStringObj(path, -1);
00734 
00735     searchPath = Tcl_DuplicateObj(searchPath);
00736     Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory);
00737     Tcl_SetEncodingSearchPath(searchPath);
00738 }
00739 
00740 /*
00741  *-------------------------------------------------------------------------
00742  *
00743  * Tcl_GetEncoding --
00744  *
00745  *      Given the name of a encoding, find the corresponding Tcl_Encoding
00746  *      token. If the encoding did not already exist, Tcl attempts to
00747  *      dynamically load an encoding by that name.
00748  *
00749  * Results:
00750  *      Returns a token that represents the encoding. If the name didn't refer
00751  *      to any known or loadable encoding, NULL is returned. If NULL was
00752  *      returned, an error message is left in interp's result object, unless
00753  *      interp was NULL.
00754  *
00755  * Side effects:
00756  *      The new encoding type is entered into a table visible to all
00757  *      interpreters, keyed off the encoding's name. For each call to this
00758  *      function, there should eventually be a call to Tcl_FreeEncoding, so
00759  *      that the database can be cleaned up when encodings aren't needed
00760  *      anymore.
00761  *
00762  *-------------------------------------------------------------------------
00763  */
00764 
00765 Tcl_Encoding
00766 Tcl_GetEncoding(
00767     Tcl_Interp *interp,         /* Interp for error reporting, if not NULL. */
00768     CONST char *name)           /* The name of the desired encoding. */
00769 {
00770     Tcl_HashEntry *hPtr;
00771     Encoding *encodingPtr;
00772 
00773     Tcl_MutexLock(&encodingMutex);
00774     if (name == NULL) {
00775         encodingPtr = (Encoding *) systemEncoding;
00776         encodingPtr->refCount++;
00777         Tcl_MutexUnlock(&encodingMutex);
00778         return systemEncoding;
00779     }
00780 
00781     hPtr = Tcl_FindHashEntry(&encodingTable, name);
00782     if (hPtr != NULL) {
00783         encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
00784         encodingPtr->refCount++;
00785         Tcl_MutexUnlock(&encodingMutex);
00786         return (Tcl_Encoding) encodingPtr;
00787     }
00788     Tcl_MutexUnlock(&encodingMutex);
00789 
00790     return LoadEncodingFile(interp, name);
00791 }
00792 
00793 /*
00794  *---------------------------------------------------------------------------
00795  *
00796  * Tcl_FreeEncoding --
00797  *
00798  *      This function is called to release an encoding allocated by
00799  *      Tcl_CreateEncoding() or Tcl_GetEncoding().
00800  *
00801  * Results:
00802  *      None.
00803  *
00804  * Side effects:
00805  *      The reference count associated with the encoding is decremented and
00806  *      the encoding may be deleted if nothing is using it anymore.
00807  *
00808  *---------------------------------------------------------------------------
00809  */
00810 
00811 void
00812 Tcl_FreeEncoding(
00813     Tcl_Encoding encoding)
00814 {
00815     Tcl_MutexLock(&encodingMutex);
00816     FreeEncoding(encoding);
00817     Tcl_MutexUnlock(&encodingMutex);
00818 }
00819 
00820 /*
00821  *----------------------------------------------------------------------
00822  *
00823  * FreeEncoding --
00824  *
00825  *      This function is called to release an encoding by functions that
00826  *      already have the encodingMutex.
00827  *
00828  * Results:
00829  *      None.
00830  *
00831  * Side effects:
00832  *      The reference count associated with the encoding is decremented and
00833  *      the encoding may be deleted if nothing is using it anymore.
00834  *
00835  *----------------------------------------------------------------------
00836  */
00837 
00838 static void
00839 FreeEncoding(
00840     Tcl_Encoding encoding)
00841 {
00842     Encoding *encodingPtr;
00843 
00844     encodingPtr = (Encoding *) encoding;
00845     if (encodingPtr == NULL) {
00846         return;
00847     }
00848     encodingPtr->refCount--;
00849     if (encodingPtr->refCount == 0) {
00850         if (encodingPtr->freeProc != NULL) {
00851             (*encodingPtr->freeProc)(encodingPtr->clientData);
00852         }
00853         if (encodingPtr->hPtr != NULL) {
00854             Tcl_DeleteHashEntry(encodingPtr->hPtr);
00855         }
00856         ckfree((char *) encodingPtr->name);
00857         ckfree((char *) encodingPtr);
00858     }
00859 }
00860 
00861 /*
00862  *-------------------------------------------------------------------------
00863  *
00864  * Tcl_GetEncodingName --
00865  *
00866  *      Given an encoding, return the name that was used to constuct the
00867  *      encoding.
00868  *
00869  * Results:
00870  *      The name of the encoding.
00871  *
00872  * Side effects:
00873  *      None.
00874  *
00875  *---------------------------------------------------------------------------
00876  */
00877 
00878 CONST char *
00879 Tcl_GetEncodingName(
00880     Tcl_Encoding encoding)      /* The encoding whose name to fetch. */
00881 {
00882     if (encoding == NULL) {
00883         encoding = systemEncoding;
00884     }
00885 
00886     return ((Encoding *) encoding)->name;
00887 }
00888 
00889 /*
00890  *-------------------------------------------------------------------------
00891  *
00892  * Tcl_GetEncodingNames --
00893  *
00894  *      Get the list of all known encodings, including the ones stored as
00895  *      files on disk in the encoding path.
00896  *
00897  * Results:
00898  *      Modifies interp's result object to hold a list of all the available
00899  *      encodings.
00900  *
00901  * Side effects:
00902  *      None.
00903  *
00904  *-------------------------------------------------------------------------
00905  */
00906 
00907 void
00908 Tcl_GetEncodingNames(
00909     Tcl_Interp *interp)         /* Interp to hold result. */
00910 {
00911     Tcl_HashTable table;
00912     Tcl_HashSearch search;
00913     Tcl_HashEntry *hPtr;
00914     Tcl_Obj *map, *name, *result = Tcl_NewObj();
00915     Tcl_DictSearch mapSearch;
00916     int dummy, done = 0;
00917 
00918     Tcl_InitObjHashTable(&table);
00919 
00920     /*
00921      * Copy encoding names from loaded encoding table to table.
00922      */
00923 
00924     Tcl_MutexLock(&encodingMutex);
00925     for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL;
00926             hPtr = Tcl_NextHashEntry(&search)) {
00927         Encoding *encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
00928         Tcl_CreateHashEntry(&table,
00929                 (char *) Tcl_NewStringObj(encodingPtr->name, -1), &dummy);
00930     }
00931     Tcl_MutexUnlock(&encodingMutex);
00932 
00933     FillEncodingFileMap();
00934     map = TclGetProcessGlobalValue(&encodingFileMap);
00935 
00936     /*
00937      * Copy encoding names from encoding file map to table.
00938      */
00939 
00940     Tcl_DictObjFirst(NULL, map, &mapSearch, &name, NULL, &done);
00941     for (; !done; Tcl_DictObjNext(&mapSearch, &name, NULL, &done)) {
00942         Tcl_CreateHashEntry(&table, (char *) name, &dummy);
00943     }
00944 
00945     /*
00946      * Pull all encoding names from table into the result list.
00947      */
00948 
00949     for (hPtr = Tcl_FirstHashEntry(&table, &search); hPtr != NULL;
00950             hPtr = Tcl_NextHashEntry(&search)) {
00951         Tcl_ListObjAppendElement(NULL, result,
00952                 (Tcl_Obj *) Tcl_GetHashKey(&table, hPtr));
00953     }
00954     Tcl_SetObjResult(interp, result);
00955     Tcl_DeleteHashTable(&table);
00956 }
00957 
00958 /*
00959  *------------------------------------------------------------------------
00960  *
00961  * Tcl_SetSystemEncoding --
00962  *
00963  *      Sets the default encoding that should be used whenever the user passes
00964  *      a NULL value in to one of the conversion routines. If the supplied
00965  *      name is NULL, the system encoding is reset to the default system
00966  *      encoding.
00967  *
00968  * Results:
00969  *      The return value is TCL_OK if the system encoding was successfully set
00970  *      to the encoding specified by name, TCL_ERROR otherwise. If TCL_ERROR
00971  *      is returned, an error message is left in interp's result object,
00972  *      unless interp was NULL.
00973  *
00974  * Side effects:
00975  *      The reference count of the new system encoding is incremented. The
00976  *      reference count of the old system encoding is decremented and it may
00977  *      be freed.
00978  *
00979  *------------------------------------------------------------------------
00980  */
00981 
00982 int
00983 Tcl_SetSystemEncoding(
00984     Tcl_Interp *interp,         /* Interp for error reporting, if not NULL. */
00985     CONST char *name)           /* The name of the desired encoding, or NULL
00986                                  * to reset to default encoding. */
00987 {
00988     Tcl_Encoding encoding;
00989     Encoding *encodingPtr;
00990 
00991     if (name == NULL) {
00992         Tcl_MutexLock(&encodingMutex);
00993         encoding = defaultEncoding;
00994         encodingPtr = (Encoding *) encoding;
00995         encodingPtr->refCount++;
00996         Tcl_MutexUnlock(&encodingMutex);
00997     } else {
00998         encoding = Tcl_GetEncoding(interp, name);
00999         if (encoding == NULL) {
01000             return TCL_ERROR;
01001         }
01002     }
01003 
01004     Tcl_MutexLock(&encodingMutex);
01005     FreeEncoding(systemEncoding);
01006     systemEncoding = encoding;
01007     Tcl_MutexUnlock(&encodingMutex);
01008 
01009     return TCL_OK;
01010 }
01011 
01012 /*
01013  *---------------------------------------------------------------------------
01014  *
01015  * Tcl_CreateEncoding --
01016  *
01017  *      This function is called to define a new encoding and the functions
01018  *      that are used to convert between the specified encoding and Unicode.
01019  *
01020  * Results:
01021  *      Returns a token that represents the encoding. If an encoding with the
01022  *      same name already existed, the old encoding token remains valid and
01023  *      continues to behave as it used to, and will eventually be garbage
01024  *      collected when the last reference to it goes away. Any subsequent
01025  *      calls to Tcl_GetEncoding with the specified name will retrieve the
01026  *      most recent encoding token.
01027  *
01028  * Side effects:
01029  *      The new encoding type is entered into a table visible to all
01030  *      interpreters, keyed off the encoding's name. For each call to this
01031  *      function, there should eventually be a call to Tcl_FreeEncoding, so
01032  *      that the database can be cleaned up when encodings aren't needed
01033  *      anymore.
01034  *
01035  *---------------------------------------------------------------------------
01036  */
01037 
01038 Tcl_Encoding
01039 Tcl_CreateEncoding(
01040     const Tcl_EncodingType *typePtr)
01041                                 /* The encoding type. */
01042 {
01043     Tcl_HashEntry *hPtr;
01044     int isNew;
01045     Encoding *encodingPtr;
01046     char *name;
01047 
01048     Tcl_MutexLock(&encodingMutex);
01049     hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &isNew);
01050     if (isNew == 0) {
01051         /*
01052          * Remove old encoding from hash table, but don't delete it until last
01053          * reference goes away.
01054          */
01055 
01056         encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
01057         encodingPtr->hPtr = NULL;
01058     }
01059 
01060     name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1);
01061 
01062     encodingPtr = (Encoding *) ckalloc(sizeof(Encoding));
01063     encodingPtr->name           = strcpy(name, typePtr->encodingName);
01064     encodingPtr->toUtfProc      = typePtr->toUtfProc;
01065     encodingPtr->fromUtfProc    = typePtr->fromUtfProc;
01066     encodingPtr->freeProc       = typePtr->freeProc;
01067     encodingPtr->nullSize       = typePtr->nullSize;
01068     encodingPtr->clientData     = typePtr->clientData;
01069     if (typePtr->nullSize == 1) {
01070         encodingPtr->lengthProc = (LengthProc *) strlen;
01071     } else {
01072         encodingPtr->lengthProc = (LengthProc *) unilen;
01073     }
01074     encodingPtr->refCount       = 1;
01075     encodingPtr->hPtr           = hPtr;
01076     Tcl_SetHashValue(hPtr, encodingPtr);
01077 
01078     Tcl_MutexUnlock(&encodingMutex);
01079 
01080     return (Tcl_Encoding) encodingPtr;
01081 }
01082 
01083 /*
01084  *-------------------------------------------------------------------------
01085  *
01086  * Tcl_ExternalToUtfDString --
01087  *
01088  *      Convert a source buffer from the specified encoding into UTF-8. If any
01089  *      of the bytes in the source buffer are invalid or cannot be represented
01090  *      in the target encoding, a default fallback character will be
01091  *      substituted.
01092  *
01093  * Results:
01094  *      The converted bytes are stored in the DString, which is then NULL
01095  *      terminated. The return value is a pointer to the value stored in the
01096  *      DString.
01097  *
01098  * Side effects:
01099  *      None.
01100  *
01101  *-------------------------------------------------------------------------
01102  */
01103 
01104 char *
01105 Tcl_ExternalToUtfDString(
01106     Tcl_Encoding encoding,      /* The encoding for the source string, or NULL
01107                                  * for the default system encoding. */
01108     CONST char *src,            /* Source string in specified encoding. */
01109     int srcLen,                 /* Source string length in bytes, or < 0 for
01110                                  * encoding-specific string length. */
01111     Tcl_DString *dstPtr)        /* Uninitialized or free DString in which the
01112                                  * converted string is stored. */
01113 {
01114     char *dst;
01115     Tcl_EncodingState state;
01116     Encoding *encodingPtr;
01117     int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
01118 
01119     Tcl_DStringInit(dstPtr);
01120     dst = Tcl_DStringValue(dstPtr);
01121     dstLen = dstPtr->spaceAvl - 1;
01122 
01123     if (encoding == NULL) {
01124         encoding = systemEncoding;
01125     }
01126     encodingPtr = (Encoding *) encoding;
01127 
01128     if (src == NULL) {
01129         srcLen = 0;
01130     } else if (srcLen < 0) {
01131         srcLen = (*encodingPtr->lengthProc)(src);
01132     }
01133 
01134     flags = TCL_ENCODING_START | TCL_ENCODING_END;
01135 
01136     while (1) {
01137         result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src,
01138                 srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
01139                 &dstChars);
01140         soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
01141 
01142         if (result != TCL_CONVERT_NOSPACE) {
01143             Tcl_DStringSetLength(dstPtr, soFar);
01144             return Tcl_DStringValue(dstPtr);
01145         }
01146 
01147         flags &= ~TCL_ENCODING_START;
01148         src += srcRead;
01149         srcLen -= srcRead;
01150         if (Tcl_DStringLength(dstPtr) == 0) {
01151             Tcl_DStringSetLength(dstPtr, dstLen);
01152         }
01153         Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
01154         dst = Tcl_DStringValue(dstPtr) + soFar;
01155         dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
01156     }
01157 }
01158 
01159 /*
01160  *-------------------------------------------------------------------------
01161  *
01162  * Tcl_ExternalToUtf --
01163  *
01164  *      Convert a source buffer from the specified encoding into UTF-8.
01165  *
01166  * Results:
01167  *      The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
01168  *      TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as
01169  *      documented in tcl.h.
01170  *
01171  * Side effects:
01172  *      The converted bytes are stored in the output buffer.
01173  *
01174  *-------------------------------------------------------------------------
01175  */
01176 
01177 int
01178 Tcl_ExternalToUtf(
01179     Tcl_Interp *interp,         /* Interp for error return, if not NULL. */
01180     Tcl_Encoding encoding,      /* The encoding for the source string, or NULL
01181                                  * for the default system encoding. */
01182     CONST char *src,            /* Source string in specified encoding. */
01183     int srcLen,                 /* Source string length in bytes, or < 0 for
01184                                  * encoding-specific string length. */
01185     int flags,                  /* Conversion control flags. */
01186     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
01187                                  * information used during a piecewise
01188                                  * conversion. Contents of statePtr are
01189                                  * initialized and/or reset by conversion
01190                                  * routine under control of flags argument. */
01191     char *dst,                  /* Output buffer in which converted string is
01192                                  * stored. */
01193     int dstLen,                 /* The maximum length of output buffer in
01194                                  * bytes. */
01195     int *srcReadPtr,            /* Filled with the number of bytes from the
01196                                  * source string that were converted. This may
01197                                  * be less than the original source length if
01198                                  * there was a problem converting some source
01199                                  * characters. */
01200     int *dstWrotePtr,           /* Filled with the number of bytes that were
01201                                  * stored in the output buffer as a result of
01202                                  * the conversion. */
01203     int *dstCharsPtr)           /* Filled with the number of characters that
01204                                  * correspond to the bytes stored in the
01205                                  * output buffer. */
01206 {
01207     Encoding *encodingPtr;
01208     int result, srcRead, dstWrote, dstChars;
01209     Tcl_EncodingState state;
01210 
01211     if (encoding == NULL) {
01212         encoding = systemEncoding;
01213     }
01214     encodingPtr = (Encoding *) encoding;
01215 
01216     if (src == NULL) {
01217         srcLen = 0;
01218     } else if (srcLen < 0) {
01219         srcLen = (*encodingPtr->lengthProc)(src);
01220     }
01221     if (statePtr == NULL) {
01222         flags |= TCL_ENCODING_START | TCL_ENCODING_END;
01223         statePtr = &state;
01224     }
01225     if (srcReadPtr == NULL) {
01226         srcReadPtr = &srcRead;
01227     }
01228     if (dstWrotePtr == NULL) {
01229         dstWrotePtr = &dstWrote;
01230     }
01231     if (dstCharsPtr == NULL) {
01232         dstCharsPtr = &dstChars;
01233     }
01234 
01235     /*
01236      * If there are any null characters in the middle of the buffer, they will
01237      * converted to the UTF-8 null character (\xC080). To get the actual \0 at
01238      * the end of the destination buffer, we need to append it manually.
01239      */
01240 
01241     dstLen--;
01242     result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen,
01243             flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
01244             dstCharsPtr);
01245     dst[*dstWrotePtr] = '\0';
01246 
01247     return result;
01248 }
01249 
01250 /*
01251  *-------------------------------------------------------------------------
01252  *
01253  * Tcl_UtfToExternalDString --
01254  *
01255  *      Convert a source buffer from UTF-8 into the specified encoding. If any
01256  *      of the bytes in the source buffer are invalid or cannot be represented
01257  *      in the target encoding, a default fallback character will be
01258  *      substituted.
01259  *
01260  * Results:
01261  *      The converted bytes are stored in the DString, which is then NULL
01262  *      terminated in an encoding-specific manner. The return value is a
01263  *      pointer to the value stored in the DString.
01264  *
01265  * Side effects:
01266  *      None.
01267  *
01268  *-------------------------------------------------------------------------
01269  */
01270 
01271 char *
01272 Tcl_UtfToExternalDString(
01273     Tcl_Encoding encoding,      /* The encoding for the converted string, or
01274                                  * NULL for the default system encoding. */
01275     CONST char *src,            /* Source string in UTF-8. */
01276     int srcLen,                 /* Source string length in bytes, or < 0 for
01277                                  * strlen(). */
01278     Tcl_DString *dstPtr)        /* Uninitialized or free DString in which the
01279                                  * converted string is stored. */
01280 {
01281     char *dst;
01282     Tcl_EncodingState state;
01283     Encoding *encodingPtr;
01284     int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
01285 
01286     Tcl_DStringInit(dstPtr);
01287     dst = Tcl_DStringValue(dstPtr);
01288     dstLen = dstPtr->spaceAvl - 1;
01289 
01290     if (encoding == NULL) {
01291         encoding = systemEncoding;
01292     }
01293     encodingPtr = (Encoding *) encoding;
01294 
01295     if (src == NULL) {
01296         srcLen = 0;
01297     } else if (srcLen < 0) {
01298         srcLen = strlen(src);
01299     }
01300     flags = TCL_ENCODING_START | TCL_ENCODING_END;
01301     while (1) {
01302         result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src,
01303                 srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
01304                 &dstChars);
01305         soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
01306 
01307         if (result != TCL_CONVERT_NOSPACE) {
01308             if (encodingPtr->nullSize == 2) {
01309                 Tcl_DStringSetLength(dstPtr, soFar + 1);
01310             }
01311             Tcl_DStringSetLength(dstPtr, soFar);
01312             return Tcl_DStringValue(dstPtr);
01313         }
01314 
01315         flags &= ~TCL_ENCODING_START;
01316         src += srcRead;
01317         srcLen -= srcRead;
01318         if (Tcl_DStringLength(dstPtr) == 0) {
01319             Tcl_DStringSetLength(dstPtr, dstLen);
01320         }
01321         Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
01322         dst = Tcl_DStringValue(dstPtr) + soFar;
01323         dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
01324     }
01325 }
01326 
01327 /*
01328  *-------------------------------------------------------------------------
01329  *
01330  * Tcl_UtfToExternal --
01331  *
01332  *      Convert a buffer from UTF-8 into the specified encoding.
01333  *
01334  * Results:
01335  *      The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
01336  *      TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as
01337  *      documented in tcl.h.
01338  *
01339  * Side effects:
01340  *      The converted bytes are stored in the output buffer.
01341  *
01342  *-------------------------------------------------------------------------
01343  */
01344 
01345 int
01346 Tcl_UtfToExternal(
01347     Tcl_Interp *interp,         /* Interp for error return, if not NULL. */
01348     Tcl_Encoding encoding,      /* The encoding for the converted string, or
01349                                  * NULL for the default system encoding. */
01350     CONST char *src,            /* Source string in UTF-8. */
01351     int srcLen,                 /* Source string length in bytes, or < 0 for
01352                                  * strlen(). */
01353     int flags,                  /* Conversion control flags. */
01354     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
01355                                  * information used during a piecewise
01356                                  * conversion. Contents of statePtr are
01357                                  * initialized and/or reset by conversion
01358                                  * routine under control of flags argument. */
01359     char *dst,                  /* Output buffer in which converted string
01360                                  * is stored. */
01361     int dstLen,                 /* The maximum length of output buffer in
01362                                  * bytes. */
01363     int *srcReadPtr,            /* Filled with the number of bytes from the
01364                                  * source string that were converted. This may
01365                                  * be less than the original source length if
01366                                  * there was a problem converting some source
01367                                  * characters. */
01368     int *dstWrotePtr,           /* Filled with the number of bytes that were
01369                                  * stored in the output buffer as a result of
01370                                  * the conversion. */
01371     int *dstCharsPtr)           /* Filled with the number of characters that
01372                                  * correspond to the bytes stored in the
01373                                  * output buffer. */
01374 {
01375     Encoding *encodingPtr;
01376     int result, srcRead, dstWrote, dstChars;
01377     Tcl_EncodingState state;
01378 
01379     if (encoding == NULL) {
01380         encoding = systemEncoding;
01381     }
01382     encodingPtr = (Encoding *) encoding;
01383 
01384     if (src == NULL) {
01385         srcLen = 0;
01386     } else if (srcLen < 0) {
01387         srcLen = strlen(src);
01388     }
01389     if (statePtr == NULL) {
01390         flags |= TCL_ENCODING_START | TCL_ENCODING_END;
01391         statePtr = &state;
01392     }
01393     if (srcReadPtr == NULL) {
01394         srcReadPtr = &srcRead;
01395     }
01396     if (dstWrotePtr == NULL) {
01397         dstWrotePtr = &dstWrote;
01398     }
01399     if (dstCharsPtr == NULL) {
01400         dstCharsPtr = &dstChars;
01401     }
01402 
01403     dstLen -= encodingPtr->nullSize;
01404     result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen,
01405             flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
01406             dstCharsPtr);
01407     if (encodingPtr->nullSize == 2) {
01408         dst[*dstWrotePtr + 1] = '\0';
01409     }
01410     dst[*dstWrotePtr] = '\0';
01411 
01412     return result;
01413 }
01414 
01415 /*
01416  *---------------------------------------------------------------------------
01417  *
01418  * Tcl_FindExecutable --
01419  *
01420  *      This function computes the absolute path name of the current
01421  *      application, given its argv[0] value.
01422  *
01423  * Results:
01424  *      None.
01425  *
01426  * Side effects:
01427  *      The absolute pathname for the application is computed and stored to be
01428  *      returned later be [info nameofexecutable].
01429  *
01430  *---------------------------------------------------------------------------
01431  */
01432 
01433 void
01434 Tcl_FindExecutable(
01435     CONST char *argv0)          /* The value of the application's argv[0]
01436                                  * (native). */
01437 {
01438     TclInitSubsystems();
01439     TclpSetInitialEncodings();
01440     TclpFindExecutable(argv0);
01441 }
01442 
01443 /*
01444  *---------------------------------------------------------------------------
01445  *
01446  * OpenEncodingFileChannel --
01447  *
01448  *      Open the file believed to hold data for the encoding, "name".
01449  *
01450  * Results:
01451  *      Returns the readable Tcl_Channel from opening the file, or NULL if the
01452  *      file could not be successfully opened. If NULL was returned, an error
01453  *      message is left in interp's result object, unless interp was NULL.
01454  *
01455  * Side effects:
01456  *      Channel may be opened. Information about the filesystem may be cached
01457  *      to speed later calls.
01458  *
01459  *---------------------------------------------------------------------------
01460  */
01461 
01462 static Tcl_Channel
01463 OpenEncodingFileChannel(
01464     Tcl_Interp *interp,         /* Interp for error reporting, if not NULL. */
01465     CONST char *name)           /* The name of the encoding file on disk and
01466                                  * also the name for new encoding. */
01467 {
01468     Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1);
01469     Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj);
01470     Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath());
01471     Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap);
01472     Tcl_Obj **dir, *path, *directory = NULL;
01473     Tcl_Channel chan = NULL;
01474     int i, numDirs;
01475 
01476     Tcl_ListObjGetElements(NULL, searchPath, &numDirs, &dir);
01477     Tcl_IncrRefCount(nameObj);
01478     Tcl_AppendToObj(fileNameObj, ".enc", -1);
01479     Tcl_IncrRefCount(fileNameObj);
01480     Tcl_DictObjGet(NULL, map, nameObj, &directory);
01481 
01482     /*
01483      * Check that any cached directory is still on the encoding search path.
01484      */
01485 
01486     if (NULL != directory) {
01487         int verified = 0;
01488 
01489         for (i=0; i<numDirs && !verified; i++) {
01490             if (dir[i] == directory) {
01491                 verified = 1;
01492             }
01493         }
01494         if (!verified) {
01495             CONST char *dirString = Tcl_GetString(directory);
01496             for (i=0; i<numDirs && !verified; i++) {
01497                 if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) {
01498                     verified = 1;
01499                 }
01500             }
01501         }
01502         if (!verified) {
01503             /*
01504              * Directory no longer on the search path. Remove from cache.
01505              */
01506 
01507             map = Tcl_DuplicateObj(map);
01508             Tcl_DictObjRemove(NULL, map, nameObj);
01509             TclSetProcessGlobalValue(&encodingFileMap, map, NULL);
01510             directory = NULL;
01511         }
01512     }
01513 
01514     if (NULL != directory) {
01515         /*
01516          * Got a directory from the cache. Try to use it first.
01517          */
01518 
01519         Tcl_IncrRefCount(directory);
01520         path = Tcl_FSJoinToPath(directory, 1, &fileNameObj);
01521         Tcl_IncrRefCount(path);
01522         Tcl_DecrRefCount(directory);
01523         chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0);
01524         Tcl_DecrRefCount(path);
01525     }
01526 
01527     /*
01528      * Scan the search path until we find it.
01529      */
01530 
01531     for (i=0; i<numDirs && (chan == NULL); i++) {
01532         path = Tcl_FSJoinToPath(dir[i], 1, &fileNameObj);
01533         Tcl_IncrRefCount(path);
01534         chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0);
01535         Tcl_DecrRefCount(path);
01536         if (chan != NULL) {
01537             /*
01538              * Save directory in the cache.
01539              */
01540 
01541             map = Tcl_DuplicateObj(TclGetProcessGlobalValue(&encodingFileMap));
01542             Tcl_DictObjPut(NULL, map, nameObj, dir[i]);
01543             TclSetProcessGlobalValue(&encodingFileMap, map, NULL);
01544         }
01545     }
01546 
01547     if ((NULL == chan) && (interp != NULL)) {
01548         Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
01549         Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, NULL);
01550     }
01551     Tcl_DecrRefCount(fileNameObj);
01552     Tcl_DecrRefCount(nameObj);
01553     Tcl_DecrRefCount(searchPath);
01554 
01555     return chan;
01556 }
01557 
01558 /*
01559  *---------------------------------------------------------------------------
01560  *
01561  * LoadEncodingFile --
01562  *
01563  *      Read a file that describes an encoding and create a new Encoding from
01564  *      the data.
01565  *
01566  * Results:
01567  *      The return value is the newly loaded Encoding, or NULL if the file
01568  *      didn't exist of was in the incorrect format. If NULL was returned, an
01569  *      error message is left in interp's result object, unless interp was
01570  *      NULL.
01571  *
01572  * Side effects:
01573  *      File read from disk.
01574  *
01575  *---------------------------------------------------------------------------
01576  */
01577 
01578 static Tcl_Encoding
01579 LoadEncodingFile(
01580     Tcl_Interp *interp,         /* Interp for error reporting, if not NULL. */
01581     CONST char *name)           /* The name of the encoding file on disk and
01582                                  * also the name for new encoding. */
01583 {
01584     Tcl_Channel chan = NULL;
01585     Tcl_Encoding encoding = NULL;
01586     int ch;
01587 
01588     chan = OpenEncodingFileChannel(interp, name);
01589     if (chan == NULL) {
01590         return NULL;
01591     }
01592 
01593     Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8");
01594 
01595     while (1) {
01596         Tcl_DString ds;
01597 
01598         Tcl_DStringInit(&ds);
01599         Tcl_Gets(chan, &ds);
01600         ch = Tcl_DStringValue(&ds)[0];
01601         Tcl_DStringFree(&ds);
01602         if (ch != '#') {
01603             break;
01604         }
01605     }
01606 
01607     switch (ch) {
01608     case 'S':
01609         encoding = LoadTableEncoding(name, ENCODING_SINGLEBYTE, chan);
01610         break;
01611     case 'D':
01612         encoding = LoadTableEncoding(name, ENCODING_DOUBLEBYTE, chan);
01613         break;
01614     case 'M':
01615         encoding = LoadTableEncoding(name, ENCODING_MULTIBYTE, chan);
01616         break;
01617     case 'E':
01618         encoding = LoadEscapeEncoding(name, chan);
01619         break;
01620     }
01621     if ((encoding == NULL) && (interp != NULL)) {
01622         Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL);
01623     }
01624     Tcl_Close(NULL, chan);
01625 
01626     return encoding;
01627 }
01628 
01629 /*
01630  *-------------------------------------------------------------------------
01631  *
01632  * LoadTableEncoding --
01633  *
01634  *      Helper function for LoadEncodingTable(). Loads a table to that
01635  *      converts between Unicode and some other encoding and creates an
01636  *      encoding (using a TableEncoding structure) from that information.
01637  *
01638  *      File contains binary data, but begins with a marker to indicate
01639  *      byte-ordering, so that same binary file can be read on either endian
01640  *      platforms.
01641  *
01642  * Results:
01643  *      The return value is the new encoding, or NULL if the encoding could
01644  *      not be created (because the file contained invalid data).
01645  *
01646  * Side effects:
01647  *      None.
01648  *
01649  *-------------------------------------------------------------------------
01650  */
01651 
01652 static Tcl_Encoding
01653 LoadTableEncoding(
01654     CONST char *name,           /* Name for new encoding. */
01655     int type,                   /* Type of encoding (ENCODING_?????). */
01656     Tcl_Channel chan)           /* File containing new encoding. */
01657 {
01658     Tcl_DString lineString;
01659     Tcl_Obj *objPtr;
01660     char *line;
01661     int i, hi, lo, numPages, symbol, fallback;
01662     unsigned char used[256];
01663     unsigned int size;
01664     TableEncodingData *dataPtr;
01665     unsigned short *pageMemPtr;
01666     Tcl_EncodingType encType;
01667 
01668     /*
01669      * Speed over memory. Use a full 256 character table to decode hex
01670      * sequences in the encoding files.
01671      */
01672 
01673     static char staticHex[] = {
01674       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*   0 ...  15 */
01675       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  16 ...  31 */
01676       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  32 ...  47 */
01677       0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /*  48 ...  63 */
01678       0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  64 ...  79 */
01679       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  80 ...  95 */
01680       0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  96 ... 111 */
01681       0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 112 ... 127 */
01682       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128 ... 143 */
01683       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 ... 159 */
01684       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 ... 175 */
01685       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 ... 191 */
01686       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 ... 207 */
01687       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 ... 223 */
01688       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 ... 239 */
01689       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 ... 255 */
01690     };
01691 
01692     Tcl_DStringInit(&lineString);
01693     Tcl_Gets(chan, &lineString);
01694     line = Tcl_DStringValue(&lineString);
01695 
01696     fallback = (int) strtol(line, &line, 16);
01697     symbol = (int) strtol(line, &line, 10);
01698     numPages = (int) strtol(line, &line, 10);
01699     Tcl_DStringFree(&lineString);
01700 
01701     if (numPages < 0) {
01702         numPages = 0;
01703     } else if (numPages > 256) {
01704         numPages = 256;
01705     }
01706 
01707     memset(used, 0, sizeof(used));
01708 
01709 #undef PAGESIZE
01710 #define PAGESIZE    (256 * sizeof(unsigned short))
01711 
01712     dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData));
01713     memset(dataPtr, 0, sizeof(TableEncodingData));
01714 
01715     dataPtr->fallback = fallback;
01716 
01717     /*
01718      * Read the table that maps characters to Unicode. Performs a single
01719      * malloc to get the memory for the array and all the pages needed by the
01720      * array.
01721      */
01722 
01723     size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
01724     dataPtr->toUnicode = (unsigned short **) ckalloc(size);
01725     memset(dataPtr->toUnicode, 0, size);
01726     pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256);
01727 
01728     TclNewObj(objPtr);
01729     Tcl_IncrRefCount(objPtr);
01730     for (i = 0; i < numPages; i++) {
01731         int ch;
01732         char *p;
01733 
01734         Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
01735         p = Tcl_GetString(objPtr);
01736         hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])];
01737         dataPtr->toUnicode[hi] = pageMemPtr;
01738         p += 2;
01739         for (lo = 0; lo < 256; lo++) {
01740             if ((lo & 0x0f) == 0) {
01741                 p++;
01742             }
01743             ch = (staticHex[UCHAR(p[0])] << 12) + (staticHex[UCHAR(p[1])] << 8)
01744                     + (staticHex[UCHAR(p[2])] << 4) + staticHex[UCHAR(p[3])];
01745             if (ch != 0) {
01746                 used[ch >> 8] = 1;
01747             }
01748             *pageMemPtr = (unsigned short) ch;
01749             pageMemPtr++;
01750             p += 4;
01751         }
01752     }
01753     TclDecrRefCount(objPtr);
01754 
01755     if (type == ENCODING_DOUBLEBYTE) {
01756         memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes));
01757     } else {
01758         for (hi = 1; hi < 256; hi++) {
01759             if (dataPtr->toUnicode[hi] != NULL) {
01760                 dataPtr->prefixBytes[hi] = 1;
01761             }
01762         }
01763     }
01764 
01765     /*
01766      * Invert toUnicode array to produce the fromUnicode array. Performs a
01767      * single malloc to get the memory for the array and all the pages needed
01768      * by the array. While reading in the toUnicode array, we remembered what
01769      * pages that would be needed for the fromUnicode array.
01770      */
01771 
01772     if (symbol) {
01773         used[0] = 1;
01774     }
01775     numPages = 0;
01776     for (hi = 0; hi < 256; hi++) {
01777         if (used[hi]) {
01778             numPages++;
01779         }
01780     }
01781     size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
01782     dataPtr->fromUnicode = (unsigned short **) ckalloc(size);
01783     memset(dataPtr->fromUnicode, 0, size);
01784     pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256);
01785 
01786     for (hi = 0; hi < 256; hi++) {
01787         if (dataPtr->toUnicode[hi] == NULL) {
01788             dataPtr->toUnicode[hi] = emptyPage;
01789         } else {
01790             for (lo = 0; lo < 256; lo++) {
01791                 int ch;
01792 
01793                 ch = dataPtr->toUnicode[hi][lo];
01794                 if (ch != 0) {
01795                     unsigned short *page;
01796 
01797                     page = dataPtr->fromUnicode[ch >> 8];
01798                     if (page == NULL) {
01799                         page = pageMemPtr;
01800                         pageMemPtr += 256;
01801                         dataPtr->fromUnicode[ch >> 8] = page;
01802                     }
01803                     page[ch & 0xff] = (unsigned short) ((hi << 8) + lo);
01804                 }
01805             }
01806         }
01807     }
01808     if (type == ENCODING_MULTIBYTE) {
01809         /*
01810          * If multibyte encodings don't have a backslash character, define
01811          * one. Otherwise, on Windows, native file names won't work because
01812          * the backslash in the file name will map to the unknown character
01813          * (question mark) when converting from UTF-8 to external encoding.
01814          */
01815 
01816         if (dataPtr->fromUnicode[0] != NULL) {
01817             if (dataPtr->fromUnicode[0]['\\'] == '\0') {
01818                 dataPtr->fromUnicode[0]['\\'] = '\\';
01819             }
01820         }
01821     }
01822     if (symbol) {
01823         unsigned short *page;
01824 
01825         /*
01826          * Make a special symbol encoding that not only maps the symbol
01827          * characters from their Unicode code points down into page 0, but
01828          * also ensure that the characters on page 0 map to themselves. This
01829          * is so that a symbol font can be used to display a simple string
01830          * like "abcd" and have alpha, beta, chi, delta show up, rather than
01831          * have "unknown" chars show up because strictly speaking the symbol
01832          * font doesn't have glyphs for those low ascii chars.
01833          */
01834 
01835         page = dataPtr->fromUnicode[0];
01836         if (page == NULL) {
01837             page = pageMemPtr;
01838             dataPtr->fromUnicode[0] = page;
01839         }
01840         for (lo = 0; lo < 256; lo++) {
01841             if (dataPtr->toUnicode[0][lo] != 0) {
01842                 page[lo] = (unsigned short) lo;
01843             }
01844         }
01845     }
01846     for (hi = 0; hi < 256; hi++) {
01847         if (dataPtr->fromUnicode[hi] == NULL) {
01848             dataPtr->fromUnicode[hi] = emptyPage;
01849         }
01850     }
01851 
01852     /*
01853      * For trailing 'R'everse encoding, see [Patch 689341]
01854      */
01855 
01856     Tcl_DStringInit(&lineString);
01857     do {
01858         int len;
01859 
01860         /*
01861          * Skip leading empty lines.
01862          */
01863 
01864         while ((len = Tcl_Gets(chan, &lineString)) == 0) {
01865             /* empty body */
01866         }
01867 
01868         if (len < 0) {
01869             break;
01870         }
01871         line = Tcl_DStringValue(&lineString);
01872         if (line[0] != 'R') {
01873             break;
01874         }
01875         for (Tcl_DStringSetLength(&lineString, 0);
01876                 (len = Tcl_Gets(chan, &lineString)) >= 0;
01877                 Tcl_DStringSetLength(&lineString, 0)) {
01878             unsigned char* p;
01879             int to, from;
01880 
01881             if (len < 5) {
01882                 continue;
01883             }
01884             p = (unsigned char*) Tcl_DStringValue(&lineString);
01885             to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
01886                     + (staticHex[p[2]] << 4) + staticHex[p[3]];
01887             if (to == 0) {
01888                 continue;
01889             }
01890             for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) {
01891                 from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
01892                         + (staticHex[p[2]] << 4) + staticHex[p[3]];
01893                 if (from == 0) {
01894                     continue;
01895                 }
01896                 dataPtr->fromUnicode[from >> 8][from & 0xff] = to;
01897             }
01898         }
01899     } while (0);
01900     Tcl_DStringFree(&lineString);
01901 
01902     encType.encodingName    = name;
01903     encType.toUtfProc       = TableToUtfProc;
01904     encType.fromUtfProc     = TableFromUtfProc;
01905     encType.freeProc        = TableFreeProc;
01906     encType.nullSize        = (type == ENCODING_DOUBLEBYTE) ? 2 : 1;
01907     encType.clientData      = (ClientData) dataPtr;
01908 
01909     return Tcl_CreateEncoding(&encType);
01910 }
01911 
01912 /*
01913  *-------------------------------------------------------------------------
01914  *
01915  * LoadEscapeEncoding --
01916  *
01917  *      Helper function for LoadEncodingTable(). Loads a state machine that
01918  *      converts between Unicode and some other encoding.
01919  *
01920  *      File contains text data that describes the escape sequences that are
01921  *      used to choose an encoding and the associated names for the
01922  *      sub-encodings.
01923  *
01924  * Results:
01925  *      The return value is the new encoding, or NULL if the encoding could
01926  *      not be created (because the file contained invalid data).
01927  *
01928  * Side effects:
01929  *      None.
01930  *
01931  *-------------------------------------------------------------------------
01932  */
01933 
01934 static Tcl_Encoding
01935 LoadEscapeEncoding(
01936     CONST char *name,           /* Name for new encoding. */
01937     Tcl_Channel chan)           /* File containing new encoding. */
01938 {
01939     int i;
01940     unsigned int size;
01941     Tcl_DString escapeData;
01942     char init[16], final[16];
01943     EscapeEncodingData *dataPtr;
01944     Tcl_EncodingType type;
01945 
01946     init[0] = '\0';
01947     final[0] = '\0';
01948     Tcl_DStringInit(&escapeData);
01949 
01950     while (1) {
01951         int argc;
01952         CONST char **argv;
01953         char *line;
01954         Tcl_DString lineString;
01955 
01956         Tcl_DStringInit(&lineString);
01957         if (Tcl_Gets(chan, &lineString) < 0) {
01958             break;
01959         }
01960         line = Tcl_DStringValue(&lineString);
01961         if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) {
01962             continue;
01963         }
01964         if (argc >= 2) {
01965             if (strcmp(argv[0], "name") == 0) {
01966                 /* do nothing */
01967             } else if (strcmp(argv[0], "init") == 0) {
01968                 strncpy(init, argv[1], sizeof(init));
01969                 init[sizeof(init) - 1] = '\0';
01970             } else if (strcmp(argv[0], "final") == 0) {
01971                 strncpy(final, argv[1], sizeof(final));
01972                 final[sizeof(final) - 1] = '\0';
01973             } else {
01974                 EscapeSubTable est;
01975 
01976                 strncpy(est.sequence, argv[1], sizeof(est.sequence));
01977                 est.sequence[sizeof(est.sequence) - 1] = '\0';
01978                 est.sequenceLen = strlen(est.sequence);
01979 
01980                 strncpy(est.name, argv[0], sizeof(est.name));
01981                 est.name[sizeof(est.name) - 1] = '\0';
01982 
01983                 /*
01984                  * To avoid infinite recursion in [encoding system iso2022-*]
01985                  */
01986 
01987                 Tcl_GetEncoding(NULL, est.name);
01988 
01989                 est.encodingPtr = NULL;
01990                 Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
01991             }
01992         }
01993         ckfree((char *) argv);
01994         Tcl_DStringFree(&lineString);
01995     }
01996 
01997     size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable)
01998             + Tcl_DStringLength(&escapeData);
01999     dataPtr = (EscapeEncodingData *) ckalloc(size);
02000     dataPtr->initLen = strlen(init);
02001     strcpy(dataPtr->init, init);
02002     dataPtr->finalLen = strlen(final);
02003     strcpy(dataPtr->final, final);
02004     dataPtr->numSubTables =
02005             Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
02006     memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData),
02007             (size_t) Tcl_DStringLength(&escapeData));
02008     Tcl_DStringFree(&escapeData);
02009 
02010     memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes));
02011     for (i = 0; i < dataPtr->numSubTables; i++) {
02012         dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1;
02013     }
02014     if (dataPtr->init[0] != '\0') {
02015         dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1;
02016     }
02017     if (dataPtr->final[0] != '\0') {
02018         dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1;
02019     }
02020 
02021     type.encodingName   = name;
02022     type.toUtfProc      = EscapeToUtfProc;
02023     type.fromUtfProc    = EscapeFromUtfProc;
02024     type.freeProc       = EscapeFreeProc;
02025     type.nullSize       = 1;
02026     type.clientData     = (ClientData) dataPtr;
02027 
02028     return Tcl_CreateEncoding(&type);
02029 }
02030 
02031 /*
02032  *-------------------------------------------------------------------------
02033  *
02034  * BinaryProc --
02035  *
02036  *      The default conversion when no other conversion is specified. No
02037  *      translation is done; source bytes are copied directly to destination
02038  *      bytes.
02039  *
02040  * Results:
02041  *      Returns TCL_OK if conversion was successful.
02042  *
02043  * Side effects:
02044  *      None.
02045  *
02046  *-------------------------------------------------------------------------
02047  */
02048 
02049 static int
02050 BinaryProc(
02051     ClientData clientData,      /* Not used. */
02052     CONST char *src,            /* Source string (unknown encoding). */
02053     int srcLen,                 /* Source string length in bytes. */
02054     int flags,                  /* Conversion control flags. */
02055     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02056                                  * information used during a piecewise
02057                                  * conversion. Contents of statePtr are
02058                                  * initialized and/or reset by conversion
02059                                  * routine under control of flags argument. */
02060     char *dst,                  /* Output buffer in which converted string is
02061                                  * stored. */
02062     int dstLen,                 /* The maximum length of output buffer in
02063                                  * bytes. */
02064     int *srcReadPtr,            /* Filled with the number of bytes from the
02065                                  * source string that were converted. */
02066     int *dstWrotePtr,           /* Filled with the number of bytes that were
02067                                  * stored in the output buffer as a result of
02068                                  * the conversion. */
02069     int *dstCharsPtr)           /* Filled with the number of characters that
02070                                  * correspond to the bytes stored in the
02071                                  * output buffer. */
02072 {
02073     int result;
02074 
02075     result = TCL_OK;
02076     dstLen -= TCL_UTF_MAX - 1;
02077     if (dstLen < 0) {
02078         dstLen = 0;
02079     }
02080     if (srcLen > dstLen) {
02081         srcLen = dstLen;
02082         result = TCL_CONVERT_NOSPACE;
02083     }
02084 
02085     *srcReadPtr = srcLen;
02086     *dstWrotePtr = srcLen;
02087     *dstCharsPtr = srcLen;
02088     memcpy(dst, src, (size_t) srcLen);
02089     return result;
02090 }
02091 
02092 /*
02093  *-------------------------------------------------------------------------
02094  *
02095  * UtfExtToUtfIntProc --
02096  *
02097  *      Convert from UTF-8 to UTF-8. While converting null-bytes from the
02098  *      Tcl's internal representation (0xc0, 0x80) to the official
02099  *      representation (0x00). See UtfToUtfProc for details.
02100  *
02101  * Results:
02102  *      Returns TCL_OK if conversion was successful.
02103  *
02104  * Side effects:
02105  *      None.
02106  *
02107  *-------------------------------------------------------------------------
02108  */
02109 
02110 static int
02111 UtfIntToUtfExtProc(
02112     ClientData clientData,      /* Not used. */
02113     CONST char *src,            /* Source string in UTF-8. */
02114     int srcLen,                 /* Source string length in bytes. */
02115     int flags,                  /* Conversion control flags. */
02116     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02117                                  * information used during a piecewise
02118                                  * conversion. Contents of statePtr are
02119                                  * initialized and/or reset by conversion
02120                                  * routine under control of flags argument. */
02121     char *dst,                  /* Output buffer in which converted string
02122                                  * is stored. */
02123     int dstLen,                 /* The maximum length of output buffer in
02124                                  * bytes. */
02125     int *srcReadPtr,            /* Filled with the number of bytes from the
02126                                  * source string that were converted. This may
02127                                  * be less than the original source length if
02128                                  * there was a problem converting some source
02129                                  * characters. */
02130     int *dstWrotePtr,           /* Filled with the number of bytes that were
02131                                  * stored in the output buffer as a result of
02132                                  * the conversion. */
02133     int *dstCharsPtr)           /* Filled with the number of characters that
02134                                  * correspond to the bytes stored in the
02135                                  * output buffer. */
02136 {
02137     return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
02138             srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
02139 }
02140 
02141 /*
02142  *-------------------------------------------------------------------------
02143  *
02144  * UtfExtToUtfIntProc --
02145  *
02146  *      Convert from UTF-8 to UTF-8 while converting null-bytes from the
02147  *      official representation (0x00) to Tcl's internal representation (0xc0,
02148  *      0x80). See UtfToUtfProc for details.
02149  *
02150  * Results:
02151  *      Returns TCL_OK if conversion was successful.
02152  *
02153  * Side effects:
02154  *      None.
02155  *
02156  *-------------------------------------------------------------------------
02157  */
02158 static int
02159 UtfExtToUtfIntProc(
02160     ClientData clientData,      /* Not used. */
02161     CONST char *src,            /* Source string in UTF-8. */
02162     int srcLen,                 /* Source string length in bytes. */
02163     int flags,                  /* Conversion control flags. */
02164     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02165                                  * information used during a piecewise
02166                                  * conversion. Contents of statePtr are
02167                                  * initialized and/or reset by conversion
02168                                  * routine under control of flags argument. */
02169     char *dst,                  /* Output buffer in which converted string is
02170                                  * stored. */
02171     int dstLen,                 /* The maximum length of output buffer in
02172                                  * bytes. */
02173     int *srcReadPtr,            /* Filled with the number of bytes from the
02174                                  * source string that were converted. This may
02175                                  * be less than the original source length if
02176                                  * there was a problem converting some source
02177                                  * characters. */
02178     int *dstWrotePtr,           /* Filled with the number of bytes that were
02179                                  * stored in the output buffer as a result of
02180                                  * the conversion. */
02181     int *dstCharsPtr)           /* Filled with the number of characters that
02182                                  * correspond to the bytes stored in the
02183                                  * output buffer. */
02184 {
02185     return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
02186             srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
02187 }
02188 
02189 /*
02190  *-------------------------------------------------------------------------
02191  *
02192  * UtfToUtfProc --
02193  *
02194  *      Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
02195  *      is not a no-op, because it will turn a stream of improperly formed
02196  *      UTF-8 into a properly formed stream.
02197  *
02198  * Results:
02199  *      Returns TCL_OK if conversion was successful.
02200  *
02201  * Side effects:
02202  *      None.
02203  *
02204  *-------------------------------------------------------------------------
02205  */
02206 
02207 static int
02208 UtfToUtfProc(
02209     ClientData clientData,      /* Not used. */
02210     CONST char *src,            /* Source string in UTF-8. */
02211     int srcLen,                 /* Source string length in bytes. */
02212     int flags,                  /* Conversion control flags. */
02213     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02214                                  * information used during a piecewise
02215                                  * conversion. Contents of statePtr are
02216                                  * initialized and/or reset by conversion
02217                                  * routine under control of flags argument. */
02218     char *dst,                  /* Output buffer in which converted string is
02219                                  * stored. */
02220     int dstLen,                 /* The maximum length of output buffer in
02221                                  * bytes. */
02222     int *srcReadPtr,            /* Filled with the number of bytes from the
02223                                  * source string that were converted. This may
02224                                  * be less than the original source length if
02225                                  * there was a problem converting some source
02226                                  * characters. */
02227     int *dstWrotePtr,           /* Filled with the number of bytes that were
02228                                  * stored in the output buffer as a result of
02229                                  * the conversion. */
02230     int *dstCharsPtr,           /* Filled with the number of characters that
02231                                  * correspond to the bytes stored in the
02232                                  * output buffer. */
02233     int pureNullMode)           /* Convert embedded nulls from internal
02234                                  * representation to real null-bytes or vice
02235                                  * versa. */
02236 {
02237     CONST char *srcStart, *srcEnd, *srcClose;
02238     char *dstStart, *dstEnd;
02239     int result, numChars;
02240     Tcl_UniChar ch;
02241 
02242     result = TCL_OK;
02243 
02244     srcStart = src;
02245     srcEnd = src + srcLen;
02246     srcClose = srcEnd;
02247     if ((flags & TCL_ENCODING_END) == 0) {
02248         srcClose -= TCL_UTF_MAX;
02249     }
02250 
02251     dstStart = dst;
02252     dstEnd = dst + dstLen - TCL_UTF_MAX;
02253 
02254     for (numChars = 0; src < srcEnd; numChars++) {
02255         if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
02256             /*
02257              * If there is more string to follow, this will ensure that the
02258              * last UTF-8 character in the source buffer hasn't been cut off.
02259              */
02260 
02261             result = TCL_CONVERT_MULTIBYTE;
02262             break;
02263         }
02264         if (dst > dstEnd) {
02265             result = TCL_CONVERT_NOSPACE;
02266             break;
02267         }
02268         if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) {
02269             /*
02270              * Copy 7bit chatacters, but skip null-bytes when we are in input
02271              * mode, so that they get converted to 0xc080.
02272              */
02273 
02274             *dst++ = *src++;
02275         } else if (pureNullMode == 1 && UCHAR(*src) == 0xc0 &&
02276                 UCHAR(*(src+1)) == 0x80) {
02277             /*
02278              * Convert 0xc080 to real nulls when we are in output mode.
02279              */
02280 
02281             *dst++ = 0;
02282             src += 2;
02283         } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
02284             /*
02285              * Always check before using Tcl_UtfToUniChar. Not doing can so
02286              * cause it run beyond the endof the buffer! If we happen such an
02287              * incomplete char its byts are made to represent themselves.
02288              */
02289 
02290             ch = (Tcl_UniChar) *src;
02291             src += 1;
02292             dst += Tcl_UniCharToUtf(ch, dst);
02293         } else {
02294             src += Tcl_UtfToUniChar(src, &ch);
02295             dst += Tcl_UniCharToUtf(ch, dst);
02296         }
02297     }
02298 
02299     *srcReadPtr = src - srcStart;
02300     *dstWrotePtr = dst - dstStart;
02301     *dstCharsPtr = numChars;
02302     return result;
02303 }
02304 
02305 /*
02306  *-------------------------------------------------------------------------
02307  *
02308  * UnicodeToUtfProc --
02309  *
02310  *      Convert from Unicode to UTF-8.
02311  *
02312  * Results:
02313  *      Returns TCL_OK if conversion was successful.
02314  *
02315  * Side effects:
02316  *      None.
02317  *
02318  *-------------------------------------------------------------------------
02319  */
02320 
02321 static int
02322 UnicodeToUtfProc(
02323     ClientData clientData,      /* Not used. */
02324     CONST char *src,            /* Source string in Unicode. */
02325     int srcLen,                 /* Source string length in bytes. */
02326     int flags,                  /* Conversion control flags. */
02327     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02328                                  * information used during a piecewise
02329                                  * conversion. Contents of statePtr are
02330                                  * initialized and/or reset by conversion
02331                                  * routine under control of flags argument. */
02332     char *dst,                  /* Output buffer in which converted string is
02333                                  * stored. */
02334     int dstLen,                 /* The maximum length of output buffer in
02335                                  * bytes. */
02336     int *srcReadPtr,            /* Filled with the number of bytes from the
02337                                  * source string that were converted. This may
02338                                  * be less than the original source length if
02339                                  * there was a problem converting some source
02340                                  * characters. */
02341     int *dstWrotePtr,           /* Filled with the number of bytes that were
02342                                  * stored in the output buffer as a result of
02343                                  * the conversion. */
02344     int *dstCharsPtr)           /* Filled with the number of characters that
02345                                  * correspond to the bytes stored in the
02346                                  * output buffer. */
02347 {
02348     CONST char *srcStart, *srcEnd;
02349     char *dstEnd, *dstStart;
02350     int result, numChars;
02351     Tcl_UniChar ch;
02352 
02353     result = TCL_OK;
02354     if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
02355         result = TCL_CONVERT_MULTIBYTE;
02356         srcLen /= sizeof(Tcl_UniChar);
02357         srcLen *= sizeof(Tcl_UniChar);
02358     }
02359 
02360     srcStart = src;
02361     srcEnd = src + srcLen;
02362 
02363     dstStart = dst;
02364     dstEnd = dst + dstLen - TCL_UTF_MAX;
02365 
02366     for (numChars = 0; src < srcEnd; numChars++) {
02367         if (dst > dstEnd) {
02368             result = TCL_CONVERT_NOSPACE;
02369             break;
02370         }
02371         /*
02372          * Special case for 1-byte utf chars for speed.  Make sure we
02373          * work with Tcl_UniChar-size data.
02374          */
02375         ch = *(Tcl_UniChar *)src;
02376         if (ch && ch < 0x80) {
02377             *dst++ = (ch & 0xFF);
02378         } else {
02379             dst += Tcl_UniCharToUtf(ch, dst);
02380         }
02381         src += sizeof(Tcl_UniChar);
02382     }
02383 
02384     *srcReadPtr = src - srcStart;
02385     *dstWrotePtr = dst - dstStart;
02386     *dstCharsPtr = numChars;
02387     return result;
02388 }
02389 
02390 /*
02391  *-------------------------------------------------------------------------
02392  *
02393  * UtfToUnicodeProc --
02394  *
02395  *      Convert from UTF-8 to Unicode.
02396  *
02397  * Results:
02398  *      Returns TCL_OK if conversion was successful.
02399  *
02400  * Side effects:
02401  *      None.
02402  *
02403  *-------------------------------------------------------------------------
02404  */
02405 
02406 static int
02407 UtfToUnicodeProc(
02408     ClientData clientData,      /* TableEncodingData that specifies
02409                                  * encoding. */
02410     CONST char *src,            /* Source string in UTF-8. */
02411     int srcLen,                 /* Source string length in bytes. */
02412     int flags,                  /* Conversion control flags. */
02413     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02414                                  * information used during a piecewise
02415                                  * conversion. Contents of statePtr are
02416                                  * initialized and/or reset by conversion
02417                                  * routine under control of flags argument. */
02418     char *dst,                  /* Output buffer in which converted string is
02419                                  * stored. */
02420     int dstLen,                 /* The maximum length of output buffer in
02421                                  * bytes. */
02422     int *srcReadPtr,            /* Filled with the number of bytes from the
02423                                  * source string that were converted. This may
02424                                  * be less than the original source length if
02425                                  * there was a problem converting some source
02426                                  * characters. */
02427     int *dstWrotePtr,           /* Filled with the number of bytes that were
02428                                  * stored in the output buffer as a result of
02429                                  * the conversion. */
02430     int *dstCharsPtr)           /* Filled with the number of characters that
02431                                  * correspond to the bytes stored in the
02432                                  * output buffer. */
02433 {
02434     CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
02435     int result, numChars;
02436     Tcl_UniChar ch;
02437 
02438     srcStart = src;
02439     srcEnd = src + srcLen;
02440     srcClose = srcEnd;
02441     if ((flags & TCL_ENCODING_END) == 0) {
02442         srcClose -= TCL_UTF_MAX;
02443     }
02444 
02445     dstStart = dst;
02446     dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);
02447 
02448     result = TCL_OK;
02449     for (numChars = 0; src < srcEnd; numChars++) {
02450         if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
02451             /*
02452              * If there is more string to follow, this will ensure that the
02453              * last UTF-8 character in the source buffer hasn't been cut off.
02454              */
02455 
02456             result = TCL_CONVERT_MULTIBYTE;
02457             break;
02458         }
02459         if (dst > dstEnd) {
02460             result = TCL_CONVERT_NOSPACE;
02461             break;
02462         }
02463         src += TclUtfToUniChar(src, &ch);
02464         /*
02465          * Need to handle this in a way that won't cause misalignment
02466          * by casting dst to a Tcl_UniChar. [Bug 1122671]
02467          * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
02468          */
02469 #ifdef WORDS_BIGENDIAN
02470         *dst++ = (ch >> 8);
02471         *dst++ = (ch & 0xFF);
02472 #else
02473         *dst++ = (ch & 0xFF);
02474         *dst++ = (ch >> 8);
02475 #endif
02476     }
02477     *srcReadPtr = src - srcStart;
02478     *dstWrotePtr = dst - dstStart;
02479     *dstCharsPtr = numChars;
02480     return result;
02481 }
02482 
02483 /*
02484  *-------------------------------------------------------------------------
02485  *
02486  * TableToUtfProc --
02487  *
02488  *      Convert from the encoding specified by the TableEncodingData into
02489  *      UTF-8.
02490  *
02491  * Results:
02492  *      Returns TCL_OK if conversion was successful.
02493  *
02494  * Side effects:
02495  *      None.
02496  *
02497  *-------------------------------------------------------------------------
02498  */
02499 
02500 static int
02501 TableToUtfProc(
02502     ClientData clientData,      /* TableEncodingData that specifies
02503                                  * encoding. */
02504     CONST char *src,            /* Source string in specified encoding. */
02505     int srcLen,                 /* Source string length in bytes. */
02506     int flags,                  /* Conversion control flags. */
02507     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02508                                  * information used during a piecewise
02509                                  * conversion. Contents of statePtr are
02510                                  * initialized and/or reset by conversion
02511                                  * routine under control of flags argument. */
02512     char *dst,                  /* Output buffer in which converted string is
02513                                  * stored. */
02514     int dstLen,                 /* The maximum length of output buffer in
02515                                  * bytes. */
02516     int *srcReadPtr,            /* Filled with the number of bytes from the
02517                                  * source string that were converted. This may
02518                                  * be less than the original source length if
02519                                  * there was a problem converting some source
02520                                  * characters. */
02521     int *dstWrotePtr,           /* Filled with the number of bytes that were
02522                                  * stored in the output buffer as a result of
02523                                  * the conversion. */
02524     int *dstCharsPtr)           /* Filled with the number of characters that
02525                                  * correspond to the bytes stored in the
02526                                  * output buffer. */
02527 {
02528     CONST char *srcStart, *srcEnd;
02529     char *dstEnd, *dstStart, *prefixBytes;
02530     int result, byte, numChars;
02531     Tcl_UniChar ch;
02532     unsigned short **toUnicode;
02533     unsigned short *pageZero;
02534     TableEncodingData *dataPtr;
02535 
02536     srcStart = src;
02537     srcEnd = src + srcLen;
02538 
02539     dstStart = dst;
02540     dstEnd = dst + dstLen - TCL_UTF_MAX;
02541 
02542     dataPtr = (TableEncodingData *) clientData;
02543     toUnicode = dataPtr->toUnicode;
02544     prefixBytes = dataPtr->prefixBytes;
02545     pageZero = toUnicode[0];
02546 
02547     result = TCL_OK;
02548     for (numChars = 0; src < srcEnd; numChars++) {
02549         if (dst > dstEnd) {
02550             result = TCL_CONVERT_NOSPACE;
02551             break;
02552         }
02553         byte = *((unsigned char *) src);
02554         if (prefixBytes[byte]) {
02555             src++;
02556             if (src >= srcEnd) {
02557                 src--;
02558                 result = TCL_CONVERT_MULTIBYTE;
02559                 break;
02560             }
02561             ch = toUnicode[byte][*((unsigned char *) src)];
02562         } else {
02563             ch = pageZero[byte];
02564         }
02565         if ((ch == 0) && (byte != 0)) {
02566             if (flags & TCL_ENCODING_STOPONERROR) {
02567                 result = TCL_CONVERT_SYNTAX;
02568                 break;
02569             }
02570             if (prefixBytes[byte]) {
02571                 src--;
02572             }
02573             ch = (Tcl_UniChar) byte;
02574         }
02575         /*
02576          * Special case for 1-byte utf chars for speed.
02577          */
02578         if (ch && ch < 0x80) {
02579             *dst++ = (char) ch;
02580         } else {
02581             dst += Tcl_UniCharToUtf(ch, dst);
02582         }
02583         src++;
02584     }
02585 
02586     *srcReadPtr = src - srcStart;
02587     *dstWrotePtr = dst - dstStart;
02588     *dstCharsPtr = numChars;
02589     return result;
02590 }
02591 
02592 /*
02593  *-------------------------------------------------------------------------
02594  *
02595  * TableFromUtfProc --
02596  *
02597  *      Convert from UTF-8 into the encoding specified by the
02598  *      TableEncodingData.
02599  *
02600  * Results:
02601  *      Returns TCL_OK if conversion was successful.
02602  *
02603  * Side effects:
02604  *      None.
02605  *
02606  *-------------------------------------------------------------------------
02607  */
02608 
02609 static int
02610 TableFromUtfProc(
02611     ClientData clientData,      /* TableEncodingData that specifies
02612                                  * encoding. */
02613     CONST char *src,            /* Source string in UTF-8. */
02614     int srcLen,                 /* Source string length in bytes. */
02615     int flags,                  /* Conversion control flags. */
02616     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02617                                  * information used during a piecewise
02618                                  * conversion. Contents of statePtr are
02619                                  * initialized and/or reset by conversion
02620                                  * routine under control of flags argument. */
02621     char *dst,                  /* Output buffer in which converted string is
02622                                  * stored. */
02623     int dstLen,                 /* The maximum length of output buffer in
02624                                  * bytes. */
02625     int *srcReadPtr,            /* Filled with the number of bytes from the
02626                                  * source string that were converted. This may
02627                                  * be less than the original source length if
02628                                  * there was a problem converting some source
02629                                  * characters. */
02630     int *dstWrotePtr,           /* Filled with the number of bytes that were
02631                                  * stored in the output buffer as a result of
02632                                  * the conversion. */
02633     int *dstCharsPtr)           /* Filled with the number of characters that
02634                                  * correspond to the bytes stored in the
02635                                  * output buffer. */
02636 {
02637     CONST char *srcStart, *srcEnd, *srcClose;
02638     char *dstStart, *dstEnd, *prefixBytes;
02639     Tcl_UniChar ch;
02640     int result, len, word, numChars;
02641     TableEncodingData *dataPtr;
02642     unsigned short **fromUnicode;
02643 
02644     result = TCL_OK;
02645 
02646     dataPtr = (TableEncodingData *) clientData;
02647     prefixBytes = dataPtr->prefixBytes;
02648     fromUnicode = dataPtr->fromUnicode;
02649 
02650     srcStart = src;
02651     srcEnd = src + srcLen;
02652     srcClose = srcEnd;
02653     if ((flags & TCL_ENCODING_END) == 0) {
02654         srcClose -= TCL_UTF_MAX;
02655     }
02656 
02657     dstStart = dst;
02658     dstEnd = dst + dstLen - 1;
02659 
02660     for (numChars = 0; src < srcEnd; numChars++) {
02661         if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
02662             /*
02663              * If there is more string to follow, this will ensure that the
02664              * last UTF-8 character in the source buffer hasn't been cut off.
02665              */
02666 
02667             result = TCL_CONVERT_MULTIBYTE;
02668             break;
02669         }
02670         len = TclUtfToUniChar(src, &ch);
02671 
02672 #if TCL_UTF_MAX > 3
02673         /*
02674          * This prevents a crash condition. More evaluation is required for
02675          * full support of int Tcl_UniChar. [Bug 1004065]
02676          */
02677 
02678         if (ch & 0xffff0000) {
02679             word = 0;
02680         } else
02681 #endif
02682             word = fromUnicode[(ch >> 8)][ch & 0xff];
02683 
02684         if ((word == 0) && (ch != 0)) {
02685             if (flags & TCL_ENCODING_STOPONERROR) {
02686                 result = TCL_CONVERT_UNKNOWN;
02687                 break;
02688             }
02689             word = dataPtr->fallback;
02690         }
02691         if (prefixBytes[(word >> 8)] != 0) {
02692             if (dst + 1 > dstEnd) {
02693                 result = TCL_CONVERT_NOSPACE;
02694                 break;
02695             }
02696             dst[0] = (char) (word >> 8);
02697             dst[1] = (char) word;
02698             dst += 2;
02699         } else {
02700             if (dst > dstEnd) {
02701                 result = TCL_CONVERT_NOSPACE;
02702                 break;
02703             }
02704             dst[0] = (char) word;
02705             dst++;
02706         }
02707         src += len;
02708     }
02709 
02710     *srcReadPtr = src - srcStart;
02711     *dstWrotePtr = dst - dstStart;
02712     *dstCharsPtr = numChars;
02713     return result;
02714 }
02715 
02716 /*
02717  *-------------------------------------------------------------------------
02718  *
02719  * Iso88591ToUtfProc --
02720  *
02721  *      Convert from the "iso8859-1" encoding into UTF-8.
02722  *
02723  * Results:
02724  *      Returns TCL_OK if conversion was successful.
02725  *
02726  * Side effects:
02727  *      None.
02728  *
02729  *-------------------------------------------------------------------------
02730  */
02731 
02732 static int
02733 Iso88591ToUtfProc(
02734     ClientData clientData,      /* Ignored. */
02735     CONST char *src,            /* Source string in specified encoding. */
02736     int srcLen,                 /* Source string length in bytes. */
02737     int flags,                  /* Conversion control flags. */
02738     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02739                                  * information used during a piecewise
02740                                  * conversion. Contents of statePtr are
02741                                  * initialized and/or reset by conversion
02742                                  * routine under control of flags argument. */
02743     char *dst,                  /* Output buffer in which converted string is
02744                                  * stored. */
02745     int dstLen,                 /* The maximum length of output buffer in
02746                                  * bytes. */
02747     int *srcReadPtr,            /* Filled with the number of bytes from the
02748                                  * source string that were converted. This may
02749                                  * be less than the original source length if
02750                                  * there was a problem converting some source
02751                                  * characters. */
02752     int *dstWrotePtr,           /* Filled with the number of bytes that were
02753                                  * stored in the output buffer as a result of
02754                                  * the conversion. */
02755     int *dstCharsPtr)           /* Filled with the number of characters that
02756                                  * correspond to the bytes stored in the
02757                                  * output buffer. */
02758 {
02759     CONST char *srcStart, *srcEnd;
02760     char *dstEnd, *dstStart;
02761     int result, numChars;
02762 
02763     srcStart = src;
02764     srcEnd = src + srcLen;
02765 
02766     dstStart = dst;
02767     dstEnd = dst + dstLen - TCL_UTF_MAX;
02768 
02769     result = TCL_OK;
02770     for (numChars = 0; src < srcEnd; numChars++) {
02771         Tcl_UniChar ch;
02772 
02773         if (dst > dstEnd) {
02774             result = TCL_CONVERT_NOSPACE;
02775             break;
02776         }
02777         ch = (Tcl_UniChar) *((unsigned char *) src);
02778         /*
02779          * Special case for 1-byte utf chars for speed.
02780          */
02781         if (ch && ch < 0x80) {
02782             *dst++ = (char) ch;
02783         } else {
02784             dst += Tcl_UniCharToUtf(ch, dst);
02785         }
02786         src++;
02787     }
02788 
02789     *srcReadPtr = src - srcStart;
02790     *dstWrotePtr = dst - dstStart;
02791     *dstCharsPtr = numChars;
02792     return result;
02793 }
02794 
02795 /*
02796  *-------------------------------------------------------------------------
02797  *
02798  * Iso88591FromUtfProc --
02799  *
02800  *      Convert from UTF-8 into the encoding "iso8859-1".
02801  *
02802  * Results:
02803  *      Returns TCL_OK if conversion was successful.
02804  *
02805  * Side effects:
02806  *      None.
02807  *
02808  *-------------------------------------------------------------------------
02809  */
02810 
02811 static int
02812 Iso88591FromUtfProc(
02813     ClientData clientData,      /* Ignored. */
02814     CONST char *src,            /* Source string in UTF-8. */
02815     int srcLen,                 /* Source string length in bytes. */
02816     int flags,                  /* Conversion control flags. */
02817     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02818                                  * information used during a piecewise
02819                                  * conversion. Contents of statePtr are
02820                                  * initialized and/or reset by conversion
02821                                  * routine under control of flags argument. */
02822     char *dst,                  /* Output buffer in which converted string is
02823                                  * stored. */
02824     int dstLen,                 /* The maximum length of output buffer in
02825                                  * bytes. */
02826     int *srcReadPtr,            /* Filled with the number of bytes from the
02827                                  * source string that were converted. This may
02828                                  * be less than the original source length if
02829                                  * there was a problem converting some source
02830                                  * characters. */
02831     int *dstWrotePtr,           /* Filled with the number of bytes that were
02832                                  * stored in the output buffer as a result of
02833                                  * the conversion. */
02834     int *dstCharsPtr)           /* Filled with the number of characters that
02835                                  * correspond to the bytes stored in the
02836                                  * output buffer. */
02837 {
02838     CONST char *srcStart, *srcEnd, *srcClose;
02839     char *dstStart, *dstEnd;
02840     int result, numChars;
02841 
02842     result = TCL_OK;
02843 
02844     srcStart = src;
02845     srcEnd = src + srcLen;
02846     srcClose = srcEnd;
02847     if ((flags & TCL_ENCODING_END) == 0) {
02848         srcClose -= TCL_UTF_MAX;
02849     }
02850 
02851     dstStart = dst;
02852     dstEnd = dst + dstLen - 1;
02853 
02854     for (numChars = 0; src < srcEnd; numChars++) {
02855         Tcl_UniChar ch;
02856         int len;
02857 
02858         if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
02859             /*
02860              * If there is more string to follow, this will ensure that the
02861              * last UTF-8 character in the source buffer hasn't been cut off.
02862              */
02863 
02864             result = TCL_CONVERT_MULTIBYTE;
02865             break;
02866         }
02867         len = TclUtfToUniChar(src, &ch);
02868 
02869         /*
02870          * Check for illegal characters.
02871          */
02872 
02873         if (ch > 0xff) {
02874             if (flags & TCL_ENCODING_STOPONERROR) {
02875                 result = TCL_CONVERT_UNKNOWN;
02876                 break;
02877             }
02878 
02879             /*
02880              * Plunge on, using '?' as a fallback character.
02881              */
02882 
02883             ch = (Tcl_UniChar) '?';
02884         }
02885 
02886         if (dst > dstEnd) {
02887             result = TCL_CONVERT_NOSPACE;
02888             break;
02889         }
02890         *(dst++) = (char) ch;
02891         src += len;
02892     }
02893 
02894     *srcReadPtr = src - srcStart;
02895     *dstWrotePtr = dst - dstStart;
02896     *dstCharsPtr = numChars;
02897     return result;
02898 }
02899 
02900 /*
02901  *---------------------------------------------------------------------------
02902  *
02903  * TableFreeProc --
02904  *
02905  *      This function is invoked when an encoding is deleted. It deletes the
02906  *      memory used by the TableEncodingData.
02907  *
02908  * Results:
02909  *      None.
02910  *
02911  * Side effects:
02912  *      Memory freed.
02913  *
02914  *---------------------------------------------------------------------------
02915  */
02916 
02917 static void
02918 TableFreeProc(
02919     ClientData clientData)      /* TableEncodingData that specifies
02920                                  * encoding. */
02921 {
02922     TableEncodingData *dataPtr;
02923 
02924     /*
02925      * Make sure we aren't freeing twice on shutdown. [Bug 219314]
02926      */
02927 
02928     dataPtr = (TableEncodingData *) clientData;
02929     ckfree((char *) dataPtr->toUnicode);
02930     ckfree((char *) dataPtr->fromUnicode);
02931     ckfree((char *) dataPtr);
02932 }
02933 
02934 /*
02935  *-------------------------------------------------------------------------
02936  *
02937  * EscapeToUtfProc --
02938  *
02939  *      Convert from the encoding specified by the EscapeEncodingData into
02940  *      UTF-8.
02941  *
02942  * Results:
02943  *      Returns TCL_OK if conversion was successful.
02944  *
02945  * Side effects:
02946  *      None.
02947  *
02948  *-------------------------------------------------------------------------
02949  */
02950 
02951 static int
02952 EscapeToUtfProc(
02953     ClientData clientData,      /* EscapeEncodingData that specifies
02954                                  * encoding. */
02955     CONST char *src,            /* Source string in specified encoding. */
02956     int srcLen,                 /* Source string length in bytes. */
02957     int flags,                  /* Conversion control flags. */
02958     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
02959                                  * information used during a piecewise
02960                                  * conversion. Contents of statePtr are
02961                                  * initialized and/or reset by conversion
02962                                  * routine under control of flags argument. */
02963     char *dst,                  /* Output buffer in which converted string is
02964                                  * stored. */
02965     int dstLen,                 /* The maximum length of output buffer in
02966                                  * bytes. */
02967     int *srcReadPtr,            /* Filled with the number of bytes from the
02968                                  * source string that were converted. This may
02969                                  * be less than the original source length if
02970                                  * there was a problem converting some source
02971                                  * characters. */
02972     int *dstWrotePtr,           /* Filled with the number of bytes that were
02973                                  * stored in the output buffer as a result of
02974                                  * the conversion. */
02975     int *dstCharsPtr)           /* Filled with the number of characters that
02976                                  * correspond to the bytes stored in the
02977                                  * output buffer. */
02978 {
02979     EscapeEncodingData *dataPtr;
02980     char *prefixBytes, *tablePrefixBytes;
02981     unsigned short **tableToUnicode;
02982     Encoding *encodingPtr;
02983     int state, result, numChars;
02984     CONST char *srcStart, *srcEnd;
02985     char *dstStart, *dstEnd;
02986 
02987     result = TCL_OK;
02988 
02989     tablePrefixBytes = NULL;    /* lint. */
02990     tableToUnicode = NULL;      /* lint. */
02991 
02992     dataPtr = (EscapeEncodingData *) clientData;
02993     prefixBytes = dataPtr->prefixBytes;
02994     encodingPtr = NULL;
02995 
02996     srcStart = src;
02997     srcEnd = src + srcLen;
02998 
02999     dstStart = dst;
03000     dstEnd = dst + dstLen - TCL_UTF_MAX;
03001 
03002     state = PTR2INT(*statePtr);
03003     if (flags & TCL_ENCODING_START) {
03004         state = 0;
03005     }
03006 
03007     for (numChars = 0; src < srcEnd; ) {
03008         int byte, hi, lo, ch;
03009 
03010         if (dst > dstEnd) {
03011             result = TCL_CONVERT_NOSPACE;
03012             break;
03013         }
03014         byte = *((unsigned char *) src);
03015         if (prefixBytes[byte]) {
03016             unsigned int left, len, longest;
03017             int checked, i;
03018             EscapeSubTable *subTablePtr;
03019 
03020             /*
03021              * Saw the beginning of an escape sequence.
03022              */
03023 
03024             left = srcEnd - src;
03025             len = dataPtr->initLen;
03026             longest = len;
03027             checked = 0;
03028 
03029             if (len <= left) {
03030                 checked++;
03031                 if ((len > 0) && (memcmp(src, dataPtr->init, len) == 0)) {
03032                     /*
03033                      * If we see initialization string, skip it, even if we're
03034                      * not at the beginning of the buffer.
03035                      */
03036 
03037                     src += len;
03038                     continue;
03039                 }
03040             }
03041 
03042             len = dataPtr->finalLen;
03043             if (len > longest) {
03044                 longest = len;
03045             }
03046 
03047             if (len <= left) {
03048                 checked++;
03049                 if ((len > 0) && (memcmp(src, dataPtr->final, len) == 0)) {
03050                     /*
03051                      * If we see finalization string, skip it, even if we're
03052                      * not at the end of the buffer.
03053                      */
03054 
03055                     src += len;
03056                     continue;
03057                 }
03058             }
03059 
03060             subTablePtr = dataPtr->subTables;
03061             for (i = 0; i < dataPtr->numSubTables; i++) {
03062                 len = subTablePtr->sequenceLen;
03063                 if (len > longest) {
03064                     longest = len;
03065                 }
03066                 if (len <= left) {
03067                     checked++;
03068                     if ((len > 0) &&
03069                             (memcmp(src, subTablePtr->sequence, len) == 0)) {
03070                         state = i;
03071                         encodingPtr = NULL;
03072                         subTablePtr = NULL;
03073                         src += len;
03074                         break;
03075                     }
03076                 }
03077                 subTablePtr++;
03078             }
03079 
03080             if (subTablePtr == NULL) {
03081                 /*
03082                  * A match was found, the escape sequence was consumed, and
03083                  * the state was updated.
03084                  */
03085 
03086                 continue;
03087             }
03088 
03089             /*
03090              * We have a split-up or unrecognized escape sequence. If we
03091              * checked all the sequences, then it's a syntax error, otherwise
03092              * we need more bytes to determine a match.
03093              */
03094 
03095             if ((checked == dataPtr->numSubTables + 2)
03096                     || (flags & TCL_ENCODING_END)) {
03097                 if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
03098                     /*
03099                      * Skip the unknown escape sequence.
03100                      */
03101 
03102                     src += longest;
03103                     continue;
03104                 }
03105                 result = TCL_CONVERT_SYNTAX;
03106             } else {
03107                 result = TCL_CONVERT_MULTIBYTE;
03108             }
03109             break;
03110         }
03111 
03112         if (encodingPtr == NULL) {
03113             TableEncodingData *tableDataPtr;
03114 
03115             encodingPtr = GetTableEncoding(dataPtr, state);
03116             tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
03117             tablePrefixBytes = tableDataPtr->prefixBytes;
03118             tableToUnicode = tableDataPtr->toUnicode;
03119         }
03120 
03121         if (tablePrefixBytes[byte]) {
03122             src++;
03123             if (src >= srcEnd) {
03124                 src--;
03125                 result = TCL_CONVERT_MULTIBYTE;
03126                 break;
03127             }
03128             hi = byte;
03129             lo = *((unsigned char *) src);
03130         } else {
03131             hi = 0;
03132             lo = byte;
03133         }
03134 
03135         ch = tableToUnicode[hi][lo];
03136         dst += Tcl_UniCharToUtf(ch, dst);
03137         src++;
03138         numChars++;
03139     }
03140 
03141     *statePtr = (Tcl_EncodingState) INT2PTR(state);
03142     *srcReadPtr = src - srcStart;
03143     *dstWrotePtr = dst - dstStart;
03144     *dstCharsPtr = numChars;
03145     return result;
03146 }
03147 
03148 /*
03149  *-------------------------------------------------------------------------
03150  *
03151  * EscapeFromUtfProc --
03152  *
03153  *      Convert from UTF-8 into the encoding specified by the
03154  *      EscapeEncodingData.
03155  *
03156  * Results:
03157  *      Returns TCL_OK if conversion was successful.
03158  *
03159  * Side effects:
03160  *      None.
03161  *
03162  *-------------------------------------------------------------------------
03163  */
03164 
03165 static int
03166 EscapeFromUtfProc(
03167     ClientData clientData,      /* EscapeEncodingData that specifies
03168                                  * encoding. */
03169     CONST char *src,            /* Source string in UTF-8. */
03170     int srcLen,                 /* Source string length in bytes. */
03171     int flags,                  /* Conversion control flags. */
03172     Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
03173                                  * information used during a piecewise
03174                                  * conversion. Contents of statePtr are
03175                                  * initialized and/or reset by conversion
03176                                  * routine under control of flags argument. */
03177     char *dst,                  /* Output buffer in which converted string is
03178                                  * stored. */
03179     int dstLen,                 /* The maximum length of output buffer in
03180                                  * bytes. */
03181     int *srcReadPtr,            /* Filled with the number of bytes from the
03182                                  * source string that were converted. This may
03183                                  * be less than the original source length if
03184                                  * there was a problem converting some source
03185                                  * characters. */
03186     int *dstWrotePtr,           /* Filled with the number of bytes that were
03187                                  * stored in the output buffer as a result of
03188                                  * the conversion. */
03189     int *dstCharsPtr)           /* Filled with the number of characters that
03190                                  * correspond to the bytes stored in the
03191                                  * output buffer. */
03192 {
03193     EscapeEncodingData *dataPtr;
03194     Encoding *encodingPtr;
03195     CONST char *srcStart, *srcEnd, *srcClose;
03196     char *dstStart, *dstEnd;
03197     int state, result, numChars;
03198     TableEncodingData *tableDataPtr;
03199     char *tablePrefixBytes;
03200     unsigned short **tableFromUnicode;
03201 
03202     result = TCL_OK;
03203 
03204     dataPtr = (EscapeEncodingData *) clientData;
03205 
03206     srcStart = src;
03207     srcEnd = src + srcLen;
03208     srcClose = srcEnd;
03209     if ((flags & TCL_ENCODING_END) == 0) {
03210         srcClose -= TCL_UTF_MAX;
03211     }
03212 
03213     dstStart = dst;
03214     dstEnd = dst + dstLen - 1;
03215 
03216     /*
03217      * RFC1468 states that the text starts in ASCII, and switches to Japanese
03218      * characters, and that the text must end in ASCII. [Patch 474358]
03219      */
03220 
03221     if (flags & TCL_ENCODING_START) {
03222         state = 0;
03223         if ((dst + dataPtr->initLen) > dstEnd) {
03224             *srcReadPtr = 0;
03225             *dstWrotePtr = 0;
03226             return TCL_CONVERT_NOSPACE;
03227         }
03228         memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen);
03229         dst += dataPtr->initLen;
03230     } else {
03231         state = PTR2INT(*statePtr);
03232     }
03233 
03234     encodingPtr = GetTableEncoding(dataPtr, state);
03235     tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
03236     tablePrefixBytes = tableDataPtr->prefixBytes;
03237     tableFromUnicode = tableDataPtr->fromUnicode;
03238 
03239     for (numChars = 0; src < srcEnd; numChars++) {
03240         unsigned int len;
03241         int word;
03242         Tcl_UniChar ch;
03243 
03244         if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
03245             /*
03246              * If there is more string to follow, this will ensure that the
03247              * last UTF-8 character in the source buffer hasn't been cut off.
03248              */
03249 
03250             result = TCL_CONVERT_MULTIBYTE;
03251             break;
03252         }
03253         len = TclUtfToUniChar(src, &ch);
03254         word = tableFromUnicode[(ch >> 8)][ch & 0xff];
03255 
03256         if ((word == 0) && (ch != 0)) {
03257             int oldState;
03258             EscapeSubTable *subTablePtr;
03259 
03260             oldState = state;
03261             for (state = 0; state < dataPtr->numSubTables; state++) {
03262                 encodingPtr = GetTableEncoding(dataPtr, state);
03263                 tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
03264                 word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff];
03265                 if (word != 0) {
03266                     break;
03267                 }
03268             }
03269 
03270             if (word == 0) {
03271                 state = oldState;
03272                 if (flags & TCL_ENCODING_STOPONERROR) {
03273                     result = TCL_CONVERT_UNKNOWN;
03274                     break;
03275                 }
03276                 encodingPtr = GetTableEncoding(dataPtr, state);
03277                 tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
03278                 word = tableDataPtr->fallback;
03279             }
03280 
03281             tablePrefixBytes = tableDataPtr->prefixBytes;
03282             tableFromUnicode = tableDataPtr->fromUnicode;
03283 
03284             /*
03285              * The state variable has the value of oldState when word is 0.
03286              * In this case, the escape sequense should not be copied to dst
03287              * because the current character set is not changed.
03288              */
03289 
03290             if (state != oldState) {
03291                 subTablePtr = &dataPtr->subTables[state];
03292                 if ((dst + subTablePtr->sequenceLen) > dstEnd) {
03293                     /*
03294                      * If there is no space to write the escape sequence, the
03295                      * state variable must be changed to the value of oldState
03296                      * variable because this escape sequence must be written
03297                      * in the next conversion.
03298                      */
03299 
03300                     state = oldState;
03301                     result = TCL_CONVERT_NOSPACE;
03302                     break;
03303                 }
03304                 memcpy(dst, subTablePtr->sequence,
03305                         (size_t) subTablePtr->sequenceLen);
03306                 dst += subTablePtr->sequenceLen;
03307             }
03308         }
03309 
03310         if (tablePrefixBytes[(word >> 8)] != 0) {
03311             if (dst + 1 > dstEnd) {
03312                 result = TCL_CONVERT_NOSPACE;
03313                 break;
03314             }
03315             dst[0] = (char) (word >> 8);
03316             dst[1] = (char) word;
03317             dst += 2;
03318         } else {
03319             if (dst > dstEnd) {
03320                 result = TCL_CONVERT_NOSPACE;
03321                 break;
03322             }
03323             dst[0] = (char) word;
03324             dst++;
03325         }
03326         src += len;
03327     }
03328 
03329     if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) {
03330         unsigned int len = dataPtr->subTables[0].sequenceLen;
03331         /*
03332          * Certain encodings like iso2022-jp need to write
03333          * an escape sequence after all characters have
03334          * been converted. This logic checks that enough
03335          * room is available in the buffer for the escape bytes.
03336          * The TCL_ENCODING_END flag is cleared after a final
03337          * escape sequence has been added to the buffer so
03338          * that another call to this method does not attempt
03339          * to append escape bytes a second time.
03340          */
03341         if ((dst + dataPtr->finalLen + (state?len:0)) > dstEnd) {
03342             result = TCL_CONVERT_NOSPACE;
03343         } else {
03344             if (state) {
03345                 memcpy(dst, dataPtr->subTables[0].sequence, (size_t) len);
03346                 dst += len;
03347             }
03348             memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen);
03349             dst += dataPtr->finalLen;
03350             state &= ~TCL_ENCODING_END;
03351         }
03352     }
03353 
03354     *statePtr = (Tcl_EncodingState) INT2PTR(state);
03355     *srcReadPtr = src - srcStart;
03356     *dstWrotePtr = dst - dstStart;
03357     *dstCharsPtr = numChars;
03358     return result;
03359 }
03360 
03361 /*
03362  *---------------------------------------------------------------------------
03363  *
03364  * EscapeFreeProc --
03365  *
03366  *      This function is invoked when an EscapeEncodingData encoding is
03367  *      deleted. It deletes the memory used by the encoding.
03368  *
03369  * Results:
03370  *      None.
03371  *
03372  * Side effects:
03373  *      Memory freed.
03374  *
03375  *---------------------------------------------------------------------------
03376  */
03377 
03378 static void
03379 EscapeFreeProc(
03380     ClientData clientData)      /* EscapeEncodingData that specifies
03381                                  * encoding. */
03382 {
03383     EscapeEncodingData *dataPtr;
03384     EscapeSubTable *subTablePtr;
03385     int i;
03386 
03387     dataPtr = (EscapeEncodingData *) clientData;
03388     if (dataPtr == NULL) {
03389         return;
03390     }
03391     subTablePtr = dataPtr->subTables;
03392     for (i = 0; i < dataPtr->numSubTables; i++) {
03393         FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr);
03394         subTablePtr++;
03395     }
03396     ckfree((char *) dataPtr);
03397 }
03398 
03399 /*
03400  *---------------------------------------------------------------------------
03401  *
03402  * GetTableEncoding --
03403  *
03404  *      Helper function for the EscapeEncodingData conversions. Gets the
03405  *      encoding (of type TextEncodingData) that represents the specified
03406  *      state.
03407  *
03408  * Results:
03409  *      The return value is the encoding.
03410  *
03411  * Side effects:
03412  *      If the encoding that represents the specified state has not already
03413  *      been used by this EscapeEncoding, it will be loaded and cached in the
03414  *      dataPtr.
03415  *
03416  *---------------------------------------------------------------------------
03417  */
03418 
03419 static Encoding *
03420 GetTableEncoding(
03421     EscapeEncodingData *dataPtr,/* Contains names of encodings. */
03422     int state)                  /* Index in dataPtr of desired Encoding. */
03423 {
03424     EscapeSubTable *subTablePtr;
03425     Encoding *encodingPtr;
03426 
03427     subTablePtr = &dataPtr->subTables[state];
03428     encodingPtr = subTablePtr->encodingPtr;
03429 
03430     if (encodingPtr == NULL) {
03431         encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name);
03432         if ((encodingPtr == NULL)
03433                 || (encodingPtr->toUtfProc != TableToUtfProc
03434                 && encodingPtr->toUtfProc != Iso88591ToUtfProc)) {
03435             Tcl_Panic("EscapeToUtfProc: invalid sub table");
03436         }
03437         subTablePtr->encodingPtr = encodingPtr;
03438     }
03439 
03440     return encodingPtr;
03441 }
03442 
03443 /*
03444  *---------------------------------------------------------------------------
03445  *
03446  * unilen --
03447  *
03448  *      A helper function for the Tcl_ExternalToUtf functions. This function
03449  *      is similar to strlen for double-byte characters: it returns the number
03450  *      of bytes in a 0x0000 terminated string.
03451  *
03452  * Results:
03453  *      As above.
03454  *
03455  * Side effects:
03456  *      None.
03457  *
03458  *---------------------------------------------------------------------------
03459  */
03460 
03461 static size_t
03462 unilen(
03463     CONST char *src)
03464 {
03465     unsigned short *p;
03466 
03467     p = (unsigned short *) src;
03468     while (*p != 0x0000) {
03469         p++;
03470     }
03471     return (char *) p - src;
03472 }
03473 
03474 /*
03475  *-------------------------------------------------------------------------
03476  *
03477  * InitializeEncodingSearchPath --
03478  *
03479  *      This is the fallback routine that sets the default value of the
03480  *      encoding search path if the application has not set one via a call to
03481  *      Tcl_SetEncodingSearchPath() by the first time the search path is needed
03482  *      to load encoding data.
03483  *
03484  *      The default encoding search path is produced by taking each directory
03485  *      in the library path, appending a subdirectory named "encoding", and if
03486  *      the resulting directory exists, adding it to the encoding search path.
03487  *
03488  * Results:
03489  *      None.
03490  *
03491  * Side effects:
03492  *      Sets the encoding search path to an initial value.
03493  *
03494  *-------------------------------------------------------------------------
03495  */
03496 
03497 static void
03498 InitializeEncodingSearchPath(
03499     char **valuePtr,
03500     int *lengthPtr,
03501     Tcl_Encoding *encodingPtr)
03502 {
03503     char *bytes;
03504     int i, numDirs, numBytes;
03505     Tcl_Obj *libPath, *encodingObj, *searchPath;
03506 
03507     TclNewLiteralStringObj(encodingObj, "encoding");
03508     TclNewObj(searchPath);
03509     Tcl_IncrRefCount(encodingObj);
03510     Tcl_IncrRefCount(searchPath);
03511     libPath = TclGetLibraryPath();
03512     Tcl_IncrRefCount(libPath);
03513     Tcl_ListObjLength(NULL, libPath, &numDirs);
03514 
03515     for (i = 0; i < numDirs; i++) {
03516         Tcl_Obj *directory, *path;
03517         Tcl_StatBuf stat;
03518 
03519         Tcl_ListObjIndex(NULL, libPath, i, &directory);
03520         path = Tcl_FSJoinToPath(directory, 1, &encodingObj);
03521         Tcl_IncrRefCount(path);
03522         if ((0 == Tcl_FSStat(path, &stat)) && S_ISDIR(stat.st_mode)) {
03523             Tcl_ListObjAppendElement(NULL, searchPath, path);
03524         }
03525         Tcl_DecrRefCount(path);
03526     }
03527 
03528     Tcl_DecrRefCount(libPath);
03529     Tcl_DecrRefCount(encodingObj);
03530     *encodingPtr = libraryPath.encoding;
03531     if (*encodingPtr) {
03532         ((Encoding *)(*encodingPtr))->refCount++;
03533     }
03534     bytes = Tcl_GetStringFromObj(searchPath, &numBytes);
03535 
03536     *lengthPtr = numBytes;
03537     *valuePtr = ckalloc((unsigned int) numBytes + 1);
03538     memcpy(*valuePtr, bytes, (size_t) numBytes + 1);
03539     Tcl_DecrRefCount(searchPath);
03540 }
03541 
03542 /*
03543  * Local Variables:
03544  * mode: c
03545  * c-basic-offset: 4
03546  * fill-column: 78
03547  * End:
03548  */
03549 



Generated on Wed Mar 12 12:18:15 2008 by  doxygen 1.5.1