src/dbapi/driver/ftds64/freetds/tds/iconv.c

Go to the documentation of this file.
00001 /* FreeTDS - Library of routines accessing Sybase and Microsoft databases
00002  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005  Brian Bruns
00003  *
00004  * This library is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU Library General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 2 of the License, or (at your option) any later version.
00008  *
00009  * This library is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * Library General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU Library General Public
00015  * License along with this library; if not, write to the
00016  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00017  * Boston, MA 02111-1307, USA.
00018  */
00019 
00020 /*
00021  * iconv.c, handle all the conversion stuff without spreading #if HAVE_ICONV_ALWAYS 
00022  * all over the other code
00023  */
00024 
00025 #include <assert.h>
00026 
00027 #if HAVE_CONFIG_H
00028 #include <config.h>
00029 #endif
00030 
00031 #if HAVE_STRING_H
00032 #include <string.h>
00033 #endif /* HAVE_STRING_H */
00034 #if HAVE_ERRNO_H
00035 #include <errno.h>
00036 #endif
00037 
00038 #include "tds.h"
00039 #include "tdsiconv.h"
00040 #if HAVE_ICONV
00041 #include <iconv.h>
00042 #endif
00043 #ifdef DMALLOC
00044 #include <dmalloc.h>
00045 #endif
00046 
00047 /* define this for now; remove when done testing */
00048 #define HAVE_ICONV_ALWAYS 1
00049 
00050 TDS_RCSID(var, "$Id: iconv.c 91553 2006-10-12 15:14:13Z ssikorsk $");
00051 
00052 #define CHARSIZE(charset) ( ((charset)->min_bytes_per_char == (charset)->max_bytes_per_char )? \
00053                 (charset)->min_bytes_per_char : 0 )
00054 
00055 
00056 #if !HAVE_ICONV_ALWAYS
00057 static int bytes_per_char(TDS_ENCODING * charset);
00058 #endif
00059 static const char *collate2charset(int sql_collate, int lcid);
00060 static int skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size);
00061 static int tds_iconv_info_init(TDSICONV * char_conv, const char *client_name, const char *server_name);
00062 static int tds_iconv_init(void);
00063 static int tds_canonical_charset(const char *charset_name);
00064 static void _iconv_close(iconv_t * cd);
00065 static void tds_iconv_info_close(TDSICONV * char_conv);
00066 
00067 
00068 /**
00069  * \ingroup libtds
00070  * \defgroup conv Charset conversion
00071  * Convert between different charsets.
00072  */
00073 
00074 
00075 #include "encodings.h"
00076 
00077 /* this will contain real iconv names */
00078 static const char *iconv_names[sizeof(canonic_charsets) / sizeof(canonic_charsets[0])];
00079 static int iconv_initialized = 0;
00080 static const char *ucs2name;
00081 
00082 enum
00083 { POS_ISO1, POS_UTF8, POS_UCS2LE, POS_UCS2BE };
00084 
00085 /**
00086  * Initialize charset searching for UTF-8, UCS-2 and ISO8859-1
00087  */
00088 static int
00089 tds_iconv_init(void)
00090 {
00091     int i;
00092     iconv_t cd;
00093 
00094     /* first entries should be constants */
00095     assert(strcmp(canonic_charsets[POS_ISO1].name, "ISO-8859-1") == 0);
00096     assert(strcmp(canonic_charsets[POS_UTF8].name, "UTF-8") == 0);
00097     assert(strcmp(canonic_charsets[POS_UCS2LE].name, "UCS-2LE") == 0);
00098     assert(strcmp(canonic_charsets[POS_UCS2BE].name, "UCS-2BE") == 0);
00099 
00100     /* fast tests for GNU-iconv */
00101     cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8");
00102     if (cd != (iconv_t) - 1) {
00103         iconv_names[POS_ISO1] = "ISO-8859-1";
00104         iconv_names[POS_UTF8] = "UTF-8";
00105         tds_sys_iconv_close(cd);
00106     } else {
00107 
00108         /* search names for ISO8859-1 and UTF-8 */
00109         for (i = 0; iconv_aliases[i].alias; ++i) {
00110             int j;
00111 
00112             if (iconv_aliases[i].canonic != POS_ISO1)
00113                 continue;
00114             for (j = 0; iconv_aliases[j].alias; ++j) {
00115                 if (iconv_aliases[j].canonic != POS_UTF8)
00116                     continue;
00117 
00118                 cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_aliases[j].alias);
00119                 if (cd != (iconv_t) - 1) {
00120                     iconv_names[POS_ISO1] = iconv_aliases[i].alias;
00121                     iconv_names[POS_UTF8] = iconv_aliases[j].alias;
00122                     tds_sys_iconv_close(cd);
00123                     break;
00124                 }
00125             }
00126             if (iconv_names[POS_ISO1])
00127                 break;
00128         }
00129         /* required characters not found !!! */
00130         if (!iconv_names[POS_ISO1])
00131             return 1;
00132     }
00133 
00134     /* now search for UCS-2 */
00135     cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2LE");
00136     if (cd != (iconv_t) - 1) {
00137         iconv_names[POS_UCS2LE] = "UCS-2LE";
00138         tds_sys_iconv_close(cd);
00139     }
00140     cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2BE");
00141     if (cd != (iconv_t) - 1) {
00142         iconv_names[POS_UCS2BE] = "UCS-2BE";
00143         tds_sys_iconv_close(cd);
00144     }
00145 
00146     /* long search needed ?? */
00147     if (!iconv_names[POS_UCS2LE] || !iconv_names[POS_UCS2BE]) {
00148         for (i = 0; iconv_aliases[i].alias; ++i) {
00149             if (strncmp(canonic_charsets[iconv_aliases[i].canonic].name, "UCS-2", 5) != 0)
00150                 continue;
00151 
00152             cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_names[POS_ISO1]);
00153             if (cd != (iconv_t) - 1) {
00154                 char ib[1];
00155                 char ob[4];
00156                 size_t il, ol;
00157                 ICONV_CONST char *pib;
00158                 char *pob;
00159                 int byte_sequence = 0;
00160 
00161                 /* try to convert 'A' and check result */
00162                 ib[0] = 0x41;
00163                 pib = ib;
00164                 pob = ob;
00165                 il = 1;
00166                 ol = 4;
00167                 ob[0] = ob[1] = 0;
00168                 if (tds_sys_iconv(cd, &pib, &il, &pob, &ol) != (size_t) - 1) {
00169                     /* byte order sequence ?? */
00170                     if (ol == 0) {
00171                         ob[0] = ob[2];
00172                         byte_sequence = 1;
00173                         /* TODO save somewhere */
00174                     }
00175 
00176                     /* save name without sequence (if present) */
00177                     if (ob[0])
00178                         il = POS_UCS2LE;
00179                     else
00180                         il = POS_UCS2BE;
00181                     if (!iconv_names[il] || !byte_sequence)
00182                         iconv_names[il] = iconv_aliases[i].alias;
00183                 }
00184                 tds_sys_iconv_close(cd);
00185             }
00186         }
00187     }
00188     /* we need a UCS-2 (big endian or little endian) */
00189     if (!iconv_names[POS_UCS2LE] && !iconv_names[POS_UCS2BE])
00190         return 2;
00191 
00192     ucs2name = iconv_names[POS_UCS2LE] ? iconv_names[POS_UCS2LE] : iconv_names[POS_UCS2BE];
00193 
00194     for (i = 0; i < 4; ++i)
00195         tdsdump_log(TDS_DBG_INFO1, "names for %s: %s\n", canonic_charsets[i].name,
00196                 iconv_names[i] ? iconv_names[i] : "(null)");
00197 
00198     /* success (it should always occurs) */
00199     return 0;
00200 }
00201 
00202 /**
00203  * Get iconv name given canonic
00204  */
00205 static void
00206 tds_get_iconv_name(int charset)
00207 {
00208     int i;
00209     iconv_t cd;
00210 
00211     assert(iconv_initialized);
00212 
00213     /* try using canonic name and UTF-8 and UCS2 */
00214     cd = tds_sys_iconv_open(iconv_names[POS_UTF8], canonic_charsets[charset].name);
00215     if (cd != (iconv_t) - 1) {
00216         iconv_names[charset] = canonic_charsets[charset].name;
00217         tds_sys_iconv_close(cd);
00218         return;
00219     }
00220     cd = tds_sys_iconv_open(ucs2name, canonic_charsets[charset].name);
00221     if (cd != (iconv_t) - 1) {
00222         iconv_names[charset] = canonic_charsets[charset].name;
00223         tds_sys_iconv_close(cd);
00224         return;
00225     }
00226 
00227     /* try all alternatives */
00228     for (i = 0; iconv_aliases[i].alias; ++i) {
00229         if (iconv_aliases[i].canonic != charset)
00230             continue;
00231 
00232         cd = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_aliases[i].alias);
00233         if (cd != (iconv_t) - 1) {
00234             iconv_names[charset] = iconv_aliases[i].alias;
00235             tds_sys_iconv_close(cd);
00236             return;
00237         }
00238 
00239         cd = tds_sys_iconv_open(ucs2name, iconv_aliases[i].alias);
00240         if (cd != (iconv_t) - 1) {
00241             iconv_names[charset] = iconv_aliases[i].alias;
00242             tds_sys_iconv_close(cd);
00243             return;
00244         }
00245     }
00246 
00247     /* charset not found, use memcpy */
00248     iconv_names[charset] = "";
00249 }
00250 
00251 static void
00252 tds_iconv_reset(TDSICONV *conv)
00253 {
00254     /*
00255      * (min|max)_bytes_per_char can be used to divide
00256      * so init to safe values
00257      */
00258     conv->server_charset.min_bytes_per_char = 1;
00259     conv->server_charset.max_bytes_per_char = 1;
00260     conv->client_charset.min_bytes_per_char = 1;
00261     conv->client_charset.max_bytes_per_char = 1;
00262 
00263     conv->server_charset.name = conv->client_charset.name = "";
00264     conv->to_wire = (iconv_t) - 1;
00265     conv->to_wire2 = (iconv_t) - 1;
00266     conv->from_wire = (iconv_t) - 1;
00267     conv->from_wire2 = (iconv_t) - 1;
00268 }
00269 
00270 /**
00271  * Allocate iconv stuff
00272  * \return 0 for success
00273  */
00274 int
00275 tds_iconv_alloc(TDSSOCKET * tds)
00276 {
00277     int i;
00278     TDSICONV *char_conv;
00279 
00280     assert(!tds->char_convs);
00281     if (!(tds->char_convs = (TDSICONV **) malloc(sizeof(TDSICONV *) * (initial_char_conv_count + 1))))
00282     return 1;
00283     char_conv = (TDSICONV *) malloc(sizeof(TDSICONV) * initial_char_conv_count);
00284     if (!char_conv) {
00285         TDS_ZERO_FREE(tds->char_convs);
00286         return 1;
00287     }
00288     memset(char_conv, 0, sizeof(TDSICONV) * initial_char_conv_count);
00289     tds->char_conv_count = initial_char_conv_count + 1;
00290 
00291     for (i = 0; i < initial_char_conv_count; ++i) {
00292         tds->char_convs[i] = &char_conv[i];
00293         tds_iconv_reset(&char_conv[i]);
00294     }
00295 
00296     /* chardata is just a pointer to another iconv info */
00297     tds->char_convs[initial_char_conv_count] = tds->char_convs[client2server_chardata];
00298 
00299     return 0;
00300 }
00301 
00302 /**
00303  * \addtogroup conv
00304  * \@{ 
00305  * Set up the initial iconv conversion descriptors.
00306  * When the socket is allocated, three TDSICONV structures are attached to iconv.  
00307  * They have fixed meanings:
00308  *  \li 0. Client <-> UCS-2 (client2ucs2)
00309  *  \li 1. Client <-> server single-byte charset (client2server_chardata)
00310  *  \li 2. ISO8859-1  <-> server meta data  (iso2server_metadata)
00311  *
00312  * Other designs that use less data are possible, but these three conversion needs are 
00313  * very often needed.  By reserving them, we avoid searching the array for our most common purposes.
00314  *
00315  * To solve different iconv names and portability problems FreeTDS maintains 
00316  * a list of aliases each charset.  
00317  * 
00318  * First we discover the names of our minimum required charsets (UTF-8, ISO8859-1 and UCS2).  
00319  * Later, as and when it's needed, we try to discover others.
00320  *
00321  * There is one list of canonic names (GNU iconv names) and two sets of aliases
00322  * (one for other iconv implementations and another for Sybase). For every
00323  * canonic charset name we cache the iconv name found during discovery. 
00324  */
00325 void
00326 tds_iconv_open(TDSSOCKET * tds, const char *charset)
00327 {
00328     static const char UCS_2LE[] = "UCS-2LE";
00329     const char *name;
00330     int fOK, ret;
00331 
00332     TDS_ENCODING *client = &tds->char_convs[client2ucs2]->client_charset;
00333     TDS_ENCODING *server = &tds->char_convs[client2ucs2]->server_charset;
00334 
00335 #if !HAVE_ICONV_ALWAYS
00336 
00337     strcpy(client->name, "ISO-8859-1");
00338     strcpy(server->name, UCS_2LE);
00339 
00340     bytes_per_char(client);
00341     bytes_per_char(server);
00342     return;
00343 #else
00344     /* initialize */
00345     if (!iconv_initialized) {
00346         if ((ret = tds_iconv_init()) > 0) {
00347             static const char names[][12] = { "ISO 8859-1", "UTF-8" };
00348             assert(ret < 3);
00349             tdsdump_log(TDS_DBG_FUNC, "error: tds_iconv_init() returned %d; "
00350                           "could not find a name for %s that your iconv accepts.\n"
00351                           "use: \"configure --disable-libiconv\"", ret, names[ret-1]);
00352             assert(ret == 0);
00353             return;
00354         }
00355         iconv_initialized = 1;
00356     }
00357 
00358     /* 
00359      * Client <-> UCS-2 (client2ucs2)
00360      */
00361     tdsdump_log(TDS_DBG_FUNC, "iconv to convert client-side data to the \"%s\" character set\n", charset);
00362 
00363     fOK = tds_iconv_info_init(tds->char_convs[client2ucs2], charset, UCS_2LE);
00364     if (!fOK)
00365         return;
00366 
00367     /* 
00368      * How many UTF-8 bytes we need is a function of what the input character set is.
00369      * TODO This could definitely be more sophisticated, but it deals with the common case.
00370      */
00371     if (client->min_bytes_per_char == 1 && client->max_bytes_per_char == 4 && server->max_bytes_per_char == 1) {
00372         /* ie client is UTF-8 and server is ISO-8859-1 or variant. */
00373         client->max_bytes_per_char = 3;
00374     }
00375 
00376     /* 
00377      * Client <-> server single-byte charset
00378      * TODO: the server hasn't reported its charset yet, so this logic can't work here.  
00379      *       not sure what to do about that yet.  
00380      */
00381     tds->char_convs[client2server_chardata]->flags = TDS_ENCODING_MEMCPY;
00382     if (tds->env.charset) {
00383         fOK = tds_iconv_info_init(tds->char_convs[client2server_chardata], charset, tds->env.charset);
00384         if (!fOK)
00385             return;
00386     }
00387 
00388     /* 
00389      * ISO8859-1 <-> server meta data
00390      */
00391     name = UCS_2LE;
00392     if (tds->major_version < 7) {
00393         name = "ISO-8859-1";
00394         if (tds->env.charset)
00395             name = tds->env.charset;
00396     }
00397     fOK = tds_iconv_info_init(tds->char_convs[iso2server_metadata], "ISO-8859-1", name);
00398 
00399 #endif
00400 }
00401 
00402 /**
00403  * Open iconv descriptors to convert between character sets (both directions).
00404  * 1.  Look up the canonical names of the character sets.
00405  * 2.  Look up their widths.
00406  * 3.  Ask iconv to open a conversion descriptor.
00407  * 4.  Fail if any of the above offer any resistance.  
00408  * \remarks The charset names written to \a iconv will be the canonical names, 
00409  *          not necessarily the names passed in. 
00410  */
00411 static int
00412 tds_iconv_info_init(TDSICONV * char_conv, const char *client_name, const char *server_name)
00413 {
00414     TDS_ENCODING *client = &char_conv->client_charset;
00415     TDS_ENCODING *server = &char_conv->server_charset;
00416 
00417     int server_canonical, client_canonical;
00418 
00419     assert(client_name && server_name);
00420 
00421     assert(char_conv->to_wire == (iconv_t) - 1);
00422     assert(char_conv->to_wire2 == (iconv_t) - 1);
00423     assert(char_conv->from_wire == (iconv_t) - 1);
00424     assert(char_conv->from_wire2 == (iconv_t) - 1);
00425 
00426     client_canonical = tds_canonical_charset(client_name);
00427     server_canonical = tds_canonical_charset(server_name);
00428 
00429     if (client_canonical < 0) {
00430         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%s\" unrecognized\n", client->name);
00431         return 0;
00432     }
00433 
00434     if (server_canonical < 0) {
00435         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: server charset name \"%s\" unrecognized\n", client->name);
00436         return 0;
00437     }
00438 
00439     *client = canonic_charsets[client_canonical];
00440     *server = canonic_charsets[server_canonical];
00441 
00442     /* special case, same charset, no conversion */
00443     if (client_canonical == server_canonical) {
00444         char_conv->to_wire = (iconv_t) - 1;
00445         char_conv->from_wire = (iconv_t) - 1;
00446         char_conv->flags = TDS_ENCODING_MEMCPY;
00447         return 1;
00448     }
00449 
00450     char_conv->flags = 0;
00451     if (!iconv_names[server_canonical]) {
00452         switch (server_canonical) {
00453         case POS_UCS2LE:
00454             server_canonical = POS_UCS2BE;
00455             char_conv->flags = TDS_ENCODING_SWAPBYTE;
00456             break;
00457         case POS_UCS2BE:
00458             server_canonical = POS_UCS2LE;
00459             char_conv->flags = TDS_ENCODING_SWAPBYTE;
00460             break;
00461         }
00462     }
00463 
00464     /* get iconv names */
00465     if (!iconv_names[client_canonical])
00466         tds_get_iconv_name(client_canonical);
00467     if (!iconv_names[server_canonical])
00468         tds_get_iconv_name(server_canonical);
00469 
00470     /* names available ?? */
00471     if (!iconv_names[client_canonical][0] || !iconv_names[server_canonical][0]) {
00472         char_conv->to_wire = (iconv_t) - 1;
00473         char_conv->from_wire = (iconv_t) - 1;
00474         char_conv->flags = TDS_ENCODING_MEMCPY;
00475         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: use memcpy to convert \"%s\"->\"%s\"\n", client->name,
00476                 server->name);
00477         return 0;
00478     }
00479 
00480     char_conv->to_wire = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]);
00481     if (char_conv->to_wire == (iconv_t) - 1) {
00482         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name);
00483     }
00484 
00485     char_conv->from_wire = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]);
00486     if (char_conv->from_wire == (iconv_t) - 1) {
00487         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name);
00488     }
00489 
00490     /* try indirect conversions */
00491     if (char_conv->to_wire == (iconv_t) - 1 || char_conv->from_wire == (iconv_t) - 1) {
00492         tds_iconv_info_close(char_conv);
00493 
00494         /* TODO reuse some conversion, client charset is usually constant in all connection (or ISO8859-1) */
00495         char_conv->to_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[client_canonical]);
00496         char_conv->to_wire2 = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[POS_UTF8]);
00497         char_conv->from_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[server_canonical]);
00498         char_conv->from_wire2 = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[POS_UTF8]);
00499 
00500         if (char_conv->to_wire == (iconv_t) - 1 || char_conv->to_wire2 == (iconv_t) - 1
00501             || char_conv->from_wire == (iconv_t) - 1 || char_conv->from_wire2 == (iconv_t) - 1) {
00502 
00503             tds_iconv_info_close(char_conv);
00504             tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\" indirectly\n",
00505                     server->name, client->name);
00506             return 0;
00507         }
00508 
00509         char_conv->flags |= TDS_ENCODING_INDIRECT;
00510     }
00511     
00512     /* TODO, do some optimizations like UCS2 -> UTF8 min,max = 2,2 (UCS2) and 1,4 (UTF8) */
00513 
00514     tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name);
00515 
00516     return 1;
00517 }
00518 
00519 
00520 #if HAVE_ICONV_ALWAYS
00521 static void
00522 _iconv_close(iconv_t * cd)
00523 {
00524     static const iconv_t invalid = (iconv_t) - 1;
00525 
00526     if (*cd != invalid) {
00527         tds_sys_iconv_close(*cd);
00528         *cd = invalid;
00529     }
00530 }
00531 
00532 static void
00533 tds_iconv_info_close(TDSICONV * char_conv)
00534 {
00535     _iconv_close(&char_conv->to_wire);
00536     _iconv_close(&char_conv->to_wire2);
00537     _iconv_close(&char_conv->from_wire);
00538     _iconv_close(&char_conv->from_wire2);
00539 }
00540 #endif
00541 
00542 void
00543 tds_iconv_close(TDSSOCKET * tds)
00544 {
00545 #if HAVE_ICONV_ALWAYS
00546     int i;
00547 
00548     for (i = 0; i < tds->char_conv_count; ++i) {
00549         tds_iconv_info_close(tds->char_convs[i]);
00550     }
00551 #endif
00552 }
00553 
00554 #define CHUNK_ALLOC 4
00555 
00556 void
00557 tds_iconv_free(TDSSOCKET * tds)
00558 {
00559     int i;
00560 
00561     if (!tds->char_convs)
00562         return;
00563     tds_iconv_close(tds);
00564 
00565     free(tds->char_convs[0]);
00566     for (i = initial_char_conv_count + 1; i < tds->char_conv_count; i += CHUNK_ALLOC)
00567         free(tds->char_convs[i]);
00568     TDS_ZERO_FREE(tds->char_convs);
00569     tds->char_conv_count = 0;
00570 }
00571 
00572 /** 
00573  * Wrapper around iconv(3).  Same parameters, with slightly different behavior.
00574  * \param tds state information for the socket and the TDS protocol
00575  * \param io Enumerated value indicating whether the data are being sent to or received from the server. 
00576  * \param conv information about the encodings involved, including the iconv(3) conversion descriptors. 
00577  * \param inbuf address of pointer to the input buffer of data to be converted.  
00578  * \param inbytesleft address of count of bytes in \a inbuf.
00579  * \param outbuf address of pointer to the output buffer.  
00580  * \param outbytesleft address of count of bytes in \a outbuf.
00581  * \retval number of irreversible conversions performed.  -1 on error, see iconv(3) documentation for 
00582  * a description of the possible values of \e errno.  
00583  * \remarks Unlike iconv(3), none of the arguments can be nor point to NULL.  Like iconv(3), all pointers will 
00584  *      be updated.  Success is signified by a nonnegative return code and \a *inbytesleft == 0.  
00585  *  If the conversion descriptor in \a iconv is -1 or NULL, \a inbuf is copied to \a outbuf, 
00586  *  and all parameters updated accordingly. 
00587  * 
00588  *  If a character in \a inbuf cannot be converted because no such cbaracter exists in the
00589  *  \a outbuf character set, we emit messages similar to the ones Sybase emits when it fails such a conversion. 
00590  *  The message varies depending on the direction of the data.  
00591  *  On a read error, we emit Msg 2403, Severity 16 (EX_INFO):
00592  *      "WARNING! Some character(s) could not be converted into client's character set. 
00593  *          Unconverted bytes were changed to question marks ('?')."
00594  *  On a write error we emit Msg 2402, Severity 16 (EX_USER):
00595  *      "Error converting client characters into server's character set. Some character(s) could not be converted."
00596  *        and return an error code.  Client libraries relying on this routine should reflect an error back to the application.  
00597  *  
00598  * \todo Check for variable multibyte non-UTF-8 input character set.  
00599  * \todo Use more robust error message generation.  
00600  * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'.  
00601  * \todo Support alternative to '?' for the replacement character.  
00602  */
00603 size_t
00604 tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io,
00605       const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft)
00606 {
00607     static const iconv_t invalid = (iconv_t) - 1;
00608     const TDS_ENCODING *input_charset = NULL;
00609     const char *output_charset_name = NULL;
00610 
00611     iconv_t cd = invalid, cd2 = invalid;
00612     iconv_t error_cd = invalid;
00613 
00614     char quest_mark[] = "?";    /* best to leave non-const; implementations vary */
00615     ICONV_CONST char *pquest_mark = quest_mark;
00616     size_t lquest_mark;
00617     size_t irreversible;
00618     char one_character;
00619     char *p;
00620     int eilseq_raised = 0;
00621     /* cast away const-ness */
00622     TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS*) &conv->suppress;
00623 
00624     assert(inbuf && inbytesleft && outbuf && outbytesleft);
00625 
00626     switch (io) {
00627     case to_server:
00628         cd = conv->to_wire;
00629         cd2 = conv->to_wire2;
00630         input_charset = &conv->client_charset;
00631         output_charset_name = conv->server_charset.name;
00632         break;
00633     case to_client:
00634         cd = conv->from_wire;
00635         cd2 = conv->from_wire2;
00636         input_charset = &conv->server_charset;
00637         output_charset_name = conv->client_charset.name;
00638         break;
00639     default:
00640         tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out.  \n", io);
00641         assert(io == to_server || io == to_client);
00642         break;
00643     }
00644 
00645     /* silly case, memcpy */
00646     if (conv->flags & TDS_ENCODING_MEMCPY || cd == invalid) {
00647         size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft;
00648 
00649         memcpy(*outbuf, *inbuf, len);
00650         errno = *inbytesleft > *outbytesleft ? E2BIG : 0;
00651         *inbytesleft -= len;
00652         *outbytesleft -= len;
00653         *inbuf += len;
00654         *outbuf += len;
00655         return 0;
00656     }
00657 
00658     /*
00659      * Call iconv() as many times as necessary, until we reach the end of input or exhaust output.  
00660      */
00661     errno = 0;
00662     p = *outbuf;
00663     for (;;) {
00664         if (conv->flags & TDS_ENCODING_INDIRECT) {
00665 #if ENABLE_EXTRA_CHECKS
00666             char tmp[8];
00667 #else
00668             char tmp[128];
00669 #endif
00670             char *pb = tmp;
00671             size_t l = sizeof(tmp);
00672             int temp_errno;
00673             size_t temp_irreversible;
00674 
00675             temp_irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, &pb, &l);
00676             temp_errno = errno;
00677 
00678             /* convert partial */
00679             pb = tmp;
00680             l = sizeof(tmp) - l;
00681             for (;;) {
00682                 errno = 0;
00683                 irreversible = tds_sys_iconv(cd2, (ICONV_CONST char **) &pb, &l, outbuf, outbytesleft);
00684                 if (irreversible != (size_t) - 1) {
00685                     if (*inbytesleft)
00686                         break;
00687                     goto end_loop;
00688                 }
00689                 /* EINVAL should be impossible, all characters came from previous iconv... */
00690                 if (errno == E2BIG || errno == EINVAL)
00691                     goto end_loop;
00692 
00693                 /*
00694                  * error should be EILSEQ, not convertible sequence 
00695                  * skip UTF-8 sequence 
00696                  */
00697                 /* avoid infinite recursion */
00698                 eilseq_raised = 1;
00699                 if (*pb == '?')
00700                     goto end_loop;
00701                 *pb = (char) 0x80;
00702                 while(l && (*pb & 0xC0) == 0x80)
00703                     ++pb, --l;
00704                 --pb;
00705                 ++l;
00706                 *pb = '?';
00707             }
00708             if (temp_errno == E2BIG) {
00709                 errno = 0;
00710                 continue;
00711             }
00712             errno = temp_errno;
00713             irreversible = temp_irreversible;
00714             break;
00715         } else if (io == to_client && conv->flags & TDS_ENCODING_SWAPBYTE) {
00716             /* swap bytes if necessary */
00717 #if ENABLE_EXTRA_CHECKS
00718             char tmp[8];
00719 #else
00720             char tmp[128];
00721 #endif
00722             char *pib = tmp;
00723             size_t il = *inbytesleft > sizeof(tmp) ? sizeof(tmp) : *inbytesleft;
00724             size_t n;
00725 
00726             for (n = 0; n < il; n += 2) {
00727                 tmp[n] = (*inbuf)[n + 1];
00728                 tmp[n + 1] = (*inbuf)[n];
00729             }
00730             irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) &pib, &il, outbuf, outbytesleft);
00731             il = pib - tmp;
00732             *inbuf += il;
00733             *inbytesleft -= il;
00734             if (irreversible != (size_t) - 1 && *inbytesleft)
00735                 continue;
00736         } else {
00737             irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft);
00738         }
00739         if (irreversible != (size_t) - 1)
00740             break;
00741 
00742         if (errno == EILSEQ)
00743             eilseq_raised = 1;
00744 
00745         if (errno != EILSEQ || io != to_client)
00746             break;
00747         /* 
00748          * Invalid input sequence encountered reading from server. 
00749          * Skip one input sequence, adjusting pointers. 
00750          */
00751         one_character = skip_one_input_sequence(cd, input_charset, inbuf, inbytesleft);
00752 
00753         if (!one_character)
00754             break;
00755 
00756         /* 
00757          * To replace invalid input with '?', we have to convert a UTF-8 '?' into the output character set.  
00758          * In unimaginably weird circumstances, this might be impossible.
00759          * We use UTF-8 instead of ASCII because some implementations 
00760          * do not convert singlebyte <-> singlebyte.
00761          */
00762         if (error_cd == invalid) {
00763             error_cd = tds_sys_iconv_open(output_charset_name, iconv_names[POS_UTF8]);
00764             if (error_cd == invalid) {
00765                 break;  /* what to do? */
00766             }
00767         }
00768 
00769         lquest_mark = 1;
00770         pquest_mark = quest_mark;
00771 
00772         p = *outbuf;
00773         irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft);
00774 
00775         if (irreversible == (size_t) - 1)
00776             break;
00777 
00778         if (!*inbytesleft)
00779             break;
00780     }
00781 end_loop:
00782     
00783     /* swap bytes if necessary */
00784     if (io == to_server && conv->flags & TDS_ENCODING_SWAPBYTE) {
00785         assert((*outbuf - p) % 2 == 0);
00786         for (; p < *outbuf; p += 2) {
00787             char tmp = p[0];
00788 
00789             p[0] = p[1];
00790             p[1] = tmp;
00791         }
00792     }
00793 
00794     if (eilseq_raised && !suppress->eilseq) {
00795         /* invalid multibyte input sequence encountered */
00796         if (io == to_client) {
00797             if (irreversible == (size_t) - 1) {
00798                 tds_client_msg(tds->tds_ctx, tds, 2404, 16, 0, 0,
00799                            "WARNING! Some character(s) could not be converted into client's character set. ");
00800             } else {
00801                 tds_client_msg(tds->tds_ctx, tds, 2403, 16, 0, 0,
00802                            "WARNING! Some character(s) could not be converted into client's character set. "
00803                            "Unconverted bytes were changed to question marks ('?').");
00804                 errno = 0;
00805             }
00806         } else {
00807             tds_client_msg(tds->tds_ctx, tds, 2402, 16, 0, 0,
00808                        "Error converting client characters into server's character set. "
00809                        "Some character(s) could not be converted.");
00810         }
00811         suppress->eilseq = 1;
00812     }
00813 
00814     switch (errno) {
00815     case EINVAL:        /* incomplete multibyte sequence is encountered */
00816         if (suppress->einval)
00817             break;
00818         /* in chunk conversion this can mean we end a chunk inside a character */
00819         tds_client_msg(tds->tds_ctx, tds, 2401, 16, *inbytesleft, 0,
00820                    "iconv EINVAL: Error converting between character sets. "
00821                    "Conversion abandoned at offset indicated by the \"state\" value of this message.");
00822         suppress->einval = 1;
00823         break;
00824     case E2BIG:     /* output buffer has no more room */
00825         if (suppress->e2big)
00826             break;
00827         tds_client_msg(tds->tds_ctx, tds, 2400, 16, *inbytesleft, 0,
00828                    "iconv E2BIG: Error converting between character sets. " "Output buffer exhausted.");
00829         suppress->e2big = 1;
00830         break;
00831     default:
00832         break;
00833     }
00834 
00835     if (error_cd != invalid) {
00836         tds_sys_iconv_close(error_cd);
00837     }
00838 
00839     return irreversible;
00840 }
00841 
00842 /**
00843  * Read a data file, passing the data through iconv().
00844  * \return Count of bytes either not read, or read but not converted.  Returns zero on success.  
00845  */
00846 size_t
00847 tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft)
00848 {
00849 #ifdef ENABLE_EXTRA_CHECKS
00850     char buffer[16];
00851 #else
00852     char buffer[16000];
00853 #endif
00854     char *ib;
00855     size_t isize = 0, nonreversible_conversions = 0;
00856 
00857     /*
00858      * If cd isn't valid, it's just an indication that this column needs no conversion.  
00859      */
00860     if (cd == (iconv_t) - 1) {
00861         assert(field_len <= *outbytesleft);
00862         if (field_len > 0) {
00863             if (1 != fread(outbuf, field_len, 1, stream)) {
00864                 return field_len + term_len;    /* unable to read */
00865             }
00866         }
00867 
00868         /* prepare to read the terminator and return */
00869         *outbytesleft -= field_len; /* as iconv would have done */
00870         isize = 0;          /* as iconv would have done */
00871         field_len = 0;          /* as the loop would have done */
00872 
00873         goto READ_TERMINATOR;
00874     }
00875     
00876     /*
00877      * Read in chunks.  
00878      *  field_len  is the total size to read
00879      *  isize      is the size of the current chunk (which might be the whole thing).
00880      * They are decremented as they are successfully processed.  
00881      * On success, we exit the loop with both equal to zero, indicating nothing we
00882      * were asked to read remains unread.
00883      */
00884     isize = (sizeof(buffer) < field_len) ? sizeof(buffer) : field_len;
00885 
00886     for (ib = buffer; isize && (isize = fread(ib, 1, isize, stream)) > 0;) {
00887 
00888         tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: read %u of %u bytes; outbuf has %u left.\n", (unsigned int) isize,
00889                 (unsigned int) field_len, (unsigned int) *outbytesleft);
00890         field_len -= isize;
00891 
00892         isize += ib - buffer;
00893         ib = buffer;
00894         nonreversible_conversions += tds_sys_iconv(cd, (ICONV_CONST char **) &ib, &isize, &outbuf, outbytesleft);
00895 
00896         if (isize != 0) {
00897             memmove(buffer, ib, isize);
00898             switch (errno) {
00899             case EINVAL:    /* incomplete multibyte sequence encountered in input */
00900                 break;
00901             case E2BIG: /* insufficient room in output buffer */
00902             case EILSEQ:    /* invalid multibyte sequence encountered in input */
00903             default:
00904                 /* FIXME: emit message */
00905                 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: error %d: %s.\n", errno, strerror(errno));
00906                 break;
00907             }
00908         }
00909         ib = buffer + isize;
00910         isize = sizeof(buffer) - isize;
00911         if (isize > field_len)
00912             isize = field_len;
00913     }
00914     
00915     READ_TERMINATOR:
00916 
00917     if (term_len > 0 && !feof(stream)) {
00918         isize += term_len;
00919         if (term_len && 1 == fread(buffer, term_len, 1, stream)) {
00920             isize -= term_len;
00921         } else {
00922             tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: cannot read %u-byte terminator\n", (unsigned int) term_len);
00923         }
00924     }
00925 
00926     return field_len + isize;
00927 }
00928 
00929 /**
00930  * Get a iconv info structure, allocate and initialize if needed
00931  */
00932 static TDSICONV *
00933 tds_iconv_get_info(TDSSOCKET * tds, const char *canonic_charset)
00934 {
00935     TDSICONV *info;
00936     int i;
00937 
00938     /* search a charset from already allocated charsets */
00939     for (i = tds->char_conv_count; --i >= initial_char_conv_count;)
00940         if (strcmp(canonic_charset, tds->char_convs[i]->server_charset.name) == 0)
00941             return tds->char_convs[i];
00942 
00943     /* allocate a new iconv structure */
00944     if (tds->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) {
00945         TDSICONV **p;
00946         TDSICONV *infos;
00947 
00948         infos = (TDSICONV *) malloc(sizeof(TDSICONV) * CHUNK_ALLOC);
00949         if (!infos)
00950             return NULL;
00951         p = (TDSICONV **) realloc(tds->char_convs, sizeof(TDSICONV *) * (tds->char_conv_count + CHUNK_ALLOC));
00952         if (!p) {
00953             free(infos);
00954             return NULL;
00955         }
00956         tds->char_convs = p;
00957         memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC);
00958         for (i = 0; i < CHUNK_ALLOC; ++i) {
00959             tds->char_convs[i + tds->char_conv_count] = &infos[i];
00960             tds_iconv_reset(&infos[i]);
00961         }
00962     }
00963     info = tds->char_convs[tds->char_conv_count++];
00964 
00965     /* init */
00966     /* TODO test allocation */
00967     tds_iconv_info_init(info, tds->char_convs[client2ucs2]->client_charset.name, canonic_charset);
00968     return info;
00969 }
00970 
00971 /* change singlebyte conversions according to server */
00972 void
00973 tds_srv_charset_changed(TDSSOCKET * tds, const char *charset)
00974 {
00975 #if HAVE_ICONV_ALWAYS
00976     TDSICONV *char_conv = tds->char_convs[client2server_chardata];
00977 
00978     const char *canonic_charset = tds_canonical_charset_name(charset);
00979 
00980     /* ignore request to change to unknown charset */
00981     if (!canonic_charset) {
00982         tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset);
00983         return;
00984     }
00985 
00986     if (strcmp(canonic_charset, char_conv->server_charset.name) == 0)
00987         return;
00988 
00989     /* find and set conversion */
00990     char_conv = tds_iconv_get_info(tds, canonic_charset);
00991     if (char_conv)
00992         tds->char_convs[client2server_chardata] = char_conv;
00993 
00994     /* if sybase change also server conversions */
00995     if (tds->major_version >= 7)
00996         return;
00997 
00998     char_conv = tds->char_convs[iso2server_metadata];
00999 
01000     tds_iconv_info_close(char_conv);
01001 
01002     tds_iconv_info_init(char_conv, "ISO-8859-1", charset);
01003 #endif
01004 }
01005 
01006 /* change singlebyte conversions according to server */
01007 void
01008 tds7_srv_charset_changed(TDSSOCKET * tds, int sql_collate, int lcid)
01009 {
01010     tds_srv_charset_changed(tds, collate2charset(sql_collate, lcid));
01011 }
01012 
01013 #if !HAVE_ICONV_ALWAYS
01014 /**
01015  * Determine byte/char for an iconv character set.  
01016  * \retval 0 failed, no such charset.
01017  * \retval 1 succeeded, fixed byte/char.
01018  * \retval 2 succeeded, variable byte/char.
01019  */
01020 static int
01021 bytes_per_char(TDS_ENCODING * charset)
01022 {
01023     int i;
01024 
01025     assert(charset && strlen(charset->name) < sizeof(charset->name));
01026 
01027     for (i = 0; i < sizeof(canonic_charsets) / sizeof(TDS_ENCODING); i++) {
01028         if (canonic_charsets[i].min_bytes_per_char == 0)
01029             break;
01030 
01031         if (0 == strcmp(charset->name, canonic_charsets[i].name)) {
01032             charset->min_bytes_per_char = canonic_charsets[i].min_bytes_per_char;
01033             charset->max_bytes_per_char = canonic_charsets[i].max_bytes_per_char;
01034 
01035             return (charset->max_bytes_per_char == charset->min_bytes_per_char) ? 1 : 2;
01036         }
01037     }
01038 
01039     return 0;
01040 }
01041 #endif
01042 
01043 /**
01044  * Move the input sequence pointer to the next valid position.
01045  * Used when an input character cannot be converted.  
01046  * \returns number of bytes to skip.
01047  */
01048 /* FIXME possible buffer reading overflow ?? */
01049 static int
01050 skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size)
01051 {
01052     int charsize = CHARSIZE(charset);
01053     char ib[16];
01054     char ob[16];
01055     ICONV_CONST char *pib;
01056     char *pob;
01057     size_t il, ol, l;
01058     iconv_t cd2;
01059 
01060 
01061     /* usually fixed size and UTF-8 do not have state, so do not reset it */
01062     if (charsize) {
01063         *input += charsize;
01064         *input_size -= charsize;
01065         return charsize;
01066     }
01067 
01068     if (0 == strcmp(charset->name, "UTF-8")) {
01069         /*
01070          * Deal with UTF-8.  
01071          * bytes | bits | representation
01072          *     1 |    7 | 0vvvvvvv
01073          *     2 |   11 | 110vvvvv 10vvvvvv
01074          *     3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv
01075          *     4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
01076          */
01077         int c = **input;
01078 
01079         c = c & (c >> 1);
01080         do {
01081             ++charsize;
01082         } while ((c <<= 1) & 0x80);
01083         *input += charsize;
01084         *input_size -= charsize;
01085         return charsize;
01086     }
01087 
01088     /* handle state encoding */
01089 
01090     /* extract state from iconv */
01091     pob = ib;
01092     ol = sizeof(ib);
01093     tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
01094 
01095     /* init destination conversion */
01096     /* TODO use largest fixed size for this platform */
01097     cd2 = tds_sys_iconv_open("UCS-4", charset->name);
01098     if (cd2 == (iconv_t) - 1)
01099         return 0;
01100 
01101     /* add part of input */
01102     il = ol;
01103     if (il > *input_size)
01104         il = *input_size;
01105     l = sizeof(ib) - ol;
01106     memcpy(ib + l, *input, il);
01107     il += l;
01108 
01109     /* translate a single character */
01110     pib = ib;
01111     pob = ob;
01112     /* TODO use size of largest fixed charset */
01113     ol = 4;
01114     tds_sys_iconv(cd2, &pib, &il, &pob, &ol);
01115 
01116     /* adjust input */
01117     l = (pib - ib) - l;
01118     *input += l;
01119     *input_size -= l;
01120 
01121     /* extract state */
01122     pob = ib;
01123     ol = sizeof(ib);
01124     tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
01125 
01126     /* set input state */
01127     pib = ib;
01128     il = sizeof(ib) - ol;
01129     pob = ob;
01130     ol = sizeof(ob);
01131     tds_sys_iconv(cd, &pib, &il, &pob, &ol);
01132 
01133     tds_sys_iconv_close(cd2);
01134 
01135     return l;
01136 }
01137 
01138 static int
01139 lookup_canonic(const CHARACTER_SET_ALIAS aliases[], const char *charset_name)
01140 {
01141     int i;
01142 
01143     for (i = 0; aliases[i].alias; ++i) {
01144         if (0 == strcmp(charset_name, aliases[i].alias))
01145             return aliases[i].canonic;
01146     }
01147 
01148     return -1;
01149 }
01150 
01151 /**
01152  * Determine canonical iconv character set.
01153  * \returns canonical position, or -1 if lookup failed.
01154  * \remarks Returned name can be used in bytes_per_char(), above.
01155  */
01156 static int
01157 tds_canonical_charset(const char *charset_name)
01158 {
01159     int res;
01160 
01161     /* search in alternative */
01162     res = lookup_canonic(iconv_aliases, charset_name);
01163     if (res >= 0)
01164         return res;
01165 
01166     /* search in sybase */
01167     return lookup_canonic(sybase_aliases, charset_name);
01168 }
01169 
01170 /**
01171  * Determine canonical iconv character set name.  
01172  * \returns canonical name, or NULL if lookup failed.
01173  * \remarks Returned name can be used in bytes_per_char(), above.
01174  */
01175 const char *
01176 tds_canonical_charset_name(const char *charset_name)
01177 {
01178     int res;
01179 
01180     /* get numeric pos */
01181     res = tds_canonical_charset(charset_name);
01182     if (res >= 0)
01183         return canonic_charsets[res].name;
01184 
01185     return NULL;
01186 }
01187 
01188 /**
01189  * Determine the name Sybase uses for a character set, given a canonical iconv name.  
01190  * \returns Sybase name, or NULL if lookup failed.
01191  * \remarks Returned name can be sent to Sybase a server.
01192  */
01193 const char *
01194 tds_sybase_charset_name(const char *charset_name)
01195 {
01196     int res, i;
01197 
01198     /* search in sybase */
01199     res = lookup_canonic(iconv_aliases, charset_name);
01200     if (res < 0)
01201         return NULL;
01202 
01203     /* special case, ignore ascii_8, take iso_1 instead, note index start from 1 */
01204     assert(strcmp(sybase_aliases[0].alias, "ascii_8") == 0);
01205 
01206     for (i = 1; sybase_aliases[i].alias; ++i) {
01207         if (sybase_aliases[i].canonic == res)
01208             return sybase_aliases[i].alias;
01209     }
01210 
01211     return NULL;
01212 }
01213 
01214 static const char *
01215 collate2charset(int sql_collate, int lcid)
01216 {
01217     /*
01218      * The table from the MSQLServer reference "Windows Collation Designators" 
01219      * and from " NLS Information for Microsoft Windows XP"
01220      */
01221 
01222     const char *cp = NULL;
01223 
01224     switch (sql_collate) {
01225     case 30:        /* SQL_Latin1_General_CP437_BIN */
01226     case 31:        /* SQL_Latin1_General_CP437_CS_AS */
01227     case 32:        /* SQL_Latin1_General_CP437_CI_AS */
01228     case 33:        /* SQL_Latin1_General_Pref_CP437_CI_AS */
01229     case 34:        /* SQL_Latin1_General_CP437_CI_AI */
01230         return "CP437";
01231     case 40:        /* SQL_Latin1_General_CP850_BIN */
01232     case 41:        /* SQL_Latin1_General_CP850_CS_AS */
01233     case 42:        /* SQL_Latin1_General_CP850_CI_AS */
01234     case 43:        /* SQL_Latin1_General_Pref_CP850_CI_AS */
01235     case 44:        /* SQL_Latin1_General_CP850_CI_AI */
01236     case 49:        /* SQL_1xCompat_CP850_CI_AS */
01237     case 55:        /* SQL_AltDiction_CP850_CS_AS */
01238     case 56:        /* SQL_AltDiction_Pref_CP850_CI_AS */
01239     case 57:        /* SQL_AltDiction_CP850_CI_AI */
01240     case 58:        /* SQL_Scandinavian_Pref_CP850_CI_AS */
01241     case 59:        /* SQL_Scandinavian_CP850_CS_AS */
01242     case 60:        /* SQL_Scandinavian_CP850_CI_AS */
01243     case 61:        /* SQL_AltDiction_CP850_CI_AS */
01244         return "CP850";
01245     case 81:        /* SQL_Latin1_General_CP1250_CS_AS */
01246     case 82:        /* SQL_Latin1_General_CP1250_CI_AS */
01247         return "CP1250";
01248     case 105:       /* SQL_Latin1_General_CP1251_CS_AS */
01249     case 106:       /* SQL_Latin1_General_CP1251_CI_AS */
01250         return "CP1251";
01251     case 113:       /* SQL_Latin1_General_CP1253_CS_AS */
01252     case 114:       /* SQL_Latin1_General_CP1253_CI_AS */
01253     case 120:       /* SQL_MixDiction_CP1253_CS_AS */
01254     case 121:       /* SQL_AltDiction_CP1253_CS_AS */
01255     case 124:       /* SQL_Latin1_General_CP1253_CI_AI */
01256         return "CP1253";
01257     case 137:       /* SQL_Latin1_General_CP1255_CS_AS */
01258     case 138:       /* SQL_Latin1_General_CP1255_CI_AS */
01259         return "CP1255";
01260     case 145:       /* SQL_Latin1_General_CP1256_CS_AS */
01261     case 146:       /* SQL_Latin1_General_CP1256_CI_AS */
01262         return "CP1256";
01263     case 153:       /* SQL_Latin1_General_CP1257_CS_AS */
01264     case 154:       /* SQL_Latin1_General_CP1257_CI_AS */
01265         return "CP1257";
01266     }
01267 
01268     switch (lcid & 0xffff) {
01269     case 0x405:
01270     case 0x40e:     /* 0x1040e */
01271     case 0x415:
01272     case 0x418:
01273     case 0x41a:
01274     case 0x41b:
01275     case 0x41c:
01276     case 0x424:
01277         /* case 0x81a: seem wrong in XP table TODO check */
01278     case 0x104e:        /* ?? */
01279         cp = "CP1250";
01280         break;
01281     case 0x402:
01282     case 0x419:
01283     case 0x422:
01284     case 0x423:
01285     case 0x42f:
01286     case 0x43f:
01287     case 0x440:
01288     case 0x444:
01289     case 0x450:
01290     case 0x81a:     /* ?? */
01291     case 0x82c:
01292     case 0x843:
01293     case 0xc1a:
01294         cp = "CP1251";
01295         break;
01296     case 0x1007:
01297     case 0x1009:
01298     case 0x100a:
01299     case 0x100c:
01300     case 0x1407:
01301     case 0x1409:
01302     case 0x140a:
01303     case 0x140c:
01304     case 0x1809:
01305     case 0x180a:
01306     case 0x180c:
01307     case 0x1c09:
01308     case 0x1c0a:
01309     case 0x2009:
01310     case 0x200a:
01311     case 0x2409:
01312     case 0x240a:
01313     case 0x2809:
01314     case 0x280a:
01315     case 0x2c09:
01316     case 0x2c0a:
01317     case 0x3009:
01318     case 0x300a:
01319     case 0x3409:
01320     case 0x340a:
01321     case 0x380a:
01322     case 0x3c0a:
01323     case 0x400a:
01324     case 0x403:
01325     case 0x406:
01326     case 0x407:     /* 0x10407 */
01327     case 0x409:
01328     case 0x40a:
01329     case 0x40b:
01330     case 0x40c:
01331     case 0x40f:
01332     case 0x410:
01333     case 0x413:
01334     case 0x414:
01335     case 0x416:
01336     case 0x41d:
01337     case 0x421:
01338     case 0x42d:
01339     case 0x436:
01340     case 0x437:     /* 0x10437 */
01341     case 0x438:
01342         /*case 0x439:  ??? Unicode only */
01343     case 0x43e:
01344     case 0x440a:
01345     case 0x441:
01346     case 0x456:
01347     case 0x480a:
01348     case 0x4c0a:
01349     case 0x500a:
01350     case 0x807:
01351     case 0x809:
01352     case 0x80a:
01353     case 0x80c:
01354     case 0x810:
01355     case 0x813:
01356     case 0x814:
01357     case 0x816:
01358     case 0x81d:
01359     case 0x83e:
01360     case 0xc07:
01361     case 0xc09:
01362     case 0xc0a:
01363     case 0xc0c:
01364         cp = "CP1252";
01365         break;
01366     case 0x408:
01367         cp = "CP1253";
01368         break;
01369     case 0x41f:
01370     case 0x42c:
01371     case 0x443:
01372         cp = "CP1254";
01373         break;
01374     case 0x40d:
01375         cp = "CP1255";
01376         break;
01377     case 0x1001:
01378     case 0x1401:
01379     case 0x1801:
01380     case 0x1c01:
01381     case 0x2001:
01382     case 0x2401:
01383     case 0x2801:
01384     case 0x2c01:
01385     case 0x3001:
01386     case 0x3401:
01387     case 0x3801:
01388     case 0x3c01:
01389     case 0x4001:
01390     case 0x401:
01391     case 0x420:
01392     case 0x429:
01393     case 0x801:
01394     case 0xc01:
01395         cp = "CP1256";
01396         break;
01397     case 0x425:
01398     case 0x426:
01399     case 0x427:
01400     case 0x827:     /* ?? */
01401         cp = "CP1257";
01402         break;
01403     case 0x42a:
01404         cp = "CP1258";
01405         break;
01406     case 0x41e:
01407         cp = "CP874";
01408         break;
01409     case 0x411:     /* 0x10411 */
01410         cp = "CP932";
01411         break;
01412     case 0x1004:
01413     case 0x804:     /* 0x20804 */
01414         cp = "CP936";
01415         break;
01416     case 0x412:     /* 0x10412 */
01417         cp = "CP949";
01418         break;
01419     case 0x1404:
01420     case 0x404:     /* 0x30404 */
01421     case 0xc04:
01422         cp = "CP950";
01423         break;
01424     default:
01425         cp = "CP1252";
01426     }
01427 
01428     assert(cp);
01429     return cp;
01430 }
01431 
01432 /**
01433  * Get iconv information from a LCID (to support different column encoding under MSSQL2K)
01434  */
01435 TDSICONV *
01436 tds_iconv_from_collate(TDSSOCKET * tds, int sql_collate, int lcid)
01437 {
01438     const char *charset = collate2charset(sql_collate, lcid);
01439 
01440 #if ENABLE_EXTRA_CHECKS
01441     assert(strcmp(tds_canonical_charset_name(charset), charset) == 0);
01442 #endif
01443 
01444     /* same as client (usually this is true, so this improve performance) ? */
01445     if (strcmp(tds->char_convs[client2server_chardata]->server_charset.name, charset) == 0)
01446         return tds->char_convs[client2server_chardata];
01447 
01448     return tds_iconv_get_info(tds, charset);
01449 }
01450 
01451 /** \@} */
01452 
01453 

Generated on Wed Dec 9 04:15:52 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:57 2009 by modify_doxy.py rev. 173732