00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <assert.h>
00026
00027 #if HAVE_CONFIG_H
00028 #include <config.h>
00029 #endif
00030
00031 #if HAVE_STRING_H
00032 #include <string.h>
00033 #endif
00034 #if HAVE_ERRNO_H
00035 #include <errno.h>
00036 #endif
00037
00038 #include "tds.h"
00039 #include "tdsiconv.h"
00040 #if HAVE_ICONV
00041 #include <iconv.h>
00042 #endif
00043 #ifdef DMALLOC
00044 #include <dmalloc.h>
00045 #endif
00046
00047
00048 #define HAVE_ICONV_ALWAYS 1
00049
00050 TDS_RCSID(var, "$Id: iconv.c 91553 2006-10-12 15:14:13Z ssikorsk $");
00051
00052 #define CHARSIZE(charset) ( ((charset)->min_bytes_per_char == (charset)->max_bytes_per_char )? \
00053 (charset)->min_bytes_per_char : 0 )
00054
00055
00056 #if !HAVE_ICONV_ALWAYS
00057 static int bytes_per_char(TDS_ENCODING * charset);
00058 #endif
00059 static const char *collate2charset(int sql_collate, int lcid);
00060 static int skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size);
00061 static int tds_iconv_info_init(TDSICONV * char_conv, const char *client_name, const char *server_name);
00062 static int tds_iconv_init(void);
00063 static int tds_canonical_charset(const char *charset_name);
00064 static void _iconv_close(iconv_t * cd);
00065 static void tds_iconv_info_close(TDSICONV * char_conv);
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075 #include "encodings.h"
00076
00077
00078 static const char *iconv_names[sizeof(canonic_charsets) / sizeof(canonic_charsets[0])];
00079 static int iconv_initialized = 0;
00080 static const char *ucs2name;
00081
00082 enum
00083 { POS_ISO1, POS_UTF8, POS_UCS2LE, POS_UCS2BE };
00084
00085
00086
00087
00088 static int
00089 tds_iconv_init(void)
00090 {
00091 int i;
00092 iconv_t cd;
00093
00094
00095 assert(strcmp(canonic_charsets[POS_ISO1].name, "ISO-8859-1") == 0);
00096 assert(strcmp(canonic_charsets[POS_UTF8].name, "UTF-8") == 0);
00097 assert(strcmp(canonic_charsets[POS_UCS2LE].name, "UCS-2LE") == 0);
00098 assert(strcmp(canonic_charsets[POS_UCS2BE].name, "UCS-2BE") == 0);
00099
00100
00101 cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8");
00102 if (cd != (iconv_t) - 1) {
00103 iconv_names[POS_ISO1] = "ISO-8859-1";
00104 iconv_names[POS_UTF8] = "UTF-8";
00105 tds_sys_iconv_close(cd);
00106 } else {
00107
00108
00109 for (i = 0; iconv_aliases[i].alias; ++i) {
00110 int j;
00111
00112 if (iconv_aliases[i].canonic != POS_ISO1)
00113 continue;
00114 for (j = 0; iconv_aliases[j].alias; ++j) {
00115 if (iconv_aliases[j].canonic != POS_UTF8)
00116 continue;
00117
00118 cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_aliases[j].alias);
00119 if (cd != (iconv_t) - 1) {
00120 iconv_names[POS_ISO1] = iconv_aliases[i].alias;
00121 iconv_names[POS_UTF8] = iconv_aliases[j].alias;
00122 tds_sys_iconv_close(cd);
00123 break;
00124 }
00125 }
00126 if (iconv_names[POS_ISO1])
00127 break;
00128 }
00129
00130 if (!iconv_names[POS_ISO1])
00131 return 1;
00132 }
00133
00134
00135 cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2LE");
00136 if (cd != (iconv_t) - 1) {
00137 iconv_names[POS_UCS2LE] = "UCS-2LE";
00138 tds_sys_iconv_close(cd);
00139 }
00140 cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2BE");
00141 if (cd != (iconv_t) - 1) {
00142 iconv_names[POS_UCS2BE] = "UCS-2BE";
00143 tds_sys_iconv_close(cd);
00144 }
00145
00146
00147 if (!iconv_names[POS_UCS2LE] || !iconv_names[POS_UCS2BE]) {
00148 for (i = 0; iconv_aliases[i].alias; ++i) {
00149 if (strncmp(canonic_charsets[iconv_aliases[i].canonic].name, "UCS-2", 5) != 0)
00150 continue;
00151
00152 cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_names[POS_ISO1]);
00153 if (cd != (iconv_t) - 1) {
00154 char ib[1];
00155 char ob[4];
00156 size_t il, ol;
00157 ICONV_CONST char *pib;
00158 char *pob;
00159 int byte_sequence = 0;
00160
00161
00162 ib[0] = 0x41;
00163 pib = ib;
00164 pob = ob;
00165 il = 1;
00166 ol = 4;
00167 ob[0] = ob[1] = 0;
00168 if (tds_sys_iconv(cd, &pib, &il, &pob, &ol) != (size_t) - 1) {
00169
00170 if (ol == 0) {
00171 ob[0] = ob[2];
00172 byte_sequence = 1;
00173
00174 }
00175
00176
00177 if (ob[0])
00178 il = POS_UCS2LE;
00179 else
00180 il = POS_UCS2BE;
00181 if (!iconv_names[il] || !byte_sequence)
00182 iconv_names[il] = iconv_aliases[i].alias;
00183 }
00184 tds_sys_iconv_close(cd);
00185 }
00186 }
00187 }
00188
00189 if (!iconv_names[POS_UCS2LE] && !iconv_names[POS_UCS2BE])
00190 return 2;
00191
00192 ucs2name = iconv_names[POS_UCS2LE] ? iconv_names[POS_UCS2LE] : iconv_names[POS_UCS2BE];
00193
00194 for (i = 0; i < 4; ++i)
00195 tdsdump_log(TDS_DBG_INFO1, "names for %s: %s\n", canonic_charsets[i].name,
00196 iconv_names[i] ? iconv_names[i] : "(null)");
00197
00198
00199 return 0;
00200 }
00201
00202
00203
00204
00205 static void
00206 tds_get_iconv_name(int charset)
00207 {
00208 int i;
00209 iconv_t cd;
00210
00211 assert(iconv_initialized);
00212
00213
00214 cd = tds_sys_iconv_open(iconv_names[POS_UTF8], canonic_charsets[charset].name);
00215 if (cd != (iconv_t) - 1) {
00216 iconv_names[charset] = canonic_charsets[charset].name;
00217 tds_sys_iconv_close(cd);
00218 return;
00219 }
00220 cd = tds_sys_iconv_open(ucs2name, canonic_charsets[charset].name);
00221 if (cd != (iconv_t) - 1) {
00222 iconv_names[charset] = canonic_charsets[charset].name;
00223 tds_sys_iconv_close(cd);
00224 return;
00225 }
00226
00227
00228 for (i = 0; iconv_aliases[i].alias; ++i) {
00229 if (iconv_aliases[i].canonic != charset)
00230 continue;
00231
00232 cd = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_aliases[i].alias);
00233 if (cd != (iconv_t) - 1) {
00234 iconv_names[charset] = iconv_aliases[i].alias;
00235 tds_sys_iconv_close(cd);
00236 return;
00237 }
00238
00239 cd = tds_sys_iconv_open(ucs2name, iconv_aliases[i].alias);
00240 if (cd != (iconv_t) - 1) {
00241 iconv_names[charset] = iconv_aliases[i].alias;
00242 tds_sys_iconv_close(cd);
00243 return;
00244 }
00245 }
00246
00247
00248 iconv_names[charset] = "";
00249 }
00250
00251 static void
00252 tds_iconv_reset(TDSICONV *conv)
00253 {
00254
00255
00256
00257
00258 conv->server_charset.min_bytes_per_char = 1;
00259 conv->server_charset.max_bytes_per_char = 1;
00260 conv->client_charset.min_bytes_per_char = 1;
00261 conv->client_charset.max_bytes_per_char = 1;
00262
00263 conv->server_charset.name = conv->client_charset.name = "";
00264 conv->to_wire = (iconv_t) - 1;
00265 conv->to_wire2 = (iconv_t) - 1;
00266 conv->from_wire = (iconv_t) - 1;
00267 conv->from_wire2 = (iconv_t) - 1;
00268 }
00269
00270
00271
00272
00273
00274 int
00275 tds_iconv_alloc(TDSSOCKET * tds)
00276 {
00277 int i;
00278 TDSICONV *char_conv;
00279
00280 assert(!tds->char_convs);
00281 if (!(tds->char_convs = (TDSICONV **) malloc(sizeof(TDSICONV *) * (initial_char_conv_count + 1))))
00282 return 1;
00283 char_conv = (TDSICONV *) malloc(sizeof(TDSICONV) * initial_char_conv_count);
00284 if (!char_conv) {
00285 TDS_ZERO_FREE(tds->char_convs);
00286 return 1;
00287 }
00288 memset(char_conv, 0, sizeof(TDSICONV) * initial_char_conv_count);
00289 tds->char_conv_count = initial_char_conv_count + 1;
00290
00291 for (i = 0; i < initial_char_conv_count; ++i) {
00292 tds->char_convs[i] = &char_conv[i];
00293 tds_iconv_reset(&char_conv[i]);
00294 }
00295
00296
00297 tds->char_convs[initial_char_conv_count] = tds->char_convs[client2server_chardata];
00298
00299 return 0;
00300 }
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325 void
00326 tds_iconv_open(TDSSOCKET * tds, const char *charset)
00327 {
00328 static const char UCS_2LE[] = "UCS-2LE";
00329 const char *name;
00330 int fOK, ret;
00331
00332 TDS_ENCODING *client = &tds->char_convs[client2ucs2]->client_charset;
00333 TDS_ENCODING *server = &tds->char_convs[client2ucs2]->server_charset;
00334
00335 #if !HAVE_ICONV_ALWAYS
00336
00337 strcpy(client->name, "ISO-8859-1");
00338 strcpy(server->name, UCS_2LE);
00339
00340 bytes_per_char(client);
00341 bytes_per_char(server);
00342 return;
00343 #else
00344
00345 if (!iconv_initialized) {
00346 if ((ret = tds_iconv_init()) > 0) {
00347 static const char names[][12] = { "ISO 8859-1", "UTF-8" };
00348 assert(ret < 3);
00349 tdsdump_log(TDS_DBG_FUNC, "error: tds_iconv_init() returned %d; "
00350 "could not find a name for %s that your iconv accepts.\n"
00351 "use: \"configure --disable-libiconv\"", ret, names[ret-1]);
00352 assert(ret == 0);
00353 return;
00354 }
00355 iconv_initialized = 1;
00356 }
00357
00358
00359
00360
00361 tdsdump_log(TDS_DBG_FUNC, "iconv to convert client-side data to the \"%s\" character set\n", charset);
00362
00363 fOK = tds_iconv_info_init(tds->char_convs[client2ucs2], charset, UCS_2LE);
00364 if (!fOK)
00365 return;
00366
00367
00368
00369
00370
00371 if (client->min_bytes_per_char == 1 && client->max_bytes_per_char == 4 && server->max_bytes_per_char == 1) {
00372
00373 client->max_bytes_per_char = 3;
00374 }
00375
00376
00377
00378
00379
00380
00381 tds->char_convs[client2server_chardata]->flags = TDS_ENCODING_MEMCPY;
00382 if (tds->env.charset) {
00383 fOK = tds_iconv_info_init(tds->char_convs[client2server_chardata], charset, tds->env.charset);
00384 if (!fOK)
00385 return;
00386 }
00387
00388
00389
00390
00391 name = UCS_2LE;
00392 if (tds->major_version < 7) {
00393 name = "ISO-8859-1";
00394 if (tds->env.charset)
00395 name = tds->env.charset;
00396 }
00397 fOK = tds_iconv_info_init(tds->char_convs[iso2server_metadata], "ISO-8859-1", name);
00398
00399 #endif
00400 }
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411 static int
00412 tds_iconv_info_init(TDSICONV * char_conv, const char *client_name, const char *server_name)
00413 {
00414 TDS_ENCODING *client = &char_conv->client_charset;
00415 TDS_ENCODING *server = &char_conv->server_charset;
00416
00417 int server_canonical, client_canonical;
00418
00419 assert(client_name && server_name);
00420
00421 assert(char_conv->to_wire == (iconv_t) - 1);
00422 assert(char_conv->to_wire2 == (iconv_t) - 1);
00423 assert(char_conv->from_wire == (iconv_t) - 1);
00424 assert(char_conv->from_wire2 == (iconv_t) - 1);
00425
00426 client_canonical = tds_canonical_charset(client_name);
00427 server_canonical = tds_canonical_charset(server_name);
00428
00429 if (client_canonical < 0) {
00430 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%s\" unrecognized\n", client->name);
00431 return 0;
00432 }
00433
00434 if (server_canonical < 0) {
00435 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: server charset name \"%s\" unrecognized\n", client->name);
00436 return 0;
00437 }
00438
00439 *client = canonic_charsets[client_canonical];
00440 *server = canonic_charsets[server_canonical];
00441
00442
00443 if (client_canonical == server_canonical) {
00444 char_conv->to_wire = (iconv_t) - 1;
00445 char_conv->from_wire = (iconv_t) - 1;
00446 char_conv->flags = TDS_ENCODING_MEMCPY;
00447 return 1;
00448 }
00449
00450 char_conv->flags = 0;
00451 if (!iconv_names[server_canonical]) {
00452 switch (server_canonical) {
00453 case POS_UCS2LE:
00454 server_canonical = POS_UCS2BE;
00455 char_conv->flags = TDS_ENCODING_SWAPBYTE;
00456 break;
00457 case POS_UCS2BE:
00458 server_canonical = POS_UCS2LE;
00459 char_conv->flags = TDS_ENCODING_SWAPBYTE;
00460 break;
00461 }
00462 }
00463
00464
00465 if (!iconv_names[client_canonical])
00466 tds_get_iconv_name(client_canonical);
00467 if (!iconv_names[server_canonical])
00468 tds_get_iconv_name(server_canonical);
00469
00470
00471 if (!iconv_names[client_canonical][0] || !iconv_names[server_canonical][0]) {
00472 char_conv->to_wire = (iconv_t) - 1;
00473 char_conv->from_wire = (iconv_t) - 1;
00474 char_conv->flags = TDS_ENCODING_MEMCPY;
00475 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: use memcpy to convert \"%s\"->\"%s\"\n", client->name,
00476 server->name);
00477 return 0;
00478 }
00479
00480 char_conv->to_wire = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]);
00481 if (char_conv->to_wire == (iconv_t) - 1) {
00482 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name);
00483 }
00484
00485 char_conv->from_wire = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]);
00486 if (char_conv->from_wire == (iconv_t) - 1) {
00487 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name);
00488 }
00489
00490
00491 if (char_conv->to_wire == (iconv_t) - 1 || char_conv->from_wire == (iconv_t) - 1) {
00492 tds_iconv_info_close(char_conv);
00493
00494
00495 char_conv->to_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[client_canonical]);
00496 char_conv->to_wire2 = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[POS_UTF8]);
00497 char_conv->from_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[server_canonical]);
00498 char_conv->from_wire2 = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[POS_UTF8]);
00499
00500 if (char_conv->to_wire == (iconv_t) - 1 || char_conv->to_wire2 == (iconv_t) - 1
00501 || char_conv->from_wire == (iconv_t) - 1 || char_conv->from_wire2 == (iconv_t) - 1) {
00502
00503 tds_iconv_info_close(char_conv);
00504 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\" indirectly\n",
00505 server->name, client->name);
00506 return 0;
00507 }
00508
00509 char_conv->flags |= TDS_ENCODING_INDIRECT;
00510 }
00511
00512
00513
00514 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name);
00515
00516 return 1;
00517 }
00518
00519
00520 #if HAVE_ICONV_ALWAYS
00521 static void
00522 _iconv_close(iconv_t * cd)
00523 {
00524 static const iconv_t invalid = (iconv_t) - 1;
00525
00526 if (*cd != invalid) {
00527 tds_sys_iconv_close(*cd);
00528 *cd = invalid;
00529 }
00530 }
00531
00532 static void
00533 tds_iconv_info_close(TDSICONV * char_conv)
00534 {
00535 _iconv_close(&char_conv->to_wire);
00536 _iconv_close(&char_conv->to_wire2);
00537 _iconv_close(&char_conv->from_wire);
00538 _iconv_close(&char_conv->from_wire2);
00539 }
00540 #endif
00541
00542 void
00543 tds_iconv_close(TDSSOCKET * tds)
00544 {
00545 #if HAVE_ICONV_ALWAYS
00546 int i;
00547
00548 for (i = 0; i < tds->char_conv_count; ++i) {
00549 tds_iconv_info_close(tds->char_convs[i]);
00550 }
00551 #endif
00552 }
00553
00554 #define CHUNK_ALLOC 4
00555
00556 void
00557 tds_iconv_free(TDSSOCKET * tds)
00558 {
00559 int i;
00560
00561 if (!tds->char_convs)
00562 return;
00563 tds_iconv_close(tds);
00564
00565 free(tds->char_convs[0]);
00566 for (i = initial_char_conv_count + 1; i < tds->char_conv_count; i += CHUNK_ALLOC)
00567 free(tds->char_convs[i]);
00568 TDS_ZERO_FREE(tds->char_convs);
00569 tds->char_conv_count = 0;
00570 }
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603 size_t
00604 tds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io,
00605 const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft)
00606 {
00607 static const iconv_t invalid = (iconv_t) - 1;
00608 const TDS_ENCODING *input_charset = NULL;
00609 const char *output_charset_name = NULL;
00610
00611 iconv_t cd = invalid, cd2 = invalid;
00612 iconv_t error_cd = invalid;
00613
00614 char quest_mark[] = "?";
00615 ICONV_CONST char *pquest_mark = quest_mark;
00616 size_t lquest_mark;
00617 size_t irreversible;
00618 char one_character;
00619 char *p;
00620 int eilseq_raised = 0;
00621
00622 TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS*) &conv->suppress;
00623
00624 assert(inbuf && inbytesleft && outbuf && outbytesleft);
00625
00626 switch (io) {
00627 case to_server:
00628 cd = conv->to_wire;
00629 cd2 = conv->to_wire2;
00630 input_charset = &conv->client_charset;
00631 output_charset_name = conv->server_charset.name;
00632 break;
00633 case to_client:
00634 cd = conv->from_wire;
00635 cd2 = conv->from_wire2;
00636 input_charset = &conv->server_charset;
00637 output_charset_name = conv->client_charset.name;
00638 break;
00639 default:
00640 tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out. \n", io);
00641 assert(io == to_server || io == to_client);
00642 break;
00643 }
00644
00645
00646 if (conv->flags & TDS_ENCODING_MEMCPY || cd == invalid) {
00647 size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft;
00648
00649 memcpy(*outbuf, *inbuf, len);
00650 errno = *inbytesleft > *outbytesleft ? E2BIG : 0;
00651 *inbytesleft -= len;
00652 *outbytesleft -= len;
00653 *inbuf += len;
00654 *outbuf += len;
00655 return 0;
00656 }
00657
00658
00659
00660
00661 errno = 0;
00662 p = *outbuf;
00663 for (;;) {
00664 if (conv->flags & TDS_ENCODING_INDIRECT) {
00665 #if ENABLE_EXTRA_CHECKS
00666 char tmp[8];
00667 #else
00668 char tmp[128];
00669 #endif
00670 char *pb = tmp;
00671 size_t l = sizeof(tmp);
00672 int temp_errno;
00673 size_t temp_irreversible;
00674
00675 temp_irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, &pb, &l);
00676 temp_errno = errno;
00677
00678
00679 pb = tmp;
00680 l = sizeof(tmp) - l;
00681 for (;;) {
00682 errno = 0;
00683 irreversible = tds_sys_iconv(cd2, (ICONV_CONST char **) &pb, &l, outbuf, outbytesleft);
00684 if (irreversible != (size_t) - 1) {
00685 if (*inbytesleft)
00686 break;
00687 goto end_loop;
00688 }
00689
00690 if (errno == E2BIG || errno == EINVAL)
00691 goto end_loop;
00692
00693
00694
00695
00696
00697
00698 eilseq_raised = 1;
00699 if (*pb == '?')
00700 goto end_loop;
00701 *pb = (char) 0x80;
00702 while(l && (*pb & 0xC0) == 0x80)
00703 ++pb, --l;
00704 --pb;
00705 ++l;
00706 *pb = '?';
00707 }
00708 if (temp_errno == E2BIG) {
00709 errno = 0;
00710 continue;
00711 }
00712 errno = temp_errno;
00713 irreversible = temp_irreversible;
00714 break;
00715 } else if (io == to_client && conv->flags & TDS_ENCODING_SWAPBYTE) {
00716
00717 #if ENABLE_EXTRA_CHECKS
00718 char tmp[8];
00719 #else
00720 char tmp[128];
00721 #endif
00722 char *pib = tmp;
00723 size_t il = *inbytesleft > sizeof(tmp) ? sizeof(tmp) : *inbytesleft;
00724 size_t n;
00725
00726 for (n = 0; n < il; n += 2) {
00727 tmp[n] = (*inbuf)[n + 1];
00728 tmp[n + 1] = (*inbuf)[n];
00729 }
00730 irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) &pib, &il, outbuf, outbytesleft);
00731 il = pib - tmp;
00732 *inbuf += il;
00733 *inbytesleft -= il;
00734 if (irreversible != (size_t) - 1 && *inbytesleft)
00735 continue;
00736 } else {
00737 irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft);
00738 }
00739 if (irreversible != (size_t) - 1)
00740 break;
00741
00742 if (errno == EILSEQ)
00743 eilseq_raised = 1;
00744
00745 if (errno != EILSEQ || io != to_client)
00746 break;
00747
00748
00749
00750
00751 one_character = skip_one_input_sequence(cd, input_charset, inbuf, inbytesleft);
00752
00753 if (!one_character)
00754 break;
00755
00756
00757
00758
00759
00760
00761
00762 if (error_cd == invalid) {
00763 error_cd = tds_sys_iconv_open(output_charset_name, iconv_names[POS_UTF8]);
00764 if (error_cd == invalid) {
00765 break;
00766 }
00767 }
00768
00769 lquest_mark = 1;
00770 pquest_mark = quest_mark;
00771
00772 p = *outbuf;
00773 irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft);
00774
00775 if (irreversible == (size_t) - 1)
00776 break;
00777
00778 if (!*inbytesleft)
00779 break;
00780 }
00781 end_loop:
00782
00783
00784 if (io == to_server && conv->flags & TDS_ENCODING_SWAPBYTE) {
00785 assert((*outbuf - p) % 2 == 0);
00786 for (; p < *outbuf; p += 2) {
00787 char tmp = p[0];
00788
00789 p[0] = p[1];
00790 p[1] = tmp;
00791 }
00792 }
00793
00794 if (eilseq_raised && !suppress->eilseq) {
00795
00796 if (io == to_client) {
00797 if (irreversible == (size_t) - 1) {
00798 tds_client_msg(tds->tds_ctx, tds, 2404, 16, 0, 0,
00799 "WARNING! Some character(s) could not be converted into client's character set. ");
00800 } else {
00801 tds_client_msg(tds->tds_ctx, tds, 2403, 16, 0, 0,
00802 "WARNING! Some character(s) could not be converted into client's character set. "
00803 "Unconverted bytes were changed to question marks ('?').");
00804 errno = 0;
00805 }
00806 } else {
00807 tds_client_msg(tds->tds_ctx, tds, 2402, 16, 0, 0,
00808 "Error converting client characters into server's character set. "
00809 "Some character(s) could not be converted.");
00810 }
00811 suppress->eilseq = 1;
00812 }
00813
00814 switch (errno) {
00815 case EINVAL:
00816 if (suppress->einval)
00817 break;
00818
00819 tds_client_msg(tds->tds_ctx, tds, 2401, 16, *inbytesleft, 0,
00820 "iconv EINVAL: Error converting between character sets. "
00821 "Conversion abandoned at offset indicated by the \"state\" value of this message.");
00822 suppress->einval = 1;
00823 break;
00824 case E2BIG:
00825 if (suppress->e2big)
00826 break;
00827 tds_client_msg(tds->tds_ctx, tds, 2400, 16, *inbytesleft, 0,
00828 "iconv E2BIG: Error converting between character sets. " "Output buffer exhausted.");
00829 suppress->e2big = 1;
00830 break;
00831 default:
00832 break;
00833 }
00834
00835 if (error_cd != invalid) {
00836 tds_sys_iconv_close(error_cd);
00837 }
00838
00839 return irreversible;
00840 }
00841
00842
00843
00844
00845
00846 size_t
00847 tds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft)
00848 {
00849 #ifdef ENABLE_EXTRA_CHECKS
00850 char buffer[16];
00851 #else
00852 char buffer[16000];
00853 #endif
00854 char *ib;
00855 size_t isize = 0, nonreversible_conversions = 0;
00856
00857
00858
00859
00860 if (cd == (iconv_t) - 1) {
00861 assert(field_len <= *outbytesleft);
00862 if (field_len > 0) {
00863 if (1 != fread(outbuf, field_len, 1, stream)) {
00864 return field_len + term_len;
00865 }
00866 }
00867
00868
00869 *outbytesleft -= field_len;
00870 isize = 0;
00871 field_len = 0;
00872
00873 goto READ_TERMINATOR;
00874 }
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884 isize = (sizeof(buffer) < field_len) ? sizeof(buffer) : field_len;
00885
00886 for (ib = buffer; isize && (isize = fread(ib, 1, isize, stream)) > 0;) {
00887
00888 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: read %u of %u bytes; outbuf has %u left.\n", (unsigned int) isize,
00889 (unsigned int) field_len, (unsigned int) *outbytesleft);
00890 field_len -= isize;
00891
00892 isize += ib - buffer;
00893 ib = buffer;
00894 nonreversible_conversions += tds_sys_iconv(cd, (ICONV_CONST char **) &ib, &isize, &outbuf, outbytesleft);
00895
00896 if (isize != 0) {
00897 memmove(buffer, ib, isize);
00898 switch (errno) {
00899 case EINVAL:
00900 break;
00901 case E2BIG:
00902 case EILSEQ:
00903 default:
00904
00905 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: error %d: %s.\n", errno, strerror(errno));
00906 break;
00907 }
00908 }
00909 ib = buffer + isize;
00910 isize = sizeof(buffer) - isize;
00911 if (isize > field_len)
00912 isize = field_len;
00913 }
00914
00915 READ_TERMINATOR:
00916
00917 if (term_len > 0 && !feof(stream)) {
00918 isize += term_len;
00919 if (term_len && 1 == fread(buffer, term_len, 1, stream)) {
00920 isize -= term_len;
00921 } else {
00922 tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: cannot read %u-byte terminator\n", (unsigned int) term_len);
00923 }
00924 }
00925
00926 return field_len + isize;
00927 }
00928
00929
00930
00931
00932 static TDSICONV *
00933 tds_iconv_get_info(TDSSOCKET * tds, const char *canonic_charset)
00934 {
00935 TDSICONV *info;
00936 int i;
00937
00938
00939 for (i = tds->char_conv_count; --i >= initial_char_conv_count;)
00940 if (strcmp(canonic_charset, tds->char_convs[i]->server_charset.name) == 0)
00941 return tds->char_convs[i];
00942
00943
00944 if (tds->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) {
00945 TDSICONV **p;
00946 TDSICONV *infos;
00947
00948 infos = (TDSICONV *) malloc(sizeof(TDSICONV) * CHUNK_ALLOC);
00949 if (!infos)
00950 return NULL;
00951 p = (TDSICONV **) realloc(tds->char_convs, sizeof(TDSICONV *) * (tds->char_conv_count + CHUNK_ALLOC));
00952 if (!p) {
00953 free(infos);
00954 return NULL;
00955 }
00956 tds->char_convs = p;
00957 memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC);
00958 for (i = 0; i < CHUNK_ALLOC; ++i) {
00959 tds->char_convs[i + tds->char_conv_count] = &infos[i];
00960 tds_iconv_reset(&infos[i]);
00961 }
00962 }
00963 info = tds->char_convs[tds->char_conv_count++];
00964
00965
00966
00967 tds_iconv_info_init(info, tds->char_convs[client2ucs2]->client_charset.name, canonic_charset);
00968 return info;
00969 }
00970
00971
00972 void
00973 tds_srv_charset_changed(TDSSOCKET * tds, const char *charset)
00974 {
00975 #if HAVE_ICONV_ALWAYS
00976 TDSICONV *char_conv = tds->char_convs[client2server_chardata];
00977
00978 const char *canonic_charset = tds_canonical_charset_name(charset);
00979
00980
00981 if (!canonic_charset) {
00982 tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset);
00983 return;
00984 }
00985
00986 if (strcmp(canonic_charset, char_conv->server_charset.name) == 0)
00987 return;
00988
00989
00990 char_conv = tds_iconv_get_info(tds, canonic_charset);
00991 if (char_conv)
00992 tds->char_convs[client2server_chardata] = char_conv;
00993
00994
00995 if (tds->major_version >= 7)
00996 return;
00997
00998 char_conv = tds->char_convs[iso2server_metadata];
00999
01000 tds_iconv_info_close(char_conv);
01001
01002 tds_iconv_info_init(char_conv, "ISO-8859-1", charset);
01003 #endif
01004 }
01005
01006
01007 void
01008 tds7_srv_charset_changed(TDSSOCKET * tds, int sql_collate, int lcid)
01009 {
01010 tds_srv_charset_changed(tds, collate2charset(sql_collate, lcid));
01011 }
01012
01013 #if !HAVE_ICONV_ALWAYS
01014
01015
01016
01017
01018
01019
01020 static int
01021 bytes_per_char(TDS_ENCODING * charset)
01022 {
01023 int i;
01024
01025 assert(charset && strlen(charset->name) < sizeof(charset->name));
01026
01027 for (i = 0; i < sizeof(canonic_charsets) / sizeof(TDS_ENCODING); i++) {
01028 if (canonic_charsets[i].min_bytes_per_char == 0)
01029 break;
01030
01031 if (0 == strcmp(charset->name, canonic_charsets[i].name)) {
01032 charset->min_bytes_per_char = canonic_charsets[i].min_bytes_per_char;
01033 charset->max_bytes_per_char = canonic_charsets[i].max_bytes_per_char;
01034
01035 return (charset->max_bytes_per_char == charset->min_bytes_per_char) ? 1 : 2;
01036 }
01037 }
01038
01039 return 0;
01040 }
01041 #endif
01042
01043
01044
01045
01046
01047
01048
01049 static int
01050 skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size)
01051 {
01052 int charsize = CHARSIZE(charset);
01053 char ib[16];
01054 char ob[16];
01055 ICONV_CONST char *pib;
01056 char *pob;
01057 size_t il, ol, l;
01058 iconv_t cd2;
01059
01060
01061
01062 if (charsize) {
01063 *input += charsize;
01064 *input_size -= charsize;
01065 return charsize;
01066 }
01067
01068 if (0 == strcmp(charset->name, "UTF-8")) {
01069
01070
01071
01072
01073
01074
01075
01076
01077 int c = **input;
01078
01079 c = c & (c >> 1);
01080 do {
01081 ++charsize;
01082 } while ((c <<= 1) & 0x80);
01083 *input += charsize;
01084 *input_size -= charsize;
01085 return charsize;
01086 }
01087
01088
01089
01090
01091 pob = ib;
01092 ol = sizeof(ib);
01093 tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
01094
01095
01096
01097 cd2 = tds_sys_iconv_open("UCS-4", charset->name);
01098 if (cd2 == (iconv_t) - 1)
01099 return 0;
01100
01101
01102 il = ol;
01103 if (il > *input_size)
01104 il = *input_size;
01105 l = sizeof(ib) - ol;
01106 memcpy(ib + l, *input, il);
01107 il += l;
01108
01109
01110 pib = ib;
01111 pob = ob;
01112
01113 ol = 4;
01114 tds_sys_iconv(cd2, &pib, &il, &pob, &ol);
01115
01116
01117 l = (pib - ib) - l;
01118 *input += l;
01119 *input_size -= l;
01120
01121
01122 pob = ib;
01123 ol = sizeof(ib);
01124 tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
01125
01126
01127 pib = ib;
01128 il = sizeof(ib) - ol;
01129 pob = ob;
01130 ol = sizeof(ob);
01131 tds_sys_iconv(cd, &pib, &il, &pob, &ol);
01132
01133 tds_sys_iconv_close(cd2);
01134
01135 return l;
01136 }
01137
01138 static int
01139 lookup_canonic(const CHARACTER_SET_ALIAS aliases[], const char *charset_name)
01140 {
01141 int i;
01142
01143 for (i = 0; aliases[i].alias; ++i) {
01144 if (0 == strcmp(charset_name, aliases[i].alias))
01145 return aliases[i].canonic;
01146 }
01147
01148 return -1;
01149 }
01150
01151
01152
01153
01154
01155
01156 static int
01157 tds_canonical_charset(const char *charset_name)
01158 {
01159 int res;
01160
01161
01162 res = lookup_canonic(iconv_aliases, charset_name);
01163 if (res >= 0)
01164 return res;
01165
01166
01167 return lookup_canonic(sybase_aliases, charset_name);
01168 }
01169
01170
01171
01172
01173
01174
01175 const char *
01176 tds_canonical_charset_name(const char *charset_name)
01177 {
01178 int res;
01179
01180
01181 res = tds_canonical_charset(charset_name);
01182 if (res >= 0)
01183 return canonic_charsets[res].name;
01184
01185 return NULL;
01186 }
01187
01188
01189
01190
01191
01192
01193 const char *
01194 tds_sybase_charset_name(const char *charset_name)
01195 {
01196 int res, i;
01197
01198
01199 res = lookup_canonic(iconv_aliases, charset_name);
01200 if (res < 0)
01201 return NULL;
01202
01203
01204 assert(strcmp(sybase_aliases[0].alias, "ascii_8") == 0);
01205
01206 for (i = 1; sybase_aliases[i].alias; ++i) {
01207 if (sybase_aliases[i].canonic == res)
01208 return sybase_aliases[i].alias;
01209 }
01210
01211 return NULL;
01212 }
01213
01214 static const char *
01215 collate2charset(int sql_collate, int lcid)
01216 {
01217
01218
01219
01220
01221
01222 const char *cp = NULL;
01223
01224 switch (sql_collate) {
01225 case 30:
01226 case 31:
01227 case 32:
01228 case 33:
01229 case 34:
01230 return "CP437";
01231 case 40:
01232 case 41:
01233 case 42:
01234 case 43:
01235 case 44:
01236 case 49:
01237 case 55:
01238 case 56:
01239 case 57:
01240 case 58:
01241 case 59:
01242 case 60:
01243 case 61:
01244 return "CP850";
01245 case 81:
01246 case 82:
01247 return "CP1250";
01248 case 105:
01249 case 106:
01250 return "CP1251";
01251 case 113:
01252 case 114:
01253 case 120:
01254 case 121:
01255 case 124:
01256 return "CP1253";
01257 case 137:
01258 case 138:
01259 return "CP1255";
01260 case 145:
01261 case 146:
01262 return "CP1256";
01263 case 153:
01264 case 154:
01265 return "CP1257";
01266 }
01267
01268 switch (lcid & 0xffff) {
01269 case 0x405:
01270 case 0x40e:
01271 case 0x415:
01272 case 0x418:
01273 case 0x41a:
01274 case 0x41b:
01275 case 0x41c:
01276 case 0x424:
01277
01278 case 0x104e:
01279 cp = "CP1250";
01280 break;
01281 case 0x402:
01282 case 0x419:
01283 case 0x422:
01284 case 0x423:
01285 case 0x42f:
01286 case 0x43f:
01287 case 0x440:
01288 case 0x444:
01289 case 0x450:
01290 case 0x81a:
01291 case 0x82c:
01292 case 0x843:
01293 case 0xc1a:
01294 cp = "CP1251";
01295 break;
01296 case 0x1007:
01297 case 0x1009:
01298 case 0x100a:
01299 case 0x100c:
01300 case 0x1407:
01301 case 0x1409:
01302 case 0x140a:
01303 case 0x140c:
01304 case 0x1809:
01305 case 0x180a:
01306 case 0x180c:
01307 case 0x1c09:
01308 case 0x1c0a:
01309 case 0x2009:
01310 case 0x200a:
01311 case 0x2409:
01312 case 0x240a:
01313 case 0x2809:
01314 case 0x280a:
01315 case 0x2c09:
01316 case 0x2c0a:
01317 case 0x3009:
01318 case 0x300a:
01319 case 0x3409:
01320 case 0x340a:
01321 case 0x380a:
01322 case 0x3c0a:
01323 case 0x400a:
01324 case 0x403:
01325 case 0x406:
01326 case 0x407:
01327 case 0x409:
01328 case 0x40a:
01329 case 0x40b:
01330 case 0x40c:
01331 case 0x40f:
01332 case 0x410:
01333 case 0x413:
01334 case 0x414:
01335 case 0x416:
01336 case 0x41d:
01337 case 0x421:
01338 case 0x42d:
01339 case 0x436:
01340 case 0x437:
01341 case 0x438:
01342
01343 case 0x43e:
01344 case 0x440a:
01345 case 0x441:
01346 case 0x456:
01347 case 0x480a:
01348 case 0x4c0a:
01349 case 0x500a:
01350 case 0x807:
01351 case 0x809:
01352 case 0x80a:
01353 case 0x80c:
01354 case 0x810:
01355 case 0x813:
01356 case 0x814:
01357 case 0x816:
01358 case 0x81d:
01359 case 0x83e:
01360 case 0xc07:
01361 case 0xc09:
01362 case 0xc0a:
01363 case 0xc0c:
01364 cp = "CP1252";
01365 break;
01366 case 0x408:
01367 cp = "CP1253";
01368 break;
01369 case 0x41f:
01370 case 0x42c:
01371 case 0x443:
01372 cp = "CP1254";
01373 break;
01374 case 0x40d:
01375 cp = "CP1255";
01376 break;
01377 case 0x1001:
01378 case 0x1401:
01379 case 0x1801:
01380 case 0x1c01:
01381 case 0x2001:
01382 case 0x2401:
01383 case 0x2801:
01384 case 0x2c01:
01385 case 0x3001:
01386 case 0x3401:
01387 case 0x3801:
01388 case 0x3c01:
01389 case 0x4001:
01390 case 0x401:
01391 case 0x420:
01392 case 0x429:
01393 case 0x801:
01394 case 0xc01:
01395 cp = "CP1256";
01396 break;
01397 case 0x425:
01398 case 0x426:
01399 case 0x427:
01400 case 0x827:
01401 cp = "CP1257";
01402 break;
01403 case 0x42a:
01404 cp = "CP1258";
01405 break;
01406 case 0x41e:
01407 cp = "CP874";
01408 break;
01409 case 0x411:
01410 cp = "CP932";
01411 break;
01412 case 0x1004:
01413 case 0x804:
01414 cp = "CP936";
01415 break;
01416 case 0x412:
01417 cp = "CP949";
01418 break;
01419 case 0x1404:
01420 case 0x404:
01421 case 0xc04:
01422 cp = "CP950";
01423 break;
01424 default:
01425 cp = "CP1252";
01426 }
01427
01428 assert(cp);
01429 return cp;
01430 }
01431
01432
01433
01434
01435 TDSICONV *
01436 tds_iconv_from_collate(TDSSOCKET * tds, int sql_collate, int lcid)
01437 {
01438 const char *charset = collate2charset(sql_collate, lcid);
01439
01440 #if ENABLE_EXTRA_CHECKS
01441 assert(strcmp(tds_canonical_charset_name(charset), charset) == 0);
01442 #endif
01443
01444
01445 if (strcmp(tds->char_convs[client2server_chardata]->server_charset.name, charset) == 0)
01446 return tds->char_convs[client2server_chardata];
01447
01448 return tds_iconv_get_info(tds, charset);
01449 }
01450
01451
01452
01453