00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <ncbi_pch.hpp>
00034 #include <common/ncbi_source_ver.h>
00035 #include <corelib/ncbistr.hpp>
00036 #include <corelib/tempstr.hpp>
00037 #include <corelib/ncbi_limits.hpp>
00038 #include <corelib/ncbistr_util.hpp>
00039 #include <corelib/error_codes.hpp>
00040 #include <memory>
00041 #include <algorithm>
00042 #include <errno.h>
00043 #include <stdio.h>
00044
00045
00046 #define NCBI_USE_ERRCODE_X Corelib_Util
00047
00048
00049 BEGIN_NCBI_SCOPE
00050
00051
00052
00053 static const char s_Hex[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
00054
00055
00056 inline SIZE_TYPE s_DiffPtr(const char* end, const char* start)
00057 {
00058 return end ? (SIZE_TYPE)(end - start) : (SIZE_TYPE) 0;
00059 }
00060
00061 const char *const kEmptyCStr = "";
00062
00063
00064 extern const char* const kNcbiDevelopmentVersionString;
00065 const char* const kNcbiDevelopmentVersionString
00066 = "NCBI_DEVELOPMENT_VER_" NCBI_AS_STRING(NCBI_DEVELOPMENT_VER);
00067
00068 #ifdef NCBI_PRODUCTION_VER
00069 extern const char* const kNcbiProductionVersionString;
00070 const char* const kNcbiProductionVersionString
00071 = "NCBI_PRODUCTION_VER_" NCBI_AS_STRING(NCBI_PRODUCTION_VER);
00072 #endif
00073
00074
00075 #if !defined(NCBI_OS_MSWIN) && !( defined(NCBI_OS_LINUX) && defined(NCBI_COMPILER_GCC) )
00076 const string* CNcbiEmptyString::m_Str = 0;
00077 const string& CNcbiEmptyString::FirstGet(void) {
00078 static const string s_Str = "";
00079 m_Str = &s_Str;
00080 return s_Str;
00081 }
00082 #endif
00083
00084
00085 bool NStr::IsBlank(const string& str, SIZE_TYPE pos)
00086 {
00087 SIZE_TYPE len = str.length();
00088 for (SIZE_TYPE idx = pos; idx < len; ++idx) {
00089 if (!isspace((unsigned char) str[idx])) {
00090 return false;
00091 }
00092 }
00093 return true;
00094 }
00095
00096
00097 int NStr::CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00098 const char* pattern)
00099 {
00100 if (pos == NPOS || !n || str.length() <= pos) {
00101 return *pattern ? -1 : 0;
00102 }
00103 if ( !*pattern ) {
00104 return 1;
00105 }
00106
00107 if (n == NPOS || n > str.length() - pos) {
00108 n = str.length() - pos;
00109 }
00110
00111 const char* s = str.data() + pos;
00112 while (n && *pattern && *s == *pattern) {
00113 s++; pattern++; n--;
00114 }
00115
00116 if (n == 0) {
00117 return *pattern ? -1 : 0;
00118 }
00119
00120 return *s - *pattern;
00121 }
00122
00123
00124 int NStr::CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00125 const char* pattern)
00126 {
00127 if (pos == NPOS || !n || str.length() <= pos) {
00128 return *pattern ? -1 : 0;
00129 }
00130 if ( !*pattern ) {
00131 return 1;
00132 }
00133
00134 if (n == NPOS || n > str.length() - pos) {
00135 n = str.length() - pos;
00136 }
00137
00138 const char* s = str.data() + pos;
00139 while (n && *pattern &&
00140 tolower((unsigned char)(*s)) ==
00141 tolower((unsigned char)(*pattern))) {
00142 s++; pattern++; n--;
00143 }
00144
00145 if (n == 0) {
00146 return *pattern ? -1 : 0;
00147 }
00148
00149 return tolower((unsigned char)(*s)) - tolower((unsigned char)(*pattern));
00150 }
00151
00152
00153 int NStr::CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00154 const string& pattern)
00155 {
00156 if (pos == NPOS || !n || str.length() <= pos) {
00157 return pattern.empty() ? 0 : -1;
00158 }
00159 if (pattern.empty()) {
00160 return 1;
00161 }
00162
00163 if (n == NPOS || n > str.length() - pos) {
00164 n = str.length() - pos;
00165 }
00166
00167 SIZE_TYPE n_cmp = n;
00168 if (n_cmp > pattern.length()) {
00169 n_cmp = pattern.length();
00170 }
00171 const char* s = str.data() + pos;
00172 const char* p = pattern.data();
00173 while (n_cmp && *s == *p) {
00174 s++; p++; n_cmp--;
00175 }
00176
00177 if (n_cmp == 0) {
00178 if (n == pattern.length())
00179 return 0;
00180 return n > pattern.length() ? 1 : -1;
00181 }
00182
00183 return *s - *p;
00184 }
00185
00186
00187 int NStr::CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00188 const string& pattern)
00189 {
00190 if (pos == NPOS || !n || str.length() <= pos) {
00191 return pattern.empty() ? 0 : -1;
00192 }
00193 if (pattern.empty()) {
00194 return 1;
00195 }
00196
00197 if (n == NPOS || n > str.length() - pos) {
00198 n = str.length() - pos;
00199 }
00200
00201 SIZE_TYPE n_cmp = n;
00202 if (n_cmp > pattern.length()) {
00203 n_cmp = pattern.length();
00204 }
00205 const char* s = str.data() + pos;
00206 const char* p = pattern.data();
00207 while (n_cmp &&
00208 tolower((unsigned char)(*s)) == tolower((unsigned char)(*p))) {
00209 s++; p++; n_cmp--;
00210 }
00211
00212 if (n_cmp == 0) {
00213 if (n == pattern.length())
00214 return 0;
00215 return n > pattern.length() ? 1 : -1;
00216 }
00217
00218 return tolower((unsigned char)(*s)) - tolower((unsigned char)(*p));
00219 }
00220
00221
00222
00223
00224 bool NStr::MatchesMask(const char* str, const char* mask, ECase use_case)
00225 {
00226 char c;
00227 bool infinite = true;
00228
00229 while (infinite) {
00230
00231 switch ( c = *mask++ ) {
00232
00233 case '\0':
00234 return *str == '\0';
00235
00236 case '?':
00237 if (*str == '\0') {
00238 return false;
00239 }
00240 ++str;
00241 break;
00242
00243 case '*':
00244 c = *mask;
00245
00246 while ( c == '*' ) {
00247 c = *++mask;
00248 }
00249 if (c == '\0') {
00250 return true;
00251 }
00252
00253 while ( *str ) {
00254 if (MatchesMask(str, mask, use_case)) {
00255 return true;
00256 }
00257 ++str;
00258 }
00259 return false;
00260
00261 default:
00262
00263 char s = *str++;
00264 if (use_case == eNocase) {
00265 c = tolower((unsigned char) c);
00266 s = tolower((unsigned char) s);
00267 }
00268 if (c != s) {
00269 return false;
00270 }
00271 break;
00272 }
00273 }
00274 return false;
00275 }
00276
00277
00278 char* NStr::ToLower(char* str)
00279 {
00280 char* s;
00281 for (s = str; *str; str++) {
00282 *str = tolower((unsigned char)(*str));
00283 }
00284 return s;
00285 }
00286
00287
00288 string& NStr::ToLower(string& str)
00289 {
00290 NON_CONST_ITERATE (string, it, str) {
00291 *it = tolower((unsigned char)(*it));
00292 }
00293 return str;
00294 }
00295
00296
00297 char* NStr::ToUpper(char* str)
00298 {
00299 char* s;
00300 for (s = str; *str; str++) {
00301 *str = toupper((unsigned char)(*str));
00302 }
00303 return s;
00304 }
00305
00306
00307 string& NStr::ToUpper(string& str)
00308 {
00309 NON_CONST_ITERATE (string, it, str) {
00310 *it = toupper((unsigned char)(*it));
00311 }
00312 return str;
00313 }
00314
00315
00316 int NStr::StringToNumeric(const string& str)
00317 {
00318 if ( str.empty() ||
00319 (!isdigit((unsigned char)(*str.begin())) & (*str.begin() != '+')) ) {
00320 errno = EINVAL;
00321 return -1;
00322 }
00323 char* endptr = 0;
00324 const char* begptr = str.c_str();
00325 errno = 0;
00326 unsigned long value = strtoul(begptr, &endptr, 10);
00327 if ( errno || !endptr || endptr == begptr ||
00328 value > (unsigned long) kMax_Int || *endptr ) {
00329 if ( !errno ) {
00330 errno = !endptr || endptr == begptr || *endptr ? EINVAL : ERANGE;
00331 }
00332 return -1;
00333 }
00334 return (int) value;
00335 }
00336
00337
00338 #define S2N_CONVERT_ERROR(to_type, msg, errcode, force_errno, delta) \
00339 if (flags & NStr::fConvErr_NoThrow) { \
00340 if ( force_errno ) \
00341 errno = 0; \
00342 if ( !errno ) \
00343 errno = errcode; \
00344 \
00345 return 0; \
00346 } else { \
00347 CTempString str_tmp(str); \
00348 CTempString msg_tmp(msg); \
00349 string smsg; \
00350 smsg.reserve(str_tmp.length() + msg_tmp.length() + 50); \
00351 smsg += "Cannot convert string '"; \
00352 smsg += str; \
00353 smsg += "' to " #to_type; \
00354 if ( !msg_tmp.empty() ) { \
00355 smsg += ", "; \
00356 smsg += msg; \
00357 } \
00358 NCBI_THROW2(CStringException, eConvert, smsg, delta); \
00359 } \
00360
00361 #define S2N_CONVERT_ERROR_INVAL(to_type) \
00362 S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, true, pos)
00363
00364 #define S2N_CONVERT_ERROR_RADIX(to_type, msg) \
00365 S2N_CONVERT_ERROR(to_type, msg, EINVAL, true, pos)
00366
00367 #define S2N_CONVERT_ERROR_OVERFLOW(to_type) \
00368 S2N_CONVERT_ERROR(to_type, "overflow",ERANGE, true, pos)
00369
00370 #define CHECK_ENDPTR(to_type) \
00371 if ( str[pos] ) { \
00372 S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, true, pos); \
00373 }
00374
00375 #define CHECK_RANGE(nmin, nmax, to_type) \
00376 if ( errno || value < nmin || value > nmax ) { \
00377 S2N_CONVERT_ERROR(to_type, "overflow", ERANGE, false, 0); \
00378 }
00379
00380 #define CHECK_RANGE_U(nmax, to_type) \
00381 if ( errno || value > nmax ) { \
00382 S2N_CONVERT_ERROR(to_type, "overflow", ERANGE, false, 0); \
00383 }
00384
00385 #define CHECK_COMMAS \
00386 \
00387 if (flags & NStr::fAllowCommas) { \
00388 if (ch == ',') { \
00389 if ((numpos == pos) || \
00390 ((comma >= 0) && (comma != 3)) ) { \
00391 \
00392 break; \
00393 } \
00394 \
00395 comma = 0; \
00396 pos++; \
00397 continue; \
00398 } else { \
00399 if (comma >= 0) { \
00400 \
00401 comma++; \
00402 } \
00403 } \
00404 }
00405
00406
00407 int NStr::StringToInt(const CTempString& str, TStringToNumFlags flags,int base)
00408 {
00409 errno = 0;
00410 Int8 value = StringToInt8(str, flags, base);
00411 CHECK_RANGE(kMin_Int, kMax_Int, int);
00412 return (int) value;
00413 }
00414
00415
00416 unsigned int
00417 NStr::StringToUInt(const CTempString& str, TStringToNumFlags flags, int base)
00418 {
00419 errno = 0;
00420 Uint8 value = StringToUInt8(str, flags, base);
00421 CHECK_RANGE_U(kMax_UInt, unsigned int);
00422 return (unsigned int) value;
00423 }
00424
00425
00426 long NStr::StringToLong(const CTempString& str, TStringToNumFlags flags,
00427 int base)
00428 {
00429 errno = 0;
00430 Int8 value = StringToInt8(str, flags, base);
00431 CHECK_RANGE(kMin_Long, kMax_Long, long);
00432 return (long) value;
00433 }
00434
00435
00436 unsigned long
00437 NStr::StringToULong(const CTempString& str, TStringToNumFlags flags, int base)
00438 {
00439 errno = 0;
00440 Uint8 value = StringToUInt8(str, flags, base);
00441 CHECK_RANGE_U(kMax_ULong, long);
00442 return (unsigned long) value;
00443 }
00444
00445
00446
00447
00448 bool s_IsGoodCharForRadix(char ch, int base, int* value = 0)
00449 {
00450 if (!isalnum((unsigned char) ch)) {
00451 return false;
00452 }
00453
00454 int delta;
00455 if (isdigit((unsigned char) ch)) {
00456 delta = ch - '0';
00457 } else {
00458 ch = tolower((unsigned char) ch);
00459 delta = ch - 'a' + 10;
00460 }
00461 if ( value ) {
00462 *value = delta;
00463 }
00464 return delta < base;
00465 }
00466
00467
00468
00469
00470 enum ESkipMode {
00471 eSkipAll,
00472 eSkipAllAllowed,
00473 eSkipSpacesOnly
00474 };
00475
00476 void s_SkipAllowedSymbols(const CTempString& str,
00477 SIZE_TYPE& pos,
00478 ESkipMode skip_mode)
00479 {
00480 if (skip_mode == eSkipAll) {
00481 pos = str.length();
00482 return;
00483 }
00484 for ( SIZE_TYPE len = str.length(); pos < len; ++pos ) {
00485 unsigned char ch = str[pos];
00486 if ( isdigit(ch) || ch == '+' || ch == '-' || ch == '.' ) {
00487 break;
00488 }
00489 if ( (skip_mode == eSkipSpacesOnly) && !isspace(ch) ) {
00490 break;
00491 }
00492 }
00493 }
00494
00495
00496
00497
00498
00499
00500 bool s_CheckRadix(const CTempString& str, SIZE_TYPE& pos, int& base)
00501 {
00502
00503 if ( base < 0 || base == 1 || base > 36 ) {
00504 return false;
00505 }
00506
00507 unsigned char ch = str[pos];
00508 unsigned char next = str[pos+1];
00509 if ( base == 0 ) {
00510 if ( ch != '0' ) {
00511 base = 10;
00512 } else if (next == 'x' || next == 'X') {
00513 base = 16;
00514 } else {
00515 base = 8;
00516 }
00517 }
00518
00519 if ( base == 16 ) {
00520 if (ch == '0' && (next == 'x' || next == 'X')) {
00521 pos += 2;
00522 }
00523 }
00524 return true;
00525 }
00526
00527
00528 Int8 NStr::StringToInt8(const CTempString& str, TStringToNumFlags flags,
00529 int base)
00530 {
00531 _ASSERT(flags == 0 || flags > 32);
00532
00533
00534 SIZE_TYPE pos = 0;
00535
00536
00537 if (flags & fAllowLeadingSymbols) {
00538 bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces);
00539 s_SkipAllowedSymbols(str, pos,
00540 spaces ? eSkipSpacesOnly : eSkipAllAllowed);
00541 }
00542
00543 bool sign = false;
00544 switch (str[pos]) {
00545 case '-':
00546 sign = true;
00547
00548 case '+':
00549 pos++;
00550 break;
00551 default:
00552 if (flags & fMandatorySign) {
00553 S2N_CONVERT_ERROR_INVAL(Int8);
00554 }
00555 break;
00556 }
00557
00558 if ( !s_CheckRadix(str, pos, base) ) {
00559 S2N_CONVERT_ERROR_RADIX(Int8, "bad numeric base '" +
00560 NStr::IntToString(base)+ "'");
00561 }
00562
00563
00564 Int8 n = 0;
00565 Int8 limdiv = kMax_I8 / base;
00566 Int8 limoff = kMax_I8 % base + (sign ? 1 : 0);
00567
00568
00569 int comma = -1;
00570 SIZE_TYPE numpos = pos;
00571
00572 errno = 0;
00573 while (str[pos]) {
00574 char ch = str[pos];
00575 int delta;
00576
00577
00578 CHECK_COMMAS;
00579
00580 if ( !s_IsGoodCharForRadix(ch, base, &delta) ) {
00581 break;
00582 }
00583
00584 if ( n > limdiv || (n == limdiv && delta > limoff) ) {
00585 S2N_CONVERT_ERROR_OVERFLOW(Int8);
00586 }
00587 n *= base;
00588 n += delta;
00589 pos++;
00590 }
00591
00592
00593 if ( !pos || ((comma >= 0) && (comma != 3)) ) {
00594 S2N_CONVERT_ERROR_INVAL(Int8);
00595 }
00596
00597 if (flags & fAllowTrailingSymbols) {
00598 bool spaces = ((flags & fAllowTrailingSymbols) ==
00599 fAllowTrailingSpaces);
00600 s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll);
00601 }
00602
00603 n = sign ? -n : n;
00604 CHECK_ENDPTR(Int8);
00605 return n;
00606 }
00607
00608
00609 Uint8 NStr::StringToUInt8(const CTempString& str,
00610 TStringToNumFlags flags, int base)
00611 {
00612 _ASSERT(flags == 0 || flags > 32);
00613
00614
00615 SIZE_TYPE pos = 0;
00616
00617
00618 if (flags & fAllowLeadingSymbols) {
00619 bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces);
00620 s_SkipAllowedSymbols(str, pos,
00621 spaces ? eSkipSpacesOnly : eSkipAllAllowed);
00622 }
00623
00624 if (str[pos] == '+') {
00625 pos++;
00626 } else {
00627 if (flags & fMandatorySign) {
00628 S2N_CONVERT_ERROR_INVAL(Uint8);
00629 }
00630 }
00631
00632 if ( !s_CheckRadix(str, pos, base) ) {
00633 S2N_CONVERT_ERROR_RADIX(Uint8, "bad numeric base '" +
00634 NStr::IntToString(base) + "'");
00635 }
00636
00637
00638 Uint8 n = 0;
00639 Uint8 limdiv = kMax_UI8 / base;
00640 int limoff = int(kMax_UI8 % base);
00641
00642
00643 int comma = -1;
00644 SIZE_TYPE numpos = pos;
00645
00646 errno = 0;
00647 while (str[pos]) {
00648 char ch = str[pos];
00649 int delta;
00650
00651
00652 CHECK_COMMAS;
00653
00654 if ( !s_IsGoodCharForRadix(ch, base, &delta) ) {
00655 break;
00656 }
00657
00658 if (n > limdiv || (n == limdiv && delta > limoff)) {
00659 S2N_CONVERT_ERROR_OVERFLOW(Uint8);
00660 }
00661 n *= base;
00662 n += delta;
00663 pos++;
00664 }
00665
00666
00667 if ( !pos || ((comma >= 0) && (comma != 3)) ) {
00668 S2N_CONVERT_ERROR_INVAL(Uint8);
00669 }
00670
00671 if (flags & fAllowTrailingSymbols) {
00672 bool spaces = ((flags & fAllowTrailingSymbols) ==
00673 fAllowTrailingSpaces);
00674 s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll);
00675 }
00676 CHECK_ENDPTR(Uint8);
00677 return n;
00678 }
00679
00680
00681 double NStr::StringToDoubleEx(const char* str, size_t size,
00682 TStringToNumFlags flags)
00683 {
00684 _ASSERT(flags == 0 || flags > 32);
00685 _ASSERT(str[size] == '\0');
00686
00687
00688 SIZE_TYPE pos = 0;
00689
00690
00691 if (flags & fAllowLeadingSymbols) {
00692 bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces);
00693 s_SkipAllowedSymbols(CTempString(str, size), pos,
00694 spaces ? eSkipSpacesOnly : eSkipAllAllowed);
00695 }
00696
00697 if (flags & fMandatorySign) {
00698 switch (str[pos]) {
00699 case '-':
00700 case '+':
00701 break;
00702 default:
00703 S2N_CONVERT_ERROR_INVAL(double);
00704 }
00705 }
00706
00707
00708 if (!(flags & fAllowLeadingSymbols)) {
00709 char c = str[pos];
00710 if ( !isdigit((unsigned int)c) && c != '.' && c != '-' && c != '+') {
00711 S2N_CONVERT_ERROR_INVAL(double);
00712 }
00713 }
00714
00715
00716 char* endptr = 0;
00717 const char* begptr = str + pos;
00718
00719 errno = 0;
00720 double n = strtod(begptr, &endptr);
00721 if ( errno || !endptr || endptr == begptr ) {
00722 S2N_CONVERT_ERROR(double, kEmptyStr, EINVAL, false,
00723 s_DiffPtr(endptr, begptr) + pos);
00724 }
00725 if ( *(endptr - 1) != '.' && *endptr == '.' ) {
00726
00727 if (endptr == strchr(begptr, '.')) {
00728 endptr++;
00729 }
00730 }
00731 pos += s_DiffPtr(endptr, begptr);
00732
00733
00734 if (flags & fAllowTrailingSymbols) {
00735 bool spaces = ((flags & fAllowTrailingSymbols) ==
00736 fAllowTrailingSpaces);
00737 s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll);
00738 }
00739 CHECK_ENDPTR(double);
00740 return n;
00741 }
00742
00743
00744 double NStr::StringToDouble(const CTempStringEx& str, TStringToNumFlags flags)
00745 {
00746 size_t size = str.size();
00747 if ( str.HasZeroAtEnd() ) {
00748
00749 return StringToDoubleEx(str.data(), size, flags);
00750 }
00751 char buf[256];
00752 if ( size < sizeof(buf) ) {
00753 memcpy(buf, str.data(), size);
00754 buf[size] = '\0';
00755 return StringToDoubleEx(buf, size, flags);
00756 }
00757 else {
00758
00759 return StringToDoubleEx(string(str).c_str(), size, flags);
00760 }
00761 }
00762
00763
00764 static Uint8 s_DataSizeConvertQual(const CTempString& str,
00765 SIZE_TYPE& pos,
00766 Uint8 value,
00767 NStr::TStringToNumFlags flags)
00768 {
00769 unsigned char ch = str[pos];
00770 if ( !ch ) {
00771 return value;
00772 }
00773
00774 ch = toupper(ch);
00775 Uint8 v = value;
00776 bool err = false;
00777
00778 switch(ch) {
00779 case 'K':
00780 pos++;
00781 if ((kMax_UI8 / 1024) < v) {
00782 err = true;
00783 }
00784 v *= 1024;
00785 break;
00786 case 'M':
00787 pos++;
00788 if ((kMax_UI8 / 1024 / 1024) < v) {
00789 err = true;
00790 }
00791 v *= 1024 * 1024;
00792 break;
00793 case 'G':
00794 pos++;
00795 if ((kMax_UI8 / 1024 / 1024 / 1024) < v) {
00796 err = true;
00797 }
00798 v *= 1024 * 1024 * 1024;
00799 break;
00800 default:
00801
00802 S2N_CONVERT_ERROR_INVAL(Uint8);
00803 }
00804 if ( err ) {
00805 S2N_CONVERT_ERROR_OVERFLOW(DataSize);
00806 }
00807
00808 ch = str[pos];
00809 if ( ch && toupper(ch) == 'B' ) {
00810 pos++;
00811 }
00812 return v;
00813 }
00814
00815
00816 Uint8 NStr::StringToUInt8_DataSize(const CTempString& str,
00817 TStringToNumFlags flags,
00818 int base)
00819 {
00820
00821 _ASSERT(flags == 0 || flags > 20);
00822
00823
00824 SIZE_TYPE pos = 0;
00825
00826
00827 {{
00828
00829 if (flags & fAllowLeadingSymbols) {
00830 bool spaces = ((flags & fAllowLeadingSymbols) ==
00831 fAllowLeadingSpaces);
00832 s_SkipAllowedSymbols(str, pos,
00833 spaces ? eSkipSpacesOnly : eSkipAllAllowed);
00834 }
00835
00836 if (str[pos] == '+') {
00837 pos++;
00838
00839 flags &= ~fMandatorySign;
00840 } else {
00841 if (flags & fMandatorySign) {
00842 S2N_CONVERT_ERROR_INVAL(Uint8);
00843 }
00844 }
00845
00846 if ( !s_CheckRadix(str, pos, base) ) {
00847 S2N_CONVERT_ERROR_RADIX(Uint8, "bad numeric base '" +
00848 NStr::IntToString(base) + "'");
00849 }
00850 }}
00851
00852 SIZE_TYPE numpos = pos;
00853 char ch = str[pos];
00854 while (ch) {
00855 if ( !s_IsGoodCharForRadix(ch, base) &&
00856 ((ch != ',') || !(flags & fAllowCommas)) ) {
00857 break;
00858 }
00859 ch = str[++pos];
00860 }
00861
00862
00863 if (pos-numpos == 0) {
00864 pos = str.length();
00865 }
00866
00867
00868 Uint8 n = StringToUInt8(CTempString(str.data()+numpos, pos-numpos),
00869 flags, base);
00870 if ( errno ) {
00871
00872
00873 return 0;
00874 }
00875
00876 if ( ch ) {
00877 n = s_DataSizeConvertQual(str, pos, n, flags);
00878 }
00879
00880 if (flags & fAllowTrailingSymbols) {
00881 bool spaces = ((flags & fAllowTrailingSymbols) ==
00882 fAllowTrailingSpaces);
00883 s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll);
00884 }
00885 CHECK_ENDPTR(Uint8);
00886 return n;
00887 }
00888
00889
00890 void NStr::IntToString(string& out_str, long svalue,
00891 TNumToStringFlags flags, int base)
00892 {
00893 _ASSERT(flags == 0 || flags > 32);
00894 if ( base < 2 || base > 36 ) {
00895 return;
00896 }
00897
00898 unsigned long value = static_cast<unsigned long>(svalue);
00899
00900 const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
00901 char buffer[kBufSize];
00902 char* pos = buffer + kBufSize;
00903
00904 if ( base == 10 ) {
00905 if ( svalue < 0 ) {
00906 value = static_cast<unsigned long>(-svalue);
00907 }
00908
00909 if ( (flags & fWithCommas) ) {
00910 int cnt = -1;
00911 do {
00912 if (++cnt == 3) {
00913 *--pos = ',';
00914 cnt = 0;
00915 }
00916 unsigned long a = '0'+value;
00917 value /= 10;
00918 *--pos = char(a - value*10);
00919 } while ( value );
00920 }
00921 else {
00922 do {
00923 unsigned long a = '0'+value;
00924 value /= 10;
00925 *--pos = char(a - value*10);
00926 } while ( value );
00927 }
00928
00929 if (svalue < 0)
00930 *--pos = '-';
00931 else if (flags & fWithSign)
00932 *--pos = '+';
00933 }
00934 else if ( base == 16 ) {
00935 do {
00936 *--pos = s_Hex[value % 16];
00937 value /= 16;
00938 } while ( value );
00939 }
00940 else {
00941 do {
00942 *--pos = s_Hex[value % base];
00943 value /= base;
00944 } while ( value );
00945 }
00946
00947 out_str.assign(pos, buffer + kBufSize - pos);
00948 }
00949
00950
00951 void NStr::UIntToString(string& out_str,
00952 unsigned long value,
00953 TNumToStringFlags flags,
00954 int base)
00955 {
00956 _ASSERT(flags == 0 || flags > 32);
00957 if ( base < 2 || base > 36 ) {
00958 return;
00959 }
00960
00961 const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
00962 char buffer[kBufSize];
00963 char* pos = buffer + kBufSize;
00964
00965 if ( base == 10 ) {
00966 if ( (flags & fWithCommas) ) {
00967 int cnt = -1;
00968 do {
00969 if (++cnt == 3) {
00970 *--pos = ',';
00971 cnt = 0;
00972 }
00973 unsigned long a = '0'+value;
00974 value /= 10;
00975 *--pos = char(a - value*10);
00976 } while ( value );
00977 }
00978 else {
00979 do {
00980 unsigned long a = '0'+value;
00981 value /= 10;
00982 *--pos = char(a - value*10);
00983 } while ( value );
00984 }
00985
00986 if ( (flags & fWithSign) ) {
00987 *--pos = '+';
00988 }
00989 }
00990 else if ( base == 16 ) {
00991 do {
00992 *--pos = s_Hex[value % 16];
00993 value /= 16;
00994 } while ( value );
00995 }
00996 else {
00997 do {
00998 *--pos = s_Hex[value % base];
00999 value /= base;
01000 } while ( value );
01001 }
01002
01003 out_str.assign(pos, buffer + kBufSize - pos);
01004 }
01005
01006
01007 string NStr::Int8ToString(Int8 value, TNumToStringFlags flags, int base)
01008 {
01009 string ret;
01010 NStr::Int8ToString(ret, value, flags, base);
01011 return ret;
01012 }
01013
01014
01015
01016
01017
01018
01019 #define PRINT_INT8_CHUNK 1000000000
01020 #define PRINT_INT8_CHUNK_SIZE 9
01021
01022
01023 static char* s_PrintUint8(char* pos,
01024 Uint8 value,
01025 NStr::TNumToStringFlags flags,
01026 int base)
01027 {
01028 if ( base == 10 ) {
01029 if ( (flags & NStr::fWithCommas) ) {
01030 int cnt = -1;
01031 #ifdef PRINT_INT8_CHUNK
01032
01033
01034 while ( value & ~Uint8(Uint4(~0)) ) {
01035 Uint4 chunk = Uint4(value);
01036 value /= PRINT_INT8_CHUNK;
01037 chunk -= PRINT_INT8_CHUNK*Uint4(value);
01038 char* end = pos - PRINT_INT8_CHUNK_SIZE - 2;
01039 do {
01040 if (++cnt == 3) {
01041 *--pos = ',';
01042 cnt = 0;
01043 }
01044 Uint4 a = '0'+chunk;
01045 chunk /= 10;
01046 *--pos = char(a-10*chunk);
01047 } while ( pos != end );
01048 }
01049
01050 Uint4 chunk = Uint4(value);
01051 do {
01052 if (++cnt == 3) {
01053 *--pos = ',';
01054 cnt = 0;
01055 }
01056 Uint4 a = '0'+chunk;
01057 chunk /= 10;
01058 *--pos = char(a-10*chunk);
01059 } while ( chunk );
01060 #else
01061 do {
01062 if (++cnt == 3) {
01063 *--pos = ',';
01064 cnt = 0;
01065 }
01066 Uint8 a = '0'+value;
01067 value /= 10;
01068 *--pos = char(a - 10*value);
01069 } while ( value );
01070 #endif
01071 }
01072 else {
01073 #ifdef PRINT_INT8_CHUNK
01074
01075
01076 while ( value & ~Uint8(Uint4(~0)) ) {
01077 Uint4 chunk = Uint4(value);
01078 value /= PRINT_INT8_CHUNK;
01079 chunk -= PRINT_INT8_CHUNK*Uint4(value);
01080 char* end = pos - PRINT_INT8_CHUNK_SIZE;
01081 do {
01082 Uint4 a = '0'+chunk;
01083 chunk /= 10;
01084 *--pos = char(a-10*chunk);
01085 } while ( pos != end );
01086 }
01087
01088 Uint4 chunk = Uint4(value);
01089 do {
01090 Uint4 a = '0'+chunk;
01091 chunk /= 10;
01092 *--pos = char(a-10*chunk);
01093 } while ( chunk );
01094 #else
01095 do {
01096 Uint8 a = '0'+value;
01097 value /= 10;
01098 *--pos = char(a-10*value);
01099 } while ( value );
01100 #endif
01101 }
01102 }
01103 else if ( base == 16 ) {
01104 do {
01105 *--pos = s_Hex[value % 16];
01106 value /= 16;
01107 } while ( value );
01108 }
01109 else {
01110 do {
01111 *--pos = s_Hex[value % base];
01112 value /= base;
01113 } while ( value );
01114 }
01115 return pos;
01116 }
01117
01118
01119 void NStr::Int8ToString(string& out_str, Int8 svalue,
01120 TNumToStringFlags flags, int base)
01121 {
01122 _ASSERT(flags == 0 || flags > 32);
01123 if ( base < 2 || base > 36 ) {
01124 return;
01125 }
01126
01127 Uint8 value;
01128 if (base == 10) {
01129 value = static_cast<Uint8>(svalue<0?-svalue:svalue);
01130 } else {
01131 value = static_cast<Uint8>(svalue);
01132 }
01133
01134 const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
01135 char buffer[kBufSize];
01136
01137 char* pos = s_PrintUint8(buffer + kBufSize, value, flags, base);
01138
01139 if (base == 10) {
01140 if (svalue < 0)
01141 *--pos = '-';
01142 else if (flags & fWithSign)
01143 *--pos = '+';
01144 }
01145 out_str.assign(pos, buffer + kBufSize - pos);
01146 }
01147
01148
01149 string NStr::UInt8ToString(Uint8 value, TNumToStringFlags flags, int base)
01150 {
01151 string ret;
01152 NStr::UInt8ToString(ret, value, flags, base);
01153 return ret;
01154 }
01155
01156
01157 void NStr::UInt8ToString(string& out_str, Uint8 value,
01158 TNumToStringFlags flags, int base)
01159 {
01160 _ASSERT(flags == 0 || flags > 32);
01161 if ( base < 2 || base > 36 ) {
01162 return;
01163 }
01164
01165 const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
01166 char buffer[kBufSize];
01167
01168 char* pos = s_PrintUint8(buffer + kBufSize, value, flags, base);
01169
01170 if ( (base == 10) && (flags & fWithSign) ) {
01171 *--pos = '+';
01172 }
01173 out_str.assign(pos, buffer + kBufSize - pos);
01174 }
01175
01176
01177
01178 #if defined(NCBI_OS_MSWIN)
01179 const int kMaxDoublePrecision = 200;
01180 #else
01181 const int kMaxDoublePrecision = 308;
01182 #endif
01183
01184
01185 const int kMaxDoubleStringSize = 308 + 3 + kMaxDoublePrecision;
01186
01187
01188 string NStr::DoubleToString(double value, int precision,
01189 TNumToStringFlags flags)
01190 {
01191 string str;
01192 DoubleToString(str, value, precision, flags);
01193 return str;
01194 }
01195
01196
01197 void NStr::DoubleToString(string& out_str, double value,
01198 int precision, TNumToStringFlags flags)
01199 {
01200 char buffer[kMaxDoubleStringSize];
01201 if (precision >= 0) {
01202 SIZE_TYPE n = DoubleToString(value, precision, buffer,
01203 kMaxDoubleStringSize, flags);
01204 buffer[n] = '\0';
01205 } else {
01206 const char* format;
01207 switch (flags & fDoubleGeneral) {
01208 case fDoubleFixed:
01209 format = "%f";
01210 break;
01211 case fDoubleScientific:
01212 format = "%e";
01213 break;
01214 case fDoubleGeneral:
01215 default:
01216 format = "%g";
01217 break;
01218 }
01219 ::sprintf(buffer, format, value);
01220 }
01221 out_str = buffer;
01222 }
01223
01224
01225
01226 SIZE_TYPE NStr::DoubleToString(double value, unsigned int precision,
01227 char* buf, SIZE_TYPE buf_size,
01228 TNumToStringFlags flags)
01229 {
01230 char buffer[kMaxDoubleStringSize];
01231 if (precision > (unsigned int)kMaxDoublePrecision) {
01232 precision = (unsigned int)kMaxDoublePrecision;
01233 }
01234 const char* format;
01235 switch (flags & fDoubleGeneral) {
01236 case fDoubleScientific:
01237 format = "%.*e";
01238 break;
01239 case fDoubleGeneral:
01240 format = "%.*g";
01241 break;
01242 case fDoubleFixed:
01243 default:
01244 format = "%.*f";
01245 break;
01246 }
01247 int n = ::sprintf(buffer, format, (int)precision, value);
01248 SIZE_TYPE n_copy = min((SIZE_TYPE) n, buf_size);
01249 memcpy(buf, buffer, n_copy);
01250 return n_copy;
01251 }
01252
01253
01254 string NStr::PtrToString(const void* value)
01255 {
01256 char buffer[64];
01257 ::sprintf(buffer, "%p", value);
01258 return buffer;
01259 }
01260
01261
01262 void NStr::PtrToString(string& out_str, const void* value)
01263 {
01264 char buffer[64];
01265 ::sprintf(buffer, "%p", value);
01266 out_str = buffer;
01267 }
01268
01269
01270 const void* NStr::StringToPtr(const string& str)
01271 {
01272 void *ptr = NULL;
01273 ::sscanf(str.c_str(), "%p", &ptr);
01274 return ptr;
01275 }
01276
01277
01278 static const char* s_kTrueString = "true";
01279 static const char* s_kFalseString = "false";
01280 static const char* s_kTString = "t";
01281 static const char* s_kFString = "f";
01282 static const char* s_kYesString = "yes";
01283 static const char* s_kNoString = "no";
01284 static const char* s_kYString = "y";
01285 static const char* s_kNString = "n";
01286
01287
01288 const string NStr::BoolToString(bool value)
01289 {
01290 return value ? s_kTrueString : s_kFalseString;
01291 }
01292
01293
01294 bool NStr::StringToBool(const string& str)
01295 {
01296 if ( AStrEquiv(str, s_kTrueString, PNocase()) ||
01297 AStrEquiv(str, s_kTString, PNocase()) ||
01298 AStrEquiv(str, s_kYesString, PNocase()) ||
01299 AStrEquiv(str, s_kYString, PNocase()) )
01300 return true;
01301
01302 if ( AStrEquiv(str, s_kFalseString, PNocase()) ||
01303 AStrEquiv(str, s_kFString, PNocase()) ||
01304 AStrEquiv(str, s_kNoString, PNocase()) ||
01305 AStrEquiv(str, s_kNString, PNocase()) )
01306 return false;
01307
01308 NCBI_THROW2(CStringException, eConvert,
01309 "String cannot be converted to bool", 0);
01310 }
01311
01312
01313 string NStr::FormatVarargs(const char* format, va_list args)
01314 {
01315 #ifdef HAVE_VASPRINTF
01316 char* s;
01317 int n = vasprintf(&s, format, args);
01318 if (n >= 0) {
01319 string str(s, n);
01320 free(s);
01321 return str;
01322 } else {
01323 return kEmptyStr;
01324 }
01325
01326 #elif defined(NCBI_COMPILER_GCC) && defined(NO_PUBSYNC)
01327 CNcbiOstrstream oss;
01328 oss.vform(format, args);
01329 return CNcbiOstrstreamToString(oss);
01330
01331 #elif defined(HAVE_VSNPRINTF)
01332
01333 SIZE_TYPE size = 1024;
01334 AutoPtr<char, ArrayDeleter<char> > buf(new char[size]);
01335 buf.get()[size-1] = buf.get()[size-2] = 0;
01336 SIZE_TYPE n = vsnprintf(buf.get(), size, format, args);
01337 while (n >= size || buf.get()[size-2]) {
01338 if (buf.get()[size-1]) {
01339 ERR_POST_X(1, Warning << "Buffer overrun by buggy vsnprintf");
01340 }
01341 size = max(size << 1, n);
01342 buf.reset(new char[size]);
01343 buf.get()[size-1] = buf.get()[size-2] = 0;
01344 n = vsnprintf(buf.get(), size, format, args);
01345 }
01346 return (n > 0) ? string(buf.get(), n) : kEmptyStr;
01347
01348 #elif defined(HAVE_VPRINTF)
01349 char buf[1024];
01350 buf[sizeof(buf) - 1] = 0;
01351 vsprintf(buf, format, args);
01352 if (buf[sizeof(buf) - 1]) {
01353 ERR_POST_X(2, Warning << "Buffer overrun by vsprintf");
01354 }
01355 return buf;
01356
01357 #else
01358 # error Please port this code to your system.
01359 #endif
01360 }
01361
01362
01363 SIZE_TYPE NStr::FindNoCase(const string& str, const string& pattern,
01364 SIZE_TYPE start, SIZE_TYPE end, EOccurrence where)
01365 {
01366 string pat(pattern, 0, 1);
01367 SIZE_TYPE l = pattern.size();
01368 if (isupper((unsigned char) pat[0])) {
01369 pat += (char) tolower((unsigned char) pat[0]);
01370 } else if (islower((unsigned char) pat[0])) {
01371 pat += (char) toupper((unsigned char) pat[0]);
01372 }
01373 if (where == eFirst) {
01374 SIZE_TYPE pos = str.find_first_of(pat, start);
01375 while (pos != NPOS && pos <= end
01376 && CompareNocase(str, pos, l, pattern) != 0) {
01377 pos = str.find_first_of(pat, pos + 1);
01378 }
01379 return pos > end ? NPOS : pos;
01380 } else {
01381 SIZE_TYPE pos = str.find_last_of(pat, end);
01382 while (pos != NPOS && pos >= start
01383 && CompareNocase(str, pos, l, pattern) != 0) {
01384 if (pos == 0) {
01385 return NPOS;
01386 }
01387 pos = str.find_last_of(pat, pos - 1);
01388 }
01389 return pos < start ? NPOS : pos;
01390 }
01391 }
01392
01393
01394 const string* NStr::Find(const list <string>& lst, const string& val,
01395 ECase use_case)
01396 {
01397 if (lst.empty()) return NULL;
01398
01399 ITERATE (list<string>, st_itr, lst) {
01400 if (Equal(*st_itr, val, use_case)) {
01401 return &*st_itr;
01402 }
01403 }
01404
01405 return NULL;
01406 }
01407
01408 const string* NStr::Find(const vector <string>& vec, const string& val,
01409 ECase use_case)
01410 {
01411 if (vec.empty()) return NULL;
01412
01413 ITERATE (vector<string>, st_itr, vec) {
01414 if (Equal(*st_itr, val, use_case)) {
01415 return &*st_itr;
01416 }
01417 }
01418
01419 return NULL;
01420 }
01421
01422
01423 template <class TStr>
01424 TStr s_TruncateSpaces(const TStr& str, NStr::ETrunc where,
01425 const TStr& empty_str)
01426 {
01427 SIZE_TYPE length = str.length();
01428 if (length == 0) {
01429 return empty_str;
01430 }
01431 SIZE_TYPE beg = 0;
01432 if (where == NStr::eTrunc_Begin || where == NStr::eTrunc_Both) {
01433 _ASSERT(beg < length);
01434 while ( isspace((unsigned char) str[beg]) ) {
01435 if (++beg == length) {
01436 return empty_str;
01437 }
01438 }
01439 }
01440 SIZE_TYPE end = length;
01441 if ( where == NStr::eTrunc_End || where == NStr::eTrunc_Both ) {
01442 _ASSERT(end > beg);
01443 for (--end; isspace((unsigned char)str[end]); --end) {
01444 if (end == beg) {
01445 return empty_str;
01446 }
01447 }
01448 _ASSERT(end >= beg && !isspace((unsigned char) str[end]));
01449 ++end;
01450 }
01451 _ASSERT(beg <= end);
01452 if (beg == end) {
01453 return empty_str;
01454 }
01455 else if ( beg || (end - length) ) {
01456
01457 return str.substr(beg, end - beg);
01458 }
01459 else {
01460 return str;
01461 }
01462 }
01463
01464
01465 string NStr::TruncateSpaces(const string& str, ETrunc where)
01466 {
01467 return s_TruncateSpaces(str, where, kEmptyStr);
01468 }
01469
01470 CTempString NStr::TruncateSpaces(const CTempString& str, ETrunc where)
01471 {
01472 return s_TruncateSpaces(str, where, CTempString());
01473 }
01474
01475 CTempString NStr::TruncateSpaces(const char* str, ETrunc where)
01476 {
01477 return s_TruncateSpaces(CTempString(str), where, CTempString());
01478 }
01479
01480
01481 void NStr::TruncateSpacesInPlace(string& str, ETrunc where)
01482 {
01483 SIZE_TYPE length = str.length();
01484 if (length == 0) {
01485 return;
01486 }
01487 SIZE_TYPE beg = 0;
01488 if ( where == eTrunc_Begin || where == eTrunc_Both ) {
01489
01490
01491 _ASSERT(beg < length);
01492 while ( isspace((unsigned char) str.data()[beg]) ) {
01493 if (++beg == length) {
01494 str.erase();
01495 return;
01496 }
01497 }
01498 }
01499
01500 SIZE_TYPE end = length;
01501 if ( where == eTrunc_End || where == eTrunc_Both ) {
01502
01503
01504 _ASSERT(end > beg);
01505 while (isspace((unsigned char) str.data()[--end])) {
01506 if (end == beg) {
01507 str.erase();
01508 return;
01509 }
01510 }
01511 _ASSERT(end >= beg && !isspace((unsigned char) str.data()[end]));
01512 ++end;
01513 }
01514 _ASSERT(beg < end);
01515
01516 #if defined(NCBI_COMPILER_GCC) && (NCBI_COMPILER_VERSION == 304)
01517
01518 str.replace(end, length, kEmptyStr);
01519 str.replace(0, beg, kEmptyStr);
01520 #else
01521 if ( (beg - 0) | (end - length) ) {
01522 str.replace(0, length, str, beg, end - beg);
01523 }
01524 #endif
01525 }
01526
01527
01528 string& NStr::Replace(const string& src,
01529 const string& search, const string& replace,
01530 string& dst, SIZE_TYPE start_pos, SIZE_TYPE max_replace)
01531 {
01532
01533 if (&src == &dst) {
01534 NCBI_THROW2(CStringException, eBadArgs,
01535 "NStr::Replace(): source and destination are the same",0);
01536 }
01537
01538 dst = src;
01539
01540 if ( start_pos + search.size() > src.size() ||
01541 search == replace )
01542 return dst;
01543
01544 for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){
01545 start_pos = dst.find(search, start_pos);
01546 if (start_pos == NPOS)
01547 break;
01548 dst.replace(start_pos, search.size(), replace);
01549 start_pos += replace.size();
01550 }
01551 return dst;
01552 }
01553
01554
01555 string NStr::Replace(const string& src,
01556 const string& search, const string& replace,
01557 SIZE_TYPE start_pos, SIZE_TYPE max_replace)
01558 {
01559 string dst;
01560 Replace(src, search, replace, dst, start_pos, max_replace);
01561 return dst;
01562 }
01563
01564
01565 string& NStr::ReplaceInPlace(string& src,
01566 const string& search, const string& replace,
01567 SIZE_TYPE start_pos, SIZE_TYPE max_replace)
01568 {
01569 if ( start_pos + search.size() > src.size() ||
01570 search == replace )
01571 return src;
01572
01573 bool equal_len = (search.size() == replace.size());
01574 for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){
01575 start_pos = src.find(search, start_pos);
01576 if (start_pos == NPOS)
01577 break;
01578
01579
01580 if ( equal_len ) {
01581 copy(replace.begin(), replace.end(), src.begin() + start_pos);
01582 } else {
01583 src.replace(start_pos, search.size(), replace);
01584 }
01585 start_pos += replace.size();
01586 }
01587 return src;
01588 }
01589
01590
01591 list<string>& NStr::Split(const string& str, const string& delim,
01592 list<string>& arr, EMergeDelims merge,
01593 vector<SIZE_TYPE>* token_pos)
01594 {
01595
01596 typedef list<string> TContainer;
01597 typedef CStrTokenPosAdapter<vector<SIZE_TYPE> > TPosArray;
01598 typedef CStrDummyTargetReserve<string, TContainer,
01599 TPosArray, CStrDummyTokenCount<string > > TReserve;
01600 typedef CStrTokenize<string, TContainer,
01601 TPosArray,
01602 CStrDummyTokenCount<string>,
01603 TReserve> TSplitter;
01604 TPosArray token_pos_proxy(token_pos);
01605 TSplitter::Do(str, delim, arr,
01606 (CStrTokenizeBase::EMergeDelims)merge,
01607 token_pos_proxy,
01608 kEmptyStr);
01609 return arr;
01610
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634
01635
01636
01637
01638
01639
01640
01641
01642
01643
01644
01645
01646
01647
01648
01649 }
01650
01651
01652 vector<string>& NStr::Tokenize(const string& str, const string& delim,
01653 vector<string>& arr, EMergeDelims merge,
01654 vector<SIZE_TYPE>* token_pos)
01655 {
01656 typedef vector<string> TContainer;
01657 typedef CStrTokenPosAdapter<vector<SIZE_TYPE> > TPosArray;
01658 typedef CStrTargetReserve<string, TContainer,
01659 TPosArray, CStringTokenCount> TReserve;
01660 typedef CStrTokenize<string, TContainer,
01661 TPosArray,
01662 CStringTokenCount,
01663 TReserve> TSplitter;
01664 TPosArray token_pos_proxy(token_pos);
01665 TSplitter::Do(str, delim, arr,
01666 (CStrTokenizeBase::EMergeDelims)merge,
01667 token_pos_proxy,
01668 kEmptyStr);
01669 return arr;
01670
01671
01672
01673
01674
01675
01676
01677
01678
01679
01680
01681
01682
01683
01684
01685
01686
01687
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01725
01726
01727
01728
01729
01730
01731
01732
01733
01734
01735
01736 }
01737
01738
01739 vector<string>& NStr::TokenizePattern(const string& str,
01740 const string& pattern,
01741 vector<string>& arr, EMergeDelims merge,
01742 vector<SIZE_TYPE>* token_pos)
01743 {
01744
01745 if (str.empty()) {
01746 return arr;
01747 } else if (pattern.empty()) {
01748 arr.push_back(str);
01749 if (token_pos)
01750 token_pos->push_back(0);
01751 return arr;
01752 }
01753
01754 SIZE_TYPE pos, prev_pos;
01755
01756
01757
01758 if ( !arr.size() ) {
01759
01760 size_t tokens = 0;
01761 for (pos = 0, prev_pos = 0; ; ) {
01762 pos = str.find(pattern, prev_pos);
01763 if ( merge != eMergeDelims || pos > prev_pos ) {
01764 if (pos == NPOS) {
01765 if (merge != eMergeDelims ||
01766 prev_pos < str.length() ) {
01767 ++tokens;
01768 }
01769 break;
01770 }
01771 ++tokens;
01772 }
01773 prev_pos = pos + pattern.length();
01774 }
01775 arr.reserve(tokens);
01776 if (token_pos)
01777 token_pos->reserve(tokens);
01778 }
01779
01780
01781 for (pos = 0, prev_pos = 0; ; ) {
01782 pos = str.find(pattern, prev_pos);
01783 if ( merge != eMergeDelims || pos > prev_pos ) {
01784 if (pos == NPOS) {
01785 if (merge != eMergeDelims ||
01786 prev_pos < str.length() ) {
01787
01788
01789 arr.push_back(kEmptyStr);
01790 arr.back().assign(str, prev_pos,
01791 str.length() - prev_pos);
01792 if (token_pos)
01793 token_pos->push_back(prev_pos);
01794 }
01795 break;
01796 }
01797
01798
01799 arr.push_back(kEmptyStr);
01800 arr.back().assign(str, prev_pos, pos - prev_pos);
01801 if (token_pos)
01802 token_pos->push_back(prev_pos);
01803 }
01804 prev_pos = pos + pattern.length();
01805 }
01806 return arr;
01807 }
01808
01809
01810 bool NStr::SplitInTwo(const string& str, const string& delim,
01811 string& str1, string& str2)
01812 {
01813 SIZE_TYPE delim_pos = str.find_first_of(delim);
01814 if (NPOS == delim_pos) {
01815 str1 = str;
01816 str2 = kEmptyStr;
01817 return false;
01818 }
01819 str1.assign(str, 0, delim_pos);
01820
01821 str2.assign(str, delim_pos + 1, str.length() - delim_pos - 1);
01822
01823 return true;
01824 }
01825
01826
01827 template <typename T>
01828 string s_NStr_Join(const T& arr, const string& delim)
01829 {
01830 if (arr.empty()) {
01831 return kEmptyStr;
01832 }
01833
01834 string result = arr.front();
01835 typename T::const_iterator it = arr.begin();
01836 SIZE_TYPE needed = result.size();
01837
01838 while (++it != arr.end()) {
01839 needed += delim.size() + it->size();
01840 }
01841 result.reserve(needed);
01842 it = arr.begin();
01843 while (++it != arr.end()) {
01844 result += delim;
01845 result += *it;
01846 }
01847 return result;
01848 }
01849
01850
01851 string NStr::Join(const list<string>& arr, const string& delim)
01852 {
01853 return s_NStr_Join(arr, delim);
01854 }
01855
01856
01857 string NStr::Join(const vector<string>& arr, const string& delim)
01858 {
01859 return s_NStr_Join(arr, delim);
01860 }
01861
01862
01863 enum ELanguage {
01864 eLanguage_C,
01865 eLanguage_Javascript
01866 };
01867
01868
01869 static inline bool s_IsQuoted(char c, ELanguage lang)
01870 {
01871 return (c == '\t' || c == '\v' || c == '\b' ||
01872 c == '\r' || c == '\f' || c == '\a' ||
01873 c == '\n' || c == '\\' || c == '\'' ||
01874 c == '"' || (c == '&' && lang == eLanguage_Javascript) ||
01875 !isprint((unsigned char) c) ? true : false);
01876 }
01877
01878
01879 static string s_PrintableString(const string& str,
01880 NStr::TPrintableMode mode,
01881 ELanguage lang)
01882 {
01883 auto_ptr<CNcbiOstrstream> out;
01884 SIZE_TYPE i, j = 0;
01885
01886 for (i = 0; i < str.size(); i++) {
01887 char c = str[i];
01888 switch (c) {
01889 case '\t':
01890 c = 't';
01891 break;
01892 case '\v':
01893 c = 'v';
01894 break;
01895 case '\b':
01896 c = 'b';
01897 break;
01898 case '\r':
01899 c = 'r';
01900 break;
01901 case '\f':
01902 c = 'f';
01903 break;
01904 case '\a':
01905 c = 'a';
01906 break;
01907 case '\n':
01908 if (!(mode & NStr::fNewLine_Passthru))
01909 c = 'n';
01910
01911 case '\\':
01912 case '\'':
01913 case '"':
01914 break;
01915 case '&':
01916 if (lang != eLanguage_Javascript)
01917 continue;
01918 break;
01919 default:
01920 if (isprint((unsigned char) c))
01921 continue;
01922 break;
01923 }
01924 if (!out.get()) {
01925 out.reset(new CNcbiOstrstream);
01926 }
01927 if (i > j) {
01928 out->write(str.data() + j, i - j);
01929 }
01930 out->put('\\');
01931 if (c == '\n') {
01932 out->write("n\\\n", 3);
01933 } else if (!isprint((unsigned char) c)) {
01934 bool reduce;
01935 if (!(mode & NStr::fPrintable_Full)) {
01936 reduce = (i == str.size() - 1 || s_IsQuoted(str[i + 1], lang)
01937 || str[i + 1] < '0' || str[i + 1] > '7');
01938 } else {
01939 reduce = false;
01940 }
01941 unsigned char v;
01942 char octal[3];
01943 int k = 0;
01944 v = (unsigned char) c >> 6;
01945 if (v || !reduce) {
01946 octal[k++] = '0' + v;
01947 reduce = false;
01948 }
01949 v = ((unsigned char) c >> 3) & 7;
01950 if (v || !reduce) {
01951 octal[k++] = '0' + v;
01952 }
01953 v = (unsigned char) c & 7;
01954 octal [k++] = '0' + v;
01955 out->write(octal, k);
01956 } else {
01957 out->put(c);
01958 }
01959 j = i + 1;
01960 }
01961 if (j && i > j) {
01962 _ASSERT(out.get());
01963 out->write(str.data() + j, i - j);
01964 }
01965 if (out.get()) {
01966
01967 return CNcbiOstrstreamToString(*out);
01968 }
01969
01970
01971 return str;
01972 }
01973
01974
01975 string NStr::PrintableString(const string& str,
01976 NStr::TPrintableMode mode)
01977 {
01978 return s_PrintableString(str, mode, eLanguage_C);
01979 }
01980
01981
01982 string NStr::JavaScriptEncode(const string& str)
01983 {
01984 return s_PrintableString(str, eNewLine_Quote, eLanguage_Javascript);
01985 }
01986
01987 string NStr::XmlEncode(const string& str)
01988
01989 {
01990 string result;
01991 SIZE_TYPE i;
01992 for (i = 0; i < str.size(); i++) {
01993 char c = str[i];
01994 switch ( c ) {
01995 case '&':
01996 result.append("&");
01997 break;
01998 case '<':
01999 result.append("<");
02000 break;
02001 case '>':
02002 result.append(">");
02003 break;
02004 case '\'':
02005 result.append("'");
02006 break;
02007 case '"':
02008 result.append(""");
02009 break;
02010 default:
02011 if ((unsigned int)(c) < 0x20) {
02012 const char* charmap = "0123456789abcdef";
02013 result.append("&#x");
02014 Uint1 ch = c;
02015 unsigned hi = ch >> 4;
02016 unsigned lo = ch & 0xF;
02017 if ( hi ) {
02018 result.append(1, charmap[hi]);
02019 }
02020 result.append(1, charmap[lo]).append(1, ';');
02021 } else {
02022 result.append(1, c);
02023 }
02024 break;
02025 }
02026 }
02027 return result;
02028 }
02029
02030 string NStr::JsonEncode(const string& str)
02031
02032 {
02033 string result;
02034 SIZE_TYPE i;
02035 for (i = 0; i < str.size(); i++) {
02036 char c = str[i];
02037 switch ( c ) {
02038 case '"':
02039 result.append("\\\"");
02040 break;
02041 case '\\':
02042 result.append("\\\\");
02043 break;
02044 default:
02045 if ((unsigned int)c < 0x20 || (unsigned int)c >= 0x80) {
02046 const char* charmap = "0123456789abcdef";
02047 result.append("\\u00");
02048 Uint1 ch = c;
02049 unsigned hi = ch >> 4;
02050 unsigned lo = ch & 0xF;
02051 result.append(1, charmap[hi]);
02052 result.append(1, charmap[lo]);
02053 } else {
02054 result.append(1, c);
02055 }
02056 break;
02057 }
02058 }
02059 return result;
02060 }
02061
02062
02063 string NStr::ParseEscapes(const string& str)
02064 {
02065 string out;
02066 out.reserve(str.size());
02067 SIZE_TYPE pos = 0;
02068
02069 while (pos < str.size()) {
02070 SIZE_TYPE pos2 = str.find('\\', pos);
02071 if (pos2 == NPOS) {
02072 out += str.substr(pos);
02073 break;
02074 }
02075 out += str.substr(pos, pos2 - pos);
02076 if (++pos2 == str.size()) {
02077 NCBI_THROW2(CStringException, eFormat,
02078 "Unterminated escape sequence", pos2);
02079 }
02080 switch (str[pos2]) {
02081 case 'a': out += '\a'; break;
02082 case 'b': out += '\b'; break;
02083 case 'f': out += '\f'; break;
02084 case 'n': out += '\n'; break;
02085 case 'r': out += '\r'; break;
02086 case 't': out += '\t'; break;
02087 case 'v': out += '\v'; break;
02088 case 'x':
02089 {{
02090 pos = ++pos2;
02091 while (pos < str.size()
02092 && isxdigit((unsigned char) str[pos])) {
02093 pos++;
02094 }
02095 if (pos > pos2) {
02096 out += static_cast<char>
02097 (StringToUInt(str.substr(pos2, pos - pos2), 0, 16));
02098 } else {
02099 NCBI_THROW2(CStringException, eFormat,
02100 "\\x followed by no hexadecimal digits", pos);
02101 }
02102 }}
02103 continue;
02104 case '0': case '1': case '2': case '3':
02105 case '4': case '5': case '6': case '7':
02106 {{
02107 pos = pos2;
02108 unsigned char c = str[pos++] - '0';
02109 while (pos < pos2 + 3 && pos < str.size()
02110 && str[pos] >= '0' && str[pos] <= '7') {
02111 c = (c << 3) | (str[pos++] - '0');
02112 }
02113 out += c;
02114 }}
02115 continue;
02116 case '\n':
02117
02118 break;
02119 default:
02120 out += str[pos2];
02121 break;
02122 }
02123 pos = pos2 + 1;
02124 }
02125 return out;
02126 }
02127
02128
02129
02130
02131 static SIZE_TYPE s_EndOfTag(const string& str, SIZE_TYPE start)
02132 {
02133 _ASSERT(start < str.size() && str[start] == '<');
02134 bool comments_ok = (start + 1 < str.size() && str[start + 1] == '!');
02135 for (SIZE_TYPE pos = start + 1; pos < str.size(); ++pos) {
02136 switch (str[pos]) {
02137 case '>':
02138 return pos;
02139
02140 case '\"':
02141 pos = str.find('\"', pos + 1);
02142 if (pos == NPOS) {
02143 NCBI_THROW2(CStringException, eFormat,
02144 "Unclosed string in HTML tag", start);
02145
02146 }
02147 break;
02148
02149 case '-':
02150 if (comments_ok && pos + 1 < str.size()
02151 && str[pos + 1] == '-') {
02152 pos = str.find("--", pos + 2);
02153 if (pos == NPOS) {
02154 NCBI_THROW2(CStringException, eFormat,
02155 "Unclosed comment in HTML tag", start);
02156
02157 } else {
02158 ++pos;
02159 }
02160 }
02161 }
02162 }
02163 NCBI_THROW2(CStringException, eFormat, "Unclosed HTML tag", start);
02164
02165 }
02166
02167
02168
02169
02170 static SIZE_TYPE s_EndOfReference(const string& str, SIZE_TYPE start)
02171 {
02172 _ASSERT(start < str.size() && str[start] == '&');
02173 #ifdef NCBI_STRICT_HTML_REFS
02174 return str.find(';', start + 1);
02175 #else
02176 SIZE_TYPE pos = str.find_first_not_of
02177 ("#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
02178 start + 1);
02179 if (pos == NPOS || str[pos] == ';') {
02180 return pos;
02181 } else {
02182 return pos - 1;
02183 }
02184 #endif
02185 }
02186
02187
02188 static SIZE_TYPE s_VisibleWidth(const string& str, bool is_html)
02189 {
02190 if (is_html) {
02191 SIZE_TYPE width = 0, pos = 0;
02192 for (;;) {
02193 SIZE_TYPE pos2 = str.find_first_of("<&", pos);
02194 if (pos2 == NPOS) {
02195 width += str.size() - pos;
02196 break;
02197 } else {
02198 width += pos2 - pos;
02199 if (str[pos2] == '&') {
02200 ++width;
02201 pos = s_EndOfReference(str, pos);
02202 } else {
02203 pos = s_EndOfTag(str, pos);
02204 }
02205 if (pos == NPOS) {
02206 break;
02207 } else {
02208 ++pos;
02209 }
02210 }
02211 }
02212 return width;
02213 } else {
02214 return str.size();
02215 }
02216 }
02217
02218
02219 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width,
02220 list<string>& arr, NStr::TWrapFlags flags,
02221 const string* prefix, const string* prefix1)
02222 {
02223 if (prefix == 0) {
02224 prefix = &kEmptyStr;
02225 }
02226
02227 const string* pfx = prefix1 ? prefix1 : prefix;
02228 SIZE_TYPE pos = 0, len = str.size(), nl_pos = 0;
02229
02230 bool is_html = flags & fWrap_HTMLPre ? true : false;
02231 bool do_flat = (flags & fWrap_FlatFile) != 0;
02232
02233 enum EScore {
02234 eForced,
02235 ePunct,
02236 eComma,
02237 eSpace,
02238 eNewline
02239 };
02240
02241 while (pos < len) {
02242 bool hyphen = false;
02243 SIZE_TYPE column = s_VisibleWidth(*pfx, is_html);
02244 SIZE_TYPE column0 = column;
02245
02246 SIZE_TYPE best_pos = NPOS;
02247 EScore best_score = eForced;
02248 SIZE_TYPE pos0 = pos;
02249 if (nl_pos <= pos) {
02250 nl_pos = str.find('\n', pos);
02251 if (nl_pos == NPOS) {
02252 nl_pos = len;
02253 }
02254 }
02255 if (column + (nl_pos-pos) <= width) {
02256 pos0 = nl_pos;
02257 }
02258 for (SIZE_TYPE pos2 = pos0; pos2 < len && column <= width;
02259 ++pos2, ++column) {
02260 EScore score = eForced;
02261 SIZE_TYPE score_pos = pos2;
02262 char c = str[pos2];
02263
02264 if (c == '\n') {
02265 best_pos = pos2;
02266 best_score = eNewline;
02267 break;
02268 } else if (isspace((unsigned char) c)) {
02269 if ( !do_flat && pos2 > 0 &&
02270 isspace((unsigned char) str[pos2 - 1])) {
02271 continue;
02272 }
02273 score = eSpace;
02274 } else if (is_html && c == '<') {
02275
02276 pos2 = s_EndOfTag(str, pos2);
02277 --column;
02278 } else if (is_html && c == '&') {
02279
02280 pos2 = s_EndOfReference(str, pos2);
02281 } else if (c == ',' && score_pos < len - 1 && column < width) {
02282 score = eComma;
02283 ++score_pos;
02284 } else if (do_flat ? c == '-' : ispunct((unsigned char) c)) {
02285
02286
02287 if (c == '(' || c == '[' || c == '{' || c == '<'
02288 || c == '`') {
02289 score = ePunct;
02290 } else if (score_pos < len - 1 && column < width) {
02291
02292 score = ePunct;
02293 ++score_pos;
02294 }
02295 }
02296
02297 if (pos2 == NPOS) {
02298 break;
02299 }
02300
02301 if (score >= best_score && score_pos > pos0) {
02302 best_pos = score_pos;
02303 best_score = score;
02304 }
02305
02306 while (pos2 < len - 1 && str[pos2 + 1] == '\b') {
02307
02308 ++pos2;
02309 if (column > column0) {
02310 --column;
02311 }
02312 }
02313 }
02314
02315 if ( best_score != eNewline && column <= width ) {
02316
02317 best_pos = len;
02318 } else if ( best_score == eForced && (flags & fWrap_Hyphenate) ) {
02319 hyphen = true;
02320 --best_pos;
02321 }
02322 arr.push_back(*pfx);
02323 {{
02324 string::const_iterator begin = str.begin() + pos;
02325 string::const_iterator end = str.begin() + best_pos;
02326 string::const_iterator bs;
02327 while ((bs = find(begin, end, '\b')) != end) {
02328 if (bs != begin) {
02329
02330 arr.back().append(begin, bs - 1);
02331 }
02332 else {
02333
02334
02335 SIZE_TYPE size = arr.back().size();
02336 if (size > pfx->size()) {
02337 arr.back().resize(size - 1);
02338 }
02339 }
02340
02341 begin = bs + 1;
02342 }
02343 if (begin != end) {
02344
02345 arr.back().append(begin, end);
02346 }
02347 }}
02348 if ( hyphen ) {
02349 arr.back() += '-';
02350 }
02351 pos = best_pos;
02352 pfx = prefix;
02353
02354 if (do_flat) {
02355 if (best_score == eSpace) {
02356 while (str[pos] == ' ') {
02357 ++pos;
02358 }
02359 if (str[pos] == '\n') {
02360 ++pos;
02361 }
02362 }
02363 if (best_score == eNewline) {
02364 ++pos;
02365 }
02366 }
02367 else {
02368 if ( best_score == eSpace || best_score == eNewline ) {
02369 ++pos;
02370 }
02371 }
02372 while (pos < len && str[pos] == '\b') {
02373 ++pos;
02374 }
02375 }
02376
02377 return arr;
02378 }
02379
02380
02381 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
02382 const string& delim, list<string>& arr,
02383 NStr::TWrapFlags flags, const string* prefix,
02384 const string* prefix1)
02385 {
02386 if (l.empty()) {
02387 return arr;
02388 }
02389
02390 const string* pfx = prefix1 ? prefix1 : prefix;
02391 string s = *pfx;
02392 bool is_html = flags & fWrap_HTMLPre ? true : false;
02393 SIZE_TYPE column = s_VisibleWidth(s, is_html);
02394 SIZE_TYPE delwidth = s_VisibleWidth(delim, is_html);
02395 bool at_start = true;
02396
02397 ITERATE (list<string>, it, l) {
02398 SIZE_TYPE term_width = s_VisibleWidth(*it, is_html);
02399 if ( at_start ) {
02400 if (column + term_width <= width) {
02401 s += *it;
02402 column += term_width;
02403 at_start = false;
02404 } else {
02405
02406 Wrap(*it, width, arr, flags, prefix, pfx);
02407 pfx = prefix;
02408 s = *prefix;
02409 column = s_VisibleWidth(s, is_html);
02410 at_start = true;
02411 }
02412 } else if (column + delwidth + term_width <= width) {
02413 s += delim;
02414 s += *it;
02415 column += delwidth + term_width;
02416 at_start = false;
02417 } else {
02418
02419 arr.push_back(s);
02420 pfx = prefix;
02421 s = *prefix;
02422 column = s_VisibleWidth(s, is_html);
02423 at_start = true;
02424 --it;
02425 }
02426 }
02427
02428 arr.push_back(s);
02429 return arr;
02430 }
02431
02432
02433 #if !defined(HAVE_STRDUP)
02434 extern char* strdup(const char* str)
02435 {
02436 if ( !str ) {
02437 return 0;
02438 }
02439 size_t size = strlen(str) + 1;
02440 void* result = malloc(size);
02441 return (char*)(result ? memcpy(result, str, size) : 0);
02442 }
02443 #endif
02444
02445
02446 static const char s_Encode[256][4] = {
02447 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02448 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02449 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02450 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02451 "+", "!", "%22", "%23", "$", "%25", "%26", "'",
02452 "(", ")", "*", "%2B", ",", "-", ".", "%2F",
02453 "0", "1", "2", "3", "4", "5", "6", "7",
02454 "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
02455 "%40", "A", "B", "C", "D", "E", "F", "G",
02456 "H", "I", "J", "K", "L", "M", "N", "O",
02457 "P", "Q", "R", "S", "T", "U", "V", "W",
02458 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02459 "%60", "a", "b", "c", "d", "e", "f", "g",
02460 "h", "i", "j", "k", "l", "m", "n", "o",
02461 "p", "q", "r", "s", "t", "u", "v", "w",
02462 "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
02463 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02464 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02465 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02466 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02467 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02468 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02469 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02470 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02471 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02472 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02473 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02474 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02475 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02476 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02477 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02478 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02479 };
02480
02481 static const char s_EncodeMarkChars[256][4] = {
02482 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02483 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02484 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02485 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02486 "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
02487 "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
02488 "0", "1", "2", "3", "4", "5", "6", "7",
02489 "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
02490 "%40", "A", "B", "C", "D", "E", "F", "G",
02491 "H", "I", "J", "K", "L", "M", "N", "O",
02492 "P", "Q", "R", "S", "T", "U", "V", "W",
02493 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
02494 "%60", "a", "b", "c", "d", "e", "f", "g",
02495 "h", "i", "j", "k", "l", "m", "n", "o",
02496 "p", "q", "r", "s", "t", "u", "v", "w",
02497 "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
02498 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02499 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02500 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02501 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02502 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02503 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02504 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02505 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02506 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02507 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02508 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02509 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02510 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02511 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02512 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02513 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02514 };
02515
02516 static const char s_EncodePercentOnly[256][4] = {
02517 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02518 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02519 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02520 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02521 "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
02522 "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
02523 "0", "1", "2", "3", "4", "5", "6", "7",
02524 "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
02525 "%40", "A", "B", "C", "D", "E", "F", "G",
02526 "H", "I", "J", "K", "L", "M", "N", "O",
02527 "P", "Q", "R", "S", "T", "U", "V", "W",
02528 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
02529 "%60", "a", "b", "c", "d", "e", "f", "g",
02530 "h", "i", "j", "k", "l", "m", "n", "o",
02531 "p", "q", "r", "s", "t", "u", "v", "w",
02532 "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
02533 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02534 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02535 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02536 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02537 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02538 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02539 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02540 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02541 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02542 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02543 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02544 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02545 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02546 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02547 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02548 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02549 };
02550
02551 static const char s_EncodePath[256][4] = {
02552 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02553 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02554 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02555 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02556 "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
02557 "%28", "%29", "%2A", "%2B", "%2C", "%2D", ".", "/",
02558 "0", "1", "2", "3", "4", "5", "6", "7",
02559 "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
02560 "%40", "A", "B", "C", "D", "E", "F", "G",
02561 "H", "I", "J", "K", "L", "M", "N", "O",
02562 "P", "Q", "R", "S", "T", "U", "V", "W",
02563 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02564 "%60", "a", "b", "c", "d", "e", "f", "g",
02565 "h", "i", "j", "k", "l", "m", "n", "o",
02566 "p", "q", "r", "s", "t", "u", "v", "w",
02567 "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
02568 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02569 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02570 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02571 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02572 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02573 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02574 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02575 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02576 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02577 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02578 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02579 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02580 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02581 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02582 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02583 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02584 };
02585
02586 static const char s_EncodeURIScheme[256][4] = {
02587 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02588 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02589 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02590 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02591 "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
02592 "%28", "%29", "%2A", "+", "%2C", "-", ".", "%2F",
02593 "0", "1", "2", "3", "4", "5", "6", "7",
02594 "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
02595 "%40", "A", "B", "C", "D", "E", "F", "G",
02596 "H", "I", "J", "K", "L", "M", "N", "O",
02597 "P", "Q", "R", "S", "T", "U", "V", "W",
02598 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
02599 "%60", "a", "b", "c", "d", "e", "f", "g",
02600 "h", "i", "j", "k", "l", "m", "n", "o",
02601 "p", "q", "r", "s", "t", "u", "v", "w",
02602 "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
02603 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02604 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02605 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02606 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02607 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02608 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02609 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02610 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02611 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02612 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02613 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02614 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02615 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02616 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02617 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02618 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02619 };
02620
02621 static const char s_EncodeURIUserinfo[256][4] = {
02622 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02623 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02624 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02625 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02626 "%20", "!", "%22", "%23", "$", "%25", "&", "'",
02627 "(", ")", "%2A", "%2B", "%2C", "-", ".", "%2F",
02628 "0", "1", "2", "3", "4", "5", "6", "7",
02629 "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "%3F",
02630 "%40", "A", "B", "C", "D", "E", "F", "G",
02631 "H", "I", "J", "K", "L", "M", "N", "O",
02632 "P", "Q", "R", "S", "T", "U", "V", "W",
02633 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02634 "%60", "a", "b", "c", "d", "e", "f", "g",
02635 "h", "i", "j", "k", "l", "m", "n", "o",
02636 "p", "q", "r", "s", "t", "u", "v", "w",
02637 "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
02638 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02639 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02640 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02641 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02642 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02643 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02644 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02645 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02646 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02647 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02648 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02649 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02650 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02651 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02652 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02653 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02654 };
02655
02656 static const char s_EncodeURIHost[256][4] = {
02657 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02658 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02659 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02660 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02661 "%20", "!", "%22", "%23", "$", "%25", "&", "'",
02662 "(", ")", "%2A", "%2B", "%2C", "-", ".", "%2F",
02663 "0", "1", "2", "3", "4", "5", "6", "7",
02664 "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
02665 "%40", "A", "B", "C", "D", "E", "F", "G",
02666 "H", "I", "J", "K", "L", "M", "N", "O",
02667 "P", "Q", "R", "S", "T", "U", "V", "W",
02668 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02669 "%60", "a", "b", "c", "d", "e", "f", "g",
02670 "h", "i", "j", "k", "l", "m", "n", "o",
02671 "p", "q", "r", "s", "t", "u", "v", "w",
02672 "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
02673 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02674 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02675 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02676 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02677 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02678 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02679 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02680 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02681 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02682 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02683 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02684 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02685 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02686 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02687 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02688 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02689 };
02690
02691 static const char s_EncodeURIPath[256][4] = {
02692 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02693 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02694 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02695 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02696 "%20", "!", "%22", "%23", "$", "%25", "&", "'",
02697 "(", ")", "%2A", "%2B", "%2C", "-", ".", "/",
02698 "0", "1", "2", "3", "4", "5", "6", "7",
02699 "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "%3F",
02700 "@", "A", "B", "C", "D", "E", "F", "G",
02701 "H", "I", "J", "K", "L", "M", "N", "O",
02702 "P", "Q", "R", "S", "T", "U", "V", "W",
02703 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02704 "%60", "a", "b", "c", "d", "e", "f", "g",
02705 "h", "i", "j", "k", "l", "m", "n", "o",
02706 "p", "q", "r", "s", "t", "u", "v", "w",
02707 "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
02708 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02709 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02710 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02711 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02712 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02713 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02714 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02715 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02716 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02717 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02718 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02719 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02720 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02721 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02722 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02723 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02724 };
02725
02726 static const char s_EncodeURIQueryName[256][4] = {
02727 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02728 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02729 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02730 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02731 "%20", "!", "%22", "%23", "$", "%25", "&", "'",
02732 "(", ")", "%2A", "%2B", "%2C", "-", ".", "/",
02733 "0", "1", "2", "3", "4", "5", "6", "7",
02734 "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?",
02735 "@", "A", "B", "C", "D", "E", "F", "G",
02736 "H", "I", "J", "K", "L", "M", "N", "O",
02737 "P", "Q", "R", "S", "T", "U", "V", "W",
02738 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02739 "%60", "a", "b", "c", "d", "e", "f", "g",
02740 "h", "i", "j", "k", "l", "m", "n", "o",
02741 "p", "q", "r", "s", "t", "u", "v", "w",
02742 "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
02743 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02744 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02745 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02746 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02747 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02748 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02749 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02750 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02751 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02752 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02753 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02754 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02755 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02756 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02757 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02758 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02759 };
02760
02761 static const char s_EncodeURIQueryValue[256][4] = {
02762 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02763 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02764 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02765 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02766 "%20", "!", "%22", "%23", "$", "%25", "&", "'",
02767 "(", ")", "%2A", "%2B", "%2C", "-", ".", "/",
02768 "0", "1", "2", "3", "4", "5", "6", "7",
02769 "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?",
02770 "@", "A", "B", "C", "D", "E", "F", "G",
02771 "H", "I", "J", "K", "L", "M", "N", "O",
02772 "P", "Q", "R", "S", "T", "U", "V", "W",
02773 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02774 "%60", "a", "b", "c", "d", "e", "f", "g",
02775 "h", "i", "j", "k", "l", "m", "n", "o",
02776 "p", "q", "r", "s", "t", "u", "v", "w",
02777 "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
02778 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02779 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02780 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02781 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02782 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02783 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02784 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02785 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02786 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02787 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02788 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02789 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02790 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02791 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02792 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02793 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02794 };
02795
02796 static const char s_EncodeURIFragment[256][4] = {
02797 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
02798 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
02799 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
02800 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
02801 "%20", "!", "%22", "%23", "$", "%25", "&", "'",
02802 "(", ")", "%2A", "%2B", "%2C", "-", ".", "/",
02803 "0", "1", "2", "3", "4", "5", "6", "7",
02804 "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?",
02805 "@", "A", "B", "C", "D", "E", "F", "G",
02806 "H", "I", "J", "K", "L", "M", "N", "O",
02807 "P", "Q", "R", "S", "T", "U", "V", "W",
02808 "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
02809 "%60", "a", "b", "c", "d", "e", "f", "g",
02810 "h", "i", "j", "k", "l", "m", "n", "o",
02811 "p", "q", "r", "s", "t", "u", "v", "w",
02812 "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
02813 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
02814 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
02815 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
02816 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
02817 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
02818 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
02819 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
02820 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
02821 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
02822 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
02823 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
02824 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
02825 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
02826 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
02827 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
02828 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
02829 };
02830
02831 string NStr::URLEncode(const string& str, EUrlEncode flag)
02832 {
02833 SIZE_TYPE len = str.length();
02834 if ( !len ) {
02835 return kEmptyStr;
02836 }
02837
02838 const char (*encode_table)[4];
02839 switch (flag) {
02840 case eUrlEnc_SkipMarkChars:
02841 encode_table = s_Encode;
02842 break;
02843 case eUrlEnc_ProcessMarkChars:
02844 encode_table = s_EncodeMarkChars;
02845 break;
02846 case eUrlEnc_PercentOnly:
02847 encode_table = s_EncodePercentOnly;
02848 break;
02849 case eUrlEnc_Path:
02850 encode_table = s_EncodePath;
02851 break;
02852 case eUrlEnc_URIScheme:
02853 encode_table = s_EncodeURIScheme;
02854 break;
02855 case eUrlEnc_URIUserinfo:
02856 encode_table = s_EncodeURIUserinfo;
02857 break;
02858 case eUrlEnc_URIHost:
02859 encode_table = s_EncodeURIHost;
02860 break;
02861 case eUrlEnc_URIPath:
02862 encode_table = s_EncodeURIPath;
02863 break;
02864 case eUrlEnc_URIQueryName:
02865 encode_table = s_EncodeURIQueryName;
02866 break;
02867 case eUrlEnc_URIQueryValue:
02868 encode_table = s_EncodeURIQueryValue;
02869 break;
02870 case eUrlEnc_URIFragment:
02871 encode_table = s_EncodeURIFragment;
02872 break;
02873 case eUrlEnc_None:
02874 return str;
02875 default:
02876 _TROUBLE;
02877
02878 encode_table = 0;
02879 }
02880
02881 string dst;
02882 SIZE_TYPE pos;
02883 SIZE_TYPE dst_len = len;
02884 const unsigned char* cstr = (const unsigned char*)str.c_str();
02885 for (pos = 0; pos < len; pos++) {
02886 if (encode_table[cstr[pos]][0] == '%')
02887 dst_len += 2;
02888 }
02889 dst.reserve(dst_len + 1);
02890 dst.resize(dst_len);
02891
02892 SIZE_TYPE p = 0;
02893 for (pos = 0; pos < len; pos++, p++) {
02894 const char* subst = encode_table[cstr[pos]];
02895 if (*subst != '%') {
02896 dst[p] = *subst;
02897 } else {
02898 dst[p] = '%';
02899 dst[++p] = *(++subst);
02900 dst[++p] = *(++subst);
02901 }
02902 }
02903
02904 _ASSERT( p == dst_len );
02905 dst[dst_len] = '\0';
02906 return dst;
02907 }
02908
02909
02910 CStringUTF8 NStr::SQLEncode(const CStringUTF8& str) {
02911 SIZE_TYPE stringSize = str.size();
02912 CStringUTF8 result;
02913
02914 result.reserve(stringSize + 6);
02915 result.append(1, '\'');
02916 for (SIZE_TYPE i = 0; i < stringSize; i++) {
02917 char c = str[i];
02918 if (c == '\'')
02919 result.append(1, '\'');
02920 result.append(1, c);
02921 }
02922 result.append(1, '\'');
02923
02924 return result;
02925 }
02926
02927
02928 void s_URLDecode(const string& src, string& dst, NStr::EUrlDecode flag)
02929 {
02930 SIZE_TYPE len = src.length();
02931 if ( !len ) {
02932 dst.clear();
02933 return;
02934 }
02935 if (dst.length() < src.length()) {
02936 dst.resize(len);
02937 }
02938
02939 SIZE_TYPE pdst = 0;
02940 for (SIZE_TYPE psrc = 0; psrc < len; pdst++) {
02941 switch ( src[psrc] ) {
02942 case '%': {
02943
02944
02945
02946 if (psrc + 2 > len) {
02947 dst[pdst] = src[psrc++];
02948 } else {
02949 int n1 = NStr::HexChar(src[psrc+1]);
02950 int n2 = NStr::HexChar(src[psrc+2]);
02951 if (n1 < 0 || n1 > 15 || n2 < 0 || n2 > 15) {
02952 dst[pdst] = src[psrc++];
02953 } else {
02954 dst[pdst] = (n1 << 4) | n2;
02955 psrc += 3;
02956 }
02957 }
02958 break;
02959 }
02960 case '+': {
02961 dst[pdst] = (flag == NStr::eUrlDec_All) ? ' ' : '+';
02962 psrc++;
02963 break;
02964 }
02965 default:
02966 dst[pdst] = src[psrc++];
02967 }
02968 }
02969 if (pdst < len) {
02970 dst[pdst] = '\0';
02971 dst.resize(pdst);
02972 }
02973 }
02974
02975
02976 string NStr::URLDecode(const string& str, EUrlDecode flag)
02977 {
02978 string dst;
02979 s_URLDecode(str, dst, flag);
02980 return dst;
02981 }
02982
02983
02984 void NStr::URLDecodeInPlace(string& str, EUrlDecode flag)
02985 {
02986 s_URLDecode(str, str, flag);
02987 }
02988
02989
02990 bool NStr::NeedsURLEncoding(const string& str, EUrlEncode flag)
02991 {
02992 SIZE_TYPE len = str.length();
02993 if ( !len ) {
02994 return false;
02995 }
02996
02997 const char (*encode_table)[4];
02998 switch (flag) {
02999 case eUrlEnc_SkipMarkChars:
03000 encode_table = s_Encode;
03001 break;
03002 case eUrlEnc_ProcessMarkChars:
03003 encode_table = s_EncodeMarkChars;
03004 break;
03005 case eUrlEnc_PercentOnly:
03006 encode_table = s_EncodePercentOnly;
03007 break;
03008 case eUrlEnc_Path:
03009 encode_table = s_EncodePath;
03010 break;
03011 case eUrlEnc_None:
03012 return false;
03013 default:
03014 _TROUBLE;
03015
03016 encode_table = 0;
03017 }
03018
03019 const unsigned char* cstr = (const unsigned char*)str.c_str();
03020 for (SIZE_TYPE pos = 0; pos < len; pos++) {
03021 const char* subst = encode_table[cstr[pos]];
03022 if (*subst != cstr[pos]) {
03023 return true;
03024 }
03025 }
03026
03027 return false;
03028 }
03029
03030
03031 bool NStr::IsIPAddress(const string& ip)
03032 {
03033 const char* start = ip.c_str();
03034 const char* c = start;
03035 unsigned long val;
03036 int dots = 0;
03037
03038 for (;;) {
03039 char* e;
03040 if ( !isdigit((unsigned char)(*c)) )
03041 return false;
03042 errno = 0;
03043 val = strtoul(c, &e, 10);
03044 if (c == e || errno)
03045 return false;
03046 c = e;
03047 if (*c != '.')
03048 break;
03049 if (++dots > 3)
03050 return false;
03051 if (val > 255)
03052 return false;
03053 c++;
03054 }
03055
03056
03057
03058 if ((size_t)(c - start) != ip.size()) {
03059 return false;
03060 }
03061 return !*c && dots == 3 && val < 256;
03062 }
03063
03064
03065 namespace {
03066
03067 template <typename TDelimiter>
03068 class PDelimiter
03069 {
03070 private:
03071 const TDelimiter& delimiter;
03072
03073 public:
03074 PDelimiter(const TDelimiter& delim)
03075 : delimiter(delim)
03076 {}
03077
03078 bool operator()(char tested_symbol) const;
03079 };
03080
03081
03082
03083
03084
03085
03086
03087
03088
03089
03090
03091
03092
03093 template <typename TComparator, typename TResult>
03094 TResult s_GetField(const CTempString& str,
03095 size_t field_no,
03096 const TComparator& delimiter,
03097 NStr::EMergeDelims merge)
03098 {
03099 const char* current_ptr = str.data();
03100 const char* end_ptr = current_ptr + str.length();
03101 size_t current_field = 0;
03102
03103
03104 for ( ; current_field != field_no; current_field++) {
03105 while (current_ptr < end_ptr && !delimiter(*current_ptr))
03106 current_ptr++;
03107
03108 if (merge == NStr::eMergeDelims) {
03109 while (current_ptr < end_ptr && delimiter(*current_ptr))
03110 current_ptr++;
03111 }
03112 else
03113 current_ptr++;
03114
03115 if (current_ptr >= end_ptr)
03116 return TResult();
03117 }
03118
03119 if (current_field != field_no)
03120 return TResult();
03121
03122
03123 const char* field_start = current_ptr;
03124 while (current_ptr < end_ptr && !delimiter(*current_ptr))
03125 current_ptr++;
03126
03127 return TResult(field_start, current_ptr - field_start);
03128 }
03129
03130
03131
03132 template <>
03133 bool PDelimiter<char>::operator() (char c) const
03134 {
03135 return delimiter == c;
03136 }
03137
03138 template <>
03139 bool PDelimiter<CTempString>::operator() (char c) const
03140 {
03141 return delimiter.find(c) != NPOS;
03142 }
03143 }
03144
03145
03146 string NStr::GetField(const CTempString& str,
03147 size_t field_no,
03148 const CTempString& delimiters,
03149 EMergeDelims merge)
03150 {
03151 return s_GetField<PDelimiter<CTempString>, string>
03152 (str,
03153 field_no,
03154 PDelimiter<CTempString>(delimiters),
03155 merge);
03156 }
03157
03158
03159 string NStr::GetField(const CTempString& str,
03160 size_t field_no,
03161 char delimiter,
03162 EMergeDelims merge)
03163 {
03164 return s_GetField<PDelimiter<char>, string>
03165 (str,
03166 field_no,
03167 PDelimiter<char>(delimiter),
03168 merge);
03169 }
03170
03171
03172 CTempString NStr::GetField_Unsafe(const CTempString& str,
03173 size_t field_no,
03174 const CTempString& delimiters,
03175 EMergeDelims merge)
03176 {
03177 return s_GetField<PDelimiter<CTempString>, CTempString>
03178 (str,
03179 field_no,
03180 PDelimiter<CTempString>(delimiters),
03181 merge);
03182 }
03183
03184
03185 CTempString NStr::GetField_Unsafe(const CTempString& str,
03186 size_t field_no,
03187 char delimiter,
03188 EMergeDelims merge)
03189 {
03190 return s_GetField<PDelimiter<char>, CTempString>
03191 (str,
03192 field_no,
03193 PDelimiter<char>(delimiter),
03194 merge);
03195 }
03196
03197
03198
03199
03200
03201
03202 SIZE_TYPE CStringUTF8::GetSymbolCount(void) const
03203 {
03204 SIZE_TYPE count = 0;
03205 for (const char* src = c_str(); *src; ++src, ++count) {
03206 SIZE_TYPE more = 0;
03207 bool good = x_EvalFirst(*src, more);
03208 while (more-- && good) {
03209 good = x_EvalNext(*(++src));
03210 }
03211 if ( !good ) {
03212 NCBI_THROW2(CStringException, eFormat,
03213 "String is not in UTF8 format",
03214 s_DiffPtr(src,c_str()));
03215 }
03216 }
03217 return count;
03218 }
03219
03220
03221 SIZE_TYPE CStringUTF8::GetValidSymbolCount(const char* src, SIZE_TYPE buf_size)
03222 {
03223 SIZE_TYPE count = 0, cur_size=0;
03224 for (; cur_size < buf_size && src && *src; ++src, ++count, ++cur_size) {
03225 SIZE_TYPE more = 0;
03226 bool good = x_EvalFirst(*src, more);
03227 while (more-- && good && ++cur_size < buf_size) {
03228 good = x_EvalNext(*(++src));
03229 }
03230 if ( !good ) {
03231 return count;
03232 }
03233 }
03234 return count;
03235 }
03236
03237
03238 SIZE_TYPE CStringUTF8::GetValidBytesCount(const char* src, SIZE_TYPE buf_size)
03239 {
03240 SIZE_TYPE count = 0;
03241 SIZE_TYPE cur_size = 0;
03242
03243 for (; cur_size < buf_size && src && *src; ++src, ++count, ++cur_size) {
03244 SIZE_TYPE more = 0;
03245 bool good = x_EvalFirst(*src, more);
03246 while (more-- && good && cur_size < buf_size) {
03247 good = x_EvalNext(*(++src));
03248 if (good) {
03249 ++cur_size;
03250 }
03251 }
03252 if ( !good ) {
03253 return cur_size;
03254 }
03255 }
03256 return cur_size;
03257 }
03258
03259
03260 string CStringUTF8::AsSingleByteString(EEncoding encoding,
03261 const char* substitute_on_error) const
03262 {
03263 string result;
03264 result.reserve( GetSymbolCount()+1 );
03265 for ( const char* src = c_str(); *src; ++src ) {
03266 TUnicodeSymbol sym = Decode( src );
03267 if (substitute_on_error) {
03268 try {
03269 result.append(1, SymbolToChar( sym, encoding));
03270 }
03271 catch (CStringException&) {
03272 result.append(substitute_on_error);
03273 }
03274 } else {
03275 result.append(1, SymbolToChar( sym, encoding));
03276 }
03277 }
03278 return result;
03279 }
03280
03281
03282 EEncoding CStringUTF8::GuessEncoding( const char* src)
03283 {
03284 SIZE_TYPE more = 0;
03285 bool cp1252, iso1, ascii, utf8;
03286 for (cp1252 = iso1 = ascii = utf8 = true; *src; ++src) {
03287 Uint1 ch = *src;
03288 bool skip = false;
03289 if (more != 0) {
03290 if (x_EvalNext(ch)) {
03291 --more;
03292 if (more == 0) {
03293 ascii = cp1252 = iso1 = false;
03294 }
03295 skip = true;
03296 } else {
03297 more = 0;
03298 utf8 = false;
03299 }
03300 }
03301 if (ch > 0x7F) {
03302 ascii = false;
03303 if (ch < 0xA0) {
03304 iso1 = false;
03305 if (ch == 0x81 || ch == 0x8D || ch == 0x8F ||
03306 ch == 0x90 || ch == 0x9D) {
03307 cp1252 = false;
03308 }
03309 }
03310 if (!skip && utf8 && !x_EvalFirst(ch, more)) {
03311 utf8 = false;
03312 }
03313 }
03314 }
03315 if (more != 0) {
03316 utf8 = false;
03317 }
03318 if (ascii) {
03319 return eEncoding_Ascii;
03320 } else if (cp1252) {
03321 return iso1 ? eEncoding_ISO8859_1 : eEncoding_Windows_1252;
03322 } else if (utf8) {
03323 return eEncoding_UTF8;
03324 }
03325 return eEncoding_Unknown;
03326 }
03327
03328
03329 bool CStringUTF8::MatchEncoding( const char* src, EEncoding encoding)
03330 {
03331 bool matches = false;
03332 EEncoding enc_src = GuessEncoding(src);
03333 switch ( enc_src ) {
03334 default:
03335 case eEncoding_Unknown:
03336 matches = false;
03337 break;
03338 case eEncoding_Ascii:
03339 matches = true;
03340 break;
03341 case eEncoding_UTF8:
03342 case eEncoding_Windows_1252:
03343 matches = (encoding == enc_src);
03344 break;
03345 case eEncoding_ISO8859_1:
03346 matches = (encoding == enc_src || encoding == eEncoding_Windows_1252);
03347 break;
03348 }
03349 return matches;
03350 }
03351
03352
03353
03354 static const TUnicodeSymbol s_cp1252_table[] = {
03355 0x20AC, 0x003F, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
03356 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003F, 0x017D, 0x003F,
03357 0x003F, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
03358 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003F, 0x017E, 0x0178
03359 };
03360
03361
03362 TUnicodeSymbol CStringUTF8::CharToSymbol(char c, EEncoding encoding)
03363 {
03364 Uint1 ch = c;
03365 switch (encoding)
03366 {
03367 case eEncoding_Unknown:
03368 case eEncoding_UTF8:
03369 NCBI_THROW2(CStringException, eBadArgs,
03370 "Unacceptable character encoding", 0);
03371 case eEncoding_Ascii:
03372 case eEncoding_ISO8859_1:
03373 break;
03374 case eEncoding_Windows_1252:
03375 if (ch > 0x7F && ch < 0xA0) {
03376 return s_cp1252_table[ ch - 0x80 ];
03377 }
03378 break;
03379 default:
03380 NCBI_THROW2(CStringException, eBadArgs,
03381 "Unsupported character encoding", 0);
03382 }
03383 return (TUnicodeSymbol)ch;
03384 }
03385
03386
03387 char CStringUTF8::SymbolToChar(TUnicodeSymbol cp, EEncoding encoding)
03388 {
03389 if( encoding == eEncoding_UTF8 || encoding == eEncoding_Unknown) {
03390 NCBI_THROW2(CStringException, eBadArgs,
03391 "Unacceptable character encoding", 0);
03392 }
03393 if ( cp <= 0xFF) {
03394 return (char)cp;
03395 }
03396 if ( encoding == eEncoding_Windows_1252 ) {
03397 for (Uint1 ch = 0x80; ch <= 0x9F; ++ch) {
03398 if (s_cp1252_table[ ch - 0x80 ] == cp) {
03399 return (char)ch;
03400 }
03401 }
03402 }
03403 if (cp > 0xFF) {
03404 NCBI_THROW2(CStringException, eConvert,
03405 "Failed to convert symbol to requested encoding", 0);
03406 }
03407 return (char)cp;
03408 }
03409
03410
03411 void CStringUTF8::x_Validate(void) const
03412 {
03413 if (!IsValid()) {
03414 NCBI_THROW2(CStringException, eBadArgs,
03415 "Source string is not in UTF8 format", 0);
03416 }
03417 }
03418
03419
03420 void CStringUTF8::x_AppendChar(TUnicodeSymbol c)
03421 {
03422 Uint4 ch = c;
03423 if (ch < 0x80) {
03424 append(1, Uint1(ch));
03425 }
03426 else if (ch < 0x800) {
03427 append(1, Uint1( (ch >> 6) | 0xC0));
03428 append(1, Uint1( (ch & 0x3F) | 0x80));
03429 } else if (ch < 0x10000) {
03430 append(1, Uint1( (ch >> 12) | 0xE0));
03431 append(1, Uint1(((ch >> 6) & 0x3F) | 0x80));
03432 append(1, Uint1(( ch & 0x3F) | 0x80));
03433 } else {
03434 append(1, Uint1( (ch >> 18) | 0xF0));
03435 append(1, Uint1(((ch >> 12) & 0x3F) | 0x80));
03436 append(1, Uint1(((ch >> 6) & 0x3F) | 0x80));
03437 append(1, Uint1( (ch & 0x3F) | 0x80));
03438 }
03439 }
03440
03441
03442 void CStringUTF8::x_Append(const char* src,
03443 EEncoding encoding, EValidate validate)
03444 {
03445 if (encoding == eEncoding_Unknown) {
03446 encoding = GuessEncoding(src);
03447 if (encoding == eEncoding_Unknown) {
03448 NCBI_THROW2(CStringException, eBadArgs,
03449 "Unable to guess the source string encoding", 0);
03450 }
03451 } else if (validate == eValidate) {
03452 if ( !MatchEncoding( src,encoding ) ) {
03453 NCBI_THROW2(CStringException, eBadArgs,
03454 "Source string does not match the declared encoding", 0);
03455 }
03456 }
03457 if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) {
03458 append(src);
03459 return;
03460 }
03461
03462 const char* srcBuf;
03463 SIZE_TYPE needed = 0;
03464 for (srcBuf = src; *srcBuf; ++srcBuf) {
03465 needed += x_BytesNeeded( CharToSymbol( *srcBuf,encoding ) );
03466 }
03467 if ( !needed ) {
03468 return;
03469 }
03470 reserve(max(capacity(),length()+needed+1));
03471 for (srcBuf = src; *srcBuf; ++srcBuf) {
03472 x_AppendChar( CharToSymbol( *srcBuf, encoding ) );
03473 }
03474 }
03475
03476
03477 SIZE_TYPE CStringUTF8::x_BytesNeeded(TUnicodeSymbol c)
03478 {
03479 Uint4 ch = c;
03480 if (ch < 0x80) {
03481 return 1;
03482 } else if (ch < 0x800) {
03483 return 2;
03484 } else if (ch < 0x10000) {
03485 return 3;
03486 }
03487 return 4;
03488 }
03489
03490
03491 bool CStringUTF8::x_EvalFirst(char ch, SIZE_TYPE& more)
03492 {
03493 more = 0;
03494 if ((ch & 0x80) != 0) {
03495 if ((ch & 0xE0) == 0xC0) {
03496 if ((ch & 0xFE) == 0xC0) {
03497
03498 return false;
03499 }
03500 more = 1;
03501 } else if ((ch & 0xF0) == 0xE0) {
03502 more = 2;
03503 } else if ((ch & 0xF8) == 0xF0) {
03504 if ((unsigned char)ch > (unsigned char)0xF4) {
03505
03506 return false;
03507 }
03508 more = 3;
03509 } else {
03510 return false;
03511 }
03512 }
03513 return true;
03514 }
03515
03516
03517 bool CStringUTF8::x_EvalNext(char ch)
03518 {
03519 return (ch & 0xC0) == 0x80;
03520 }
03521
03522
03523 TUnicodeSymbol CStringUTF8::Decode(const char*& src)
03524 {
03525 TUnicodeSymbol chRes;
03526 SIZE_TYPE more;
03527 Uint1 ch = *src;
03528 if ((ch & 0x80) == 0) {
03529 chRes = ch;
03530 more = 0;
03531 } else if ((ch & 0xE0) == 0xC0) {
03532 chRes = (ch & 0x1F);
03533 more = 1;
03534 } else if ((ch & 0xF0) == 0xE0) {
03535 chRes = (ch & 0x0F);
03536 more = 2;
03537 } else if ((ch & 0xF8) == 0xF0) {
03538 chRes = (ch & 0x07);
03539 more = 3;
03540 } else {
03541 NCBI_THROW2(CStringException, eBadArgs,
03542 "Source string is not in UTF8 format", 0);
03543 }
03544 while (more--) {
03545 ch = *(++src);
03546 if ((ch & 0xC0) != 0x80) {
03547 NCBI_THROW2(CStringException, eBadArgs,
03548 "Source string is not in UTF8 format", 0);
03549 }
03550 chRes = (chRes << 6) | (ch & 0x3F);
03551 }
03552 return chRes;
03553 }
03554
03555
03556 TUnicodeSymbol CStringUTF8::DecodeFirst(char ch, SIZE_TYPE& more)
03557 {
03558 TUnicodeSymbol chRes = 0;
03559 more = 0;
03560 if ((ch & 0x80) == 0) {
03561 chRes = ch;
03562 } else if ((ch & 0xE0) == 0xC0) {
03563 chRes = (ch & 0x1F);
03564 more = 1;
03565 } else if ((ch & 0xF0) == 0xE0) {
03566 chRes = (ch & 0x0F);
03567 more = 2;
03568 } else if ((ch & 0xF8) == 0xF0) {
03569 chRes = (ch & 0x07);
03570 more = 3;
03571 }
03572 return chRes;
03573 }
03574
03575
03576 TUnicodeSymbol CStringUTF8::DecodeNext(TUnicodeSymbol chU, char ch)
03577 {
03578 if ((ch & 0xC0) == 0x80) {
03579 return (chU << 6) | (ch & 0x3F);
03580 }
03581 return 0;
03582 }
03583
03584
03585 const char* CStringException::GetErrCodeString(void) const
03586 {
03587 switch (GetErrCode()) {
03588 case eConvert: return "eConvert";
03589 case eBadArgs: return "eBadArgs";
03590 case eFormat: return "eFormat";
03591 default: return CException::GetErrCodeString();
03592 }
03593 }
03594
03595
03596
03597
03598
03599
03600 CStringDecoder_Url::CStringDecoder_Url(NStr::EUrlDecode flag)
03601 : m_Flag(flag)
03602 {
03603 }
03604
03605
03606 string CStringDecoder_Url::Decode(const string& src,
03607 EStringType ) const
03608 {
03609 return NStr::URLDecode(src, m_Flag);
03610 }
03611
03612
03613 CStringEncoder_Url::CStringEncoder_Url(NStr::EUrlEncode flag)
03614 : m_Flag(flag)
03615 {
03616 }
03617
03618
03619 string CStringEncoder_Url::Encode(const string& src,
03620 EStringType ) const
03621 {
03622 return NStr::URLEncode(src, m_Flag);
03623 }
03624
03625
03626
03627
03628
03629 CEncodedString::CEncodedString(const string& s,
03630 NStr::EUrlEncode flag)
03631 {
03632 SetString(s, flag);
03633 }
03634
03635
03636 void CEncodedString::SetString(const string& s,
03637 NStr::EUrlEncode flag)
03638 {
03639 m_Original = s;
03640 if ( NStr::NeedsURLEncoding(s, flag) ) {
03641 if ( m_Encoded.get() ) {
03642
03643 *m_Encoded = NStr::URLEncode(s, flag);
03644 }
03645 else {
03646 m_Encoded.reset(new string(NStr::URLEncode(s, flag)));
03647 }
03648 }
03649 else {
03650 m_Encoded.reset();
03651 }
03652 }
03653
03654
03655
03656
03657
03658
03659
03660
03661 CTempString::CTempString(const char* str, size_type pos, size_type len)
03662 : m_String(str+pos), m_Length(len)
03663 {
03664 }
03665
03666
03667 CTempString::CTempString(const string& str, size_type len)
03668 : m_String(str.data()), m_Length(min(len, str.size()))
03669 {
03670 }
03671
03672
03673 END_NCBI_SCOPE
03674
03675