| 31 |
*/ |
*/ |
| 32 |
|
|
| 33 |
#include <ncbi_pch.hpp> |
#include <ncbi_pch.hpp> |
| 34 |
|
#include <common/ncbi_source_ver.h> |
| 35 |
#include <corelib/ncbistr.hpp> |
#include <corelib/ncbistr.hpp> |
| 36 |
#include <corelib/tempstr.hpp> |
#include <corelib/tempstr.hpp> |
|
#include <corelib/ncbi_limits.hpp> |
|
| 37 |
#include <corelib/ncbistr_util.hpp> |
#include <corelib/ncbistr_util.hpp> |
| 38 |
#include <corelib/error_codes.hpp> |
#include <corelib/error_codes.hpp> |
| 39 |
|
#include <corelib/ncbierror.hpp> |
| 40 |
|
#include <corelib/ncbifloat.h> |
| 41 |
#include <memory> |
#include <memory> |
| 42 |
|
#include <functional> |
| 43 |
#include <algorithm> |
#include <algorithm> |
| 44 |
#include <errno.h> |
#include <iterator> |
| 45 |
#include <stdio.h> |
#include <stdio.h> |
| 46 |
|
#include <locale.h> |
| 47 |
|
#include <math.h> |
| 48 |
|
|
| 49 |
|
|
| 50 |
#define NCBI_USE_ERRCODE_X Corelib_Util |
#define NCBI_USE_ERRCODE_X Corelib_Util |
| 51 |
|
|
| 52 |
|
|
| 53 |
BEGIN_NCBI_SCOPE |
BEGIN_NCBI_NAMESPACE; |
| 54 |
|
|
| 55 |
|
|
| 56 |
// Hex symbols (upt ot base 36) |
// Digits (up to base 36) |
| 57 |
static const char s_Hex[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
static const char kDigit[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
| 58 |
|
|
| 59 |
|
|
| 60 |
inline SIZE_TYPE s_DiffPtr(const char* end, const char* start) |
static inline |
| 61 |
|
SIZE_TYPE s_DiffPtr(const char* end, const char* start) |
| 62 |
{ |
{ |
| 63 |
return end ? (SIZE_TYPE)(end - start) : (SIZE_TYPE) 0; |
return end ? (SIZE_TYPE)(end - start) : (SIZE_TYPE) 0; |
| 64 |
} |
} |
| 65 |
|
|
| 66 |
const char *const kEmptyCStr = ""; |
const char *const kEmptyCStr = ""; |
| 67 |
|
|
| 68 |
|
#if defined(HAVE_WSTRING) |
| 69 |
|
const wchar_t *const kEmptyWCStr = L""; |
| 70 |
|
#endif |
| 71 |
|
|
| 72 |
|
|
| 73 |
|
extern const char* const kNcbiDevelopmentVersionString; |
| 74 |
|
const char* const kNcbiDevelopmentVersionString |
| 75 |
|
= "NCBI_DEVELOPMENT_VER_" NCBI_AS_STRING(NCBI_DEVELOPMENT_VER); |
| 76 |
|
|
| 77 |
|
#ifdef NCBI_PRODUCTION_VER |
| 78 |
|
extern const char* const kNcbiProductionVersionString; |
| 79 |
|
const char* const kNcbiProductionVersionString |
| 80 |
|
= "NCBI_PRODUCTION_VER_" NCBI_AS_STRING(NCBI_PRODUCTION_VER); |
| 81 |
|
#endif |
| 82 |
|
|
| 83 |
|
|
| 84 |
#if !defined(NCBI_OS_MSWIN) && !( defined(NCBI_OS_LINUX) && defined(NCBI_COMPILER_GCC) ) |
#if !defined(NCBI_OS_MSWIN) && !( defined(NCBI_OS_LINUX) && defined(NCBI_COMPILER_GCC) ) |
| 85 |
const string* CNcbiEmptyString::m_Str = 0; |
const string* CNcbiEmptyString::m_Str = 0; |
| 88 |
m_Str = &s_Str; |
m_Str = &s_Str; |
| 89 |
return s_Str; |
return s_Str; |
| 90 |
} |
} |
| 91 |
|
# ifdef HAVE_WSTRING |
| 92 |
|
const wstring* CNcbiEmptyWString::m_Str = 0; |
| 93 |
|
const wstring& CNcbiEmptyWString::FirstGet(void) { |
| 94 |
|
static const wstring s_Str = L""; |
| 95 |
|
m_Str = &s_Str; |
| 96 |
|
return s_Str; |
| 97 |
|
} |
| 98 |
|
# endif |
| 99 |
#endif |
#endif |
| 100 |
|
|
| 101 |
|
|
| 102 |
bool NStr::IsBlank(const string& str, SIZE_TYPE pos) |
bool NStr::IsBlank(const CTempString& str, SIZE_TYPE pos) |
| 103 |
{ |
{ |
| 104 |
SIZE_TYPE len = str.length(); |
SIZE_TYPE len = str.length(); |
| 105 |
for (SIZE_TYPE idx = pos; idx < len; ++idx) { |
for (SIZE_TYPE idx = pos; idx < len; ++idx) { |
| 111 |
} |
} |
| 112 |
|
|
| 113 |
|
|
| 114 |
int NStr::CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, |
int NStr::CompareCase(const CTempString& str, SIZE_TYPE pos, SIZE_TYPE n, |
| 115 |
const char* pattern) |
const char* pattern) |
| 116 |
{ |
{ |
| 117 |
if (pos == NPOS || !n || str.length() <= pos) { |
if (pos == NPOS || !n || str.length() <= pos) { |
| 120 |
if ( !*pattern ) { |
if ( !*pattern ) { |
| 121 |
return 1; |
return 1; |
| 122 |
} |
} |
|
|
|
| 123 |
if (n == NPOS || n > str.length() - pos) { |
if (n == NPOS || n > str.length() - pos) { |
| 124 |
n = str.length() - pos; |
n = str.length() - pos; |
| 125 |
} |
} |
|
|
|
| 126 |
const char* s = str.data() + pos; |
const char* s = str.data() + pos; |
| 127 |
while (n && *pattern && *s == *pattern) { |
while (n && *pattern && *s == *pattern) { |
| 128 |
s++; pattern++; n--; |
s++; pattern++; n--; |
| 129 |
} |
} |
|
|
|
| 130 |
if (n == 0) { |
if (n == 0) { |
| 131 |
return *pattern ? -1 : 0; |
return *pattern ? -1 : 0; |
| 132 |
} |
} |
|
|
|
| 133 |
return *s - *pattern; |
return *s - *pattern; |
| 134 |
} |
} |
| 135 |
|
|
| 136 |
|
|
| 137 |
int NStr::CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, |
|
| 138 |
const char* pattern) |
int NStr::CompareCase(const CTempString& str, SIZE_TYPE pos, SIZE_TYPE n, |
| 139 |
|
const CTempString& pattern) |
| 140 |
{ |
{ |
| 141 |
if (pos == NPOS || !n || str.length() <= pos) { |
if (pos == NPOS || !n || str.length() <= pos) { |
| 142 |
return *pattern ? -1 : 0; |
return pattern.empty() ? 0 : -1; |
| 143 |
} |
} |
| 144 |
if ( !*pattern ) { |
if (pattern.empty()) { |
| 145 |
return 1; |
return 1; |
| 146 |
} |
} |
|
|
|
| 147 |
if (n == NPOS || n > str.length() - pos) { |
if (n == NPOS || n > str.length() - pos) { |
| 148 |
n = str.length() - pos; |
n = str.length() - pos; |
| 149 |
} |
} |
| 150 |
|
SIZE_TYPE n_cmp = n; |
| 151 |
|
if (n_cmp > pattern.length()) { |
| 152 |
|
n_cmp = pattern.length(); |
| 153 |
|
} |
| 154 |
const char* s = str.data() + pos; |
const char* s = str.data() + pos; |
| 155 |
while (n && *pattern && |
const char* p = pattern.data(); |
| 156 |
tolower((unsigned char)(*s)) == |
while (n_cmp && *s == *p) { |
| 157 |
tolower((unsigned char)(*pattern))) { |
s++; p++; n_cmp--; |
|
s++; pattern++; n--; |
|
| 158 |
} |
} |
| 159 |
|
|
| 160 |
if (n == 0) { |
if (n_cmp == 0) { |
| 161 |
return *pattern ? -1 : 0; |
if (n == pattern.length()) |
| 162 |
|
return 0; |
| 163 |
|
return n > pattern.length() ? 1 : -1; |
| 164 |
} |
} |
| 165 |
|
|
| 166 |
return tolower((unsigned char)(*s)) - tolower((unsigned char)(*pattern)); |
return *s - *p; |
| 167 |
} |
} |
| 168 |
|
|
| 169 |
|
|
| 170 |
int NStr::CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, |
int NStr::CompareNocase(const CTempString& str, SIZE_TYPE pos, SIZE_TYPE n, |
| 171 |
const string& pattern) |
const char* pattern) |
| 172 |
{ |
{ |
| 173 |
if (pos == NPOS || !n || str.length() <= pos) { |
if (pos == NPOS || !n || str.length() <= pos) { |
| 174 |
return pattern.empty() ? 0 : -1; |
return *pattern ? -1 : 0; |
| 175 |
} |
} |
| 176 |
if (pattern.empty()) { |
if ( !*pattern ) { |
| 177 |
return 1; |
return 1; |
| 178 |
} |
} |
| 179 |
|
|
| 181 |
n = str.length() - pos; |
n = str.length() - pos; |
| 182 |
} |
} |
| 183 |
|
|
|
SIZE_TYPE n_cmp = n; |
|
|
if (n_cmp > pattern.length()) { |
|
|
n_cmp = pattern.length(); |
|
|
} |
|
| 184 |
const char* s = str.data() + pos; |
const char* s = str.data() + pos; |
| 185 |
const char* p = pattern.data(); |
while (n && *pattern && |
| 186 |
while (n_cmp && *s == *p) { |
tolower((unsigned char)(*s)) == |
| 187 |
s++; p++; n_cmp--; |
tolower((unsigned char)(*pattern))) { |
| 188 |
|
s++; pattern++; n--; |
| 189 |
} |
} |
| 190 |
|
|
| 191 |
if (n_cmp == 0) { |
if (n == 0) { |
| 192 |
if (n == pattern.length()) |
return *pattern ? -1 : 0; |
|
return 0; |
|
|
return n > pattern.length() ? 1 : -1; |
|
| 193 |
} |
} |
| 194 |
|
|
| 195 |
return *s - *p; |
return tolower((unsigned char)(*s)) - tolower((unsigned char)(*pattern)); |
| 196 |
} |
} |
| 197 |
|
|
| 198 |
|
|
| 199 |
int NStr::CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, |
int NStr::CompareNocase(const CTempString& str, SIZE_TYPE pos, SIZE_TYPE n, |
| 200 |
const string& pattern) |
const CTempString& pattern) |
| 201 |
{ |
{ |
| 202 |
if (pos == NPOS || !n || str.length() <= pos) { |
if (pos == NPOS || !n || str.length() <= pos) { |
| 203 |
return pattern.empty() ? 0 : -1; |
return pattern.empty() ? 0 : -1; |
| 232 |
|
|
| 233 |
|
|
| 234 |
// NOTE: This code is used also in the CDirEntry::MatchesMask. |
// NOTE: This code is used also in the CDirEntry::MatchesMask. |
| 235 |
|
bool NStr::MatchesMask(CTempString str, CTempString mask, ECase use_case) |
|
bool NStr::MatchesMask(const char* str, const char* mask, ECase use_case) |
|
| 236 |
{ |
{ |
| 237 |
char c; |
char c; |
| 238 |
bool infinite = true; |
for ( size_t str_pos = 0, mask_pos = 0; ; ) { |
|
|
|
|
while (infinite) { |
|
| 239 |
// Analyze symbol in mask |
// Analyze symbol in mask |
| 240 |
switch ( c = *mask++ ) { |
switch ( c = mask[mask_pos++] ) { |
|
|
|
| 241 |
case '\0': |
case '\0': |
| 242 |
return *str == '\0'; |
return str[str_pos] == '\0'; |
| 243 |
|
|
| 244 |
case '?': |
case '?': |
| 245 |
if (*str == '\0') { |
if (str[str_pos] == '\0') { |
| 246 |
return false; |
return false; |
| 247 |
} |
} |
| 248 |
++str; |
++str_pos; |
| 249 |
break; |
break; |
| 250 |
|
|
| 251 |
case '*': |
case '*': |
| 252 |
c = *mask; |
c = mask[mask_pos]; |
| 253 |
// Collapse multiple stars |
// Collapse multiple stars |
| 254 |
while ( c == '*' ) { |
while ( c == '*' ) { |
| 255 |
c = *++mask; |
c = mask[++mask_pos]; |
| 256 |
} |
} |
| 257 |
if (c == '\0') { |
if (c == '\0') { |
| 258 |
return true; |
return true; |
| 259 |
} |
} |
| 260 |
// General case, use recursion |
// General case, use recursion |
| 261 |
while ( *str ) { |
while ( str[str_pos] ) { |
| 262 |
if (MatchesMask(str, mask, use_case)) { |
if ( MatchesMask(str.substr(str_pos), |
| 263 |
|
mask.substr(mask_pos), |
| 264 |
|
use_case) ) { |
| 265 |
return true; |
return true; |
| 266 |
} |
} |
| 267 |
++str; |
++str_pos; |
| 268 |
} |
} |
| 269 |
return false; |
return false; |
| 270 |
|
|
| 271 |
default: |
default: |
| 272 |
// Compare nonpattern character in mask and name |
// Compare nonpattern character in mask and name |
| 273 |
char s = *str++; |
char s = str[str_pos++]; |
| 274 |
if (use_case == eNocase) { |
if (use_case == NStr::eNocase) { |
| 275 |
c = tolower((unsigned char) c); |
c = tolower((unsigned char) c); |
| 276 |
s = tolower((unsigned char) s); |
s = tolower((unsigned char) s); |
| 277 |
} |
} |
| 323 |
} |
} |
| 324 |
|
|
| 325 |
|
|
| 326 |
int NStr::StringToNumeric(const string& str) |
int NStr::StringToNonNegativeInt(const string& str) |
| 327 |
{ |
{ |
| 328 |
if ( str.empty() || |
int& errno_ref = errno; |
| 329 |
(!isdigit((unsigned char)(*str.begin())) & (*str.begin() != '+')) ) { |
if ( str.empty() ) { |
| 330 |
errno = EINVAL; |
CNcbiError::SetErrno(errno_ref = EINVAL, str); |
| 331 |
|
return -1; |
| 332 |
|
} |
| 333 |
|
char ch = str[0]; |
| 334 |
|
if ( !isdigit((unsigned char)ch) && (ch != '+') ) { |
| 335 |
|
CNcbiError::SetErrno(errno_ref = EINVAL, str); |
| 336 |
return -1; |
return -1; |
| 337 |
} |
} |
| 338 |
char* endptr = 0; |
char* endptr = 0; |
| 339 |
const char* begptr = str.c_str(); |
const char* begptr = str.c_str(); |
| 340 |
errno = 0; |
errno_ref = 0; |
| 341 |
unsigned long value = strtoul(begptr, &endptr, 10); |
unsigned long value = strtoul(begptr, &endptr, 10); |
| 342 |
if ( errno || !endptr || endptr == begptr || |
if ( errno_ref ) { |
| 343 |
value > (unsigned long) kMax_Int || *endptr ) { |
CNcbiError::SetErrno(errno_ref, str); |
| 344 |
if ( !errno ) { |
return -1; |
| 345 |
errno = !endptr || endptr == begptr || *endptr ? EINVAL : ERANGE; |
} |
| 346 |
|
else if ( !endptr || endptr == begptr || *endptr ) { |
| 347 |
|
CNcbiError::SetErrno(errno_ref = EINVAL, str); |
| 348 |
|
return -1; |
| 349 |
} |
} |
| 350 |
|
else if ( value > (unsigned long) kMax_Int ) { |
| 351 |
|
CNcbiError::SetErrno(errno_ref = ERANGE, str); |
| 352 |
return -1; |
return -1; |
| 353 |
} |
} |
| 354 |
|
errno_ref = 0; |
| 355 |
return (int) value; |
return (int) value; |
| 356 |
} |
} |
| 357 |
|
|
| 358 |
|
|
| 359 |
#define S2N_CONVERT_ERROR(to_type, msg, errcode, force_errno, delta) \ |
/// @internal |
| 360 |
if (flags & NStr::fConvErr_NoThrow) { \ |
// Access to errno is slow on some platforms, because it use TLS to store a value |
| 361 |
if ( force_errno ) \ |
// for each thread. This guard class can set an errno value in string to numeric |
| 362 |
errno = 0; \ |
// conversion functions only once before exit, and when necessary. |
| 363 |
if ( !errno ) \ |
class CS2N_Guard |
| 364 |
errno = errcode; \ |
{ |
| 365 |
/* ignore previosly converted value -- always return zero */ \ |
public: |
| 366 |
return 0; \ |
CS2N_Guard(NStr::TStringToNumFlags flags, bool skip_if_zero) : |
| 367 |
|
m_NoThrow((flags & NStr::fConvErr_NoThrow) > 0), |
| 368 |
|
m_SkipIfZero(skip_if_zero), |
| 369 |
|
m_Errno(0) |
| 370 |
|
{} |
| 371 |
|
~CS2N_Guard(void) { |
| 372 |
|
// Does the guard is used against the code that already set an errno? |
| 373 |
|
// If the error code is not defined here, do not even try to check/set it. |
| 374 |
|
if (!m_SkipIfZero || m_Errno) { |
| 375 |
|
errno = m_Errno; |
| 376 |
|
} |
| 377 |
|
} |
| 378 |
|
void Set(int errcode) { m_Errno = errcode; } |
| 379 |
|
int Errno(void) const { return m_Errno;} |
| 380 |
|
// Says that we want to throw an exception, do not set errno in this case |
| 381 |
|
void Throw(void) { m_SkipIfZero = true; m_Errno = 0; } |
| 382 |
|
bool ToThrow(void) const { return !m_NoThrow; } |
| 383 |
|
// Auxiliary function to create a message about conversion error |
| 384 |
|
// to specified type. It doesn't have any relation to the guard itself, |
| 385 |
|
// but can help to save on the amount of code in calling macro. |
| 386 |
|
string Message(const CTempString& str, const char* to_type, const CTempString& msg); |
| 387 |
|
|
| 388 |
|
private: |
| 389 |
|
bool m_NoThrow; // do not throw an exception if TRUE |
| 390 |
|
bool m_SkipIfZero; // do not set errno if TRUE and m_Errno == 0 |
| 391 |
|
int m_Errno; // errno value to set |
| 392 |
|
}; |
| 393 |
|
|
| 394 |
|
string CS2N_Guard::Message(const CTempString& str, const char* to_type, const CTempString& msg) |
| 395 |
|
{ |
| 396 |
|
string s; |
| 397 |
|
s.reserve(str.length() + msg.length() + 50); |
| 398 |
|
s += "Cannot convert string '"; |
| 399 |
|
s += str; |
| 400 |
|
s += "' to "; |
| 401 |
|
s += to_type; |
| 402 |
|
if ( !msg.empty() ) { |
| 403 |
|
s += ", "; |
| 404 |
|
s += msg; |
| 405 |
|
} |
| 406 |
|
return s; |
| 407 |
|
} |
| 408 |
|
|
| 409 |
|
/// Regular guard |
| 410 |
|
#define S2N_CONVERT_GUARD(flags) \ |
| 411 |
|
CS2N_Guard err_guard(flags, false) |
| 412 |
|
|
| 413 |
|
// This guard can be used against the code that already set an errno. |
| 414 |
|
// If the error code is not defined, the guard not even try to check/set it (even to zero). |
| 415 |
|
#define S2N_CONVERT_GUARD_EX(flags) \ |
| 416 |
|
CS2N_Guard err_guard(flags, true) |
| 417 |
|
|
| 418 |
|
#define S2N_CONVERT_ERROR(to_type, msg, errcode, pos) \ |
| 419 |
|
do { \ |
| 420 |
|
err_guard.Set(errcode); \ |
| 421 |
|
if ( err_guard.ToThrow() ) { \ |
| 422 |
|
err_guard.Throw(); \ |
| 423 |
|
NCBI_THROW2(CStringException, eConvert, \ |
| 424 |
|
err_guard.Message(str, #to_type, msg), pos); \ |
| 425 |
} else { \ |
} else { \ |
| 426 |
CTempString str_tmp(str); \ |
CNcbiError::SetErrno(err_guard.Errno(), \ |
| 427 |
CTempString msg_tmp(msg); \ |
err_guard.Message(str, #to_type, msg)); \ |
| 428 |
string smsg; \ |
return 0; \ |
|
smsg.reserve(str_tmp.length() + msg_tmp.length() + 50); \ |
|
|
smsg += "Cannot convert string '"; \ |
|
|
smsg += str; \ |
|
|
smsg += "' to " #to_type; \ |
|
|
if ( !msg_tmp.empty() ) { \ |
|
|
smsg += ", "; \ |
|
|
smsg += msg; \ |
|
|
} \ |
|
|
NCBI_THROW2(CStringException, eConvert, smsg, delta); \ |
|
| 429 |
} \ |
} \ |
| 430 |
|
} while (false) |
| 431 |
|
|
| 432 |
|
|
| 433 |
#define S2N_CONVERT_ERROR_INVAL(to_type) \ |
#define S2N_CONVERT_ERROR_INVAL(to_type) \ |
| 434 |
S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, true, pos) |
S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, pos) |
| 435 |
|
|
| 436 |
#define S2N_CONVERT_ERROR_RADIX(to_type, msg) \ |
#define S2N_CONVERT_ERROR_RADIX(to_type, msg) \ |
| 437 |
S2N_CONVERT_ERROR(to_type, msg, EINVAL, true, pos) |
S2N_CONVERT_ERROR(to_type, msg, EINVAL, pos) |
| 438 |
|
|
| 439 |
#define S2N_CONVERT_ERROR_OVERFLOW(to_type) \ |
#define S2N_CONVERT_ERROR_OVERFLOW(to_type) \ |
| 440 |
S2N_CONVERT_ERROR(to_type, "overflow",ERANGE, true, pos) |
S2N_CONVERT_ERROR(to_type, "overflow",ERANGE, pos) |
| 441 |
|
|
| 442 |
#define CHECK_ENDPTR(to_type) \ |
#define CHECK_ENDPTR(to_type) \ |
| 443 |
if ( str[pos] ) { \ |
if ( str[pos] ) { \ |
| 444 |
S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, true, pos); \ |
S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, pos); \ |
|
} |
|
|
|
|
|
#define CHECK_RANGE(nmin, nmax, to_type) \ |
|
|
if ( errno || value < nmin || value > nmax ) { \ |
|
|
S2N_CONVERT_ERROR(to_type, "overflow", ERANGE, false, 0); \ |
|
| 445 |
} |
} |
| 446 |
|
|
| 447 |
#define CHECK_RANGE_U(nmax, to_type) \ |
#define CHECK_ENDPTR_SIZE(to_type) \ |
| 448 |
if ( errno || value > nmax ) { \ |
if ( pos < size ) { \ |
| 449 |
S2N_CONVERT_ERROR(to_type, "overflow", ERANGE, false, 0); \ |
S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, pos); \ |
| 450 |
} |
} |
| 451 |
|
|
| 452 |
#define CHECK_COMMAS \ |
#define CHECK_COMMAS \ |
| 473 |
|
|
| 474 |
int NStr::StringToInt(const CTempString& str, TStringToNumFlags flags,int base) |
int NStr::StringToInt(const CTempString& str, TStringToNumFlags flags,int base) |
| 475 |
{ |
{ |
| 476 |
errno = 0; |
S2N_CONVERT_GUARD_EX(flags); |
| 477 |
Int8 value = StringToInt8(str, flags, base); |
Int8 value = StringToInt8(str, flags, base); |
| 478 |
CHECK_RANGE(kMin_Int, kMax_Int, int); |
if ( value < kMin_Int || value > kMax_Int) { |
| 479 |
|
S2N_CONVERT_ERROR(int, "overflow", ERANGE, 0); |
| 480 |
|
} |
| 481 |
return (int) value; |
return (int) value; |
| 482 |
} |
} |
| 483 |
|
|
| 485 |
unsigned int |
unsigned int |
| 486 |
NStr::StringToUInt(const CTempString& str, TStringToNumFlags flags, int base) |
NStr::StringToUInt(const CTempString& str, TStringToNumFlags flags, int base) |
| 487 |
{ |
{ |
| 488 |
errno = 0; |
S2N_CONVERT_GUARD_EX(flags); |
| 489 |
Uint8 value = StringToUInt8(str, flags, base); |
Uint8 value = StringToUInt8(str, flags, base); |
| 490 |
CHECK_RANGE_U(kMax_UInt, unsigned int); |
if ( value > kMax_UInt ) { |
| 491 |
|
S2N_CONVERT_ERROR(unsigned int, "overflow", ERANGE, 0); |
| 492 |
|
} |
| 493 |
return (unsigned int) value; |
return (unsigned int) value; |
| 494 |
} |
} |
| 495 |
|
|
| 497 |
long NStr::StringToLong(const CTempString& str, TStringToNumFlags flags, |
long NStr::StringToLong(const CTempString& str, TStringToNumFlags flags, |
| 498 |
int base) |
int base) |
| 499 |
{ |
{ |
| 500 |
errno = 0; |
S2N_CONVERT_GUARD_EX(flags); |
| 501 |
Int8 value = StringToInt8(str, flags, base); |
Int8 value = StringToInt8(str, flags, base); |
| 502 |
CHECK_RANGE(kMin_Long, kMax_Long, long); |
if ( value < kMin_Long || value > kMax_Long) { |
| 503 |
|
S2N_CONVERT_ERROR(long, "overflow", ERANGE, 0); |
| 504 |
|
} |
| 505 |
return (long) value; |
return (long) value; |
| 506 |
} |
} |
| 507 |
|
|
| 509 |
unsigned long |
unsigned long |
| 510 |
NStr::StringToULong(const CTempString& str, TStringToNumFlags flags, int base) |
NStr::StringToULong(const CTempString& str, TStringToNumFlags flags, int base) |
| 511 |
{ |
{ |
| 512 |
errno = 0; |
S2N_CONVERT_GUARD_EX(flags); |
| 513 |
Uint8 value = StringToUInt8(str, flags, base); |
Uint8 value = StringToUInt8(str, flags, base); |
| 514 |
CHECK_RANGE_U(kMax_ULong, long); |
if ( value > kMax_ULong ) { |
| 515 |
|
S2N_CONVERT_ERROR(unsigned long, "overflow", ERANGE, 0); |
| 516 |
|
} |
| 517 |
return (unsigned long) value; |
return (unsigned long) value; |
| 518 |
} |
} |
| 519 |
|
|
| 520 |
|
|
| 521 |
/// @internal |
/// @internal |
| 522 |
// Check that symbol 'ch' is good symbol for number with radix 'base'. |
// Check that symbol 'ch' is good symbol for number with radix 'base'. |
| 523 |
|
static inline |
| 524 |
bool s_IsGoodCharForRadix(char ch, int base, int* value = 0) |
bool s_IsGoodCharForRadix(char ch, int base, int* value = 0) |
| 525 |
{ |
{ |
| 526 |
|
if ( base <= 10 ) { |
| 527 |
|
// shortcut for most frequent case |
| 528 |
|
int delta = ch-'0'; |
| 529 |
|
if ( unsigned(delta) < unsigned(base) ) { |
| 530 |
|
if ( value ) { |
| 531 |
|
*value = delta; |
| 532 |
|
} |
| 533 |
|
return true; |
| 534 |
|
} |
| 535 |
|
return false; |
| 536 |
|
} |
| 537 |
if (!isalnum((unsigned char) ch)) { |
if (!isalnum((unsigned char) ch)) { |
| 538 |
return false; |
return false; |
| 539 |
} |
} |
| 560 |
eSkipSpacesOnly // spaces only |
eSkipSpacesOnly // spaces only |
| 561 |
}; |
}; |
| 562 |
|
|
| 563 |
|
static inline |
| 564 |
|
bool s_IsDecimalPoint(unsigned char ch, NStr::TStringToNumFlags flags) |
| 565 |
|
{ |
| 566 |
|
if ( ch != '.' && ch != ',') { |
| 567 |
|
return false; |
| 568 |
|
} |
| 569 |
|
if (flags & NStr::fDecimalPosix) { |
| 570 |
|
return ch == '.'; |
| 571 |
|
} |
| 572 |
|
else if (flags & NStr::fDecimalPosixOrLocal) { |
| 573 |
|
return ch == '.' || ch == ','; |
| 574 |
|
} |
| 575 |
|
struct lconv* conv = localeconv(); |
| 576 |
|
return ch == *(conv->decimal_point); |
| 577 |
|
} |
| 578 |
|
|
| 579 |
|
static inline |
| 580 |
void s_SkipAllowedSymbols(const CTempString& str, |
void s_SkipAllowedSymbols(const CTempString& str, |
| 581 |
SIZE_TYPE& pos, |
SIZE_TYPE& pos, |
| 582 |
ESkipMode skip_mode) |
ESkipMode skip_mode, |
| 583 |
|
NStr::TStringToNumFlags flags) |
| 584 |
{ |
{ |
| 585 |
if (skip_mode == eSkipAll) { |
if (skip_mode == eSkipAll) { |
| 586 |
pos = str.length(); |
pos = str.length(); |
| 587 |
return; |
return; |
| 588 |
} |
} |
| 589 |
|
|
| 590 |
|
for ( SIZE_TYPE len = str.length(); pos < len; ++pos ) { |
| 591 |
unsigned char ch = str[pos]; |
unsigned char ch = str[pos]; |
| 592 |
while ( ch ) { |
if ( isdigit(ch) || ch == '+' || ch == '-' || s_IsDecimalPoint(ch,flags) ) { |
|
if ( isdigit(ch) || ch == '+' || ch == '-' || ch == '.' ) { |
|
| 593 |
break; |
break; |
| 594 |
} |
} |
| 595 |
if ( (skip_mode == eSkipSpacesOnly) && !isspace(ch) ) { |
if ( (skip_mode == eSkipSpacesOnly) && !isspace(ch) ) { |
| 596 |
break; |
break; |
| 597 |
} |
} |
|
ch = str[++pos]; |
|
| 598 |
} |
} |
| 599 |
} |
} |
| 600 |
|
|
| 603 |
// of the string. Update 'base' value. |
// of the string. Update 'base' value. |
| 604 |
// Update 'ptr' to current position in the string. |
// Update 'ptr' to current position in the string. |
| 605 |
|
|
| 606 |
|
static inline |
| 607 |
bool s_CheckRadix(const CTempString& str, SIZE_TYPE& pos, int& base) |
bool s_CheckRadix(const CTempString& str, SIZE_TYPE& pos, int& base) |
| 608 |
{ |
{ |
| 609 |
|
if ( base == 10 || base == 8 ) { |
| 610 |
|
// shortcut for most frequent case |
| 611 |
|
return true; |
| 612 |
|
} |
| 613 |
// Check base |
// Check base |
| 614 |
if ( base < 0 || base == 1 || base > 36 ) { |
if ( base < 0 || base == 1 || base > 36 ) { |
| 615 |
return false; |
return false; |
| 640 |
int base) |
int base) |
| 641 |
{ |
{ |
| 642 |
_ASSERT(flags == 0 || flags > 32); |
_ASSERT(flags == 0 || flags > 32); |
| 643 |
|
S2N_CONVERT_GUARD(flags); |
| 644 |
|
|
| 645 |
// Current position in the string |
// Current position in the string |
| 646 |
SIZE_TYPE pos = 0; |
SIZE_TYPE pos = 0; |
| 649 |
if (flags & fAllowLeadingSymbols) { |
if (flags & fAllowLeadingSymbols) { |
| 650 |
bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces); |
bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces); |
| 651 |
s_SkipAllowedSymbols(str, pos, |
s_SkipAllowedSymbols(str, pos, |
| 652 |
spaces ? eSkipSpacesOnly : eSkipAllAllowed); |
spaces ? eSkipSpacesOnly : eSkipAllAllowed, flags); |
| 653 |
} |
} |
| 654 |
// Determine sign |
// Determine sign |
| 655 |
bool sign = false; |
bool sign = false; |
| 666 |
} |
} |
| 667 |
break; |
break; |
| 668 |
} |
} |
| 669 |
|
SIZE_TYPE pos0 = pos; |
| 670 |
// Check radix base |
// Check radix base |
| 671 |
if ( !s_CheckRadix(str, pos, base) ) { |
if ( !s_CheckRadix(str, pos, base) ) { |
| 672 |
S2N_CONVERT_ERROR_RADIX(Int8, "bad numeric base '" + |
S2N_CONVERT_ERROR_RADIX(Int8, "bad numeric base '" + |
| 675 |
|
|
| 676 |
// Begin conversion |
// Begin conversion |
| 677 |
Int8 n = 0; |
Int8 n = 0; |
| 678 |
Int8 limdiv = kMax_I8 / base; |
Int8 limdiv = base==10? kMax_I8 / 10: kMax_I8 / base; |
| 679 |
Int8 limoff = kMax_I8 % base + (sign ? 1 : 0); |
Int8 limoff = (base==10? kMax_I8 % 10: kMax_I8 % base) + (sign ? 1 : 0); |
| 680 |
|
|
| 681 |
// Number of symbols between two commas. '-1' means -- no comma yet. |
// Number of symbols between two commas. '-1' means -- no comma yet. |
| 682 |
int comma = -1; |
int comma = -1; |
| 683 |
SIZE_TYPE numpos = pos; |
SIZE_TYPE numpos = pos; |
| 684 |
|
|
| 685 |
errno = 0; |
while (char ch = str[pos]) { |
|
while (str[pos]) { |
|
|
char ch = str[pos]; |
|
| 686 |
int delta; // corresponding numeric value of 'ch' |
int delta; // corresponding numeric value of 'ch' |
| 687 |
|
|
| 688 |
// Check on possible commas |
// Check on possible commas |
| 692 |
break; |
break; |
| 693 |
} |
} |
| 694 |
// Overflow check |
// Overflow check |
| 695 |
if ( n > limdiv || (n == limdiv && delta > limoff) ) { |
if ( n >= limdiv && (n > limdiv || delta > limoff) ) { |
| 696 |
S2N_CONVERT_ERROR_OVERFLOW(Int8); |
S2N_CONVERT_ERROR_OVERFLOW(Int8); |
| 697 |
} |
} |
| 698 |
n *= base; |
n *= base; |
| 701 |
} |
} |
| 702 |
|
|
| 703 |
// Last checks |
// Last checks |
| 704 |
if ( !pos || ((comma >= 0) && (comma != 3)) ) { |
if ( pos == pos0 || ((comma >= 0) && (comma != 3)) ) { |
| 705 |
S2N_CONVERT_ERROR_INVAL(Int8); |
S2N_CONVERT_ERROR_INVAL(Int8); |
| 706 |
} |
} |
| 707 |
// Skip allowed trailing symbols |
// Skip allowed trailing symbols |
| 708 |
if (flags & fAllowTrailingSymbols) { |
if (flags & fAllowTrailingSymbols) { |
| 709 |
bool spaces = ((flags & fAllowTrailingSymbols) == |
bool spaces = ((flags & fAllowTrailingSymbols) == |
| 710 |
fAllowTrailingSpaces); |
fAllowTrailingSpaces); |
| 711 |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll); |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll, flags); |
| 712 |
} |
} |
| 713 |
// Assign sign before the end pointer check |
// Assign sign before the end pointer check |
| 714 |
n = sign ? -n : n; |
n = sign ? -n : n; |
| 721 |
TStringToNumFlags flags, int base) |
TStringToNumFlags flags, int base) |
| 722 |
{ |
{ |
| 723 |
_ASSERT(flags == 0 || flags > 32); |
_ASSERT(flags == 0 || flags > 32); |
| 724 |
|
S2N_CONVERT_GUARD(flags); |
| 725 |
|
|
| 726 |
|
const TStringToNumFlags slow_flags = |
| 727 |
|
fMandatorySign|fAllowCommas|fAllowLeadingSymbols|fAllowTrailingSymbols; |
| 728 |
|
|
| 729 |
|
if ( base == 10 && (flags & slow_flags) == 0 ) { |
| 730 |
|
// fast conversion |
| 731 |
|
|
| 732 |
// Current position in the string |
// Current position in the string |
| 733 |
SIZE_TYPE pos = 0; |
CTempString::const_iterator ptr = str.begin(), end = str.end(); |
| 734 |
|
|
| 735 |
|
// Determine sign |
| 736 |
|
if ( ptr != end && *ptr == '+' ) { |
| 737 |
|
++ptr; |
| 738 |
|
} |
| 739 |
|
if ( ptr == end ) { |
| 740 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, ptr-str.begin()); |
| 741 |
|
} |
| 742 |
|
|
| 743 |
|
// Begin conversion |
| 744 |
|
Uint8 n = 0; |
| 745 |
|
|
| 746 |
|
const Uint8 limdiv = kMax_UI8/10; |
| 747 |
|
const int limoff = int(kMax_UI8 % 10); |
| 748 |
|
|
| 749 |
|
do { |
| 750 |
|
char ch = *ptr; |
| 751 |
|
int delta = ch - '0'; |
| 752 |
|
if ( unsigned(delta) >= 10 ) { |
| 753 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, ptr-str.begin()); |
| 754 |
|
} |
| 755 |
|
// Overflow check |
| 756 |
|
if ( n >= limdiv && (n > limdiv || delta > limoff) ) { |
| 757 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, ERANGE, ptr-str.begin()); |
| 758 |
|
} |
| 759 |
|
n = n*10+delta; |
| 760 |
|
} while ( ++ptr != end ); |
| 761 |
|
|
| 762 |
|
return n; |
| 763 |
|
} |
| 764 |
|
|
| 765 |
|
// Current position in the string |
| 766 |
|
SIZE_TYPE pos = 0, size = str.size(); |
| 767 |
|
|
| 768 |
// Skip allowed leading symbols |
// Skip allowed leading symbols |
| 769 |
if (flags & fAllowLeadingSymbols) { |
if (flags & fAllowLeadingSymbols) { |
| 770 |
bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces); |
bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces); |
| 771 |
s_SkipAllowedSymbols(str, pos, |
s_SkipAllowedSymbols(str, pos, |
| 772 |
spaces ? eSkipSpacesOnly : eSkipAllAllowed); |
spaces ? eSkipSpacesOnly : eSkipAllAllowed, flags); |
| 773 |
} |
} |
| 774 |
// Determine sign |
// Determine sign |
| 775 |
if (str[pos] == '+') { |
if (str[pos] == '+') { |
| 779 |
S2N_CONVERT_ERROR_INVAL(Uint8); |
S2N_CONVERT_ERROR_INVAL(Uint8); |
| 780 |
} |
} |
| 781 |
} |
} |
| 782 |
|
SIZE_TYPE pos0 = pos; |
| 783 |
|
|
| 784 |
|
// Begin conversion |
| 785 |
|
Uint8 n = 0; |
| 786 |
// Check radix base |
// Check radix base |
| 787 |
if ( !s_CheckRadix(str, pos, base) ) { |
if ( !s_CheckRadix(str, pos, base) ) { |
| 788 |
S2N_CONVERT_ERROR_RADIX(Uint8, "bad numeric base '" + |
S2N_CONVERT_ERROR_RADIX(Uint8, "bad numeric base '" + |
| 789 |
NStr::IntToString(base) + "'"); |
NStr::IntToString(base) + "'"); |
| 790 |
} |
} |
| 791 |
|
|
|
// Begin conversion |
|
|
Uint8 n = 0; |
|
| 792 |
Uint8 limdiv = kMax_UI8 / base; |
Uint8 limdiv = kMax_UI8 / base; |
| 793 |
int limoff = int(kMax_UI8 % base); |
int limoff = int(kMax_UI8 % base); |
| 794 |
|
|
| 796 |
int comma = -1; |
int comma = -1; |
| 797 |
SIZE_TYPE numpos = pos; |
SIZE_TYPE numpos = pos; |
| 798 |
|
|
| 799 |
errno = 0; |
while (char ch = str[pos]) { |
|
while (str[pos]) { |
|
|
char ch = str[pos]; |
|
| 800 |
int delta; // corresponding numeric value of 'ch' |
int delta; // corresponding numeric value of 'ch' |
| 801 |
|
|
| 802 |
// Check on possible commas |
// Check on possible commas |
| 806 |
break; |
break; |
| 807 |
} |
} |
| 808 |
// Overflow check |
// Overflow check |
| 809 |
if (n > limdiv || (n == limdiv && delta > limoff)) { |
if ( n >= limdiv && (n > limdiv || delta > limoff) ) { |
| 810 |
S2N_CONVERT_ERROR_OVERFLOW(Uint8); |
S2N_CONVERT_ERROR_OVERFLOW(Uint8); |
| 811 |
} |
} |
| 812 |
n *= base; |
n *= base; |
| 815 |
} |
} |
| 816 |
|
|
| 817 |
// Last checks |
// Last checks |
| 818 |
if ( !pos || ((comma >= 0) && (comma != 3)) ) { |
if ( pos == pos0 || ((comma >= 0) && (comma != 3)) ) { |
| 819 |
S2N_CONVERT_ERROR_INVAL(Uint8); |
S2N_CONVERT_ERROR_INVAL(Uint8); |
| 820 |
} |
} |
| 821 |
// Skip allowed trailing symbols |
// Skip allowed trailing symbols |
| 822 |
if (flags & fAllowTrailingSymbols) { |
if (flags & fAllowTrailingSymbols) { |
| 823 |
bool spaces = ((flags & fAllowTrailingSymbols) == |
bool spaces = ((flags & fAllowTrailingSymbols) == |
| 824 |
fAllowTrailingSpaces); |
fAllowTrailingSpaces); |
| 825 |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll); |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll, flags); |
| 826 |
} |
} |
| 827 |
CHECK_ENDPTR(Uint8); |
CHECK_ENDPTR_SIZE(Uint8); |
| 828 |
return n; |
return n; |
| 829 |
} |
} |
| 830 |
|
|
| 831 |
|
|
| 832 |
double NStr::StringToDouble(const CTempString& str, TStringToNumFlags flags) |
double NStr::StringToDoublePosix(const char* ptr, char** endptr) |
| 833 |
|
{ |
| 834 |
|
S2N_CONVERT_GUARD(NStr::fConvErr_NoThrow); |
| 835 |
|
|
| 836 |
|
const char* start = ptr; |
| 837 |
|
char c = *ptr++; |
| 838 |
|
|
| 839 |
|
// skip leading blanks |
| 840 |
|
while ( isspace((unsigned char)c) ) { |
| 841 |
|
c = *ptr++; |
| 842 |
|
} |
| 843 |
|
|
| 844 |
|
// short-cut - single digit |
| 845 |
|
if ( !*ptr && c >= '0' && c <= '9' ) { |
| 846 |
|
if (endptr) { |
| 847 |
|
*endptr = (char*)ptr; |
| 848 |
|
} |
| 849 |
|
return c-'0'; |
| 850 |
|
} |
| 851 |
|
|
| 852 |
|
int sign = 0; |
| 853 |
|
if ( c == '-' ) { |
| 854 |
|
sign = -1; |
| 855 |
|
c = *ptr++; |
| 856 |
|
} |
| 857 |
|
else if ( c == '+' ) { |
| 858 |
|
sign = +1; |
| 859 |
|
c = *ptr++; |
| 860 |
|
} |
| 861 |
|
|
| 862 |
|
bool dot = false, expn = false, anydigits = false; |
| 863 |
|
int digits = 0, dot_position = 0; |
| 864 |
|
unsigned int first=0, second=0, first_mul=1; |
| 865 |
|
long double second_mul = NCBI_CONST_LONGDOUBLE(1.), |
| 866 |
|
third = NCBI_CONST_LONGDOUBLE(0.); |
| 867 |
|
|
| 868 |
|
// up to exponent |
| 869 |
|
for ( ; ; c = *ptr++ ) { |
| 870 |
|
if (c >= '0' && c <= '9') { |
| 871 |
|
// digits: accumulate |
| 872 |
|
c -= '0'; |
| 873 |
|
anydigits = true; |
| 874 |
|
++digits; |
| 875 |
|
if (first == 0) { |
| 876 |
|
first = c; |
| 877 |
|
if ( first == 0 ) { |
| 878 |
|
// omit leading zeros |
| 879 |
|
--digits; |
| 880 |
|
if (dot) { |
| 881 |
|
--dot_position; |
| 882 |
|
} |
| 883 |
|
} |
| 884 |
|
} else if (digits <= 9) { |
| 885 |
|
// first 9 digits come to 'first' |
| 886 |
|
first = first*10 + c; |
| 887 |
|
} else if (digits <= 18) { |
| 888 |
|
// next 9 digits come to 'second' |
| 889 |
|
first_mul *= 10; |
| 890 |
|
second = second*10 + c; |
| 891 |
|
} else { |
| 892 |
|
// other digits come to 'third' |
| 893 |
|
second_mul *= NCBI_CONST_LONGDOUBLE(10.); |
| 894 |
|
third = third * NCBI_CONST_LONGDOUBLE(10.) + c; |
| 895 |
|
} |
| 896 |
|
} |
| 897 |
|
else if (c == '.') { |
| 898 |
|
// dot |
| 899 |
|
// if second dot, stop |
| 900 |
|
if (dot) { |
| 901 |
|
--ptr; |
| 902 |
|
break; |
| 903 |
|
} |
| 904 |
|
dot_position = digits; |
| 905 |
|
dot = true; |
| 906 |
|
} |
| 907 |
|
else if (c == 'e' || c == 'E') { |
| 908 |
|
// if exponent, stop |
| 909 |
|
if (!anydigits) { |
| 910 |
|
--ptr; |
| 911 |
|
break; |
| 912 |
|
} |
| 913 |
|
expn = true; |
| 914 |
|
break; |
| 915 |
|
} |
| 916 |
|
else { |
| 917 |
|
--ptr; |
| 918 |
|
if (!anydigits) { |
| 919 |
|
if ( !dot && (c == 'n' || c == 'N') && |
| 920 |
|
NStr::strncasecmp(ptr,"nan",3)==0) { |
| 921 |
|
if (endptr) { |
| 922 |
|
*endptr = (char*)(ptr+3); |
| 923 |
|
} |
| 924 |
|
return HUGE_VAL/HUGE_VAL; /* NCBI_FAKE_WARNING */ |
| 925 |
|
} |
| 926 |
|
if ( (c == 'i' || c == 'I') ) { |
| 927 |
|
if ( NStr::strncasecmp(ptr,"inf",3)==0) { |
| 928 |
|
ptr += 3; |
| 929 |
|
if ( NStr::strncasecmp(ptr,"inity",5)==0) { |
| 930 |
|
ptr += 5; |
| 931 |
|
} |
| 932 |
|
if (endptr) { |
| 933 |
|
*endptr = (char*)ptr; |
| 934 |
|
} |
| 935 |
|
return sign < 0 ? -HUGE_VAL : HUGE_VAL; |
| 936 |
|
} |
| 937 |
|
} |
| 938 |
|
} |
| 939 |
|
break; |
| 940 |
|
} |
| 941 |
|
} |
| 942 |
|
// if no digits, stop now - error |
| 943 |
|
if (!anydigits) { |
| 944 |
|
if (endptr) { |
| 945 |
|
*endptr = (char*)start; |
| 946 |
|
} |
| 947 |
|
err_guard.Set(EINVAL); |
| 948 |
|
return 0.; |
| 949 |
|
} |
| 950 |
|
int exponent = dot ? dot_position - digits : 0; |
| 951 |
|
|
| 952 |
|
// read exponent |
| 953 |
|
if (expn && *ptr) { |
| 954 |
|
int expvalue = 0; |
| 955 |
|
bool expsign = false, expnegate= false; |
| 956 |
|
int expdigits= 0; |
| 957 |
|
for( ; ; ++ptr) { |
| 958 |
|
c = *ptr; |
| 959 |
|
// sign: should be no digits at this point |
| 960 |
|
if (c == '-' || c == '+') { |
| 961 |
|
// if there was sign or digits, stop |
| 962 |
|
if (expsign || expdigits) { |
| 963 |
|
break; |
| 964 |
|
} |
| 965 |
|
expsign = true; |
| 966 |
|
expnegate = c == '-'; |
| 967 |
|
} |
| 968 |
|
// digits: accumulate |
| 969 |
|
else if (c >= '0' && c <= '9') { |
| 970 |
|
++expdigits; |
| 971 |
|
int newexpvalue = expvalue*10 + (c-'0'); |
| 972 |
|
if (newexpvalue > expvalue) { |
| 973 |
|
expvalue = newexpvalue; |
| 974 |
|
} |
| 975 |
|
} |
| 976 |
|
else { |
| 977 |
|
break; |
| 978 |
|
} |
| 979 |
|
} |
| 980 |
|
// if no digits, rollback |
| 981 |
|
if (!expdigits) { |
| 982 |
|
// rollback sign |
| 983 |
|
if (expsign) { |
| 984 |
|
--ptr; |
| 985 |
|
} |
| 986 |
|
// rollback exponent |
| 987 |
|
if (expn) { |
| 988 |
|
--ptr; |
| 989 |
|
} |
| 990 |
|
} |
| 991 |
|
else { |
| 992 |
|
exponent = expnegate ? exponent - expvalue : exponent + expvalue; |
| 993 |
|
} |
| 994 |
|
} |
| 995 |
|
long double ret; |
| 996 |
|
if ( first_mul > 1 ) { |
| 997 |
|
_ASSERT(first); |
| 998 |
|
ret = ((long double)first * first_mul + second)* second_mul + third; |
| 999 |
|
} |
| 1000 |
|
else { |
| 1001 |
|
_ASSERT(first_mul == 1); |
| 1002 |
|
_ASSERT(second == 0); |
| 1003 |
|
_ASSERT(second_mul == 1); |
| 1004 |
|
_ASSERT(third == 0); |
| 1005 |
|
ret = first; |
| 1006 |
|
} |
| 1007 |
|
// calculate exponent |
| 1008 |
|
if ( first && exponent ) { |
| 1009 |
|
// multiply by power of 10 only non-zero mantissa |
| 1010 |
|
if (exponent > 2*DBL_MAX_10_EXP) { |
| 1011 |
|
ret = HUGE_VAL; |
| 1012 |
|
err_guard.Set(ERANGE); |
| 1013 |
|
} else if (exponent < 2*DBL_MIN_10_EXP) { |
| 1014 |
|
ret = 0.; |
| 1015 |
|
err_guard.Set(ERANGE); |
| 1016 |
|
} else { |
| 1017 |
|
if ( exponent > 0 ) { |
| 1018 |
|
static const double mul1[16] = { |
| 1019 |
|
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, |
| 1020 |
|
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 |
| 1021 |
|
}; |
| 1022 |
|
ret *= mul1[exponent&15]; |
| 1023 |
|
if ( exponent >>= 4 ) { |
| 1024 |
|
static const long double mul2[16] = { |
| 1025 |
|
NCBI_CONST_LONGDOUBLE(1e0), |
| 1026 |
|
NCBI_CONST_LONGDOUBLE(1e16), |
| 1027 |
|
NCBI_CONST_LONGDOUBLE(1e32), |
| 1028 |
|
NCBI_CONST_LONGDOUBLE(1e48), |
| 1029 |
|
NCBI_CONST_LONGDOUBLE(1e64), |
| 1030 |
|
NCBI_CONST_LONGDOUBLE(1e80), |
| 1031 |
|
NCBI_CONST_LONGDOUBLE(1e96), |
| 1032 |
|
NCBI_CONST_LONGDOUBLE(1e112), |
| 1033 |
|
NCBI_CONST_LONGDOUBLE(1e128), |
| 1034 |
|
NCBI_CONST_LONGDOUBLE(1e144), |
| 1035 |
|
NCBI_CONST_LONGDOUBLE(1e160), |
| 1036 |
|
NCBI_CONST_LONGDOUBLE(1e176), |
| 1037 |
|
NCBI_CONST_LONGDOUBLE(1e192), |
| 1038 |
|
NCBI_CONST_LONGDOUBLE(1e208), |
| 1039 |
|
NCBI_CONST_LONGDOUBLE(1e224), |
| 1040 |
|
NCBI_CONST_LONGDOUBLE(1e240) |
| 1041 |
|
}; |
| 1042 |
|
ret *= mul2[exponent&15]; |
| 1043 |
|
for ( exponent >>= 4; exponent; --exponent ) { |
| 1044 |
|
ret *= NCBI_CONST_LONGDOUBLE(1e256); |
| 1045 |
|
} |
| 1046 |
|
} |
| 1047 |
|
if (!finite(double(ret))) { |
| 1048 |
|
err_guard.Set(ERANGE); |
| 1049 |
|
} |
| 1050 |
|
} |
| 1051 |
|
else { |
| 1052 |
|
exponent = -exponent; |
| 1053 |
|
static const long double mul1[16] = { |
| 1054 |
|
NCBI_CONST_LONGDOUBLE(1e-0), |
| 1055 |
|
NCBI_CONST_LONGDOUBLE(1e-1), |
| 1056 |
|
NCBI_CONST_LONGDOUBLE(1e-2), |
| 1057 |
|
NCBI_CONST_LONGDOUBLE(1e-3), |
| 1058 |
|
NCBI_CONST_LONGDOUBLE(1e-4), |
| 1059 |
|
NCBI_CONST_LONGDOUBLE(1e-5), |
| 1060 |
|
NCBI_CONST_LONGDOUBLE(1e-6), |
| 1061 |
|
NCBI_CONST_LONGDOUBLE(1e-7), |
| 1062 |
|
NCBI_CONST_LONGDOUBLE(1e-8), |
| 1063 |
|
NCBI_CONST_LONGDOUBLE(1e-9), |
| 1064 |
|
NCBI_CONST_LONGDOUBLE(1e-10), |
| 1065 |
|
NCBI_CONST_LONGDOUBLE(1e-11), |
| 1066 |
|
NCBI_CONST_LONGDOUBLE(1e-12), |
| 1067 |
|
NCBI_CONST_LONGDOUBLE(1e-13), |
| 1068 |
|
NCBI_CONST_LONGDOUBLE(1e-14), |
| 1069 |
|
NCBI_CONST_LONGDOUBLE(1e-15) |
| 1070 |
|
}; |
| 1071 |
|
ret *= mul1[exponent&15]; |
| 1072 |
|
if ( exponent >>= 4 ) { |
| 1073 |
|
static const long double mul2[16] = { |
| 1074 |
|
NCBI_CONST_LONGDOUBLE(1e-0), |
| 1075 |
|
NCBI_CONST_LONGDOUBLE(1e-16), |
| 1076 |
|
NCBI_CONST_LONGDOUBLE(1e-32), |
| 1077 |
|
NCBI_CONST_LONGDOUBLE(1e-48), |
| 1078 |
|
NCBI_CONST_LONGDOUBLE(1e-64), |
| 1079 |
|
NCBI_CONST_LONGDOUBLE(1e-80), |
| 1080 |
|
NCBI_CONST_LONGDOUBLE(1e-96), |
| 1081 |
|
NCBI_CONST_LONGDOUBLE(1e-112), |
| 1082 |
|
NCBI_CONST_LONGDOUBLE(1e-128), |
| 1083 |
|
NCBI_CONST_LONGDOUBLE(1e-144), |
| 1084 |
|
NCBI_CONST_LONGDOUBLE(1e-160), |
| 1085 |
|
NCBI_CONST_LONGDOUBLE(1e-176), |
| 1086 |
|
NCBI_CONST_LONGDOUBLE(1e-192), |
| 1087 |
|
NCBI_CONST_LONGDOUBLE(1e-208), |
| 1088 |
|
NCBI_CONST_LONGDOUBLE(1e-224), |
| 1089 |
|
NCBI_CONST_LONGDOUBLE(1e-240) |
| 1090 |
|
}; |
| 1091 |
|
ret *= mul2[exponent&15]; |
| 1092 |
|
for ( exponent >>= 4; exponent; --exponent ) { |
| 1093 |
|
ret *= NCBI_CONST_LONGDOUBLE(1e-256); |
| 1094 |
|
} |
| 1095 |
|
} |
| 1096 |
|
if ( ret < DBL_MIN ) { |
| 1097 |
|
err_guard.Set(ERANGE); |
| 1098 |
|
} |
| 1099 |
|
} |
| 1100 |
|
} |
| 1101 |
|
} |
| 1102 |
|
if ( sign < 0 ) { |
| 1103 |
|
ret = -ret; |
| 1104 |
|
} |
| 1105 |
|
// done |
| 1106 |
|
if (endptr) { |
| 1107 |
|
*endptr = (char*)ptr; |
| 1108 |
|
} |
| 1109 |
|
return ret; |
| 1110 |
|
} |
| 1111 |
|
|
| 1112 |
|
|
| 1113 |
|
/// @internal |
| 1114 |
|
static double s_StringToDouble(const char* str, size_t size, |
| 1115 |
|
NStr::TStringToNumFlags flags) |
| 1116 |
{ |
{ |
| 1117 |
_ASSERT(flags == 0 || flags > 32); |
_ASSERT(flags == 0 || flags > 32); |
| 1118 |
|
_ASSERT(str[size] == '\0'); |
| 1119 |
|
if ((flags & NStr::fDecimalPosix) && (flags & NStr::fDecimalPosixOrLocal)) { |
| 1120 |
|
NCBI_THROW2(CStringException, eBadArgs, |
| 1121 |
|
"NStr::StringToDouble(): mutually exclusive flags specified",0); |
| 1122 |
|
} |
| 1123 |
|
S2N_CONVERT_GUARD_EX(flags); |
| 1124 |
|
|
| 1125 |
// Current position in the string |
// Current position in the string |
| 1126 |
SIZE_TYPE pos = 0; |
SIZE_TYPE pos = 0; |
| 1127 |
|
|
| 1128 |
// Skip allowed leading symbols |
// Skip allowed leading symbols |
| 1129 |
if (flags & fAllowLeadingSymbols) { |
if (flags & NStr::fAllowLeadingSymbols) { |
| 1130 |
bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces); |
bool spaces = ((flags & NStr::fAllowLeadingSymbols) == |
| 1131 |
s_SkipAllowedSymbols(str, pos, |
NStr::fAllowLeadingSpaces); |
| 1132 |
spaces ? eSkipSpacesOnly : eSkipAllAllowed); |
s_SkipAllowedSymbols(CTempString(str, size), pos, |
| 1133 |
|
spaces ? eSkipSpacesOnly : eSkipAllAllowed, flags); |
| 1134 |
} |
} |
| 1135 |
// Check mandatory sign |
// Check mandatory sign |
| 1136 |
if (flags & fMandatorySign) { |
if (flags & NStr::fMandatorySign) { |
| 1137 |
switch (str[pos]) { |
switch (str[pos]) { |
| 1138 |
case '-': |
case '-': |
| 1139 |
case '+': |
case '+': |
| 1144 |
} |
} |
| 1145 |
// For consistency make additional check on incorrect leading symbols. |
// For consistency make additional check on incorrect leading symbols. |
| 1146 |
// Because strtod() may just skip such symbols. |
// Because strtod() may just skip such symbols. |
| 1147 |
if (!(flags & fAllowLeadingSymbols)) { |
if (!(flags & NStr::fAllowLeadingSymbols)) { |
| 1148 |
char c = str[pos]; |
char c = str[pos]; |
| 1149 |
if ( !isdigit((unsigned int)c) && c != '.' && c != '-' && c != '+') { |
if ( !isdigit((unsigned int)c) && !s_IsDecimalPoint(c,flags) && c != '-' && c != '+') { |
| 1150 |
S2N_CONVERT_ERROR_INVAL(double); |
S2N_CONVERT_ERROR_INVAL(double); |
| 1151 |
} |
} |
| 1152 |
} |
} |
| 1153 |
|
|
| 1154 |
// Conversion |
// Conversion |
| 1155 |
string s; |
int& errno_ref = errno; |
| 1156 |
str.Copy(s, 0, str.size()); |
errno_ref = 0; |
| 1157 |
|
|
| 1158 |
char* endptr = 0; |
char* endptr = 0; |
| 1159 |
const char* begptr = s.c_str() + pos; |
const char* begptr = str + pos; |
| 1160 |
|
|
| 1161 |
errno = 0; |
double n; |
| 1162 |
double n = strtod(begptr, &endptr); |
if (flags & NStr::fDecimalPosix) { |
| 1163 |
if ( errno || !endptr || endptr == begptr ) { |
n = NStr::StringToDoublePosix(begptr, &endptr); |
| 1164 |
S2N_CONVERT_ERROR(double, kEmptyStr, EINVAL, false, |
} else { |
| 1165 |
s_DiffPtr(endptr, begptr) + pos); |
n = strtod(begptr, &endptr); |
| 1166 |
} |
} |
| 1167 |
if ( *(endptr - 1) != '.' && *endptr == '.' ) { |
if (flags & NStr::fDecimalPosixOrLocal) { |
| 1168 |
// Only a single dot at the end of line is allowed |
char* endptr2 = 0; |
| 1169 |
if (endptr == strchr(begptr, '.')) { |
double n2 = NStr::StringToDoublePosix(begptr, &endptr2); |
| 1170 |
endptr++; |
if (!endptr || (endptr2 && endptr2 > endptr)) { |
| 1171 |
|
n = n2; |
| 1172 |
|
endptr = endptr2; |
| 1173 |
} |
} |
| 1174 |
} |
} |
| 1175 |
|
if ( !endptr || endptr == begptr ) { |
| 1176 |
|
S2N_CONVERT_ERROR(double, kEmptyStr, EINVAL, s_DiffPtr(endptr, begptr) + pos); |
| 1177 |
|
} |
| 1178 |
|
if ( errno_ref ) { |
| 1179 |
|
S2N_CONVERT_ERROR(double, kEmptyStr, errno_ref, s_DiffPtr(endptr, begptr) + pos); |
| 1180 |
|
} |
| 1181 |
pos += s_DiffPtr(endptr, begptr); |
pos += s_DiffPtr(endptr, begptr); |
| 1182 |
|
|
| 1183 |
// Skip allowed trailing symbols |
// Skip allowed trailing symbols |
| 1184 |
if (flags & fAllowTrailingSymbols) { |
if (flags & NStr::fAllowTrailingSymbols) { |
| 1185 |
bool spaces = ((flags & fAllowTrailingSymbols) == |
bool spaces = ((flags & NStr::fAllowTrailingSymbols) == |
| 1186 |
fAllowTrailingSpaces); |
NStr::fAllowTrailingSpaces); |
| 1187 |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll); |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll, flags); |
| 1188 |
} |
} |
| 1189 |
CHECK_ENDPTR(double); |
CHECK_ENDPTR(double); |
| 1190 |
return n; |
return n; |
| 1191 |
} |
} |
| 1192 |
|
|
| 1193 |
|
|
| 1194 |
|
double NStr::StringToDoubleEx(const char* str, size_t size, |
| 1195 |
|
TStringToNumFlags flags) |
| 1196 |
|
{ |
| 1197 |
|
return s_StringToDouble(str, size, flags); |
| 1198 |
|
} |
| 1199 |
|
|
| 1200 |
|
|
| 1201 |
|
double NStr::StringToDouble(const CTempStringEx& str, TStringToNumFlags flags) |
| 1202 |
|
{ |
| 1203 |
|
size_t size = str.size(); |
| 1204 |
|
if ( str.HasZeroAtEnd() ) { |
| 1205 |
|
// string has zero at the end already |
| 1206 |
|
return s_StringToDouble(str.data(), size, flags); |
| 1207 |
|
} |
| 1208 |
|
char buf[256]; // small temporary buffer on stack for appending zero char |
| 1209 |
|
if ( size < sizeof(buf) ) { |
| 1210 |
|
memcpy(buf, str.data(), size); |
| 1211 |
|
buf[size] = '\0'; |
| 1212 |
|
return s_StringToDouble(buf, size, flags); |
| 1213 |
|
} |
| 1214 |
|
else { |
| 1215 |
|
// use std::string() to allocate memory for appending zero char |
| 1216 |
|
return s_StringToDouble(string(str).c_str(), size, flags); |
| 1217 |
|
} |
| 1218 |
|
} |
| 1219 |
|
|
| 1220 |
/// @internal |
/// @internal |
| 1221 |
static Uint8 s_DataSizeConvertQual(const CTempString& str, |
static Uint8 s_DataSizeConvertQual(const CTempString& str, |
| 1222 |
SIZE_TYPE& pos, |
SIZE_TYPE& pos, |
| 1223 |
Uint8 value, |
Uint8 value, |
| 1224 |
NStr::TStringToNumFlags flags) |
NStr::TStringToNumFlags flags) |
| 1225 |
{ |
{ |
| 1226 |
|
S2N_CONVERT_GUARD(flags); |
| 1227 |
|
|
| 1228 |
unsigned char ch = str[pos]; |
unsigned char ch = str[pos]; |
| 1229 |
if ( !ch ) { |
if ( !ch ) { |
| 1230 |
return value; |
return value; |
| 1278 |
{ |
{ |
| 1279 |
// We have a limited base range here |
// We have a limited base range here |
| 1280 |
_ASSERT(flags == 0 || flags > 20); |
_ASSERT(flags == 0 || flags > 20); |
| 1281 |
|
if ( base < 2 || base > 16 ) { |
| 1282 |
|
NCBI_THROW2(CStringException, eConvert, |
| 1283 |
|
"Bad numeric base '" + NStr::IntToString(base)+ "'", 0); |
| 1284 |
|
} |
| 1285 |
|
S2N_CONVERT_GUARD_EX(flags); |
| 1286 |
|
|
| 1287 |
// Current position in the string |
// Current position in the string |
| 1288 |
SIZE_TYPE pos = 0; |
SIZE_TYPE pos = 0; |
| 1294 |
bool spaces = ((flags & fAllowLeadingSymbols) == |
bool spaces = ((flags & fAllowLeadingSymbols) == |
| 1295 |
fAllowLeadingSpaces); |
fAllowLeadingSpaces); |
| 1296 |
s_SkipAllowedSymbols(str, pos, |
s_SkipAllowedSymbols(str, pos, |
| 1297 |
spaces ? eSkipSpacesOnly : eSkipAllAllowed); |
spaces ? eSkipSpacesOnly : eSkipAllAllowed, flags); |
| 1298 |
} |
} |
| 1299 |
// Determine sign |
// Determine sign |
| 1300 |
if (str[pos] == '+') { |
if (str[pos] == '+') { |
| 1331 |
// Convert to number |
// Convert to number |
| 1332 |
Uint8 n = StringToUInt8(CTempString(str.data()+numpos, pos-numpos), |
Uint8 n = StringToUInt8(CTempString(str.data()+numpos, pos-numpos), |
| 1333 |
flags, base); |
flags, base); |
| 1334 |
if ( errno ) { |
if ( !n && errno ) { |
| 1335 |
// If exceptions enabled by flags that it has been already thrown. |
// If exceptions are enabled that it has been already thrown. |
| 1336 |
// errno is also set, so return a zero. |
// The errno is also set, so just return a zero. |
| 1337 |
return 0; |
return 0; |
| 1338 |
} |
} |
| 1339 |
// Check trailer (KB, MB, ...) |
// Check trailer (KB, MB, ...) |
| 1344 |
if (flags & fAllowTrailingSymbols) { |
if (flags & fAllowTrailingSymbols) { |
| 1345 |
bool spaces = ((flags & fAllowTrailingSymbols) == |
bool spaces = ((flags & fAllowTrailingSymbols) == |
| 1346 |
fAllowTrailingSpaces); |
fAllowTrailingSpaces); |
| 1347 |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll); |
s_SkipAllowedSymbols(str, pos, spaces ? eSkipSpacesOnly : eSkipAll, flags); |
| 1348 |
} |
} |
| 1349 |
CHECK_ENDPTR(Uint8); |
CHECK_ENDPTR(Uint8); |
| 1350 |
return n; |
return n; |
| 1351 |
} |
} |
| 1352 |
|
|
| 1353 |
|
|
| 1354 |
void NStr::IntToString(string& out_str, long svalue, |
Uint8 NStr::StringToUInt8_DataSize(const CTempString& str, |
| 1355 |
TNumToStringFlags flags, int base) |
TStringToNumFlags flags /* = 0 */) |
| 1356 |
{ |
{ |
| 1357 |
_ASSERT(flags == 0 || flags > 32); |
TStringToNumFlags allowed_flags = fConvErr_NoThrow + |
| 1358 |
if ( base < 2 || base > 36 ) { |
fMandatorySign + |
| 1359 |
return; |
fAllowCommas + |
| 1360 |
|
fAllowLeadingSymbols + |
| 1361 |
|
fAllowTrailingSymbols + |
| 1362 |
|
fDS_ForceBinary + |
| 1363 |
|
fDS_ProhibitFractions + |
| 1364 |
|
fDS_ProhibitSpaceBeforeSuffix; |
| 1365 |
|
|
| 1366 |
|
if ((flags & allowed_flags) != flags) { |
| 1367 |
|
NCBI_THROW2(CStringException, eConvert, "Wrong set of flags", 0); |
| 1368 |
} |
} |
| 1369 |
|
S2N_CONVERT_GUARD(flags); |
| 1370 |
|
|
| 1371 |
unsigned long value = static_cast<unsigned long>(svalue); |
const char* str_ptr = str.data(); |
| 1372 |
|
const char* str_end = str_ptr + str.size(); |
| 1373 |
|
if (flags & fAllowLeadingSymbols) { |
| 1374 |
|
bool allow_all = (flags & fAllowLeadingSymbols) != fAllowLeadingSpaces; |
| 1375 |
|
for (; str_ptr < str_end; ++str_ptr) { |
| 1376 |
|
char c = *str_ptr; |
| 1377 |
|
if (isdigit(c)) |
| 1378 |
|
break; |
| 1379 |
|
if (isspace(c)) |
| 1380 |
|
continue; |
| 1381 |
|
if ((c == '+' || c == '-') && (flags & fMandatorySign) |
| 1382 |
|
&& str_ptr + 1 < str_end && isdigit(*(str_ptr + 1))) |
| 1383 |
|
{ |
| 1384 |
|
break; |
| 1385 |
|
} |
| 1386 |
|
if (!allow_all) |
| 1387 |
|
break; |
| 1388 |
|
} |
| 1389 |
|
} |
| 1390 |
|
|
| 1391 |
|
if (str_ptr < str_end && *str_ptr == '+') { |
| 1392 |
|
++str_ptr; |
| 1393 |
|
} |
| 1394 |
|
else if ((str_ptr < str_end && *str_ptr == '-') |
| 1395 |
|
|| (flags & fMandatorySign)) |
| 1396 |
|
{ |
| 1397 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, str_ptr - str.data()); |
| 1398 |
|
} |
| 1399 |
|
|
| 1400 |
|
const char* num_start = str_ptr; |
| 1401 |
|
bool have_dot = false; |
| 1402 |
|
bool allow_commas = (flags & fAllowCommas) != 0; |
| 1403 |
|
bool allow_dot = (flags & fDS_ProhibitFractions) == 0; |
| 1404 |
|
Uint4 digs_pre_dot = 0, digs_post_dot = 0; |
| 1405 |
|
|
| 1406 |
|
for (; str_ptr < str_end; ++str_ptr) { |
| 1407 |
|
char c = *str_ptr; |
| 1408 |
|
if (isdigit(c)) { |
| 1409 |
|
if (have_dot) |
| 1410 |
|
++digs_post_dot; |
| 1411 |
|
else |
| 1412 |
|
++digs_pre_dot; |
| 1413 |
|
} |
| 1414 |
|
else if (c == '.' && allow_dot) { |
| 1415 |
|
if (have_dot || str_ptr == num_start) |
| 1416 |
|
break; |
| 1417 |
|
if (*(str_ptr - 1) == ',') { |
| 1418 |
|
--str_ptr; |
| 1419 |
|
break; |
| 1420 |
|
} |
| 1421 |
|
have_dot = true; |
| 1422 |
|
} |
| 1423 |
|
else if (c == ',' && allow_commas) { |
| 1424 |
|
if (have_dot || str_ptr == num_start) |
| 1425 |
|
break; |
| 1426 |
|
if (*(str_ptr - 1) == ',') { |
| 1427 |
|
--str_ptr; |
| 1428 |
|
break; |
| 1429 |
|
} |
| 1430 |
|
} |
| 1431 |
|
else |
| 1432 |
|
break; |
| 1433 |
|
} |
| 1434 |
|
if (have_dot && digs_post_dot == 0) |
| 1435 |
|
--str_ptr; |
| 1436 |
|
else if (str_ptr > num_start && *(str_ptr - 1) == ',') |
| 1437 |
|
--str_ptr; |
| 1438 |
|
|
| 1439 |
|
const char* num_end = str_ptr; |
| 1440 |
|
if (num_start == num_end) { |
| 1441 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, str_ptr - str.data()); |
| 1442 |
|
} |
| 1443 |
|
if (str_ptr < str_end && *str_ptr == ' ' |
| 1444 |
|
&& !(flags & fDS_ProhibitSpaceBeforeSuffix)) |
| 1445 |
|
{ |
| 1446 |
|
++str_ptr; |
| 1447 |
|
} |
| 1448 |
|
char suff_c = 0; |
| 1449 |
|
if (str_ptr < str_end) |
| 1450 |
|
suff_c = toupper(*str_ptr); |
| 1451 |
|
|
| 1452 |
|
static const char s_Suffixes[] = {'K', 'M', 'G', 'T', 'P', 'E'}; |
| 1453 |
|
static const char* const s_BinCoefs[] = {"1024", "1048576", "1073741824", |
| 1454 |
|
"1099511627776", |
| 1455 |
|
"1125899906842624", |
| 1456 |
|
"1152921504606846976"}; |
| 1457 |
|
static const Uint4 s_NumSuffixes = sizeof(s_Suffixes) / sizeof(s_Suffixes[0]); |
| 1458 |
|
|
| 1459 |
|
bool binary_suff = (flags & fDS_ForceBinary) != 0; |
| 1460 |
|
Uint4 suff_idx = 0; |
| 1461 |
|
for (; suff_idx < s_NumSuffixes; ++suff_idx) { |
| 1462 |
|
if (suff_c == s_Suffixes[suff_idx]) |
| 1463 |
|
break; |
| 1464 |
|
} |
| 1465 |
|
if (suff_idx < s_NumSuffixes) { |
| 1466 |
|
++str_ptr; |
| 1467 |
|
if (str_ptr + 1 < str_end && toupper(*str_ptr) == 'I' |
| 1468 |
|
&& toupper(*(str_ptr + 1)) == 'B') |
| 1469 |
|
{ |
| 1470 |
|
str_ptr += 2; |
| 1471 |
|
binary_suff = true; |
| 1472 |
|
} |
| 1473 |
|
else if (str_ptr < str_end && toupper(*str_ptr) == 'B') |
| 1474 |
|
++str_ptr; |
| 1475 |
|
} |
| 1476 |
|
else if (suff_c == 'B') { |
| 1477 |
|
++str_ptr; |
| 1478 |
|
} |
| 1479 |
|
else if (*(str_ptr - 1) == ' ') |
| 1480 |
|
--str_ptr; |
| 1481 |
|
|
| 1482 |
|
if (flags & fAllowTrailingSymbols) { |
| 1483 |
|
bool allow_all = (flags & fAllowTrailingSymbols) != fAllowTrailingSpaces; |
| 1484 |
|
for (; str_ptr < str_end; ++str_ptr) { |
| 1485 |
|
char c = *str_ptr; |
| 1486 |
|
if (isspace(c)) |
| 1487 |
|
continue; |
| 1488 |
|
if (!allow_all) |
| 1489 |
|
break; |
| 1490 |
|
} |
| 1491 |
|
} |
| 1492 |
|
if (str_ptr != str_end) { |
| 1493 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, str_ptr - str.data()); |
| 1494 |
|
} |
| 1495 |
|
|
| 1496 |
|
Uint4 orig_digs = digs_pre_dot + digs_post_dot; |
| 1497 |
|
AutoArray<Uint1> orig_num(orig_digs); |
| 1498 |
|
str_ptr = num_start; |
| 1499 |
|
for (Uint4 i = 0; str_ptr < num_end; ++str_ptr) { |
| 1500 |
|
if (*str_ptr == ',' || *str_ptr == '.') |
| 1501 |
|
continue; |
| 1502 |
|
orig_num[i++] = *str_ptr - '0'; |
| 1503 |
|
} |
| 1504 |
|
|
| 1505 |
|
Uint1* num_to_conv = orig_num.get(); |
| 1506 |
|
Uint4 digs_to_conv = digs_pre_dot; |
| 1507 |
|
AutoArray<Uint1> mul_num; |
| 1508 |
|
if (binary_suff && suff_idx < s_NumSuffixes) { |
| 1509 |
|
const char* coef = s_BinCoefs[suff_idx]; |
| 1510 |
|
Uint4 coef_size = Uint4(strlen(coef)); |
| 1511 |
|
mul_num = new Uint1[orig_digs + coef_size]; |
| 1512 |
|
memset(mul_num.get(), 0, orig_digs + coef_size); |
| 1513 |
|
for (Uint4 coef_i = 0; coef_i < coef_size; ++coef_i) { |
| 1514 |
|
Uint1 coef_d = Uint1(coef[coef_i] - '0'); |
| 1515 |
|
Uint1 carry = 0; |
| 1516 |
|
Uint4 res_idx = orig_digs + coef_i; |
| 1517 |
|
for (int orig_i = orig_digs - 1; orig_i >= 0; --orig_i, --res_idx) { |
| 1518 |
|
Uint1 orig_d = orig_num[orig_i]; |
| 1519 |
|
Uint1 res_d = coef_d * orig_d + carry + mul_num[res_idx]; |
| 1520 |
|
carry = 0; |
| 1521 |
|
while (res_d >= 10) { |
| 1522 |
|
res_d -= 10; |
| 1523 |
|
++carry; |
| 1524 |
|
} |
| 1525 |
|
mul_num[res_idx] = res_d; |
| 1526 |
|
} |
| 1527 |
|
_ASSERT(carry <= 9); |
| 1528 |
|
for (; carry != 0; --res_idx) { |
| 1529 |
|
Uint1 res_d = mul_num[res_idx] + carry; |
| 1530 |
|
carry = 0; |
| 1531 |
|
while (res_d >= 10) { |
| 1532 |
|
res_d -= 10; |
| 1533 |
|
++carry; |
| 1534 |
|
} |
| 1535 |
|
mul_num[res_idx] = res_d; |
| 1536 |
|
} |
| 1537 |
|
} |
| 1538 |
|
digs_to_conv = orig_digs + coef_size - digs_post_dot; |
| 1539 |
|
num_to_conv = mul_num.get(); |
| 1540 |
|
while (digs_to_conv > 1 && *num_to_conv == 0) { |
| 1541 |
|
--digs_to_conv; |
| 1542 |
|
++num_to_conv; |
| 1543 |
|
} |
| 1544 |
|
} |
| 1545 |
|
else if (suff_idx < s_NumSuffixes) { |
| 1546 |
|
Uint4 coef_size = (suff_idx + 1) * 3; |
| 1547 |
|
if (coef_size <= digs_post_dot) { |
| 1548 |
|
digs_to_conv += coef_size; |
| 1549 |
|
digs_post_dot -= coef_size; |
| 1550 |
|
} |
| 1551 |
|
else { |
| 1552 |
|
digs_to_conv += digs_post_dot; |
| 1553 |
|
coef_size -= digs_post_dot; |
| 1554 |
|
digs_post_dot = 0; |
| 1555 |
|
mul_num = new Uint1[digs_to_conv + coef_size]; |
| 1556 |
|
memmove(mul_num.get(), num_to_conv, digs_to_conv); |
| 1557 |
|
memset(mul_num.get() + digs_to_conv, 0, coef_size); |
| 1558 |
|
num_to_conv = mul_num.get(); |
| 1559 |
|
digs_to_conv += coef_size; |
| 1560 |
|
} |
| 1561 |
|
} |
| 1562 |
|
|
| 1563 |
|
const Uint8 limdiv = kMax_UI8/10; |
| 1564 |
|
const int limoff = int(kMax_UI8 % 10); |
| 1565 |
|
Uint8 n = 0; |
| 1566 |
|
for (Uint4 i = 0; i < digs_to_conv; ++i) { |
| 1567 |
|
Uint1 d = num_to_conv[i]; |
| 1568 |
|
if (n >= limdiv && (n > limdiv || d > limoff)) { |
| 1569 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, ERANGE, i); |
| 1570 |
|
} |
| 1571 |
|
n *= 10; |
| 1572 |
|
n += d; |
| 1573 |
|
} |
| 1574 |
|
if (digs_post_dot != 0 && num_to_conv[digs_to_conv] >= 5) { |
| 1575 |
|
if (n == kMax_UI8) { |
| 1576 |
|
S2N_CONVERT_ERROR(Uint8, kEmptyStr, ERANGE, digs_to_conv); |
| 1577 |
|
} |
| 1578 |
|
++n; |
| 1579 |
|
} |
| 1580 |
|
return n; |
| 1581 |
|
} |
| 1582 |
|
|
| 1583 |
|
|
| 1584 |
|
size_t NStr::StringToSizet(const CTempString& str, |
| 1585 |
|
TStringToNumFlags flags, int base) |
| 1586 |
|
{ |
| 1587 |
|
#if (SIZEOF_SIZE_T > 4) |
| 1588 |
|
return StringToUInt8(str, flags, base); |
| 1589 |
|
#else |
| 1590 |
|
return StringToUInt(str, flags, base); |
| 1591 |
|
#endif |
| 1592 |
|
} |
| 1593 |
|
|
| 1594 |
|
|
| 1595 |
|
|
| 1596 |
|
/// @internal |
| 1597 |
|
static void s_SignedToString(string& out_str, |
| 1598 |
|
unsigned long value, |
| 1599 |
|
long svalue, |
| 1600 |
|
NStr::TNumToStringFlags flags, |
| 1601 |
|
int base) |
| 1602 |
|
{ |
| 1603 |
const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value); |
const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value); |
| 1604 |
char buffer[kBufSize]; |
char buffer[kBufSize]; |
| 1605 |
char* pos = buffer + kBufSize; |
char* pos = buffer + kBufSize; |
| 1609 |
value = static_cast<unsigned long>(-svalue); |
value = static_cast<unsigned long>(-svalue); |
| 1610 |
} |
} |
| 1611 |
|
|
| 1612 |
if ( (flags & fWithCommas) ) { |
if ( (flags & NStr::fWithCommas) ) { |
| 1613 |
int cnt = -1; |
int cnt = -1; |
| 1614 |
do { |
do { |
| 1615 |
if (++cnt == 3) { |
if (++cnt == 3) { |
| 1631 |
|
|
| 1632 |
if (svalue < 0) |
if (svalue < 0) |
| 1633 |
*--pos = '-'; |
*--pos = '-'; |
| 1634 |
else if (flags & fWithSign) |
else if (flags & NStr::fWithSign) |
| 1635 |
*--pos = '+'; |
*--pos = '+'; |
| 1636 |
} |
} |
| 1637 |
else if ( base == 16 ) { |
else if ( base == 16 ) { |
| 1638 |
do { |
do { |
| 1639 |
*--pos = s_Hex[value % 16]; |
*--pos = kDigit[value % 16]; |
| 1640 |
value /= 16; |
value /= 16; |
| 1641 |
} while ( value ); |
} while ( value ); |
| 1642 |
} |
} |
| 1643 |
else { |
else { |
| 1644 |
do { |
do { |
| 1645 |
*--pos = s_Hex[value % base]; |
*--pos = kDigit[value % base]; |
| 1646 |
value /= base; |
value /= base; |
| 1647 |
} while ( value ); |
} while ( value ); |
| 1648 |
} |
} |
| 1651 |
} |
} |
| 1652 |
|
|
| 1653 |
|
|
| 1654 |
void NStr::UIntToString(string& out_str, |
void NStr::IntToString(string& out_str, int svalue, |
| 1655 |
|
TNumToStringFlags flags, int base) |
| 1656 |
|
{ |
| 1657 |
|
_ASSERT(flags == 0 || flags > 32); |
| 1658 |
|
if ( base < 2 || base > 36 ) { |
| 1659 |
|
CNcbiError::SetErrno(errno = EINVAL); |
| 1660 |
|
return; |
| 1661 |
|
} |
| 1662 |
|
unsigned int value = static_cast<unsigned int>(svalue); |
| 1663 |
|
|
| 1664 |
|
if ( base == 10 && svalue < 0 ) { |
| 1665 |
|
value = static_cast<unsigned int>(-svalue); |
| 1666 |
|
} |
| 1667 |
|
s_SignedToString(out_str, value, svalue, flags, base); |
| 1668 |
|
errno = 0; |
| 1669 |
|
} |
| 1670 |
|
|
| 1671 |
|
|
| 1672 |
|
void NStr::LongToString(string& out_str, long svalue, |
| 1673 |
|
TNumToStringFlags flags, int base) |
| 1674 |
|
{ |
| 1675 |
|
_ASSERT(flags == 0 || flags > 32); |
| 1676 |
|
if ( base < 2 || base > 36 ) { |
| 1677 |
|
CNcbiError::SetErrno(errno = EINVAL); |
| 1678 |
|
return; |
| 1679 |
|
} |
| 1680 |
|
unsigned long value = static_cast<unsigned long>(svalue); |
| 1681 |
|
|
| 1682 |
|
if ( base == 10 && svalue < 0 ) { |
| 1683 |
|
value = static_cast<unsigned long>(-svalue); |
| 1684 |
|
} |
| 1685 |
|
s_SignedToString(out_str, value, svalue, flags, base); |
| 1686 |
|
errno = 0; |
| 1687 |
|
} |
| 1688 |
|
|
| 1689 |
|
|
| 1690 |
|
void NStr::ULongToString(string& out_str, |
| 1691 |
unsigned long value, |
unsigned long value, |
| 1692 |
TNumToStringFlags flags, |
TNumToStringFlags flags, |
| 1693 |
int base) |
int base) |
| 1694 |
{ |
{ |
| 1695 |
_ASSERT(flags == 0 || flags > 32); |
_ASSERT(flags == 0 || flags > 32); |
| 1696 |
if ( base < 2 || base > 36 ) { |
if ( base < 2 || base > 36 ) { |
| 1697 |
|
CNcbiError::SetErrno(errno = EINVAL); |
| 1698 |
return; |
return; |
| 1699 |
} |
} |
| 1700 |
|
|
| 1729 |
} |
} |
| 1730 |
else if ( base == 16 ) { |
else if ( base == 16 ) { |
| 1731 |
do { |
do { |
| 1732 |
*--pos = s_Hex[value % 16]; |
*--pos = kDigit[value % 16]; |
| 1733 |
value /= 16; |
value /= 16; |
| 1734 |
} while ( value ); |
} while ( value ); |
| 1735 |
} |
} |
| 1736 |
else { |
else { |
| 1737 |
do { |
do { |
| 1738 |
*--pos = s_Hex[value % base]; |
*--pos = kDigit[value % base]; |
| 1739 |
value /= base; |
value /= base; |
| 1740 |
} while ( value ); |
} while ( value ); |
| 1741 |
} |
} |
| 1742 |
|
|
| 1743 |
out_str.assign(pos, buffer + kBufSize - pos); |
out_str.assign(pos, buffer + kBufSize - pos); |
| 1744 |
|
errno = 0; |
| 1745 |
} |
} |
| 1746 |
|
|
| 1747 |
|
|
|
string NStr::Int8ToString(Int8 value, TNumToStringFlags flags, int base) |
|
|
{ |
|
|
string ret; |
|
|
NStr::Int8ToString(ret, value, flags, base); |
|
|
return ret; |
|
|
} |
|
|
|
|
| 1748 |
|
|
| 1749 |
// On some platforms division of Int8 is very slow, |
// On some platforms division of Int8 is very slow, |
| 1750 |
// so will try to optimize it working with chunks. |
// so will try to optimize it working with chunks. |
| 1769 |
Uint4 chunk = Uint4(value); |
Uint4 chunk = Uint4(value); |
| 1770 |
value /= PRINT_INT8_CHUNK; |
value /= PRINT_INT8_CHUNK; |
| 1771 |
chunk -= PRINT_INT8_CHUNK*Uint4(value); |
chunk -= PRINT_INT8_CHUNK*Uint4(value); |
| 1772 |
char* end = pos - PRINT_INT8_CHUNK_SIZE; |
char* end = pos - PRINT_INT8_CHUNK_SIZE - 2; // 9-digit chunk should have 2 commas |
| 1773 |
do { |
do { |
| 1774 |
if (++cnt == 3) { |
if (++cnt == 3) { |
| 1775 |
*--pos = ','; |
*--pos = ','; |
| 1836 |
} |
} |
| 1837 |
else if ( base == 16 ) { |
else if ( base == 16 ) { |
| 1838 |
do { |
do { |
| 1839 |
*--pos = s_Hex[value % 16]; |
*--pos = kDigit[value % 16]; |
| 1840 |
value /= 16; |
value /= 16; |
| 1841 |
} while ( value ); |
} while ( value ); |
| 1842 |
} |
} |
| 1843 |
else { |
else { |
| 1844 |
do { |
do { |
| 1845 |
*--pos = s_Hex[value % base]; |
*--pos = kDigit[value % base]; |
| 1846 |
value /= base; |
value /= base; |
| 1847 |
} while ( value ); |
} while ( value ); |
| 1848 |
} |
} |
| 1855 |
{ |
{ |
| 1856 |
_ASSERT(flags == 0 || flags > 32); |
_ASSERT(flags == 0 || flags > 32); |
| 1857 |
if ( base < 2 || base > 36 ) { |
if ( base < 2 || base > 36 ) { |
| 1858 |
|
CNcbiError::SetErrno(errno = EINVAL); |
| 1859 |
return; |
return; |
| 1860 |
} |
} |
|
|
|
| 1861 |
Uint8 value; |
Uint8 value; |
| 1862 |
if (base == 10) { |
if (base == 10) { |
| 1863 |
value = static_cast<Uint8>(svalue<0?-svalue:svalue); |
value = static_cast<Uint8>(svalue<0?-svalue:svalue); |
| 1864 |
} else { |
} else { |
| 1865 |
value = static_cast<Uint8>(svalue); |
value = static_cast<Uint8>(svalue); |
| 1866 |
} |
} |
|
|
|
| 1867 |
const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value); |
const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value); |
| 1868 |
char buffer[kBufSize]; |
char buffer[kBufSize]; |
| 1869 |
|
|
| 1876 |
*--pos = '+'; |
*--pos = '+'; |
| 1877 |
} |
} |
| 1878 |
out_str.assign(pos, buffer + kBufSize - pos); |
out_str.assign(pos, buffer + kBufSize - pos); |
| 1879 |
} |
errno = 0; |
|
|
|
|
|
|
|
string NStr::UInt8ToString(Uint8 value, TNumToStringFlags flags, int base) |
|
|
{ |
|
|
string ret; |
|
|
NStr::UInt8ToString(ret, value, flags, base); |
|
|
return ret; |
|
| 1880 |
} |
} |
| 1881 |
|
|
| 1882 |
|
|
| 1885 |
{ |
{ |
| 1886 |
_ASSERT(flags == 0 || flags > 32); |
_ASSERT(flags == 0 || flags > 32); |
| 1887 |
if ( base < 2 || base > 36 ) { |
if ( base < 2 || base > 36 ) { |
| 1888 |
|
CNcbiError::SetErrno(errno = EINVAL); |
| 1889 |
return; |
return; |
| 1890 |
} |
} |
|
|
|
| 1891 |
const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value); |
const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value); |
| 1892 |
char buffer[kBufSize]; |
char buffer[kBufSize]; |
| 1893 |
|
|
| 1897 |
*--pos = '+'; |
*--pos = '+'; |
| 1898 |
} |
} |
| 1899 |
out_str.assign(pos, buffer + kBufSize - pos); |
out_str.assign(pos, buffer + kBufSize - pos); |
| 1900 |
|
errno = 0; |
| 1901 |
|
} |
| 1902 |
|
|
| 1903 |
|
|
| 1904 |
|
void NStr::UInt8ToString_DataSize(string& out_str, |
| 1905 |
|
Uint8 value, |
| 1906 |
|
TNumToStringFlags flags /* = 0 */, |
| 1907 |
|
unsigned int max_digits /* = 3 */) |
| 1908 |
|
{ |
| 1909 |
|
TNumToStringFlags allowed_flags = fWithSign + |
| 1910 |
|
fWithCommas + |
| 1911 |
|
fDS_Binary + |
| 1912 |
|
fDS_NoDecimalPoint + |
| 1913 |
|
fDS_PutSpaceBeforeSuffix + |
| 1914 |
|
fDS_ShortSuffix + |
| 1915 |
|
fDS_PutBSuffixToo; |
| 1916 |
|
|
| 1917 |
|
if ((flags & allowed_flags) != flags) { |
| 1918 |
|
NCBI_THROW2(CStringException, eConvert, "Wrong set of flags", 0); |
| 1919 |
|
} |
| 1920 |
|
|
| 1921 |
|
if (max_digits < 3) |
| 1922 |
|
max_digits = 3; |
| 1923 |
|
|
| 1924 |
|
static const char s_Suffixes[] = {'K', 'M', 'G', 'T', 'P', 'E'}; |
| 1925 |
|
static const Uint4 s_NumSuffixes = sizeof(s_Suffixes) / sizeof(s_Suffixes[0]); |
| 1926 |
|
|
| 1927 |
|
static const SIZE_TYPE kBufSize = 50; |
| 1928 |
|
char buffer[kBufSize]; |
| 1929 |
|
char* num_start; |
| 1930 |
|
char* dot_ptr; |
| 1931 |
|
char* num_end; |
| 1932 |
|
Uint4 digs_pre_dot, suff_idx; |
| 1933 |
|
|
| 1934 |
|
if (!(flags &fDS_Binary)) { |
| 1935 |
|
static const Uint8 s_Coefs[] = {1000, 1000000, 1000000000, |
| 1936 |
|
NCBI_CONST_UINT8(1000000000000), |
| 1937 |
|
NCBI_CONST_UINT8(1000000000000000), |
| 1938 |
|
NCBI_CONST_UINT8(1000000000000000000)}; |
| 1939 |
|
suff_idx = 0; |
| 1940 |
|
for (; suff_idx < s_NumSuffixes; ++suff_idx) { |
| 1941 |
|
if (value < s_Coefs[suff_idx]) |
| 1942 |
|
break; |
| 1943 |
|
} |
| 1944 |
|
num_start = s_PrintUint8(buffer + kBufSize, value, 0, 10); |
| 1945 |
|
num_start[-1] = '0'; |
| 1946 |
|
dot_ptr = buffer + kBufSize - 3 * suff_idx; |
| 1947 |
|
digs_pre_dot = Uint4(dot_ptr - num_start); |
| 1948 |
|
if (!(flags & fDS_NoDecimalPoint)) { |
| 1949 |
|
num_end = min(buffer + kBufSize, dot_ptr + (max_digits - digs_pre_dot)); |
| 1950 |
|
} |
| 1951 |
|
else { |
| 1952 |
|
while (suff_idx > 0 && max_digits - digs_pre_dot >= 3) { |
| 1953 |
|
--suff_idx; |
| 1954 |
|
digs_pre_dot += 3; |
| 1955 |
|
dot_ptr += 3; |
| 1956 |
|
} |
| 1957 |
|
num_end = dot_ptr; |
| 1958 |
|
} |
| 1959 |
|
char* round_dig = num_end - 1; |
| 1960 |
|
if (num_end < buffer + kBufSize && *num_end >= '5') |
| 1961 |
|
++(*round_dig); |
| 1962 |
|
while (*round_dig == '0' + 10) { |
| 1963 |
|
*round_dig = '0'; |
| 1964 |
|
--round_dig; |
| 1965 |
|
++(*round_dig); |
| 1966 |
|
} |
| 1967 |
|
if (round_dig < num_start) { |
| 1968 |
|
_ASSERT(num_start - round_dig == 1); |
| 1969 |
|
num_start = round_dig; |
| 1970 |
|
++digs_pre_dot; |
| 1971 |
|
if (!(flags & fDS_NoDecimalPoint)) { |
| 1972 |
|
if (digs_pre_dot > 3) { |
| 1973 |
|
++suff_idx; |
| 1974 |
|
digs_pre_dot -= 3; |
| 1975 |
|
dot_ptr -= 3; |
| 1976 |
|
} |
| 1977 |
|
--num_end; |
| 1978 |
|
} |
| 1979 |
|
else { |
| 1980 |
|
if (digs_pre_dot > max_digits) { |
| 1981 |
|
++suff_idx; |
| 1982 |
|
digs_pre_dot -= 3; |
| 1983 |
|
dot_ptr -= 3; |
| 1984 |
|
num_end = dot_ptr; |
| 1985 |
|
} |
| 1986 |
|
} |
| 1987 |
|
} |
| 1988 |
|
} |
| 1989 |
|
else { |
| 1990 |
|
static const Uint8 s_Coefs[] = {1, 1024, 1048576, 1073741824, |
| 1991 |
|
NCBI_CONST_UINT8(1099511627776), |
| 1992 |
|
NCBI_CONST_UINT8(1125899906842624), |
| 1993 |
|
NCBI_CONST_UINT8(1152921504606846976)}; |
| 1994 |
|
|
| 1995 |
|
suff_idx = 1; |
| 1996 |
|
for (; suff_idx < s_NumSuffixes; ++suff_idx) { |
| 1997 |
|
if (value < s_Coefs[suff_idx]) |
| 1998 |
|
break; |
| 1999 |
|
} |
| 2000 |
|
bool can_try_another = true; |
| 2001 |
|
try_another_suffix: |
| 2002 |
|
Uint8 mul_coef = s_Coefs[suff_idx - 1]; |
| 2003 |
|
Uint8 whole_num = value / mul_coef; |
| 2004 |
|
if (max_digits == 3 && whole_num >= 1000) { |
| 2005 |
|
++suff_idx; |
| 2006 |
|
goto try_another_suffix; |
| 2007 |
|
} |
| 2008 |
|
num_start = s_PrintUint8(buffer + kBufSize, whole_num, 0, 10); |
| 2009 |
|
num_start[-1] = '0'; |
| 2010 |
|
digs_pre_dot = Uint4(buffer + kBufSize - num_start); |
| 2011 |
|
if (max_digits - digs_pre_dot >= 3 && (flags & fDS_NoDecimalPoint) |
| 2012 |
|
&& suff_idx != 1 && can_try_another) |
| 2013 |
|
{ |
| 2014 |
|
Uint4 new_suff = suff_idx - 1; |
| 2015 |
|
try_even_more_suffix: |
| 2016 |
|
Uint8 new_num = value / s_Coefs[new_suff - 1]; |
| 2017 |
|
char* new_start = s_PrintUint8(buffer + kBufSize / 2, new_num, 0, 10); |
| 2018 |
|
Uint4 new_digs = Uint4(buffer + kBufSize / 2 - new_start); |
| 2019 |
|
if (new_digs <= max_digits) { |
| 2020 |
|
if (max_digits - digs_pre_dot >= 3 && new_suff != 1) { |
| 2021 |
|
--new_suff; |
| 2022 |
|
goto try_even_more_suffix; |
| 2023 |
|
} |
| 2024 |
|
suff_idx = new_suff; |
| 2025 |
|
can_try_another = false; |
| 2026 |
|
goto try_another_suffix; |
| 2027 |
|
} |
| 2028 |
|
if (new_suff != suff_idx - 1) { |
| 2029 |
|
suff_idx = new_suff + 1; |
| 2030 |
|
can_try_another = false; |
| 2031 |
|
goto try_another_suffix; |
| 2032 |
|
} |
| 2033 |
|
} |
| 2034 |
|
memcpy(buffer, num_start - 1, digs_pre_dot + 1); |
| 2035 |
|
num_start = buffer + 1; |
| 2036 |
|
dot_ptr = num_start + digs_pre_dot; |
| 2037 |
|
Uint4 cnt_more_digs = 1; |
| 2038 |
|
if (!(flags & fDS_NoDecimalPoint)) |
| 2039 |
|
cnt_more_digs += min(max_digits - digs_pre_dot, 3 * (suff_idx - 1)); |
| 2040 |
|
num_end = dot_ptr; |
| 2041 |
|
Uint8 left_val = value - whole_num * mul_coef; |
| 2042 |
|
do { |
| 2043 |
|
left_val *= 10; |
| 2044 |
|
Uint1 d = Uint1(left_val / mul_coef); |
| 2045 |
|
*num_end = d + '0'; |
| 2046 |
|
++num_end; |
| 2047 |
|
left_val -= d * mul_coef; |
| 2048 |
|
--cnt_more_digs; |
| 2049 |
|
} |
| 2050 |
|
while (cnt_more_digs != 0); |
| 2051 |
|
--num_end; |
| 2052 |
|
|
| 2053 |
|
char* round_dig = num_end - 1; |
| 2054 |
|
if (*num_end >= '5') |
| 2055 |
|
++(*round_dig); |
| 2056 |
|
while (*round_dig == '0' + 10) { |
| 2057 |
|
*round_dig = '0'; |
| 2058 |
|
--round_dig; |
| 2059 |
|
++(*round_dig); |
| 2060 |
|
} |
| 2061 |
|
if (round_dig < num_start) { |
| 2062 |
|
_ASSERT(round_dig == buffer); |
| 2063 |
|
num_start = round_dig; |
| 2064 |
|
++digs_pre_dot; |
| 2065 |
|
if (digs_pre_dot > max_digits) { |
| 2066 |
|
++suff_idx; |
| 2067 |
|
goto try_another_suffix; |
| 2068 |
|
} |
| 2069 |
|
if (num_end != dot_ptr) |
| 2070 |
|
--num_end; |
| 2071 |
|
} |
| 2072 |
|
if (!(flags & fDS_NoDecimalPoint) && digs_pre_dot == 4 |
| 2073 |
|
&& num_start[0] == '1' && num_start[1] == '0' |
| 2074 |
|
&& num_start[2] == '2' && num_start[3] == '4') |
| 2075 |
|
{ |
| 2076 |
|
++suff_idx; |
| 2077 |
|
goto try_another_suffix; |
| 2078 |
|
} |
| 2079 |
|
|
| 2080 |
|
--suff_idx; |
| 2081 |
|
} |
| 2082 |
|
|
| 2083 |
|
out_str.erase(); |
| 2084 |
|
if (flags & fWithSign) |
| 2085 |
|
out_str.append(1, '+'); |
| 2086 |
|
if (!(flags & fWithCommas) || digs_pre_dot <= 3) { |
| 2087 |
|
out_str.append(num_start, digs_pre_dot); |
| 2088 |
|
} |
| 2089 |
|
else { |
| 2090 |
|
Uint4 digs_first = digs_pre_dot % 3; |
| 2091 |
|
out_str.append(num_start, digs_first); |
| 2092 |
|
char* left_ptr = num_start + digs_first; |
| 2093 |
|
Uint4 digs_left = digs_pre_dot - digs_first; |
| 2094 |
|
while (digs_left != 0) { |
| 2095 |
|
out_str.append(1, ','); |
| 2096 |
|
out_str.append(left_ptr, 3); |
| 2097 |
|
left_ptr += 3; |
| 2098 |
|
digs_left -= 3; |
| 2099 |
|
} |
| 2100 |
|
} |
| 2101 |
|
if (num_end != dot_ptr) { |
| 2102 |
|
out_str.append(1, '.'); |
| 2103 |
|
out_str.append(dot_ptr, num_end - dot_ptr); |
| 2104 |
|
} |
| 2105 |
|
|
| 2106 |
|
if (suff_idx == 0) { |
| 2107 |
|
if (flags & fDS_PutBSuffixToo) { |
| 2108 |
|
if (flags & fDS_PutSpaceBeforeSuffix) |
| 2109 |
|
out_str.append(1, ' '); |
| 2110 |
|
out_str.append(1, 'B'); |
| 2111 |
|
} |
| 2112 |
|
} |
| 2113 |
|
else { |
| 2114 |
|
--suff_idx; |
| 2115 |
|
if (flags & fDS_PutSpaceBeforeSuffix) |
| 2116 |
|
out_str.append(1, ' '); |
| 2117 |
|
out_str.append(1, s_Suffixes[suff_idx]); |
| 2118 |
|
if (!(flags & fDS_ShortSuffix)) { |
| 2119 |
|
if (flags & fDS_Binary) |
| 2120 |
|
out_str.append(1, 'i'); |
| 2121 |
|
out_str.append(1, 'B'); |
| 2122 |
|
} |
| 2123 |
|
} |
| 2124 |
|
errno = 0; |
| 2125 |
} |
} |
| 2126 |
|
|
| 2127 |
|
|
| 2136 |
const int kMaxDoubleStringSize = 308 + 3 + kMaxDoublePrecision; |
const int kMaxDoubleStringSize = 308 + 3 + kMaxDoublePrecision; |
| 2137 |
|
|
| 2138 |
|
|
|
string NStr::DoubleToString(double value, int precision, |
|
|
TNumToStringFlags flags) |
|
|
{ |
|
|
string str; |
|
|
DoubleToString(str, value, precision, flags); |
|
|
return str; |
|
|
} |
|
|
|
|
|
|
|
| 2139 |
void NStr::DoubleToString(string& out_str, double value, |
void NStr::DoubleToString(string& out_str, double value, |
| 2140 |
int precision, TNumToStringFlags flags) |
int precision, TNumToStringFlags flags) |
| 2141 |
{ |
{ |
| 2142 |
char buffer[kMaxDoubleStringSize]; |
char buffer[kMaxDoubleStringSize]; |
| 2143 |
if (precision >= 0) { |
if (precision >= 0 || |
| 2144 |
|
((flags & fDoublePosix) && (isnan(value) || !finite(value)))) { |
| 2145 |
SIZE_TYPE n = DoubleToString(value, precision, buffer, |
SIZE_TYPE n = DoubleToString(value, precision, buffer, |
| 2146 |
kMaxDoubleStringSize, flags); |
kMaxDoubleStringSize, flags); |
| 2147 |
buffer[n] = '\0'; |
buffer[n] = '\0'; |
| 2160 |
break; |
break; |
| 2161 |
} |
} |
| 2162 |
::sprintf(buffer, format, value); |
::sprintf(buffer, format, value); |
| 2163 |
|
if (flags & fDoublePosix) { |
| 2164 |
|
struct lconv* conv = localeconv(); |
| 2165 |
|
if ('.' != *(conv->decimal_point)) { |
| 2166 |
|
char* pos = strchr(buffer, *(conv->decimal_point)); |
| 2167 |
|
if (pos) { |
| 2168 |
|
*pos = '.'; |
| 2169 |
|
} |
| 2170 |
|
} |
| 2171 |
|
} |
| 2172 |
} |
} |
| 2173 |
out_str = buffer; |
out_str = buffer; |
| 2174 |
|
errno = 0; |
| 2175 |
} |
} |
| 2176 |
|
|
| 2177 |
|
|
| 2181 |
TNumToStringFlags flags) |
TNumToStringFlags flags) |
| 2182 |
{ |
{ |
| 2183 |
char buffer[kMaxDoubleStringSize]; |
char buffer[kMaxDoubleStringSize]; |
| 2184 |
|
int n = 0; |
| 2185 |
|
if ((flags & fDoublePosix) && (isnan(value) || !finite(value))) { |
| 2186 |
|
if (isnan(value)) { |
| 2187 |
|
strcpy(buffer, "NaN"); |
| 2188 |
|
n = 4; |
| 2189 |
|
} else if (value > 0.) { |
| 2190 |
|
strcpy(buffer, "INF"); |
| 2191 |
|
n = 4; |
| 2192 |
|
} else { |
| 2193 |
|
strcpy(buffer, "-INF"); |
| 2194 |
|
n = 5; |
| 2195 |
|
} |
| 2196 |
|
} else { |
| 2197 |
if (precision > (unsigned int)kMaxDoublePrecision) { |
if (precision > (unsigned int)kMaxDoublePrecision) { |
| 2198 |
precision = (unsigned int)kMaxDoublePrecision; |
precision = (unsigned int)kMaxDoublePrecision; |
| 2199 |
} |
} |
| 2210 |
format = "%.*f"; |
format = "%.*f"; |
| 2211 |
break; |
break; |
| 2212 |
} |
} |
| 2213 |
int n = ::sprintf(buffer, format, (int)precision, value); |
n = ::sprintf(buffer, format, (int)precision, value); |
| 2214 |
|
if (n < 0) { |
| 2215 |
|
n = 0; |
| 2216 |
|
} |
| 2217 |
|
if (flags & fDoublePosix) { |
| 2218 |
|
struct lconv* conv = localeconv(); |
| 2219 |
|
if ('.' != *(conv->decimal_point)) { |
| 2220 |
|
char* pos = strchr(buffer, *(conv->decimal_point)); |
| 2221 |
|
if (pos) { |
| 2222 |
|
*pos = '.'; |
| 2223 |
|
} |
| 2224 |
|
} |
| 2225 |
|
} |
| 2226 |
|
} |
| 2227 |
SIZE_TYPE n_copy = min((SIZE_TYPE) n, buf_size); |
SIZE_TYPE n_copy = min((SIZE_TYPE) n, buf_size); |
| 2228 |
memcpy(buf, buffer, n_copy); |
memcpy(buf, buffer, n_copy); |
| 2229 |
|
errno = 0; |
| 2230 |
return n_copy; |
return n_copy; |
| 2231 |
} |
} |
| 2232 |
|
|
| 2233 |
|
|
| 2234 |
|
char* s_ncbi_append_int2str(char* buffer, unsigned int value, size_t digits, bool zeros) |
| 2235 |
|
{ |
| 2236 |
|
char* buffer_start = buffer; |
| 2237 |
|
char* buffer_end = (buffer += digits-1); |
| 2238 |
|
if (zeros) { |
| 2239 |
|
do { |
| 2240 |
|
*buffer-- = (char)(48 + (value % 10)); |
| 2241 |
|
value /= 10; |
| 2242 |
|
} while (--digits); |
| 2243 |
|
} else { |
| 2244 |
|
do { |
| 2245 |
|
*buffer-- = (char)(48 + (value % 10)); |
| 2246 |
|
} while (value /= 10); |
| 2247 |
|
|
| 2248 |
|
if (++buffer != buffer_start) { |
| 2249 |
|
memmove(buffer_start, buffer, buffer_end-buffer+1); |
| 2250 |
|
buffer_end -= buffer - buffer_start; |
| 2251 |
|
} |
| 2252 |
|
} |
| 2253 |
|
return ++buffer_end; |
| 2254 |
|
} |
| 2255 |
|
|
| 2256 |
|
|
| 2257 |
|
#define __NLG NCBI_CONST_LONGDOUBLE |
| 2258 |
|
|
| 2259 |
|
SIZE_TYPE NStr::DoubleToString_Ecvt(double val, unsigned int precision, |
| 2260 |
|
char* buffer, SIZE_TYPE bufsize, |
| 2261 |
|
int* dec, int* sign) |
| 2262 |
|
{ |
| 2263 |
|
//errno = 0; |
| 2264 |
|
*dec = *sign = 0; |
| 2265 |
|
if (precision==0) { |
| 2266 |
|
return 0; |
| 2267 |
|
} |
| 2268 |
|
if (precision > DBL_DIG) { |
| 2269 |
|
precision = DBL_DIG; |
| 2270 |
|
} |
| 2271 |
|
if (val == 0.) { |
| 2272 |
|
*buffer='0'; |
| 2273 |
|
return 1; |
| 2274 |
|
} |
| 2275 |
|
*sign = val < 0. ? -1 : 1; |
| 2276 |
|
if (*sign < 0) { |
| 2277 |
|
val = -val; |
| 2278 |
|
} |
| 2279 |
|
bool high_precision = precision > 9; |
| 2280 |
|
|
| 2281 |
|
// calculate exponent |
| 2282 |
|
unsigned int exp=0; |
| 2283 |
|
bool exp_positive = val >= 1.; |
| 2284 |
|
unsigned int first, second=0; |
| 2285 |
|
long double mult = __NLG(1.); |
| 2286 |
|
long double value = val; |
| 2287 |
|
|
| 2288 |
|
if (exp_positive) { |
| 2289 |
|
while (value>=__NLG(1.e256)) |
| 2290 |
|
{value/=__NLG(1.e256); exp+=256;} |
| 2291 |
|
if (value >= __NLG(1.e16)) { |
| 2292 |
|
if (value>=__NLG(1.e240)) {value*=__NLG(1.e-240); exp+=240;} |
| 2293 |
|
else if (value>=__NLG(1.e224)) {value*=__NLG(1.e-224); exp+=224;} |
| 2294 |
|
else if (value>=__NLG(1.e208)) {value*=__NLG(1.e-208); exp+=208;} |
| 2295 |
|
else if (value>=__NLG(1.e192)) {value*=__NLG(1.e-192); exp+=192;} |
| 2296 |
|
else if (value>=__NLG(1.e176)) {value*=__NLG(1.e-176); exp+=176;} |
| 2297 |
|
else if (value>=__NLG(1.e160)) {value*=__NLG(1.e-160); exp+=160;} |
| 2298 |
|
else if (value>=__NLG(1.e144)) {value*=__NLG(1.e-144); exp+=144;} |
| 2299 |
|
else if (value>=__NLG(1.e128)) {value*=__NLG(1.e-128); exp+=128;} |
| 2300 |
|
else if (value>=__NLG(1.e112)) {value*=__NLG(1.e-112); exp+=112;} |
| 2301 |
|
else if (value>=__NLG(1.e96)) {value*=__NLG(1.e-96); exp+=96;} |
| 2302 |
|
else if (value>=__NLG(1.e80)) {value*=__NLG(1.e-80); exp+=80;} |
| 2303 |
|
else if (value>=__NLG(1.e64)) {value*=__NLG(1.e-64); exp+=64;} |
| 2304 |
|
else if (value>=__NLG(1.e48)) {value*=__NLG(1.e-48); exp+=48;} |
| 2305 |
|
else if (value>=__NLG(1.e32)) {value*=__NLG(1.e-32); exp+=32;} |
| 2306 |
|
else if (value>=__NLG(1.e16)) {value*=__NLG(1.e-16); exp+=16;} |
| 2307 |
|
} |
| 2308 |
|
if (value< __NLG(1.)) {mult=__NLG(1.e+9); exp-= 1;} |
| 2309 |
|
else if (value< __NLG(10.)) {mult=__NLG(1.e+8); } |
| 2310 |
|
else if (value< __NLG(1.e2)) {mult=__NLG(1.e+7); exp+= 1;} |
| 2311 |
|
else if (value< __NLG(1.e3)) {mult=__NLG(1.e+6); exp+= 2;} |
| 2312 |
|
else if (value< __NLG(1.e4)) {mult=__NLG(1.e+5); exp+= 3;} |
| 2313 |
|
else if (value< __NLG(1.e5)) {mult=__NLG(1.e+4); exp+= 4;} |
| 2314 |
|
else if (value< __NLG(1.e6)) {mult=__NLG(1.e+3); exp+= 5;} |
| 2315 |
|
else if (value< __NLG(1.e7)) {mult=__NLG(1.e+2); exp+= 6;} |
| 2316 |
|
else if (value< __NLG(1.e8)) {mult= __NLG(10.); exp+= 7;} |
| 2317 |
|
else if (value< __NLG(1.e9)) {mult= __NLG(1.); exp+= 8;} |
| 2318 |
|
else if (value<__NLG(1.e10)) {mult= __NLG(0.1); exp+= 9;} |
| 2319 |
|
else if (value<__NLG(1.e11)) {mult=__NLG(1.e-2); exp+=10;} |
| 2320 |
|
else if (value<__NLG(1.e12)) {mult=__NLG(1.e-3); exp+=11;} |
| 2321 |
|
else if (value<__NLG(1.e13)) {mult=__NLG(1.e-4); exp+=12;} |
| 2322 |
|
else if (value<__NLG(1.e14)) {mult=__NLG(1.e-5); exp+=13;} |
| 2323 |
|
else if (value<__NLG(1.e15)) {mult=__NLG(1.e-6); exp+=14;} |
| 2324 |
|
else if (value<__NLG(1.e16)) {mult=__NLG(1.e-7); exp+=15;} |
| 2325 |
|
else {mult=__NLG(1.e-8); exp+=16;} |
| 2326 |
|
} else { |
| 2327 |
|
while (value<=__NLG(1.e-256)) |
| 2328 |
|
{value*=__NLG(1.e256); exp+=256;} |
| 2329 |
|
if (value <= __NLG(1.e-16)) { |
| 2330 |
|
if (value<=__NLG(1.e-240)) {value*=__NLG(1.e240); exp+=240;} |
| 2331 |
|
else if (value<=__NLG(1.e-224)) {value*=__NLG(1.e224); exp+=224;} |
| 2332 |
|
else if (value<=__NLG(1.e-208)) {value*=__NLG(1.e208); exp+=208;} |
| 2333 |
|
else if (value<=__NLG(1.e-192)) {value*=__NLG(1.e192); exp+=192;} |
| 2334 |
|
else if (value<=__NLG(1.e-176)) {value*=__NLG(1.e176); exp+=176;} |
| 2335 |
|
else if (value<=__NLG(1.e-160)) {value*=__NLG(1.e160); exp+=160;} |
| 2336 |
|
else if (value<=__NLG(1.e-144)) {value*=__NLG(1.e144); exp+=144;} |
| 2337 |
|
else if (value<=__NLG(1.e-128)) {value*=__NLG(1.e128); exp+=128;} |
| 2338 |
|
else if (value<=__NLG(1.e-112)) {value*=__NLG(1.e112); exp+=112;} |
| 2339 |
|
else if (value<=__NLG(1.e-96)) {value*=__NLG(1.e96); exp+=96;} |
| 2340 |
|
else if (value<=__NLG(1.e-80)) {value*=__NLG(1.e80); exp+=80;} |
| 2341 |
|
else if (value<=__NLG(1.e-64)) {value*=__NLG(1.e64); exp+=64;} |
| 2342 |
|
else if (value<=__NLG(1.e-48)) {value*=__NLG(1.e48); exp+=48;} |
| 2343 |
|
else if (value<=__NLG(1.e-32)) {value*=__NLG(1.e32); exp+=32;} |
| 2344 |
|
else if (value<=__NLG(1.e-16)) {value*=__NLG(1.e16); exp+=16;} |
| 2345 |
|
} |
| 2346 |
|
if (value<__NLG(1.e-15)) {mult=__NLG(1.e24); exp+=16;} |
| 2347 |
|
else if (value<__NLG(1.e-14)) {mult=__NLG(1.e23); exp+=15;} |
| 2348 |
|
else if (value<__NLG(1.e-13)) {mult=__NLG(1.e22); exp+=14;} |
| 2349 |
|
else if (value<__NLG(1.e-12)) {mult=__NLG(1.e21); exp+=13;} |
| 2350 |
|
else if (value<__NLG(1.e-11)) {mult=__NLG(1.e20); exp+=12;} |
| 2351 |
|
else if (value<__NLG(1.e-10)) {mult=__NLG(1.e19); exp+=11;} |
| 2352 |
|
else if (value<__NLG(1.e-9)) {mult=__NLG(1.e18); exp+=10;} |
| 2353 |
|
else if (value<__NLG(1.e-8)) {mult=__NLG(1.e17); exp+=9;} |
| 2354 |
|
else if (value<__NLG(1.e-7)) {mult=__NLG(1.e16); exp+=8;} |
| 2355 |
|
else if (value<__NLG(1.e-6)) {mult=__NLG(1.e15); exp+=7;} |
| 2356 |
|
else if (value<__NLG(1.e-5)) {mult=__NLG(1.e14); exp+=6;} |
| 2357 |
|
else if (value<__NLG(1.e-4)) {mult=__NLG(1.e13); exp+=5;} |
| 2358 |
|
else if (value<__NLG(1.e-3)) {mult=__NLG(1.e12); exp+=4;} |
| 2359 |
|
else if (value<__NLG(1.e-2)) {mult=__NLG(1.e11); exp+=3;} |
| 2360 |
|
else if (value<__NLG(1.e-1)) {mult=__NLG(1.e10); exp+=2;} |
| 2361 |
|
else if (value<__NLG(1.)) {mult=__NLG(1.e9); exp+=1;} |
| 2362 |
|
else {mult=__NLG(1.e8); } |
| 2363 |
|
} |
| 2364 |
|
|
| 2365 |
|
// get all digits |
| 2366 |
|
long double t1 = value * mult; |
| 2367 |
|
if (t1 >= __NLG(1.e9)) { |
| 2368 |
|
first = 999999999; |
| 2369 |
|
} else if (t1 < __NLG(1.e8)) { |
| 2370 |
|
first = 100000000; |
| 2371 |
|
t1 = first; |
| 2372 |
|
} else { |
| 2373 |
|
first = (unsigned int)t1; |
| 2374 |
|
} |
| 2375 |
|
if (high_precision) { |
| 2376 |
|
long double t2 = (t1-first) * __NLG(1.e8); |
| 2377 |
|
if (t2 >= __NLG(1.e8)) { |
| 2378 |
|
second = 99999999; |
| 2379 |
|
} else { |
| 2380 |
|
second = (unsigned int)t2; |
| 2381 |
|
} |
| 2382 |
|
} |
| 2383 |
|
|
| 2384 |
|
// convert them into string |
| 2385 |
|
bool use_ext_buffer = bufsize > 20; |
| 2386 |
|
char tmp[32]; |
| 2387 |
|
char *digits = use_ext_buffer ? buffer : tmp; |
| 2388 |
|
char *digits_end = s_ncbi_append_int2str(digits,first,9,false); |
| 2389 |
|
if (high_precision) { |
| 2390 |
|
digits_end = s_ncbi_append_int2str(digits_end,second,8,true); |
| 2391 |
|
} |
| 2392 |
|
size_t digits_len = digits_end - digits; |
| 2393 |
|
size_t digits_got = digits_len; |
| 2394 |
|
size_t digits_expected = high_precision ? 17 : 9; |
| 2395 |
|
|
| 2396 |
|
// get significant digits according to requested precision |
| 2397 |
|
size_t pos = precision; |
| 2398 |
|
if (digits_len > precision) { |
| 2399 |
|
digits_len = precision; |
| 2400 |
|
|
| 2401 |
|
// this is questionable, but in fact, |
| 2402 |
|
// improves the result (on average) |
| 2403 |
|
#if 1 |
| 2404 |
|
if (high_precision) { |
| 2405 |
|
if (digits[pos] == '4') { |
| 2406 |
|
size_t pt = pos-1; |
| 2407 |
|
while (pt != 0 && digits[--pt] == '9') |
| 2408 |
|
; |
| 2409 |
|
if (pt != 0 && (pos-pt) > precision/2) |
| 2410 |
|
digits[pos]='5'; |
| 2411 |
|
} else if (digits[pos] == '5') { |
| 2412 |
|
size_t pt = pos; |
| 2413 |
|
while (pt != 0 && digits[--pt] == '0') |
| 2414 |
|
; |
| 2415 |
|
if (pt != 0 && (pos-pt) > precision/2) |
| 2416 |
|
digits[pos]='4'; |
| 2417 |
|
} |
| 2418 |
|
} |
| 2419 |
|
#endif |
| 2420 |
|
|
| 2421 |
|
if (digits[pos] >= '5') { |
| 2422 |
|
do { |
| 2423 |
|
if (digits[--pos] < '9') { |
| 2424 |
|
++digits[pos++]; |
| 2425 |
|
break; |
| 2426 |
|
} |
| 2427 |
|
digits[pos]='0'; |
| 2428 |
|
} while (pos > 0); |
| 2429 |
|
if (pos == 0) { |
| 2430 |
|
if (digits_expected <= digits_got) { |
| 2431 |
|
if (exp_positive) { |
| 2432 |
|
++exp; |
| 2433 |
|
} else { |
| 2434 |
|
// exp cannot be 0, by design |
| 2435 |
|
exp_positive = --exp == 0; |
| 2436 |
|
} |
| 2437 |
|
} |
| 2438 |
|
*digits = '1'; |
| 2439 |
|
digits_len = 1; |
| 2440 |
|
} |
| 2441 |
|
} |
| 2442 |
|
} |
| 2443 |
|
|
| 2444 |
|
// truncate trailing zeros |
| 2445 |
|
for (pos = digits_len; pos-- > 0 && digits[pos] == '0';) |
| 2446 |
|
--digits_len; |
| 2447 |
|
|
| 2448 |
|
*dec = (int)exp; |
| 2449 |
|
if (!exp_positive) { |
| 2450 |
|
*dec = -*dec; |
| 2451 |
|
} |
| 2452 |
|
if (!use_ext_buffer) { |
| 2453 |
|
if (digits_len <= bufsize) { |
| 2454 |
|
strncpy(buffer,digits,digits_len); |
| 2455 |
|
} else { |
| 2456 |
|
NCBI_THROW2(CStringException, eConvert, |
| 2457 |
|
"Destination buffer too small", 0); |
| 2458 |
|
} |
| 2459 |
|
} |
| 2460 |
|
return digits_len; |
| 2461 |
|
} |
| 2462 |
|
#undef __NLG |
| 2463 |
|
|
| 2464 |
|
|
| 2465 |
|
SIZE_TYPE NStr::DoubleToStringPosix(double val, unsigned int precision, |
| 2466 |
|
char* buffer, SIZE_TYPE bufsize) |
| 2467 |
|
{ |
| 2468 |
|
if (bufsize < precision+8) { |
| 2469 |
|
NCBI_THROW2(CStringException, eConvert, |
| 2470 |
|
"Destination buffer too small", 0); |
| 2471 |
|
} |
| 2472 |
|
int dec=0, sign=0; |
| 2473 |
|
char digits[32]; |
| 2474 |
|
size_t digits_len = DoubleToString_Ecvt( |
| 2475 |
|
val, precision, digits, sizeof(digits), &dec, &sign); |
| 2476 |
|
if (digits_len == 0) { |
| 2477 |
|
errno = 0; |
| 2478 |
|
return 0; |
| 2479 |
|
} |
| 2480 |
|
if (digits_len == 1 && dec == 0 && sign >=0) { |
| 2481 |
|
*buffer = digits[0]; |
| 2482 |
|
errno = 0; |
| 2483 |
|
return 1; |
| 2484 |
|
} |
| 2485 |
|
bool exp_positive = dec >= 0; |
| 2486 |
|
unsigned int exp= (unsigned int)(exp_positive ? dec : (-dec)); |
| 2487 |
|
|
| 2488 |
|
// assemble the result |
| 2489 |
|
char *buffer_pos = buffer; |
| 2490 |
|
// char *buffer_end = buffer + bufsize; |
| 2491 |
|
char *digits_pos = digits; |
| 2492 |
|
|
| 2493 |
|
if (sign < 0) { |
| 2494 |
|
*buffer_pos++ = '-'; |
| 2495 |
|
} |
| 2496 |
|
// The 'e' format is used when the exponent of the value is less than -4 |
| 2497 |
|
// or greater than or equal to the precision argument |
| 2498 |
|
if ((exp_positive && exp >= precision) || (!exp_positive && exp > 4)) { |
| 2499 |
|
*buffer_pos++ = *digits_pos++; |
| 2500 |
|
--digits_len; |
| 2501 |
|
if (digits_len != 0) { |
| 2502 |
|
*buffer_pos++ = '.'; |
| 2503 |
|
strncpy(buffer_pos,digits_pos,digits_len); |
| 2504 |
|
buffer_pos += digits_len; |
| 2505 |
|
} |
| 2506 |
|
*buffer_pos++ = 'e'; |
| 2507 |
|
*buffer_pos++ = exp_positive ? '+' : '-'; |
| 2508 |
|
|
| 2509 |
|
#if defined(NCBI_OS_MSWIN) |
| 2510 |
|
bool need_zeros = true; |
| 2511 |
|
size_t need_digits = 3; |
| 2512 |
|
#else |
| 2513 |
|
bool need_zeros = exp < 10 ? true : false; |
| 2514 |
|
size_t need_digits = exp < 100 ? 2 : 3; |
| 2515 |
|
#endif |
| 2516 |
|
// assuming exp < 1000 |
| 2517 |
|
buffer_pos = s_ncbi_append_int2str(buffer_pos, exp, need_digits,need_zeros); |
| 2518 |
|
} else if (exp_positive) { |
| 2519 |
|
*buffer_pos++ = *digits_pos++; |
| 2520 |
|
--digits_len; |
| 2521 |
|
if (digits_len > exp) { |
| 2522 |
|
strncpy(buffer_pos,digits_pos,exp); |
| 2523 |
|
buffer_pos += exp; |
| 2524 |
|
*buffer_pos++ = '.'; |
| 2525 |
|
strncpy(buffer_pos,digits_pos+exp,digits_len-exp); |
| 2526 |
|
buffer_pos += digits_len-exp; |
| 2527 |
|
} else { |
| 2528 |
|
strncpy(buffer_pos,digits_pos,digits_len); |
| 2529 |
|
buffer_pos += digits_len; |
| 2530 |
|
exp -= (unsigned int)digits_len; |
| 2531 |
|
while (exp--) { |
| 2532 |
|
*buffer_pos++ = '0'; |
| 2533 |
|
} |
| 2534 |
|
} |
| 2535 |
|
} else { |
| 2536 |
|
*buffer_pos++ = '0'; |
| 2537 |
|
*buffer_pos++ = '.'; |
| 2538 |
|
for (--exp; exp--;) { |
| 2539 |
|
*buffer_pos++ = '0'; |
| 2540 |
|
} |
| 2541 |
|
strncpy(buffer_pos,digits_pos, digits_len); |
| 2542 |
|
buffer_pos += digits_len; |
| 2543 |
|
} |
| 2544 |
|
errno = 0; |
| 2545 |
|
return buffer_pos - buffer; |
| 2546 |
|
} |
| 2547 |
|
|
| 2548 |
|
|
| 2549 |
|
string NStr::SizetToString(size_t value, TNumToStringFlags flags, int base) |
| 2550 |
|
{ |
| 2551 |
|
#if (SIZEOF_SIZE_T > 4) |
| 2552 |
|
return UInt8ToString(value, flags, base); |
| 2553 |
|
#else |
| 2554 |
|
return UIntToString(static_cast<unsigned int>(value), flags, base); |
| 2555 |
|
#endif |
| 2556 |
|
} |
| 2557 |
|
|
| 2558 |
|
|
| 2559 |
string NStr::PtrToString(const void* value) |
string NStr::PtrToString(const void* value) |
| 2560 |
{ |
{ |
| 2561 |
|
errno = 0; |
| 2562 |
char buffer[64]; |
char buffer[64]; |
| 2563 |
::sprintf(buffer, "%p", value); |
::sprintf(buffer, "%p", value); |
| 2564 |
return buffer; |
return buffer; |
| 2567 |
|
|
| 2568 |
void NStr::PtrToString(string& out_str, const void* value) |
void NStr::PtrToString(string& out_str, const void* value) |
| 2569 |
{ |
{ |
| 2570 |
|
errno = 0; |
| 2571 |
char buffer[64]; |
char buffer[64]; |
| 2572 |
::sprintf(buffer, "%p", value); |
::sprintf(buffer, "%p", value); |
| 2573 |
out_str = buffer; |
out_str = buffer; |
| 2574 |
} |
} |
| 2575 |
|
|
| 2576 |
|
|
| 2577 |
const void* NStr::StringToPtr(const string& str) |
const void* NStr::StringToPtr(const CTempStringEx& str) |
| 2578 |
{ |
{ |
| 2579 |
|
int& errno_ref = errno; |
| 2580 |
|
errno_ref = 0; |
| 2581 |
void *ptr = NULL; |
void *ptr = NULL; |
| 2582 |
::sscanf(str.c_str(), "%p", &ptr); |
int res; |
| 2583 |
|
if ( str.HasZeroAtEnd() ) { |
| 2584 |
|
res = ::sscanf(str.data(), "%p", &ptr); |
| 2585 |
|
} else { |
| 2586 |
|
res = ::sscanf(string(str).c_str(), "%p", &ptr); |
| 2587 |
|
} |
| 2588 |
|
if (res != 1) { |
| 2589 |
|
CNcbiError::SetErrno(errno_ref = EINVAL, str); |
| 2590 |
|
return NULL; |
| 2591 |
|
} |
| 2592 |
return ptr; |
return ptr; |
| 2593 |
} |
} |
| 2594 |
|
|
| 2609 |
} |
} |
| 2610 |
|
|
| 2611 |
|
|
| 2612 |
bool NStr::StringToBool(const string& str) |
bool NStr::StringToBool(const CTempString& str) |
| 2613 |
{ |
{ |
| 2614 |
if ( AStrEquiv(str, s_kTrueString, PNocase()) || |
if ( AStrEquiv(str, s_kTrueString, PNocase()) || |
| 2615 |
AStrEquiv(str, s_kTString, PNocase()) || |
AStrEquiv(str, s_kTString, PNocase()) || |
| 2616 |
AStrEquiv(str, s_kYesString, PNocase()) || |
AStrEquiv(str, s_kYesString, PNocase()) || |
| 2617 |
AStrEquiv(str, s_kYString, PNocase()) ) |
AStrEquiv(str, s_kYString, PNocase()) ) { |
| 2618 |
|
errno = 0; |
| 2619 |
return true; |
return true; |
| 2620 |
|
} |
| 2621 |
if ( AStrEquiv(str, s_kFalseString, PNocase()) || |
if ( AStrEquiv(str, s_kFalseString, PNocase()) || |
| 2622 |
AStrEquiv(str, s_kFString, PNocase()) || |
AStrEquiv(str, s_kFString, PNocase()) || |
| 2623 |
AStrEquiv(str, s_kNoString, PNocase()) || |
AStrEquiv(str, s_kNoString, PNocase()) || |
| 2624 |
AStrEquiv(str, s_kNString, PNocase()) ) |
AStrEquiv(str, s_kNString, PNocase()) ) { |
| 2625 |
|
errno = 0; |
| 2626 |
return false; |
return false; |
| 2627 |
|
} |
| 2628 |
NCBI_THROW2(CStringException, eConvert, |
NCBI_THROW2(CStringException, eConvert, |
| 2629 |
"String cannot be converted to bool", 0); |
"String cannot be converted to bool", 0); |
| 2630 |
} |
} |
| 2651 |
#elif defined(HAVE_VSNPRINTF) |
#elif defined(HAVE_VSNPRINTF) |
| 2652 |
// deal with implementation quirks |
// deal with implementation quirks |
| 2653 |
SIZE_TYPE size = 1024; |
SIZE_TYPE size = 1024; |
| 2654 |
AutoPtr<char, ArrayDeleter<char> > buf(new char[size]); |
AutoArray<char> buf(size); |
| 2655 |
buf.get()[size-1] = buf.get()[size-2] = 0; |
buf.get()[size-1] = buf.get()[size-2] = 0; |
| 2656 |
SIZE_TYPE n = vsnprintf(buf.get(), size, format, args); |
SIZE_TYPE n = vsnprintf(buf.get(), size, format, args); |
| 2657 |
while (n >= size || buf.get()[size-2]) { |
while (n >= size || buf.get()[size-2]) { |
| 2680 |
} |
} |
| 2681 |
|
|
| 2682 |
|
|
| 2683 |
SIZE_TYPE NStr::FindNoCase(const string& str, const string& pattern, |
SIZE_TYPE NStr::FindNoCase(const CTempString& str, const CTempString& pattern, |
| 2684 |
SIZE_TYPE start, SIZE_TYPE end, EOccurrence where) |
SIZE_TYPE start, SIZE_TYPE end, EOccurrence where) |
| 2685 |
{ |
{ |
| 2686 |
string pat(pattern, 0, 1); |
string pat(pattern, 0, 1); |
| 2711 |
} |
} |
| 2712 |
|
|
| 2713 |
|
|
| 2714 |
|
const string* NStr::Find(const list <string>& lst, const CTempString& val, |
| 2715 |
|
ECase use_case) |
| 2716 |
|
{ |
| 2717 |
|
if (lst.empty()) return NULL; |
| 2718 |
|
ITERATE (list<string>, st_itr, lst) { |
| 2719 |
|
if (Equal(*st_itr, val, use_case)) { |
| 2720 |
|
return &*st_itr; |
| 2721 |
|
} |
| 2722 |
|
} |
| 2723 |
|
return NULL; |
| 2724 |
|
} |
| 2725 |
|
|
| 2726 |
|
const string* NStr::Find(const vector <string>& vec, const CTempString& val, |
| 2727 |
|
ECase use_case) |
| 2728 |
|
{ |
| 2729 |
|
if (vec.empty()) return NULL; |
| 2730 |
|
ITERATE (vector<string>, st_itr, vec) { |
| 2731 |
|
if (Equal(*st_itr, val, use_case)) { |
| 2732 |
|
return &*st_itr; |
| 2733 |
|
} |
| 2734 |
|
} |
| 2735 |
|
return NULL; |
| 2736 |
|
} |
| 2737 |
|
|
| 2738 |
|
|
| 2739 |
|
SIZE_TYPE NStr::CommonOverlapSize(const CTempString& s1, const CTempString& s2) |
| 2740 |
|
{ |
| 2741 |
|
const SIZE_TYPE len1 = s1.length(); |
| 2742 |
|
const SIZE_TYPE len2 = s2.length(); |
| 2743 |
|
|
| 2744 |
|
// Eliminate the null case |
| 2745 |
|
if (len1 == 0 || len2 == 0) { |
| 2746 |
|
return 0; |
| 2747 |
|
} |
| 2748 |
|
SIZE_TYPE len = min(len1, len2); |
| 2749 |
|
|
| 2750 |
|
// Truncate the longer string |
| 2751 |
|
CTempString t1, t2; |
| 2752 |
|
if (len1 > len2) { |
| 2753 |
|
t1 = s1.substr(len1-len, len); |
| 2754 |
|
t2 = s2; |
| 2755 |
|
} else { |
| 2756 |
|
t1 = s1; |
| 2757 |
|
t2 = s2.substr(0, len); |
| 2758 |
|
} |
| 2759 |
|
// Quick check for the worst case |
| 2760 |
|
if (memcmp(t1.data(), t2.data(), len) == 0) { |
| 2761 |
|
return len; |
| 2762 |
|
} |
| 2763 |
|
|
| 2764 |
|
// Start by looking for a single character match |
| 2765 |
|
// and increase length until no match is found. |
| 2766 |
|
// Performance analysis: http://neil.fraser.name/news/2010/11/04/ |
| 2767 |
|
SIZE_TYPE best = 0; |
| 2768 |
|
SIZE_TYPE n = 1; |
| 2769 |
|
for (;;) { |
| 2770 |
|
// Right 'n' symbols of 't1' |
| 2771 |
|
CTempString pattern(t1.data() + len - n, n); |
| 2772 |
|
SIZE_TYPE pos = t2.find(pattern); |
| 2773 |
|
if (pos == NPOS) { |
| 2774 |
|
return best; |
| 2775 |
|
} |
| 2776 |
|
n += pos; |
| 2777 |
|
if (pos == 0 || memcmp(pattern.data(), t2.data(), n) == 0) { |
| 2778 |
|
best = n; |
| 2779 |
|
n++; |
| 2780 |
|
} |
| 2781 |
|
} |
| 2782 |
|
// Unreachable |
| 2783 |
|
return best; |
| 2784 |
|
} |
| 2785 |
|
|
| 2786 |
|
|
| 2787 |
template <class TStr> |
template <class TStr> |
| 2788 |
TStr s_TruncateSpaces(const TStr& str, NStr::ETrunc where, |
TStr s_TruncateSpaces(const TStr& str, NStr::ETrunc where, |
| 2789 |
const TStr& empty_str) |
const TStr& empty_str) |
| 2803 |
} |
} |
| 2804 |
SIZE_TYPE end = length; |
SIZE_TYPE end = length; |
| 2805 |
if ( where == NStr::eTrunc_End || where == NStr::eTrunc_Both ) { |
if ( where == NStr::eTrunc_End || where == NStr::eTrunc_Both ) { |
| 2806 |
_ASSERT(end > beg); |
_ASSERT(beg < end); |
| 2807 |
for (--end; isspace((unsigned char)str[end]); --end) { |
while (isspace((unsigned char) str[--end])) { |
| 2808 |
if (end == beg) { |
if (beg == end) { |
| 2809 |
return empty_str; |
return empty_str; |
| 2810 |
} |
} |
| 2811 |
} |
} |
| 2812 |
_ASSERT(end >= beg && !isspace((unsigned char) str[end])); |
_ASSERT(beg <= end && !isspace((unsigned char) str[end])); |
| 2813 |
++end; |
++end; |
| 2814 |
} |
} |
| 2815 |
_ASSERT(beg <= end); |
_ASSERT(beg < end && end <= length); |
| 2816 |
if (beg == end) { |
if ( beg | (end - length) ) { // if either beg != 0 or end != length |
|
return empty_str; |
|
|
} |
|
|
else if ( beg || (end - length) ) { |
|
|
// if either beg != 0 or end != length |
|
| 2817 |
return str.substr(beg, end - beg); |
return str.substr(beg, end - beg); |
| 2818 |
} |
} |
| 2819 |
else { |
else { |
| 2821 |
} |
} |
| 2822 |
} |
} |
| 2823 |
|
|
|
|
|
| 2824 |
string NStr::TruncateSpaces(const string& str, ETrunc where) |
string NStr::TruncateSpaces(const string& str, ETrunc where) |
| 2825 |
{ |
{ |
| 2826 |
return s_TruncateSpaces(str, where, kEmptyStr); |
return s_TruncateSpaces(str, where, kEmptyStr); |
| 2827 |
} |
} |
| 2828 |
|
|
| 2829 |
CTempString NStr::TruncateSpaces(const CTempString& str, ETrunc where) |
CTempString NStr::TruncateSpaces_Unsafe(const CTempString& str, ETrunc where) |
| 2830 |
{ |
{ |
| 2831 |
return s_TruncateSpaces(str, where, CTempString()); |
return s_TruncateSpaces(str, where, CTempString()); |
| 2832 |
} |
} |
| 2833 |
|
|
| 2834 |
CTempString NStr::TruncateSpaces(const char* str, ETrunc where) |
//CTempString NStr::TruncateSpaces_Unsafe(const char* str, ETrunc where) |
| 2835 |
|
//{ |
| 2836 |
|
// return s_TruncateSpaces(CTempString(str), where, CTempString()); |
| 2837 |
|
//} |
| 2838 |
|
|
| 2839 |
|
void NStr::TruncateSpacesInPlace(CTempString& str, ETrunc where) |
| 2840 |
{ |
{ |
| 2841 |
return s_TruncateSpaces(CTempString(str), where, CTempString()); |
str = s_TruncateSpaces(str, where, CTempString()); |
| 2842 |
} |
} |
| 2843 |
|
|
|
|
|
| 2844 |
void NStr::TruncateSpacesInPlace(string& str, ETrunc where) |
void NStr::TruncateSpacesInPlace(string& str, ETrunc where) |
| 2845 |
{ |
{ |
| 2846 |
SIZE_TYPE length = str.length(); |
SIZE_TYPE length = str.length(); |
| 2864 |
if ( where == eTrunc_End || where == eTrunc_Both ) { |
if ( where == eTrunc_End || where == eTrunc_Both ) { |
| 2865 |
// It's better to use str.data()[] to check string characters |
// It's better to use str.data()[] to check string characters |
| 2866 |
// to avoid implicit modification of the string by non-const operator[] |
// to avoid implicit modification of the string by non-const operator[] |
| 2867 |
_ASSERT(end > beg); |
_ASSERT(beg < end); |
| 2868 |
while (isspace((unsigned char) str.data()[--end])) { |
while (isspace((unsigned char) str.data()[--end])) { |
| 2869 |
if (end == beg) { |
if (beg == end) { |
| 2870 |
str.erase(); |
str.erase(); |
| 2871 |
return; |
return; |
| 2872 |
} |
} |
| 2873 |
} |
} |
| 2874 |
_ASSERT(end >= beg && !isspace((unsigned char) str.data()[end])); |
_ASSERT(beg <= end && !isspace((unsigned char) str.data()[end])); |
| 2875 |
++end; |
++end; |
| 2876 |
} |
} |
| 2877 |
_ASSERT(beg < end); |
_ASSERT(beg < end && end <= length); |
| 2878 |
|
|
| 2879 |
#if defined(NCBI_COMPILER_GCC) && (NCBI_COMPILER_VERSION == 304) |
#if defined(NCBI_COMPILER_GCC) && (NCBI_COMPILER_VERSION == 304) |
| 2880 |
// work around a library bug |
// work around a library bug |
| 2881 |
str.replace(end, length, kEmptyStr); |
str.replace(end, length, kEmptyStr); |
| 2882 |
str.replace(0, beg, kEmptyStr); |
str.replace(0, beg, kEmptyStr); |
| 2883 |
#else |
#else |
| 2884 |
if ( (beg - 0) | (end - length) ) { // if either beg != 0 or end != length |
if ( beg | (end - length) ) { // if either beg != 0 or end != length |
| 2885 |
str.replace(0, length, str, beg, end - beg); |
str.replace(0, length, str, beg, end - beg); |
| 2886 |
} |
} |
| 2887 |
#endif |
#endif |
| 2897 |
NCBI_THROW2(CStringException, eBadArgs, |
NCBI_THROW2(CStringException, eBadArgs, |
| 2898 |
"NStr::Replace(): source and destination are the same",0); |
"NStr::Replace(): source and destination are the same",0); |
| 2899 |
} |
} |
| 2900 |
|
if ( start_pos + search.size() > src.size() || search == replace ) { |
| 2901 |
dst = src; |
dst = src; |
|
|
|
|
if ( start_pos + search.size() > src.size() || |
|
|
search == replace ) |
|
| 2902 |
return dst; |
return dst; |
| 2903 |
|
} |
| 2904 |
|
|
| 2905 |
|
// Use different algorithms depending on size or 'search' and 'replace' |
| 2906 |
|
// for better performance (and for big strings only! > 16KB). |
| 2907 |
|
|
| 2908 |
|
if (replace.size() > search.size() && src.size() > 16*1024) { |
| 2909 |
|
// Replacing string is longer -- worst case. |
| 2910 |
|
// Try to avoid memory reallocations inside std::string. |
| 2911 |
|
// Count replacing strings first |
| 2912 |
|
SIZE_TYPE n = 0; |
| 2913 |
|
SIZE_TYPE start_orig = start_pos; |
| 2914 |
|
for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){ |
| 2915 |
|
start_pos = src.find(search, start_pos); |
| 2916 |
|
if (start_pos == NPOS) |
| 2917 |
|
break; |
| 2918 |
|
n++; |
| 2919 |
|
start_pos += search.size(); |
| 2920 |
|
} |
| 2921 |
|
// Reallocate memory for destination string |
| 2922 |
|
dst.resize(src.size() - n*search.size() + n*replace.size()); |
| 2923 |
|
|
| 2924 |
|
// Use copy() to create destination string |
| 2925 |
|
start_pos = start_orig; |
| 2926 |
|
string::const_iterator src_start = src.begin(); |
| 2927 |
|
string::const_iterator src_end = src.begin(); |
| 2928 |
|
string::iterator dst_pos = dst.begin(); |
| 2929 |
|
|
| 2930 |
|
for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){ |
| 2931 |
|
start_pos = src.find(search, start_pos); |
| 2932 |
|
if (start_pos == NPOS) |
| 2933 |
|
break; |
| 2934 |
|
// Copy from source string up to 'search' |
| 2935 |
|
src_end = src.begin() + start_pos; |
| 2936 |
|
copy(src_start, src_end, dst_pos); |
| 2937 |
|
dst_pos += (src_end - src_start); |
| 2938 |
|
// Append 'replace' |
| 2939 |
|
copy(replace.begin(), replace.end(), dst_pos); |
| 2940 |
|
dst_pos += replace.size(); |
| 2941 |
|
start_pos += search.size(); |
| 2942 |
|
src_start = src.begin() + start_pos; |
| 2943 |
|
} |
| 2944 |
|
// Copy source's string tail to the place |
| 2945 |
|
copy(src_start, src.end(), dst_pos); |
| 2946 |
|
|
| 2947 |
|
} else { |
| 2948 |
|
// Replacing string is shorter or have the same length. |
| 2949 |
|
// ReplaceInPlace() can be faster on some platform, but not much, |
| 2950 |
|
// so we use regular algorithm even for equal lengths here. |
| 2951 |
|
dst = src; |
| 2952 |
for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){ |
for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){ |
| 2953 |
start_pos = dst.find(search, start_pos); |
start_pos = dst.find(search, start_pos); |
| 2954 |
if (start_pos == NPOS) |
if (start_pos == NPOS) |
| 2956 |
dst.replace(start_pos, search.size(), replace); |
dst.replace(start_pos, search.size(), replace); |
| 2957 |
start_pos += replace.size(); |
start_pos += replace.size(); |
| 2958 |
} |
} |
| 2959 |
|
} |
| 2960 |
return dst; |
return dst; |
| 2961 |
} |
} |
| 2962 |
|
|
| 2997 |
} |
} |
| 2998 |
|
|
| 2999 |
|
|
| 3000 |
list<string>& NStr::Split(const string& str, const string& delim, |
template<typename TString, typename TContainer> |
| 3001 |
list<string>& arr, EMergeDelims merge, |
TContainer& s_Split(const TString& str, const TString& delim, |
| 3002 |
vector<SIZE_TYPE>* token_pos) |
TContainer& arr, NStr::TSplitFlags flags, |
| 3003 |
|
vector<SIZE_TYPE>* token_pos, |
| 3004 |
|
CTempString_Storage* storage = NULL) |
| 3005 |
{ |
{ |
|
|
|
|
typedef list<string> TContainer; |
|
| 3006 |
typedef CStrTokenPosAdapter<vector<SIZE_TYPE> > TPosArray; |
typedef CStrTokenPosAdapter<vector<SIZE_TYPE> > TPosArray; |
| 3007 |
typedef CStrDummyTargetReserve<string, TContainer, |
typedef CStrDummyTargetReserve<TContainer, TPosArray> TReserve; |
| 3008 |
TPosArray, CStrDummyTokenCount<string > > TReserve; |
typedef CStrTokenize<TString, TContainer, TPosArray, |
| 3009 |
typedef CStrTokenize<string, TContainer, |
CStrDummyTokenCount, TReserve> TSplitter; |
| 3010 |
TPosArray, |
|
|
CStrDummyTokenCount<string>, |
|
|
TReserve> TSplitter; |
|
| 3011 |
TPosArray token_pos_proxy(token_pos); |
TPosArray token_pos_proxy(token_pos); |
| 3012 |
TSplitter::Do(str, delim, arr, |
TSplitter splitter(str, delim, flags, storage); |
| 3013 |
(CStrTokenizeBase::EMergeDelims)merge, |
splitter.Do(arr, token_pos_proxy, kEmptyStr); |
|
token_pos_proxy, |
|
|
kEmptyStr); |
|
|
return arr; |
|
|
/* |
|
|
// Special cases |
|
|
if (str.empty()) { |
|
|
return arr; |
|
|
} else if (delim.empty()) { |
|
|
arr.push_back(str); |
|
|
if (token_pos) |
|
|
token_pos->push_back(0); |
|
| 3014 |
return arr; |
return arr; |
| 3015 |
} |
} |
| 3016 |
|
|
| 3017 |
for (SIZE_TYPE pos = 0; ; ) { |
|
| 3018 |
SIZE_TYPE prev_pos = (merge == eMergeDelims |
list<string>& NStr::Split(const CTempString& str, const CTempString& delim, |
| 3019 |
? str.find_first_not_of(delim, pos) |
list<string>& arr, TSplitFlags flags, |
| 3020 |
: pos); |
vector<SIZE_TYPE>* token_pos) |
| 3021 |
if (prev_pos == NPOS) { |
{ |
| 3022 |
break; |
return s_Split(str, delim, arr, flags, token_pos); |
|
} |
|
|
pos = str.find_first_of(delim, prev_pos); |
|
|
if (pos == NPOS) { |
|
|
// Avoid using temporary objects |
|
|
// ~ arr.push_back(str.substr(prev_pos)); |
|
|
arr.push_back(kEmptyStr); |
|
|
arr.back().assign(str, prev_pos, str.length() - prev_pos); |
|
|
if (token_pos) |
|
|
token_pos->push_back(prev_pos); |
|
|
break; |
|
|
} else { |
|
|
// Avoid using temporary objects |
|
|
// ~ arr.push_back(str.substr(prev_pos, pos - prev_pos)); |
|
|
arr.push_back(kEmptyStr); |
|
|
arr.back().assign(str, prev_pos, pos - prev_pos); |
|
|
if (token_pos) |
|
|
token_pos->push_back(prev_pos); |
|
|
++pos; |
|
|
} |
|
|
} |
|
|
return arr; |
|
|
*/ |
|
| 3023 |
} |
} |
| 3024 |
|
|
| 3025 |
|
list<CTempStringEx>& NStr::Split(const CTempString& str, |
| 3026 |
|
const CTempString& delim, |
| 3027 |
|
list<CTempStringEx>& arr, TSplitFlags flags, |
| 3028 |
|
vector<SIZE_TYPE>* token_pos, |
| 3029 |
|
CTempString_Storage* storage) |
| 3030 |
|
{ |
| 3031 |
|
return s_Split(str, delim, arr, flags, token_pos, storage); |
| 3032 |
|
} |
| 3033 |
|
|
| 3034 |
vector<string>& NStr::Tokenize(const string& str, const string& delim, |
list<CTempString>& NStr::Split(const CTempString& str, |
| 3035 |
vector<string>& arr, EMergeDelims merge, |
const CTempString& delim, |
| 3036 |
|
list<CTempString>& arr, EMergeDelims merge, |
| 3037 |
vector<SIZE_TYPE>* token_pos) |
vector<SIZE_TYPE>* token_pos) |
| 3038 |
{ |
{ |
| 3039 |
typedef vector<string> TContainer; |
vector<CTempStringEx> arr2; |
| 3040 |
typedef CStrTokenPosAdapter<vector<SIZE_TYPE> > TPosArray; |
Tokenize(str, delim, arr2, |
| 3041 |
typedef CStrTargetReserve<string, TContainer, |
(merge == eMergeDelims) ? fSplit_MergeDelims : 0, token_pos); |
| 3042 |
TPosArray, CStringTokenCount> TReserve; |
ITERATE (vector<CTempStringEx>, it, arr2) { |
| 3043 |
typedef CStrTokenize<string, TContainer, |
arr.push_back(*it); |
| 3044 |
TPosArray, |
} |
|
CStringTokenCount, |
|
|
TReserve> TSplitter; |
|
|
TPosArray token_pos_proxy(token_pos); |
|
|
TSplitter::Do(str, delim, arr, |
|
|
(CStrTokenizeBase::EMergeDelims)merge, |
|
|
token_pos_proxy, |
|
|
kEmptyStr); |
|
|
return arr; |
|
|
/* |
|
|
// Special cases |
|
|
if (str.empty()) { |
|
|
return arr; |
|
|
} else if (delim.empty()) { |
|
|
arr.push_back(str); |
|
|
if (token_pos) |
|
|
token_pos->push_back(0); |
|
| 3045 |
return arr; |
return arr; |
| 3046 |
} |
} |
| 3047 |
|
|
|
SIZE_TYPE pos, prev_pos; |
|
| 3048 |
|
|
| 3049 |
// Reserve vector size only for empty vectors. |
vector<string>& NStr::Tokenize(const CTempString& str, const CTempString& delim, |
| 3050 |
// For vectors which already have items this usualy works slower. |
vector<string>& arr, TSplitFlags flags, |
| 3051 |
if ( !arr.size() ) { |
vector<SIZE_TYPE>* token_pos) |
| 3052 |
// Count number of tokens to determine the array size |
{ |
| 3053 |
size_t tokens = 0; |
return s_Split(str, delim, arr, flags, token_pos); |
|
|
|
|
for (pos = 0;;) { |
|
|
prev_pos = (merge == NStr::eMergeDelims ? |
|
|
str.find_first_not_of(delim, pos) : pos); |
|
|
if (prev_pos == NPOS) { |
|
|
break; |
|
|
} |
|
|
pos = str.find_first_of(delim, prev_pos); |
|
|
++tokens; |
|
|
if (pos == NPOS) { |
|
|
break; |
|
|
} |
|
|
++pos; |
|
| 3054 |
} |
} |
|
arr.reserve(tokens); |
|
|
if (token_pos) |
|
|
token_pos->reserve(tokens); |
|
| 3055 |
|
|
| 3056 |
|
vector<CTempStringEx>& NStr::Tokenize(const CTempString& str, |
| 3057 |
|
const CTempString& delim, |
| 3058 |
|
vector<CTempStringEx>& arr, |
| 3059 |
|
TSplitFlags flags, |
| 3060 |
|
vector<SIZE_TYPE>* token_pos, |
| 3061 |
|
CTempString_Storage* storage) |
| 3062 |
|
{ |
| 3063 |
|
return s_Split(str, delim, arr, flags, token_pos, storage); |
| 3064 |
} |
} |
| 3065 |
|
|
| 3066 |
// Tokenization |
vector<CTempString>& NStr::Tokenize(const CTempString& str, |
| 3067 |
for (pos = 0;;) { |
const CTempString& delim, |
| 3068 |
prev_pos = (merge == eMergeDelims ? |
vector<CTempString>& arr, |
| 3069 |
str.find_first_not_of(delim, pos) : pos); |
EMergeDelims merge, |
| 3070 |
if (prev_pos == NPOS) { |
vector<SIZE_TYPE>* token_pos) |
| 3071 |
break; |
{ |
| 3072 |
} |
vector<CTempStringEx> arr2; |
| 3073 |
pos = str.find_first_of(delim, prev_pos); |
Tokenize(str, delim, arr2, |
| 3074 |
if (pos == NPOS) { |
(merge == eMergeDelims) ? fSplit_MergeDelims : 0, token_pos); |
| 3075 |
// Avoid using temporary objects |
arr.reserve(arr.size() + arr2.size()); |
| 3076 |
// ~ arr.push_back(str.substr(prev_pos)); |
ITERATE (vector<CTempStringEx>, it, arr2) { |
| 3077 |
arr.push_back(kEmptyStr); |
arr.push_back(*it); |
|
arr.back().assign(str, prev_pos, str.length() - prev_pos); |
|
|
if (token_pos) |
|
|
token_pos->push_back(prev_pos); |
|
|
break; |
|
|
} else { |
|
|
// Avoid using temporary objects |
|
|
// ~ arr.push_back(str.substr(prev_pos, pos - prev_pos)); |
|
|
arr.push_back(kEmptyStr); |
|
|
arr.back().assign(str, prev_pos, pos - prev_pos); |
|
|
if (token_pos) |
|
|
token_pos->push_back(prev_pos); |
|
|
++pos; |
|
|
} |
|
| 3078 |
} |
} |
| 3079 |
return arr; |
return arr; |
|
*/ |
|
| 3080 |
} |
} |
| 3081 |
|
|
| 3082 |
|
|
| 3083 |
vector<string>& NStr::TokenizePattern(const string& str, |
vector<CTempString>& NStr::TokenizePattern(const CTempString& str, |
| 3084 |
const string& pattern, |
const CTempString& delim, |
| 3085 |
vector<string>& arr, EMergeDelims merge, |
vector<CTempString>& arr, |
| 3086 |
|
EMergeDelims merge, |
| 3087 |
vector<SIZE_TYPE>* token_pos) |
vector<SIZE_TYPE>* token_pos) |
| 3088 |
{ |
{ |
| 3089 |
// Special cases |
vector<CTempStringEx> arr2; |
| 3090 |
if (str.empty()) { |
Tokenize(str, delim, arr2, |
| 3091 |
return arr; |
fSplit_ByPattern |
| 3092 |
} else if (pattern.empty()) { |
| ((merge == eMergeDelims) ? fSplit_MergeDelims : 0), |
| 3093 |
arr.push_back(str); |
token_pos); |
| 3094 |
if (token_pos) |
arr.reserve(arr.size() + arr2.size()); |
| 3095 |
token_pos->push_back(0); |
ITERATE (vector<CTempStringEx>, it, arr2) { |
| 3096 |
|
arr.push_back(*it); |
| 3097 |
|
} |
| 3098 |
return arr; |
return arr; |
| 3099 |
} |
} |
| 3100 |
|
|
|
SIZE_TYPE pos, prev_pos; |
|
| 3101 |
|
|
| 3102 |
// Reserve vector size only for empty vectors. |
bool NStr::SplitInTwo(const CTempString& str, |
| 3103 |
// For vectors which already have items this usualy works slower. |
const CTempString& delim, |
| 3104 |
if ( !arr.size() ) { |
string& str1, string& str2, TSplitFlags flags) |
| 3105 |
// Count number of tokens to determine the array size |
{ |
| 3106 |
size_t tokens = 0; |
CTempStringEx ts1, ts2; |
| 3107 |
for (pos = 0, prev_pos = 0; ; ) { |
bool result = SplitInTwo(str, delim, ts1, ts2, flags); |
| 3108 |
pos = str.find(pattern, prev_pos); |
str1 = ts1; |
| 3109 |
if ( merge != eMergeDelims || pos > prev_pos ) { |
str2 = ts2; |
| 3110 |
if (pos == NPOS) { |
return result; |
|
if (merge != eMergeDelims || |
|
|
prev_pos < str.length() ) { |
|
|
++tokens; |
|
|
} |
|
|
break; |
|
|
} |
|
|
++tokens; |
|
| 3111 |
} |
} |
| 3112 |
prev_pos = pos + pattern.length(); |
|
| 3113 |
|
bool NStr::SplitInTwo(const CTempString& str, const CTempString& delim, |
| 3114 |
|
CTempString& str1, CTempString& str2, EMergeDelims merge) |
| 3115 |
|
{ |
| 3116 |
|
CTempStringEx tsx1, tsx2; |
| 3117 |
|
bool result = SplitInTwo(str, delim, tsx1, tsx2, |
| 3118 |
|
(merge == eMergeDelims) ? fSplit_MergeDelims : 0); |
| 3119 |
|
str1 = tsx1; |
| 3120 |
|
str2 = tsx2; |
| 3121 |
|
return result; |
| 3122 |
} |
} |
| 3123 |
arr.reserve(tokens); |
|
| 3124 |
if (token_pos) |
bool NStr::SplitInTwo(const CTempString& str, const CTempString& delim, |
| 3125 |
token_pos->reserve(tokens); |
CTempStringEx& str1, CTempStringEx& str2, |
| 3126 |
|
TSplitFlags flags, |
| 3127 |
|
CTempString_Storage* storage) |
| 3128 |
|
{ |
| 3129 |
|
if ((flags & (fSplit_CanEscape | fSplit_CanQuote)) && !storage) { |
| 3130 |
|
NCBI_THROW2(CStringException, eBadArgs, |
| 3131 |
|
"NStr::SplitInTwo(): the selected flags require non-NULL storage", |
| 3132 |
|
0); |
| 3133 |
} |
} |
| 3134 |
|
|
| 3135 |
// Tokenization |
typedef CStrTokenize<CTempString, int, CStrDummyTokenPos, |
| 3136 |
for (pos = 0, prev_pos = 0; ; ) { |
CStrDummyTokenCount, |
| 3137 |
pos = str.find(pattern, prev_pos); |
CStrDummyTargetReserve<int, int> > TSplitter; |
| 3138 |
if ( merge != eMergeDelims || pos > prev_pos ) { |
|
| 3139 |
if (pos == NPOS) { |
CTempStringList part_collector(storage); |
| 3140 |
if (merge != eMergeDelims || |
TSplitter splitter(str, delim, flags, storage); |
| 3141 |
prev_pos < str.length() ) { |
bool found_delim; |
| 3142 |
// Avoid using temporary objects |
|
| 3143 |
// ~ arr.push_back(str.substr(prev_pos)); |
splitter.SkipDelims(); |
| 3144 |
arr.push_back(kEmptyStr); |
if (splitter.GetPos() == 0) { |
| 3145 |
arr.back().assign(str, prev_pos, |
splitter.Advance(&part_collector); |
| 3146 |
str.length() - prev_pos); |
part_collector.Join(&str1); |
| 3147 |
if (token_pos) |
part_collector.Clear(); |
| 3148 |
token_pos->push_back(prev_pos); |
if (splitter.AtEnd()) { |
| 3149 |
|
// check for trailing delimiter |
| 3150 |
|
if ((flags & fSplit_ByPattern) != 0) { |
| 3151 |
|
found_delim = NStr::EndsWith(str, delim); |
| 3152 |
|
} else { |
| 3153 |
|
found_delim = ( !str.empty() |
| 3154 |
|
&& delim.find(str[str.size()-1]) != NPOS); |
| 3155 |
} |
} |
| 3156 |
break; |
if (found_delim && (flags & fSplit_CanEscape) != 0) { |
| 3157 |
|
SIZE_TYPE dsz = ((flags & fSplit_ByPattern) == 0 ? 1 |
| 3158 |
|
: delim.size()); |
| 3159 |
|
if (str.size() > dsz && str[str.size() - dsz - 1] == '\\') { |
| 3160 |
|
found_delim = false; // actually escaped |
| 3161 |
} |
} |
|
// Avoid using temporary objects |
|
|
// ~ arr.push_back(str.substr(prev_pos, pos - prev_pos)); |
|
|
arr.push_back(kEmptyStr); |
|
|
arr.back().assign(str, prev_pos, pos - prev_pos); |
|
|
if (token_pos) |
|
|
token_pos->push_back(prev_pos); |
|
| 3162 |
} |
} |
| 3163 |
prev_pos = pos + pattern.length(); |
} else { |
| 3164 |
|
found_delim = true; |
| 3165 |
} |
} |
| 3166 |
return arr; |
} else { |
| 3167 |
|
found_delim = true; |
| 3168 |
|
str1.clear(); |
| 3169 |
} |
} |
| 3170 |
|
|
| 3171 |
|
// don't need further splitting, just quote and escape parsing |
| 3172 |
|
splitter.SetDelim(kEmptyStr); |
| 3173 |
|
splitter.Advance(&part_collector); |
| 3174 |
|
part_collector.Join(&str2); |
| 3175 |
|
|
| 3176 |
bool NStr::SplitInTwo(const string& str, const string& delim, |
return found_delim; |
|
string& str1, string& str2) |
|
|
{ |
|
|
SIZE_TYPE delim_pos = str.find_first_of(delim); |
|
|
if (NPOS == delim_pos) { // only one piece. |
|
|
str1 = str; |
|
|
str2 = kEmptyStr; |
|
|
return false; |
|
|
} |
|
|
str1.assign(str, 0, delim_pos); |
|
|
// skip only one delimiter character. |
|
|
str2.assign(str, delim_pos + 1, str.length() - delim_pos - 1); |
|
|
|
|
|
return true; |
|
| 3177 |
} |
} |
| 3178 |
|
|
| 3179 |
|
|
| 3180 |
template <typename T> |
template <typename T> |
| 3181 |
string s_NStr_Join(const T& arr, const string& delim) |
string s_NStr_Join(const T& arr, const CTempString& delim) |
| 3182 |
{ |
{ |
| 3183 |
if (arr.empty()) { |
if (arr.empty()) { |
| 3184 |
return kEmptyStr; |
return kEmptyStr; |
| 3201 |
} |
} |
| 3202 |
|
|
| 3203 |
|
|
| 3204 |
string NStr::Join(const list<string>& arr, const string& delim) |
string NStr::Join(const list<string>& arr, const CTempString& delim) |
| 3205 |
{ |
{ |
| 3206 |
return s_NStr_Join(arr, delim); |
return s_NStr_Join(arr, delim); |
| 3207 |
} |
} |
| 3208 |
|
|
| 3209 |
|
|
| 3210 |
string NStr::Join(const vector<string>& arr, const string& delim) |
string NStr::Join(const list<CTempString>& arr, const CTempString& delim) |
| 3211 |
{ |
{ |
| 3212 |
return s_NStr_Join(arr, delim); |
return s_NStr_Join(arr, delim); |
| 3213 |
} |
} |
| 3214 |
|
|
| 3215 |
|
|
| 3216 |
enum ELanguage { |
string NStr::Join(const vector<string>& arr, const CTempString& delim) |
| 3217 |
eLanguage_C, |
{ |
| 3218 |
eLanguage_Javascript |
return s_NStr_Join(arr, delim); |
| 3219 |
}; |
} |
| 3220 |
|
|
| 3221 |
|
|
| 3222 |
static inline bool s_IsQuoted(char c, ELanguage lang) |
string NStr::Join(const vector<CTempString>& arr, const CTempString& delim) |
| 3223 |
{ |
{ |
| 3224 |
return (c == '\t' || c == '\v' || c == '\b' || |
return s_NStr_Join(arr, delim); |
|
c == '\r' || c == '\f' || c == '\a' || |
|
|
c == '\n' || c == '\\' || c == '\'' || |
|
|
c == '"' || (c == '&' && lang == eLanguage_Javascript) || |
|
|
!isprint((unsigned char) c) ? true : false); |
|
| 3225 |
} |
} |
| 3226 |
|
|
| 3227 |
|
|
| 3228 |
static string s_PrintableString(const string& str, |
enum ELanguage { |
| 3229 |
|
eLanguage_C, |
| 3230 |
|
eLanguage_Javascript |
| 3231 |
|
}; |
| 3232 |
|
|
| 3233 |
|
|
| 3234 |
|
static string s_PrintableString(const CTempString& str, |
| 3235 |
NStr::TPrintableMode mode, |
NStr::TPrintableMode mode, |
| 3236 |
ELanguage lang) |
ELanguage lang) |
| 3237 |
{ |
{ |
| 3239 |
SIZE_TYPE i, j = 0; |
SIZE_TYPE i, j = 0; |
| 3240 |
|
|
| 3241 |
for (i = 0; i < str.size(); i++) { |
for (i = 0; i < str.size(); i++) { |
| 3242 |
|
bool octal = false; |
| 3243 |
char c = str[i]; |
char c = str[i]; |
| 3244 |
switch (c) { |
switch (c) { |
| 3245 |
case '\t': |
case '\t': |
| 3273 |
continue; |
continue; |
| 3274 |
break; |
break; |
| 3275 |
default: |
default: |
| 3276 |
if (isprint((unsigned char) c)) |
if (!isascii((unsigned char) c)) { |
| 3277 |
continue; |
if (mode & NStr::fNonAscii_Quote) { |
| 3278 |
|
octal = true; |
| 3279 |
|
break; |
| 3280 |
|
} |
| 3281 |
|
} |
| 3282 |
|
if (!isprint((unsigned char) c)) { |
| 3283 |
|
octal = true; |
| 3284 |
break; |
break; |
| 3285 |
} |
} |
| 3286 |
|
continue; |
| 3287 |
|
} |
| 3288 |
if (!out.get()) { |
if (!out.get()) { |
| 3289 |
out.reset(new CNcbiOstrstream); |
out.reset(new CNcbiOstrstream); |
| 3290 |
} |
} |
| 3292 |
out->write(str.data() + j, i - j); |
out->write(str.data() + j, i - j); |
| 3293 |
} |
} |
| 3294 |
out->put('\\'); |
out->put('\\'); |
| 3295 |
if (!isprint((unsigned char) c) && c != '\n') { |
if (c == '\n') { |
| 3296 |
|
out->write("n\\\n", 3); |
| 3297 |
|
} else if (octal) { |
| 3298 |
bool reduce; |
bool reduce; |
| 3299 |
if (!(mode & NStr::fPrintable_Full)) { |
if (!(mode & NStr::fPrintable_Full)) { |
| 3300 |
reduce = (i == str.size() - 1 || s_IsQuoted(str[i + 1], lang) |
reduce = (i == str.size() - 1 || |
| 3301 |
|| str[i + 1] < '0' || str[i + 1] > '7'); |
str[i + 1] < '0' || str[i + 1] > '7' ? true : false); |
| 3302 |
} else { |
} else { |
| 3303 |
reduce = false; |
reduce = false; |
| 3304 |
} |
} |
| 3305 |
unsigned char v; |
unsigned char v; |
| 3306 |
char octal[3]; |
char val[3]; |
| 3307 |
int k = 0; |
int k = 0; |
| 3308 |
v = (unsigned char) c >> 6; |
v = (unsigned char) c >> 6; |
| 3309 |
if (v || !reduce) { |
if (v || !reduce) { |
| 3310 |
octal[k++] = '0' + v; |
val[k++] = '0' + v; |
| 3311 |
reduce = false; |
reduce = false; |
| 3312 |
} |
} |
| 3313 |
v = ((unsigned char) c >> 3) & 7; |
v = ((unsigned char) c >> 3) & 7; |
| 3314 |
if (v || !reduce) { |
if (v || !reduce) { |
| 3315 |
octal[k++] = '0' + v; |
val[k++] = '0' + v; |
| 3316 |
} |
} |
| 3317 |
v = (unsigned char) c & 7; |
v = (unsigned char) c & 7; |
| 3318 |
octal [k++] = '0' + v; |
val [k++] = '0' + v; |
| 3319 |
out->write(octal, k); |
out->write(val, k); |
| 3320 |
} else { |
} else { |
| 3321 |
out->put(c); |
out->put(c); |
| 3322 |
} |
} |
| 3331 |
return CNcbiOstrstreamToString(*out); |
return CNcbiOstrstreamToString(*out); |
| 3332 |
} |
} |
| 3333 |
|
|
| 3334 |
// All characters are good - return original string |
// All characters are good - return (a copy of) the original string |
| 3335 |
return str; |
return str; |
| 3336 |
} |
} |
| 3337 |
|
|
| 3338 |
|
|
| 3339 |
string NStr::PrintableString(const string& str, |
string NStr::PrintableString(const CTempString& str, |
| 3340 |
NStr::TPrintableMode mode) |
NStr::TPrintableMode mode) |
| 3341 |
{ |
{ |
| 3342 |
return s_PrintableString(str, mode, eLanguage_C); |
return s_PrintableString(str, mode, eLanguage_C); |
| 3343 |
} |
} |
| 3344 |
|
|
| 3345 |
|
|
| 3346 |
string NStr::JavaScriptEncode(const string& str) |
string NStr::JavaScriptEncode(const CTempString& str) |
| 3347 |
{ |
{ |
| 3348 |
return s_PrintableString(str, eNewLine_Quote, eLanguage_Javascript); |
return s_PrintableString(str, |
| 3349 |
|
fNewLine_Quote | fNonAscii_Passthru, |
| 3350 |
|
eLanguage_Javascript); |
| 3351 |
|
} |
| 3352 |
|
|
| 3353 |
|
|
| 3354 |
|
string NStr::CEncode(const CTempString& str, EQuoted quoted) |
| 3355 |
|
{ |
| 3356 |
|
switch (quoted) { |
| 3357 |
|
case eNotQuoted: |
| 3358 |
|
return PrintableString(str); |
| 3359 |
|
case eQuoted: |
| 3360 |
|
return '"' + PrintableString(str) + '"'; |
| 3361 |
|
} |
| 3362 |
|
_TROUBLE; |
| 3363 |
|
// Unreachable |
| 3364 |
|
return str; |
| 3365 |
|
} |
| 3366 |
|
|
| 3367 |
|
|
| 3368 |
|
string NStr::CParse(const CTempString& str, EQuoted quoted) |
| 3369 |
|
{ |
| 3370 |
|
if (quoted == eNotQuoted) { |
| 3371 |
|
return ParseEscapes(str); |
| 3372 |
|
} |
| 3373 |
|
_ASSERT(quoted == eQuoted); |
| 3374 |
|
|
| 3375 |
|
SIZE_TYPE pos; |
| 3376 |
|
SIZE_TYPE len = str.length(); |
| 3377 |
|
const char quote_char = '"'; |
| 3378 |
|
|
| 3379 |
|
if (len < 2 || str[0] != quote_char || str[len-1] != quote_char) { |
| 3380 |
|
NCBI_THROW2(CStringException, eFormat, |
| 3381 |
|
"The source string must start and finish with a double quote", 0); |
| 3382 |
} |
} |
| 3383 |
|
|
| 3384 |
string NStr::XmlEncode(const string& str) |
// Flag that next char is escaped, ignore it |
| 3385 |
|
bool escaped = false; |
| 3386 |
|
// We have a quote mark, start collect string chars |
| 3387 |
|
bool collect = true; |
| 3388 |
|
// Position of last quote |
| 3389 |
|
SIZE_TYPE last_quote = 0; |
| 3390 |
|
|
| 3391 |
|
string out; |
| 3392 |
|
out.reserve(str.size()); |
| 3393 |
|
|
| 3394 |
|
for (pos = 1; pos < len; ++pos) { |
| 3395 |
|
unsigned char ch = str[pos]; |
| 3396 |
|
if (ch == quote_char && !escaped) { |
| 3397 |
|
// Have a substring |
| 3398 |
|
CTempString sub(str.data() + last_quote + 1, pos - last_quote - 1); |
| 3399 |
|
if (collect) { |
| 3400 |
|
// Parse escape sequences and add it to result |
| 3401 |
|
out += ParseEscapes(sub); |
| 3402 |
|
} else { |
| 3403 |
|
// Possible we have adjacent strings ("A""B"). |
| 3404 |
|
if (pos != last_quote + 1) { |
| 3405 |
|
NCBI_THROW2(CStringException, eFormat, |
| 3406 |
|
"Quoted string format error", pos); |
| 3407 |
|
} |
| 3408 |
|
} |
| 3409 |
|
last_quote = pos; |
| 3410 |
|
collect = !collect; |
| 3411 |
|
} else { |
| 3412 |
|
escaped = ch == '\\' ? !escaped : false; |
| 3413 |
|
} |
| 3414 |
|
} |
| 3415 |
|
if (escaped || last_quote != len-1) { |
| 3416 |
|
NCBI_THROW2(CStringException, eFormat, |
| 3417 |
|
"Unterminated quoted string", str.length()); |
| 3418 |
|
} |
| 3419 |
|
return out; |
| 3420 |
|
} |
| 3421 |
|
|
| 3422 |
|
|
| 3423 |
|
string NStr::XmlEncode(const CTempString& str, EXmlEncode flags) |
| 3424 |
// http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent |
// http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent |
| 3425 |
{ |
{ |
| 3426 |
string result; |
string result; |
| 3427 |
SIZE_TYPE i; |
SIZE_TYPE i; |
| 3428 |
|
|
| 3429 |
|
// wild guess... |
| 3430 |
|
result.reserve(str.size()); |
| 3431 |
|
|
| 3432 |
for (i = 0; i < str.size(); i++) { |
for (i = 0; i < str.size(); i++) { |
| 3433 |
char c = str[i]; |
char c = str[i]; |
| 3434 |
switch ( c ) { |
switch ( c ) { |
| 3447 |
case '"': |
case '"': |
| 3448 |
result.append("""); |
result.append("""); |
| 3449 |
break; |
break; |
| 3450 |
|
case '-': |
| 3451 |
|
if (flags == eXmlEnc_CommentSafe) { |
| 3452 |
|
// translate double hyphen and ending hyphen |
| 3453 |
|
// http://www.w3.org/TR/xml11/#sec-comments |
| 3454 |
|
if (i+1 == str.size()) { |
| 3455 |
|
result.append("-"); |
| 3456 |
|
break; |
| 3457 |
|
} else if (str[i+1] == '-') { |
| 3458 |
|
++i; |
| 3459 |
|
result.append(1, c).append("-"); |
| 3460 |
|
break; |
| 3461 |
|
} |
| 3462 |
|
} |
| 3463 |
|
result.append(1, c); |
| 3464 |
|
break; |
| 3465 |
|
default: |
| 3466 |
|
if ((unsigned int)(c) < 0x20) { |
| 3467 |
|
const char* charmap = "0123456789abcdef"; |
| 3468 |
|
result.append("&#x"); |
| 3469 |
|
Uint1 ch = c; |
| 3470 |
|
unsigned hi = ch >> 4; |
| 3471 |
|
unsigned lo = ch & 0xF; |
| 3472 |
|
if ( hi ) { |
| 3473 |
|
result.append(1, charmap[hi]); |
| 3474 |
|
} |
| 3475 |
|
result.append(1, charmap[lo]).append(1, ';'); |
| 3476 |
|
} else { |
| 3477 |
|
result.append(1, c); |
| 3478 |
|
} |
| 3479 |
|
break; |
| 3480 |
|
} |
| 3481 |
|
} |
| 3482 |
|
return result; |
| 3483 |
|
} |
| 3484 |
|
|
| 3485 |
|
|
| 3486 |
|
string NStr::HtmlEncode(const CTempString& str, THtmlEncode flags) |
| 3487 |
|
{ |
| 3488 |
|
string result; |
| 3489 |
|
SIZE_TYPE i; |
| 3490 |
|
SIZE_TYPE semicolon = 0; |
| 3491 |
|
|
| 3492 |
|
// wild guess... |
| 3493 |
|
result.reserve(str.size()); |
| 3494 |
|
|
| 3495 |
|
for (i = 0; i < str.size(); i++) { |
| 3496 |
|
char c = str[i]; |
| 3497 |
|
switch ( c ) { |
| 3498 |
|
case '&': |
| 3499 |
|
{{ |
| 3500 |
|
result.append("&"); |
| 3501 |
|
// Check on HTML entity |
| 3502 |
|
bool is_entity = false; |
| 3503 |
|
if ((flags & fHtmlEnc_SkipEntities) && |
| 3504 |
|
(i+2 < str.size()) && (semicolon != NPOS)) { |
| 3505 |
|
|
| 3506 |
|
if ( i >= semicolon ) { |
| 3507 |
|
semicolon = str.find(";", i+1); |
| 3508 |
|
} |
| 3509 |
|
if ( semicolon != NPOS ) { |
| 3510 |
|
SIZE_TYPE len = semicolon - i; |
| 3511 |
|
SIZE_TYPE p = i + 1; |
| 3512 |
|
if (str[i+1] == '#') { |
| 3513 |
|
// Check on numeric character reference encoding |
| 3514 |
|
if (flags & fHtmlEnc_SkipNumericEntities) { |
| 3515 |
|
p++; |
| 3516 |
|
if (len || len <= 4) { |
| 3517 |
|
for (; p < semicolon; ++p) { |
| 3518 |
|
if (!isdigit((unsigned char)(str[p]))) |
| 3519 |
|
break; |
| 3520 |
|
} |
| 3521 |
|
} |
| 3522 |
|
} |
| 3523 |
|
} else { |
| 3524 |
|
// Check on literal entity |
| 3525 |
|
if (flags & fHtmlEnc_SkipLiteralEntities) { |
| 3526 |
|
if (len && len <= 10) { |
| 3527 |
|
for (; p < semicolon; ++p) { |
| 3528 |
|
if (!isalpha((unsigned char)(str[p]))) |
| 3529 |
|
break; |
| 3530 |
|
} |
| 3531 |
|
} |
| 3532 |
|
} |
| 3533 |
|
} |
| 3534 |
|
is_entity = (p == semicolon); |
| 3535 |
|
} |
| 3536 |
|
} |
| 3537 |
|
if ( is_entity ) { |
| 3538 |
|
if (flags & fHtmlEnc_CheckPreencoded) { |
| 3539 |
|
ERR_POST_X_ONCE(5, Info << "string \"" << str << |
| 3540 |
|
"\" contains HTML encoded entities"); |
| 3541 |
|
} |
| 3542 |
|
} else { |
| 3543 |
|
result.append("amp;"); |
| 3544 |
|
} |
| 3545 |
|
}} |
| 3546 |
|
break; |
| 3547 |
|
case '<': |
| 3548 |
|
result.append("<"); |
| 3549 |
|
break; |
| 3550 |
|
case '>': |
| 3551 |
|
result.append(">"); |
| 3552 |
|
break; |
| 3553 |
|
case '\'': |
| 3554 |
|
result.append("'"); |
| 3555 |
|
break; |
| 3556 |
|
case '"': |
| 3557 |
|
result.append("""); |
| 3558 |
|
break; |
| 3559 |
default: |
default: |
| 3560 |
if ((unsigned int)(c) < 0x20) { |
if ((unsigned int)(c) < 0x20) { |
| 3561 |
const char* charmap = "0123456789abcdef"; |
const char* charmap = "0123456789abcdef"; |
| 3576 |
return result; |
return result; |
| 3577 |
} |
} |
| 3578 |
|
|
| 3579 |
string NStr::JsonEncode(const string& str) |
|
| 3580 |
|
// Character entity references |
| 3581 |
|
// http://www.w3.org/TR/html4/sgml/entities.html |
| 3582 |
|
// http://www.w3.org/TR/1998/REC-html40-19980424/charset.html#h-5.3 |
| 3583 |
|
|
| 3584 |
|
static struct tag_HtmlEntities |
| 3585 |
|
{ |
| 3586 |
|
TUnicodeSymbol u; |
| 3587 |
|
const char* s; |
| 3588 |
|
|
| 3589 |
|
} const s_HtmlEntities[] = { |
| 3590 |
|
{ 34, "quot" }, |
| 3591 |
|
{ 38, "amp" }, |
| 3592 |
|
{ 60, "lt" }, |
| 3593 |
|
{ 62, "gt" }, |
| 3594 |
|
{ 160, "nbsp" }, |
| 3595 |
|
{ 161, "iexcl" }, |
| 3596 |
|
{ 162, "cent" }, |
| 3597 |
|
{ 163, "pound" }, |
| 3598 |
|
{ 164, "curren" }, |
| 3599 |
|
{ 165, "yen" }, |
| 3600 |
|
{ 166, "brvbar" }, |
| 3601 |
|
{ 167, "sect" }, |
| 3602 |
|
{ 168, "uml" }, |
| 3603 |
|
{ 169, "copy" }, |
| 3604 |
|
{ 170, "ordf" }, |
| 3605 |
|
{ 171, "laquo" }, |
| 3606 |
|
{ 172, "not" }, |
| 3607 |
|
{ 173, "shy" }, |
| 3608 |
|
{ 174, "reg" }, |
| 3609 |
|
{ 175, "macr" }, |
| 3610 |
|
{ 176, "deg" }, |
| 3611 |
|
{ 177, "plusmn" }, |
| 3612 |
|
{ 178, "sup2" }, |
| 3613 |
|
{ 179, "sup3" }, |
| 3614 |
|
{ 180, "acute" }, |
| 3615 |
|
{ 181, "micro" }, |
| 3616 |
|
{ 182, "para" }, |
| 3617 |
|
{ 183, "middot" }, |
| 3618 |
|
{ 184, "cedil" }, |
| 3619 |
|
{ 185, "sup1" }, |
| 3620 |
|
{ 186, "ordm" }, |
| 3621 |
|
{ 187, "raquo" }, |
| 3622 |
|
{ 188, "frac14" }, |
| 3623 |
|
{ 189, "frac12" }, |
| 3624 |
|
{ 190, "frac34" }, |
| 3625 |
|
{ 191, "iquest" }, |
| 3626 |
|
{ 192, "Agrave" }, |
| 3627 |
|
{ 193, "Aacute" }, |
| 3628 |
|
{ 194, "Acirc" }, |
| 3629 |
|
{ 195, "Atilde" }, |
| 3630 |
|
{ 196, "Auml" }, |
| 3631 |
|
{ 197, "Aring" }, |
| 3632 |
|
{ 198, "AElig" }, |
| 3633 |
|
{ 199, "Ccedil" }, |
| 3634 |
|
{ 200, "Egrave" }, |
| 3635 |
|
{ 201, "Eacute" }, |
| 3636 |
|
{ 202, "Ecirc" }, |
| 3637 |
|
{ 203, "Euml" }, |
| 3638 |
|
{ 204, "Igrave" }, |
| 3639 |
|
{ 205, "Iacute" }, |
| 3640 |
|
{ 206, "Icirc" }, |
| 3641 |
|
{ 207, "Iuml" }, |
| 3642 |
|
{ 208, "ETH" }, |
| 3643 |
|
{ 209, "Ntilde" }, |
| 3644 |
|
{ 210, "Ograve" }, |
| 3645 |
|
{ 211, "Oacute" }, |
| 3646 |
|
{ 212, "Ocirc" }, |
| 3647 |
|
{ 213, "Otilde" }, |
| 3648 |
|
{ 214, "Ouml" }, |
| 3649 |
|
{ 215, "times" }, |
| 3650 |
|
{ 216, "Oslash" }, |
| 3651 |
|
{ 217, "Ugrave" }, |
| 3652 |
|
{ 218, "Uacute" }, |
| 3653 |
|
{ 219, "Ucirc" }, |
| 3654 |
|
{ 220, "Uuml" }, |
| 3655 |
|
{ 221, "Yacute" }, |
| 3656 |
|
{ 222, "THORN" }, |
| 3657 |
|
{ 223, "szlig" }, |
| 3658 |
|
{ 224, "agrave" }, |
| 3659 |
|
{ 225, "aacute" }, |
| 3660 |
|
{ 226, "acirc" }, |
| 3661 |
|
{ 227, "atilde" }, |
| 3662 |
|
{ 228, "auml" }, |
| 3663 |
|
{ 229, "aring" }, |
| 3664 |
|
{ 230, "aelig" }, |
| 3665 |
|
{ 231, "ccedil" }, |
| 3666 |
|
{ 232, "egrave" }, |
| 3667 |
|
{ 233, "eacute" }, |
| 3668 |
|
{ 234, "ecirc" }, |
| 3669 |
|
{ 235, "euml" }, |
| 3670 |
|
{ 236, "igrave" }, |
| 3671 |
|
{ 237, "iacute" }, |
| 3672 |
|
{ 238, "icirc" }, |
| 3673 |
|
{ 239, "iuml" }, |
| 3674 |
|
{ 240, "eth" }, |
| 3675 |
|
{ 241, "ntilde" }, |
| 3676 |
|
{ 242, "ograve" }, |
| 3677 |
|
{ 243, "oacute" }, |
| 3678 |
|
{ 244, "ocirc" }, |
| 3679 |
|
{ 245, "otilde" }, |
| 3680 |
|
{ 246, "ouml" }, |
| 3681 |
|
{ 247, "divide" }, |
| 3682 |
|
{ 248, "oslash" }, |
| 3683 |
|
{ 249, "ugrave" }, |
| 3684 |
|
{ 250, "uacute" }, |
| 3685 |
|
{ 251, "ucirc" }, |
| 3686 |
|
{ 252, "uuml" }, |
| 3687 |
|
{ 253, "yacute" }, |
| 3688 |
|
{ 254, "thorn" }, |
| 3689 |
|
{ 255, "yuml" }, |
| 3690 |
|
{ 338, "OElig" }, |
| 3691 |
|
{ 339, "oelig" }, |
| 3692 |
|
{ 352, "Scaron" }, |
| 3693 |
|
{ 353, "scaron" }, |
| 3694 |
|
{ 376, "Yuml" }, |
| 3695 |
|
{ 402, "fnof" }, |
| 3696 |
|
{ 710, "circ" }, |
| 3697 |
|
{ 732, "tilde" }, |
| 3698 |
|
{ 913, "Alpha" }, |
| 3699 |
|
{ 914, "Beta" }, |
| 3700 |
|
{ 915, "Gamma" }, |
| 3701 |
|
{ 916, "Delta" }, |
| 3702 |
|
{ 917, "Epsilon" }, |
| 3703 |
|
{ 918, "Zeta" }, |
| 3704 |
|
{ 919, "Eta" }, |
| 3705 |
|
{ 920, "Theta" }, |
| 3706 |
|
{ 921, "Iota" }, |
| 3707 |
|
{ 922, "Kappa" }, |
| 3708 |
|
{ 923, "Lambda" }, |
| 3709 |
|
{ 924, "Mu" }, |
| 3710 |
|
{ 925, "Nu" }, |
| 3711 |
|
{ 926, "Xi" }, |
| 3712 |
|
{ 927, "Omicron" }, |
| 3713 |
|
{ 928, "Pi" }, |
| 3714 |
|
{ 929, "Rho" }, |
| 3715 |
|
{ 931, "Sigma" }, |
| 3716 |
|
{ 932, "Tau" }, |
| 3717 |
|
{ 933, "Upsilon" }, |
| 3718 |
|
{ 934, "Phi" }, |
| 3719 |
|
{ 935, "Chi" }, |
| 3720 |
|
{ 936, "Psi" }, |
| 3721 |
|
{ 937, "Omega" }, |
| 3722 |
|
{ 945, "alpha" }, |
| 3723 |
|
{ 946, "beta" }, |
| 3724 |
|
{ 947, "gamma" }, |
| 3725 |
|
{ 948, "delta" }, |
| 3726 |
|
{ 949, "epsilon" }, |
| 3727 |
|
{ 950, "zeta" }, |
| 3728 |
|
{ 951, "eta" }, |
| 3729 |
|
{ 952, "theta" }, |
| 3730 |
|
{ 953, "iota" }, |
| 3731 |
|
{ 954, "kappa" }, |
| 3732 |
|
{ 955, "lambda" }, |
| 3733 |
|
{ 956, "mu" }, |
| 3734 |
|
{ 957, "nu" }, |
| 3735 |
|
{ 958, "xi" }, |
| 3736 |
|
{ 959, "omicron" }, |
| 3737 |
|
{ 960, "pi" }, |
| 3738 |
|
{ 961, "rho" }, |
| 3739 |
|
{ 962, "sigmaf" }, |
| 3740 |
|
{ 963, "sigma" }, |
| 3741 |
|
{ 964, "tau" }, |
| 3742 |
|
{ 965, "upsilon" }, |
| 3743 |
|
{ 966, "phi" }, |
| 3744 |
|
{ 967, "chi" }, |
| 3745 |
|
{ 968, "psi" }, |
| 3746 |
|
{ 969, "omega" }, |
| 3747 |
|
{ 977, "thetasym" }, |
| 3748 |
|
{ 978, "upsih" }, |
| 3749 |
|
{ 982, "piv" }, |
| 3750 |
|
{ 8194, "ensp" }, |
| 3751 |
|
{ 8195, "emsp" }, |
| 3752 |
|
{ 8201, "thinsp" }, |
| 3753 |
|
{ 8204, "zwnj" }, |
| 3754 |
|
{ 8205, "zwj" }, |
| 3755 |
|
{ 8206, "lrm" }, |
| 3756 |
|
{ 8207, "rlm" }, |
| 3757 |
|
{ 8211, "ndash" }, |
| 3758 |
|
{ 8212, "mdash" }, |
| 3759 |
|
{ 8216, "lsquo" }, |
| 3760 |
|
{ 8217, "rsquo" }, |
| 3761 |
|
{ 8218, "sbquo" }, |
| 3762 |
|
{ 8220, "ldquo" }, |
| 3763 |
|
{ 8221, "rdquo" }, |
| 3764 |
|
{ 8222, "bdquo" }, |
| 3765 |
|
{ 8224, "dagger" }, |
| 3766 |
|
{ 8225, "Dagger" }, |
| 3767 |
|
{ 8226, "bull" }, |
| 3768 |
|
{ 8230, "hellip" }, |
| 3769 |
|
{ 8240, "permil" }, |
| 3770 |
|
{ 8242, "prime" }, |
| 3771 |
|
{ 8243, "Prime" }, |
| 3772 |
|
{ 8249, "lsaquo" }, |
| 3773 |
|
{ 8250, "rsaquo" }, |
| 3774 |
|
{ 8254, "oline" }, |
| 3775 |
|
{ 8260, "frasl" }, |
| 3776 |
|
{ 8364, "euro" }, |
| 3777 |
|
{ 8472, "weierp" }, |
| 3778 |
|
{ 8465, "image" }, |
| 3779 |
|
{ 8476, "real" }, |
| 3780 |
|
{ 8482, "trade" }, |
| 3781 |
|
{ 8501, "alefsym" }, |
| 3782 |
|
{ 8592, "larr" }, |
| 3783 |
|
{ 8593, "uarr" }, |
| 3784 |
|
{ 8594, "rarr" }, |
| 3785 |
|
{ 8595, "darr" }, |
| 3786 |
|
{ 8596, "harr" }, |
| 3787 |
|
{ 8629, "crarr" }, |
| 3788 |
|
{ 8656, "lArr" }, |
| 3789 |
|
{ 8657, "uArr" }, |
| 3790 |
|
{ 8658, "rArr" }, |
| 3791 |
|
{ 8659, "dArr" }, |
| 3792 |
|
{ 8660, "hArr" }, |
| 3793 |
|
{ 8704, "forall" }, |
| 3794 |
|
{ 8706, "part" }, |
| 3795 |
|
{ 8707, "exist" }, |
| 3796 |
|
{ 8709, "empty" }, |
| 3797 |
|
{ 8711, "nabla" }, |
| 3798 |
|
{ 8712, "isin" }, |
| 3799 |
|
{ 8713, "notin" }, |
| 3800 |
|
{ 8715, "ni" }, |
| 3801 |
|
{ 8719, "prod" }, |
| 3802 |
|
{ 8721, "sum" }, |
| 3803 |
|
{ 8722, "minus" }, |
| 3804 |
|
{ 8727, "lowast" }, |
| 3805 |
|
{ 8730, "radic" }, |
| 3806 |
|
{ 8733, "prop" }, |
| 3807 |
|
{ 8734, "infin" }, |
| 3808 |
|
{ 8736, "ang" }, |
| 3809 |
|
{ 8743, "and" }, |
| 3810 |
|
{ 8744, "or" }, |
| 3811 |
|
{ 8745, "cap" }, |
| 3812 |
|
{ 8746, "cup" }, |
| 3813 |
|
{ 8747, "int" }, |
| 3814 |
|
{ 8756, "there4" }, |
| 3815 |
|
{ 8764, "sim" }, |
| 3816 |
|
{ 8773, "cong" }, |
| 3817 |
|
{ 8776, "asymp" }, |
| 3818 |
|
{ 8800, "ne" }, |
| 3819 |
|
{ 8801, "equiv" }, |
| 3820 |
|
{ 8804, "le" }, |
| 3821 |
|
{ 8805, "ge" }, |
| 3822 |
|
{ 8834, "sub" }, |
| 3823 |
|
{ 8835, "sup" }, |
| 3824 |
|
{ 8836, "nsub" }, |
| 3825 |
|
{ 8838, "sube" }, |
| 3826 |
|
{ 8839, "supe" }, |
| 3827 |
|
{ 8853, "oplus" }, |
| 3828 |
|
{ 8855, "otimes" }, |
| 3829 |
|
{ 8869, "perp" }, |
| 3830 |
|
{ 8901, "sdot" }, |
| 3831 |
|
{ 8968, "lceil" }, |
| 3832 |
|
{ 8969, "rceil" }, |
| 3833 |
|
{ 8970, "lfloor" }, |
| 3834 |
|
{ 8971, "rfloor" }, |
| 3835 |
|
{ 9001, "lang" }, |
| 3836 |
|
{ 9002, "rang" }, |
| 3837 |
|
{ 9674, "loz" }, |
| 3838 |
|
{ 9824, "spades" }, |
| 3839 |
|
{ 9827, "clubs" }, |
| 3840 |
|
{ 9829, "hearts" }, |
| 3841 |
|
{ 9830, "diams" }, |
| 3842 |
|
{ 0, 0 } |
| 3843 |
|
}; |
| 3844 |
|
|
| 3845 |
|
|
| 3846 |
|
string NStr::HtmlDecode(const CTempString& str, EEncoding encoding, THtmlDecode* result_flags) |
| 3847 |
|
{ |
| 3848 |
|
string ustr; |
| 3849 |
|
THtmlDecode result = 0; |
| 3850 |
|
|
| 3851 |
|
if (encoding == eEncoding_Unknown) { |
| 3852 |
|
encoding = CUtf8::GuessEncoding(str); |
| 3853 |
|
if (encoding == eEncoding_Unknown) { |
| 3854 |
|
NCBI_THROW2(CStringException, eBadArgs, |
| 3855 |
|
"Unable to guess the source string encoding", 0); |
| 3856 |
|
} |
| 3857 |
|
} |
| 3858 |
|
// wild guess... |
| 3859 |
|
ustr.reserve(str.size()); |
| 3860 |
|
|
| 3861 |
|
CTempString::const_iterator i, e = str.end(); |
| 3862 |
|
char ch; |
| 3863 |
|
TUnicodeSymbol uch; |
| 3864 |
|
|
| 3865 |
|
for (i = str.begin(); i != e;) { |
| 3866 |
|
ch = *(i++); |
| 3867 |
|
//check for HTML entities and character references |
| 3868 |
|
if (i != e && ch == '&') { |
| 3869 |
|
CTempString::const_iterator itmp, end_of_entity, start_of_entity; |
| 3870 |
|
itmp = end_of_entity = start_of_entity = i; |
| 3871 |
|
bool ent, dec, hex, parsed=false; |
| 3872 |
|
ent = isalpha((unsigned char)(*itmp)) != 0; |
| 3873 |
|
dec = !ent && *itmp == '#' && ++itmp != e && |
| 3874 |
|
isdigit((unsigned char)(*itmp)) != 0; |
| 3875 |
|
hex = !dec && itmp != e && |
| 3876 |
|
(*itmp == 'x' || *itmp == 'X') && ++itmp != e && |
| 3877 |
|
isxdigit((unsigned char)(*itmp)) != 0; |
| 3878 |
|
start_of_entity = itmp; |
| 3879 |
|
|
| 3880 |
|
if (itmp != e && (ent || dec || hex)) { |
| 3881 |
|
// do not look too far |
| 3882 |
|
for (int len=0; len<16 && itmp != e; ++len, ++itmp) { |
| 3883 |
|
if (*itmp == '&' || *itmp == '#') { |
| 3884 |
|
break; |
| 3885 |
|
} |
| 3886 |
|
if (*itmp == ';') { |
| 3887 |
|
end_of_entity = itmp; |
| 3888 |
|
break; |
| 3889 |
|
} |
| 3890 |
|
ent = ent && isalnum( (unsigned char)(*itmp)) != 0; |
| 3891 |
|
dec = dec && isdigit( (unsigned char)(*itmp)) != 0; |
| 3892 |
|
hex = hex && isxdigit((unsigned char)(*itmp)) != 0; |
| 3893 |
|
} |
| 3894 |
|
if (end_of_entity != i && (ent || dec || hex)) { |
| 3895 |
|
uch = 0; |
| 3896 |
|
if (ent) { |
| 3897 |
|
string entity(start_of_entity,end_of_entity); |
| 3898 |
|
const struct tag_HtmlEntities* p = s_HtmlEntities; |
| 3899 |
|
for ( ; p->u != 0; ++p) { |
| 3900 |
|
if (entity.compare(p->s) == 0) { |
| 3901 |
|
uch = p->u; |
| 3902 |
|
parsed = true; |
| 3903 |
|
result |= fHtmlDec_CharRef_Entity; |
| 3904 |
|
break; |
| 3905 |
|
} |
| 3906 |
|
} |
| 3907 |
|
} else { |
| 3908 |
|
parsed = true; |
| 3909 |
|
result |= fHtmlDec_CharRef_Numeric; |
| 3910 |
|
for (itmp = start_of_entity; itmp != end_of_entity; ++itmp) { |
| 3911 |
|
TUnicodeSymbol ud = *itmp; |
| 3912 |
|
if (dec) { |
| 3913 |
|
uch = 10 * uch + (ud - '0'); |
| 3914 |
|
} else if (hex) { |
| 3915 |
|
if (ud >='0' && ud <= '9') { |
| 3916 |
|
ud -= '0'; |
| 3917 |
|
} else if (ud >='a' && ud <= 'f') { |
| 3918 |
|
ud -= 'a'; |
| 3919 |
|
ud += 10; |
| 3920 |
|
} else if (ud >='A' && ud <= 'F') { |
| 3921 |
|
ud -= 'A'; |
| 3922 |
|
ud += 10; |
| 3923 |
|
} |
| 3924 |
|
uch = 16 * uch + ud; |
| 3925 |
|
} |
| 3926 |
|
} |
| 3927 |
|
} |
| 3928 |
|
if (parsed) { |
| 3929 |
|
ustr += CUtf8::AsUTF8(&uch,1); |
| 3930 |
|
i = ++end_of_entity; |
| 3931 |
|
continue; |
| 3932 |
|
} |
| 3933 |
|
} |
| 3934 |
|
} |
| 3935 |
|
} |
| 3936 |
|
// no entity - append as is |
| 3937 |
|
if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) { |
| 3938 |
|
ustr.append( 1, ch ); |
| 3939 |
|
} else { |
| 3940 |
|
result |= fHtmlDec_Encoding_Changed; |
| 3941 |
|
ustr += CUtf8::AsUTF8(CTempString(&ch,1), encoding); |
| 3942 |
|
} |
| 3943 |
|
} |
| 3944 |
|
if (result_flags) { |
| 3945 |
|
*result_flags = result; |
| 3946 |
|
} |
| 3947 |
|
return ustr; |
| 3948 |
|
} |
| 3949 |
|
|
| 3950 |
|
|
| 3951 |
|
string NStr::JsonEncode(const CTempString& str) |
| 3952 |
// http://www.json.org/ |
// http://www.json.org/ |
| 3953 |
{ |
{ |
| 3954 |
string result; |
string result; |
| 3955 |
SIZE_TYPE i; |
SIZE_TYPE i; |
| 3956 |
|
// wild guess... |
| 3957 |
|
result.reserve(str.size()); |
| 3958 |
|
|
| 3959 |
for (i = 0; i < str.size(); i++) { |
for (i = 0; i < str.size(); i++) { |
| 3960 |
char c = str[i]; |
char c = str[i]; |
| 3961 |
switch ( c ) { |
switch ( c ) { |
| 3984 |
} |
} |
| 3985 |
|
|
| 3986 |
|
|
| 3987 |
string NStr::ParseEscapes(const string& str) |
string NStr::ShellEncode(const string& str) |
| 3988 |
|
{ |
| 3989 |
|
// 1. Special-case of non-printable characters. We have no choice and |
| 3990 |
|
// must use BASH extensions if we want printable output. |
| 3991 |
|
// |
| 3992 |
|
// Aesthetic issue: Most people are not familiar with the BASH-only |
| 3993 |
|
// quoting style. Avoid it as much as possible. |
| 3994 |
|
|
| 3995 |
|
if (find_if(str.begin(),str.end(),not1(ptr_fun(::isprint))) != str.end()) { |
| 3996 |
|
return "$'" + NStr::PrintableString(str) + "'"; |
| 3997 |
|
} |
| 3998 |
|
|
| 3999 |
|
///////////////////////////////////////////////////////////////////////// |
| 4000 |
|
// Bourne Shell quoting as IEEE-standard without special extensions. |
| 4001 |
|
// |
| 4002 |
|
// There are 3 basic ways to quote/escape in Bourne Shell: |
| 4003 |
|
// |
| 4004 |
|
// - Single-quotes. All characters (including non-printable |
| 4005 |
|
// characters newlines, backslashes), are literal. There is no escape. |
| 4006 |
|
// - Double-quotes. Need to escape some metacharacters, such as literal |
| 4007 |
|
// escape (\), variable expansion ($) and command substitution (`). |
| 4008 |
|
// - Escape without quotes. Use backslash. |
| 4009 |
|
///////////////////////////////////////////////////////////////////////// |
| 4010 |
|
|
| 4011 |
|
// 2. Non-empty printable string without metacharacters. |
| 4012 |
|
// |
| 4013 |
|
// Shell special characters, according to IEEE Std 1003.1, |
| 4014 |
|
// plus ! (Bourne shell exit status negation and Bash history expansion), |
| 4015 |
|
// braces (Bourne enhanced expansion), space, tab, and newline. |
| 4016 |
|
// |
| 4017 |
|
// See http://www.opengroup.org/onlinepubs/009695399/toc.htm |
| 4018 |
|
// See Bourne and Bash man pages. |
| 4019 |
|
|
| 4020 |
|
if (!str.empty() && |
| 4021 |
|
str.find_first_of("!{} \t\r\n[|&;<>()$`\"'*?#~=%\\") == NPOS) { |
| 4022 |
|
return str; |
| 4023 |
|
} |
| 4024 |
|
|
| 4025 |
|
// 3. Printable string, but either empty or some shell metacharacters. |
| 4026 |
|
// |
| 4027 |
|
// Aesthetics preference: |
| 4028 |
|
// i) If the string includes literal single-quotes, then prefer |
| 4029 |
|
// double-quoting provided there is no need to escape embedded |
| 4030 |
|
// literal double-quotes, escapes (\), variable substitution ($), |
| 4031 |
|
// or command substitution (`). |
| 4032 |
|
|
| 4033 |
|
if (str.find('\'') != NPOS && |
| 4034 |
|
str.find_first_of("\"\\$`") == NPOS) { |
| 4035 |
|
return "\"" + str + "\""; |
| 4036 |
|
} |
| 4037 |
|
|
| 4038 |
|
// Use single-quoting. The only special case for Bourne shell |
| 4039 |
|
// single-quoting is a literal single-quote, which needs to |
| 4040 |
|
// be pulled out of the quoted region. |
| 4041 |
|
// |
| 4042 |
|
// Single-quoting does not have any escape character, so close |
| 4043 |
|
// the quoted string ('), then emit an escaped or quoted literal |
| 4044 |
|
// single-quote (\' or "'"), and resume the quoted string ('). |
| 4045 |
|
// |
| 4046 |
|
// Aesthetics preferences: |
| 4047 |
|
// ii) Prefer single-quoting over escape characters, especially |
| 4048 |
|
// escaped whitespace. However, this is in compromise to optimal |
| 4049 |
|
// quoting: if there are many literal single-quotes and the |
| 4050 |
|
// use of double-quotes would involve the need to escape embedded |
| 4051 |
|
// characters, then it may be more pleasing to escape the |
| 4052 |
|
// shell metacharacters, and avoid the need for single-quoting |
| 4053 |
|
// in the presence of literal single-quotes. |
| 4054 |
|
// iii) If there are no literal double-quotes, then all else being equal, |
| 4055 |
|
// avoid double-quotes and prefer escaping. Double-quotes are |
| 4056 |
|
// more commonly used by enclosing formats such as ASN.1 Text |
| 4057 |
|
// and CVS, and would thus need to be escaped. If there are |
| 4058 |
|
// literal double-quotes, then having them is in the output is |
| 4059 |
|
// unavoidable, and this aesthetics rule becomes secondary to |
| 4060 |
|
// the preference for avoiding escape characters. If there are |
| 4061 |
|
// literal escape characters, then having them is unavoidable |
| 4062 |
|
// and avoidance of double-quotes is once again recommended. |
| 4063 |
|
|
| 4064 |
|
// TODO: Should simplify runs of multiple quotes, for example: |
| 4065 |
|
// '\'''\'''\'' -> '"'''"' |
| 4066 |
|
|
| 4067 |
|
bool avoid_double_quotes = (str.find('"') == NPOS || |
| 4068 |
|
str.find('\\') != NPOS); |
| 4069 |
|
string s = "'" + NStr::Replace(str, "'", |
| 4070 |
|
avoid_double_quotes ? "'\\''" : "'\"'\"'") + "'"; |
| 4071 |
|
|
| 4072 |
|
// Aesthetic improvement: Remove paired single-quotes ('') |
| 4073 |
|
// that aren't escaped, as these evaluate to an empty string. |
| 4074 |
|
// Don't apply this simplification for the degenerate case when |
| 4075 |
|
// the string is the empty string ''. (Nondegenerate strings |
| 4076 |
|
// must be length greater than 2). Implement the equivalent |
| 4077 |
|
// of the Perl regexp: |
| 4078 |
|
// |
| 4079 |
|
// s/(?<!\\)''//g |
| 4080 |
|
// |
| 4081 |
|
if (s.size() > 2) { |
| 4082 |
|
size_t pos = 0; |
| 4083 |
|
while ( true ) { |
| 4084 |
|
pos = s.find("''", pos); |
| 4085 |
|
if (pos == NPOS) break; |
| 4086 |
|
if (pos == 0 || s[pos-1] != '\\') { |
| 4087 |
|
s.erase(pos, 2); |
| 4088 |
|
} else { |
| 4089 |
|
++pos; |
| 4090 |
|
} |
| 4091 |
|
} |
| 4092 |
|
} |
| 4093 |
|
|
| 4094 |
|
return s; |
| 4095 |
|
} |
| 4096 |
|
|
| 4097 |
|
|
| 4098 |
|
string NStr::ParseEscapes(const CTempString& str, EEscSeqRange mode, char user_char) |
| 4099 |
{ |
{ |
| 4100 |
string out; |
string out; |
| 4101 |
out.reserve(str.size()); // can only be smaller |
out.reserve(str.size()); // result string can only be smaller |
| 4102 |
SIZE_TYPE pos = 0; |
SIZE_TYPE pos = 0; |
| 4103 |
|
bool is_error = false; |
| 4104 |
|
|
| 4105 |
while (pos < str.size()) { |
while (pos < str.size() || !is_error) { |
| 4106 |
SIZE_TYPE pos2 = str.find('\\', pos); |
SIZE_TYPE pos2 = str.find('\\', pos); |
| 4107 |
if (pos2 == NPOS) { |
if (pos2 == NPOS) { |
| 4108 |
out += str.substr(pos); |
//~ out += str.substr(pos); |
| 4109 |
|
CTempString sub(str, pos); |
| 4110 |
|
out += sub; |
| 4111 |
break; |
break; |
| 4112 |
} |
} |
| 4113 |
out += str.substr(pos, pos2 - pos); |
//~ out += str.substr(pos, pos2 - pos); |
| 4114 |
|
CTempString sub(str, pos, pos2-pos); |
| 4115 |
|
out += sub; |
| 4116 |
if (++pos2 == str.size()) { |
if (++pos2 == str.size()) { |
| 4117 |
NCBI_THROW2(CStringException, eFormat, |
NCBI_THROW2(CStringException, eFormat, |
| 4118 |
"Unterminated escape sequence", pos2); |
"Unterminated escape sequence", pos2); |
| 4133 |
pos++; |
pos++; |
| 4134 |
} |
} |
| 4135 |
if (pos > pos2) { |
if (pos > pos2) { |
| 4136 |
out += static_cast<char> |
SIZE_TYPE len = pos-pos2; |
| 4137 |
(StringToUInt(str.substr(pos2, pos - pos2), 0, 16)); |
if ((mode == eEscSeqRange_FirstByte) && (len > 2)) { |
| 4138 |
|
// Take only 2 first hex-digits |
| 4139 |
|
len = 2; |
| 4140 |
|
pos = pos2 + 2; |
| 4141 |
|
} |
| 4142 |
|
unsigned int value = |
| 4143 |
|
StringToUInt(CTempString(str, pos2, len), 0, 16); |
| 4144 |
|
if ((mode != eEscSeqRange_Standard) && (value > 255)) { |
| 4145 |
|
// eEscSeqRange_Standard -- by default |
| 4146 |
|
switch (mode) { |
| 4147 |
|
case eEscSeqRange_FirstByte: |
| 4148 |
|
// Already have right value |
| 4149 |
|
break; |
| 4150 |
|
case eEscSeqRange_Throw: |
| 4151 |
|
NCBI_THROW2(CStringException, eFormat, |
| 4152 |
|
"Escape sequence '" + string(CTempString(str, pos2, len)) + |
| 4153 |
|
"' is out of range [0-255]", pos2); |
| 4154 |
|
break; |
| 4155 |
|
case eEscSeqRange_Errno: |
| 4156 |
|
CNcbiError::SetErrno(errno = ERANGE,str); |
| 4157 |
|
is_error = true; |
| 4158 |
|
continue; |
| 4159 |
|
case eEscSeqRange_User: |
| 4160 |
|
value = (unsigned)user_char; |
| 4161 |
|
break; |
| 4162 |
|
default: |
| 4163 |
|
NCBI_THROW2(CStringException, eFormat, "Wrong set of flags", pos2); |
| 4164 |
|
} |
| 4165 |
|
} |
| 4166 |
|
out += static_cast<char>(value); |
| 4167 |
} else { |
} else { |
| 4168 |
NCBI_THROW2(CStringException, eFormat, |
NCBI_THROW2(CStringException, eFormat, |
| 4169 |
"\\x followed by no hexadecimal digits", pos); |
"\\x followed by no hexadecimal digits", pos); |
| 4182 |
out += c; |
out += c; |
| 4183 |
}} |
}} |
| 4184 |
continue; |
continue; |
| 4185 |
|
case '\n': |
| 4186 |
|
// quoted EOL means no EOL |
| 4187 |
|
break; |
| 4188 |
default: |
default: |
| 4189 |
out += str[pos2]; |
out += str[pos2]; |
| 4190 |
break; |
break; |
| 4191 |
} |
} |
| 4192 |
pos = pos2 + 1; |
pos = pos2 + 1; |
| 4193 |
} |
} |
| 4194 |
|
if (mode == eEscSeqRange_Errno) { |
| 4195 |
|
if (is_error) { |
| 4196 |
|
return kEmptyStr; |
| 4197 |
|
} |
| 4198 |
|
errno = 0; |
| 4199 |
|
} |
| 4200 |
return out; |
return out; |
| 4201 |
} |
} |
| 4202 |
|
|
| 4203 |
|
|
| 4204 |
|
string NStr::ParseQuoted(const CTempString& str, size_t* n_read /*= NULL*/) |
| 4205 |
|
{ |
| 4206 |
|
const char* str_pos = str.data(); |
| 4207 |
|
char quote_char; |
| 4208 |
|
|
| 4209 |
|
if (str.empty() || ((quote_char = *str_pos) != '"' && quote_char != '\'')) { |
| 4210 |
|
NCBI_THROW2(CStringException, eFormat, |
| 4211 |
|
"The source string must start with a quote", 0); |
| 4212 |
|
} |
| 4213 |
|
|
| 4214 |
|
const char* str_end = str_pos + str.length(); |
| 4215 |
|
bool escaped = false; |
| 4216 |
|
|
| 4217 |
|
while (++str_pos < str_end) { |
| 4218 |
|
if (*str_pos == quote_char && !escaped) { |
| 4219 |
|
size_t pos = str_pos - str.data(); |
| 4220 |
|
if (n_read != NULL) |
| 4221 |
|
*n_read = pos + 1; |
| 4222 |
|
return ParseEscapes(CTempString(str.data() + 1, pos - 1)); |
| 4223 |
|
} else { |
| 4224 |
|
escaped = *str_pos == '\\' ? !escaped : false; |
| 4225 |
|
} |
| 4226 |
|
} |
| 4227 |
|
NCBI_THROW2(CStringException, eFormat, |
| 4228 |
|
"Unterminated quoted string", str.length()); |
| 4229 |
|
} |
| 4230 |
|
|
| 4231 |
|
|
| 4232 |
// Determines the end of an HTML <...> tag, accounting for attributes |
// Determines the end of an HTML <...> tag, accounting for attributes |
| 4233 |
// and comments (the latter allowed only within <!...>). |
// and comments (the latter allowed only within <!...>). |
| 4234 |
static SIZE_TYPE s_EndOfTag(const string& str, SIZE_TYPE start) |
static SIZE_TYPE s_EndOfTag(const string& str, SIZE_TYPE start) |
| 4269 |
|
|
| 4270 |
|
|
| 4271 |
// Determines the end of an HTML &foo; character/entity reference |
// Determines the end of an HTML &foo; character/entity reference |
| 4272 |
// (which might not actually end with a semicolon :-/) |
// (which might not actually end with a semicolon :-/ , but we ignore that case) |
| 4273 |
static SIZE_TYPE s_EndOfReference(const string& str, SIZE_TYPE start) |
static SIZE_TYPE s_EndOfReference(const string& str, SIZE_TYPE start) |
| 4274 |
{ |
{ |
| 4275 |
_ASSERT(start < str.size() && str[start] == '&'); |
_ASSERT(start < str.size() && str[start] == '&'); |
| 4276 |
#ifdef NCBI_STRICT_HTML_REFS |
|
|
return str.find(';', start + 1); |
|
|
#else |
|
| 4277 |
SIZE_TYPE pos = str.find_first_not_of |
SIZE_TYPE pos = str.find_first_not_of |
| 4278 |
("#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", |
("#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", |
| 4279 |
start + 1); |
start + 1); |
| 4280 |
if (pos == NPOS || str[pos] == ';') { |
if (pos != NPOS && str[pos] == ';') { |
| 4281 |
|
// found terminating semicolon, so it's valid, and we return that |
| 4282 |
return pos; |
return pos; |
| 4283 |
} else { |
} else { |
| 4284 |
return pos - 1; |
// We consider it just a '&' by itself since it's invalid |
| 4285 |
|
return start; |
| 4286 |
} |
} |
|
#endif |
|
| 4287 |
} |
} |
| 4288 |
|
|
| 4289 |
|
|
| 4290 |
static SIZE_TYPE s_VisibleWidth(const string& str, bool is_html) |
static SIZE_TYPE s_VisibleHtmlWidth(const string& str) |
| 4291 |
{ |
{ |
|
if (is_html) { |
|
| 4292 |
SIZE_TYPE width = 0, pos = 0; |
SIZE_TYPE width = 0, pos = 0; |
| 4293 |
for (;;) { |
for (;;) { |
| 4294 |
SIZE_TYPE pos2 = str.find_first_of("<&", pos); |
SIZE_TYPE pos2 = str.find_first_of("<&", pos); |
| 4311 |
} |
} |
| 4312 |
} |
} |
| 4313 |
return width; |
return width; |
|
} else { |
|
|
return str.size(); |
|
|
} |
|
| 4314 |
} |
} |
| 4315 |
|
|
|
|
|
| 4316 |
list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, |
list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, |
| 4317 |
list<string>& arr, NStr::TWrapFlags flags, |
list<string>& arr, NStr::TWrapFlags flags, |
| 4318 |
const string* prefix, const string* prefix1) |
const string* prefix, const string* prefix1) |
| 4324 |
const string* pfx = prefix1 ? prefix1 : prefix; |
const string* pfx = prefix1 ? prefix1 : prefix; |
| 4325 |
SIZE_TYPE pos = 0, len = str.size(), nl_pos = 0; |
SIZE_TYPE pos = 0, len = str.size(), nl_pos = 0; |
| 4326 |
|
|
| 4327 |
bool is_html = flags & fWrap_HTMLPre ? true : false; |
const bool is_html = flags & fWrap_HTMLPre ? true : false; |
| 4328 |
bool do_flat = (flags & fWrap_FlatFile) != 0; |
const bool do_flat = (flags & fWrap_FlatFile) != 0; |
| 4329 |
|
|
| 4330 |
enum EScore { // worst to best |
enum EScore { // worst to best |
| 4331 |
eForced, |
eForced, |
| 4335 |
eNewline |
eNewline |
| 4336 |
}; |
}; |
| 4337 |
|
|
| 4338 |
|
// To avoid copying parts of str when we need to store a |
| 4339 |
|
// substr of str, we store the substr as a pair |
| 4340 |
|
// representing start (inclusive) and end (exclusive). |
| 4341 |
|
typedef pair<SIZE_TYPE, SIZE_TYPE> TWrapSubstr; |
| 4342 |
|
|
| 4343 |
|
// This variable is used for HTML links that cross line boundaries. |
| 4344 |
|
// Since it's aesthetically displeasing for a link to cross a boundary, we |
| 4345 |
|
// close it at the end of each line and re-open it after the next line's |
| 4346 |
|
// prefix |
| 4347 |
|
// (This is needed in, e.g. AE017351) |
| 4348 |
|
TWrapSubstr best_link(0, 0); // last link found before current best_pos |
| 4349 |
|
TWrapSubstr latest_link(0, 0); // last link found at all |
| 4350 |
|
|
| 4351 |
while (pos < len) { |
while (pos < len) { |
| 4352 |
bool hyphen = false; // "-" or empty |
bool hyphen = false; // "-" or empty |
| 4353 |
SIZE_TYPE column = s_VisibleWidth(*pfx, is_html); |
SIZE_TYPE column = is_html? s_VisibleHtmlWidth(*pfx) : pfx->size(); |
| 4354 |
SIZE_TYPE column0 = column; |
SIZE_TYPE column0 = column; |
| 4355 |
// the next line will start at best_pos |
// the next line will start at best_pos |
| 4356 |
SIZE_TYPE best_pos = NPOS; |
SIZE_TYPE best_pos = NPOS; |
| 4357 |
EScore best_score = eForced; |
EScore best_score = eForced; |
| 4358 |
|
|
| 4359 |
|
// certain logic can be skipped if this part has no backspace, |
| 4360 |
|
// which is, by far, the most common case |
| 4361 |
|
bool thisPartHasBackspace = false; |
| 4362 |
|
|
| 4363 |
|
arr.push_back(""); |
| 4364 |
|
arr.back().reserve( width ); |
| 4365 |
|
arr.back() = *pfx; |
| 4366 |
|
|
| 4367 |
|
// append any still-open links from previous lines |
| 4368 |
|
if( is_html && best_link.second != 0 ) { |
| 4369 |
|
arr.back().append( |
| 4370 |
|
str.begin() + best_link.first, |
| 4371 |
|
str.begin() + best_link.second ); |
| 4372 |
|
} |
| 4373 |
|
|
| 4374 |
SIZE_TYPE pos0 = pos; |
SIZE_TYPE pos0 = pos; |
| 4375 |
|
|
| 4376 |
|
// we can't do this in HTML mode because we might have to deal with |
| 4377 |
|
// link tags that go across lines. |
| 4378 |
|
if( ! is_html ) { |
| 4379 |
if (nl_pos <= pos) { |
if (nl_pos <= pos) { |
| 4380 |
nl_pos = str.find('\n', pos); |
nl_pos = str.find('\n', pos); |
| 4381 |
if (nl_pos == NPOS) { |
if (nl_pos == NPOS) { |
| 4385 |
if (column + (nl_pos-pos) <= width) { |
if (column + (nl_pos-pos) <= width) { |
| 4386 |
pos0 = nl_pos; |
pos0 = nl_pos; |
| 4387 |
} |
} |
| 4388 |
|
} |
| 4389 |
|
|
| 4390 |
for (SIZE_TYPE pos2 = pos0; pos2 < len && column <= width; |
for (SIZE_TYPE pos2 = pos0; pos2 < len && column <= width; |
| 4391 |
++pos2, ++column) { |
++pos2, ++column) { |
| 4392 |
EScore score = eForced; |
EScore score = eForced; |
| 4393 |
SIZE_TYPE score_pos = pos2; |
SIZE_TYPE score_pos = pos2; |
| 4394 |
char c = str[pos2]; |
const char c = str[pos2]; |
| 4395 |
|
|
| 4396 |
if (c == '\n') { |
if (c == '\n') { |
| 4397 |
best_pos = pos2; |
best_pos = pos2; |
| 4398 |
best_score = eNewline; |
best_score = eNewline; |
| 4399 |
|
best_link = latest_link; |
| 4400 |
break; |
break; |
| 4401 |
} else if (isspace((unsigned char) c)) { |
} else if (isspace((unsigned char) c)) { |
| 4402 |
if ( !do_flat && pos2 > 0 && |
if ( !do_flat && pos2 > 0 && |
| 4403 |
isspace((unsigned char) str[pos2 - 1])) { |
isspace((unsigned char) str[pos2 - 1])) { |
| 4404 |
|
if(pos2 < len - 1 && str[pos2 + 1] == '\b') { |
| 4405 |
|
thisPartHasBackspace = true; |
| 4406 |
|
} |
| 4407 |
continue; // take the first space of a group |
continue; // take the first space of a group |
| 4408 |
} |
} |
| 4409 |
score = eSpace; |
score = eSpace; |
| 4410 |
} else if (is_html && c == '<') { |
} else if (is_html && c == '<') { |
| 4411 |
// treat tags as zero-width... |
// treat tags as zero-width... |
| 4412 |
|
SIZE_TYPE start_of_tag = pos2; |
| 4413 |
pos2 = s_EndOfTag(str, pos2); |
pos2 = s_EndOfTag(str, pos2); |
| 4414 |
--column; |
--column; |
| 4415 |
|
if (pos2 == NPOS) { |
| 4416 |
|
break; |
| 4417 |
|
} |
| 4418 |
|
|
| 4419 |
|
if( (pos2 - start_of_tag) >= 6 && |
| 4420 |
|
str[start_of_tag+1] == 'a' && |
| 4421 |
|
str[start_of_tag+2] == ' ' && |
| 4422 |
|
str[start_of_tag+3] == 'h' && |
| 4423 |
|
str[start_of_tag+4] == 'r' && |
| 4424 |
|
str[start_of_tag+5] == 'e' && |
| 4425 |
|
str[start_of_tag+6] == 'f' ) |
| 4426 |
|
{ |
| 4427 |
|
// remember current link in case of line wrap |
| 4428 |
|
latest_link.first = start_of_tag; |
| 4429 |
|
latest_link.second = pos2 + 1; |
| 4430 |
|
} |
| 4431 |
|
if( (pos2 - start_of_tag) >= 3 && |
| 4432 |
|
str[start_of_tag+1] == '/' && |
| 4433 |
|
str[start_of_tag+2] == 'a' && |
| 4434 |
|
str[start_of_tag+3] == '>') |
| 4435 |
|
{ |
| 4436 |
|
// link is closed |
| 4437 |
|
latest_link.first = 0; |
| 4438 |
|
latest_link.second = 0; |
| 4439 |
|
} |
| 4440 |
} else if (is_html && c == '&') { |
} else if (is_html && c == '&') { |
| 4441 |
// ...and references as single characters |
// ...and references as single characters |
| 4442 |
pos2 = s_EndOfReference(str, pos2); |
pos2 = s_EndOfReference(str, pos2); |
| 4443 |
} else if (c == ',' && score_pos < len - 1 && column < width) { |
if (pos2 == NPOS) { |
| 4444 |
|
break; |
| 4445 |
|
} |
| 4446 |
|
} else if ( c == ',' && column < width && score_pos < len - 1 ) { |
| 4447 |
score = eComma; |
score = eComma; |
| 4448 |
++score_pos; |
++score_pos; |
| 4449 |
} else if (do_flat ? c == '-' : ispunct((unsigned char) c)) { |
} else if (do_flat ? c == '-' : ispunct((unsigned char) c)) { |
| 4450 |
// For flat files, only whitespace, hyphens and commas |
// For flat files, only whitespace, hyphens and commas |
| 4451 |
// are special. |
// are special. |
| 4452 |
if (c == '(' || c == '[' || c == '{' || c == '<' |
switch(c) { |
| 4453 |
|| c == '`') { // opening element |
case '(': case '[': case '{': case '<': case '`': |
| 4454 |
score = ePunct; |
score = ePunct; |
| 4455 |
} else if (score_pos < len - 1 && column < width) { |
break; |
| 4456 |
// Prefer breaking *after* most types of punctuation. |
default: |
| 4457 |
|
if( score_pos < len - 1 && column < width ) { |
| 4458 |
score = ePunct; |
score = ePunct; |
| 4459 |
++score_pos; |
++score_pos; |
| 4460 |
} |
} |
|
} |
|
|
|
|
|
if (pos2 == NPOS) { |
|
| 4461 |
break; |
break; |
| 4462 |
} |
} |
| 4463 |
|
} |
| 4464 |
|
|
| 4465 |
if (score >= best_score && score_pos > pos0) { |
if (score >= best_score && score_pos > pos0) { |
| 4466 |
best_pos = score_pos; |
best_pos = score_pos; |
| 4467 |
best_score = score; |
best_score = score; |
| 4468 |
|
best_link = latest_link; |
| 4469 |
} |
} |
| 4470 |
|
|
| 4471 |
while (pos2 < len - 1 && str[pos2 + 1] == '\b') { |
while (pos2 < len - 1 && str[pos2 + 1] == '\b') { |
| 4474 |
if (column > column0) { |
if (column > column0) { |
| 4475 |
--column; |
--column; |
| 4476 |
} |
} |
| 4477 |
|
thisPartHasBackspace = true; |
| 4478 |
} |
} |
| 4479 |
} |
} |
| 4480 |
|
|
| 4481 |
if ( best_score != eNewline && column <= width ) { |
if ( best_score != eNewline && column <= width ) { |
| 4482 |
|
if( best_pos != len ) { |
| 4483 |
// If the whole remaining text can fit, don't split it... |
// If the whole remaining text can fit, don't split it... |
| 4484 |
best_pos = len; |
best_pos = len; |
| 4485 |
|
best_link = latest_link; |
| 4486 |
|
// Force backspace checking, to play it safe |
| 4487 |
|
thisPartHasBackspace = true; |
| 4488 |
|
} |
| 4489 |
} else if ( best_score == eForced && (flags & fWrap_Hyphenate) ) { |
} else if ( best_score == eForced && (flags & fWrap_Hyphenate) ) { |
| 4490 |
hyphen = true; |
hyphen = true; |
| 4491 |
--best_pos; |
--best_pos; |
| 4492 |
} |
} |
| 4493 |
arr.push_back(*pfx); |
|
| 4494 |
{{ // eat backspaces and the characters (if any) that precede them |
{{ |
| 4495 |
string::const_iterator begin = str.begin() + pos; |
string::const_iterator begin = str.begin() + pos; |
| 4496 |
string::const_iterator end = str.begin() + best_pos; |
string::const_iterator end = str.begin() + best_pos; |
| 4497 |
|
if( thisPartHasBackspace ) { |
| 4498 |
|
// eat backspaces and the characters (if any) that precede them |
| 4499 |
|
|
| 4500 |
string::const_iterator bs; // position of next backspace |
string::const_iterator bs; // position of next backspace |
| 4501 |
while ((bs = find(begin, end, '\b')) != end) { |
while ((bs = find(begin, end, '\b')) != end) { |
| 4502 |
if (bs != begin) { |
if (bs != begin) { |
| 4514 |
// skip over backspace |
// skip over backspace |
| 4515 |
begin = bs + 1; |
begin = bs + 1; |
| 4516 |
} |
} |
| 4517 |
|
} |
| 4518 |
if (begin != end) { |
if (begin != end) { |
| 4519 |
// add remaining characters |
// add remaining characters |
| 4520 |
arr.back().append(begin, end); |
arr.back().append(begin, end); |
| 4521 |
} |
} |
| 4522 |
}} |
}} |
| 4523 |
|
|
| 4524 |
|
// if we didn't close the link on this line, we |
| 4525 |
|
// close it here |
| 4526 |
|
if( is_html && best_link.second != 0 ) { |
| 4527 |
|
arr.back() += "</a>"; |
| 4528 |
|
} |
| 4529 |
|
|
| 4530 |
if ( hyphen ) { |
if ( hyphen ) { |
| 4531 |
arr.back() += '-'; |
arr.back() += '-'; |
| 4532 |
} |
} |
| 4533 |
pos = best_pos; |
pos = best_pos; |
| 4534 |
pfx = prefix; |
pfx = prefix; |
| 4535 |
|
|
| 4536 |
|
if (do_flat) { |
| 4537 |
|
if (best_score == eSpace) { |
| 4538 |
|
while (str[pos] == ' ') { |
| 4539 |
|
++pos; |
| 4540 |
|
} |
| 4541 |
|
if (str[pos] == '\n') { |
| 4542 |
|
++pos; |
| 4543 |
|
} |
| 4544 |
|
} |
| 4545 |
|
if (best_score == eNewline) { |
| 4546 |
|
++pos; |
| 4547 |
|
} |
| 4548 |
|
} |
| 4549 |
|
else { |
| 4550 |
if ( best_score == eSpace || best_score == eNewline ) { |
if ( best_score == eSpace || best_score == eNewline ) { |
| 4551 |
++pos; |
++pos; |
| 4552 |
} |
} |
| 4553 |
|
} |
| 4554 |
while (pos < len && str[pos] == '\b') { |
while (pos < len && str[pos] == '\b') { |
| 4555 |
++pos; |
++pos; |
| 4556 |
} |
} |
| 4562 |
|
|
| 4563 |
list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width, |
list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width, |
| 4564 |
const string& delim, list<string>& arr, |
const string& delim, list<string>& arr, |
| 4565 |
NStr::TWrapFlags flags, const string* prefix, |
NStr::TWrapFlags flags, |
| 4566 |
|
const string* prefix, |
| 4567 |
const string* prefix1) |
const string* prefix1) |
| 4568 |
{ |
{ |
| 4569 |
if (l.empty()) { |
if (l.empty()) { |
| 4573 |
const string* pfx = prefix1 ? prefix1 : prefix; |
const string* pfx = prefix1 ? prefix1 : prefix; |
| 4574 |
string s = *pfx; |
string s = *pfx; |
| 4575 |
bool is_html = flags & fWrap_HTMLPre ? true : false; |
bool is_html = flags & fWrap_HTMLPre ? true : false; |
| 4576 |
SIZE_TYPE column = s_VisibleWidth(s, is_html); |
SIZE_TYPE column = is_html? s_VisibleHtmlWidth(s) : s.size(); |
| 4577 |
SIZE_TYPE delwidth = s_VisibleWidth(delim, is_html); |
SIZE_TYPE delwidth = is_html? s_VisibleHtmlWidth(delim) : delim.size(); |
| 4578 |
bool at_start = true; |
bool at_start = true; |
| 4579 |
|
|
| 4580 |
ITERATE (list<string>, it, l) { |
ITERATE (list<string>, it, l) { |
| 4581 |
SIZE_TYPE term_width = s_VisibleWidth(*it, is_html); |
SIZE_TYPE term_width = is_html ? s_VisibleHtmlWidth(*it) : it->size(); |
| 4582 |
if ( at_start ) { |
if ( at_start ) { |
| 4583 |
if (column + term_width <= width) { |
if (column + term_width <= width) { |
| 4584 |
s += *it; |
s += *it; |
| 4589 |
Wrap(*it, width, arr, flags, prefix, pfx); |
Wrap(*it, width, arr, flags, prefix, pfx); |
| 4590 |
pfx = prefix; |
pfx = prefix; |
| 4591 |
s = *prefix; |
s = *prefix; |
| 4592 |
column = s_VisibleWidth(s, is_html); |
column = is_html ? s_VisibleHtmlWidth(s) : s.size(); |
| 4593 |
at_start = true; |
at_start = true; |
| 4594 |
} |
} |
| 4595 |
} else if (column + delwidth + term_width <= width) { |
} else if (column + delwidth + term_width <= width) { |
| 4602 |
arr.push_back(s); |
arr.push_back(s); |
| 4603 |
pfx = prefix; |
pfx = prefix; |
| 4604 |
s = *prefix; |
s = *prefix; |
| 4605 |
column = s_VisibleWidth(s, is_html); |
column = is_html ? s_VisibleHtmlWidth(s) : s.size(); |
| 4606 |
at_start = true; |
at_start = true; |
| 4607 |
--it; |
--it; |
| 4608 |
} |
} |
| 4609 |
} |
} |
|
|
|
| 4610 |
arr.push_back(s); |
arr.push_back(s); |
| 4611 |
return arr; |
return arr; |
| 4612 |
} |
} |
| 4613 |
|
|
| 4614 |
|
|
| 4615 |
|
list<string>& NStr::Justify(const CTempString& str, |
| 4616 |
|
SIZE_TYPE width, |
| 4617 |
|
list<string>& par, |
| 4618 |
|
const CTempString* pfx, |
| 4619 |
|
const CTempString* pfx1) |
| 4620 |
|
{ |
| 4621 |
|
static const CTempString kNothing; |
| 4622 |
|
if (!pfx) |
| 4623 |
|
pfx = &kNothing; |
| 4624 |
|
const CTempString* p = pfx1 ? pfx1 : pfx; |
| 4625 |
|
|
| 4626 |
|
SIZE_TYPE pos = 0; |
| 4627 |
|
for (SIZE_TYPE len = p->size(); pos < str.size(); len = p->size()) { |
| 4628 |
|
list<CTempString> words; |
| 4629 |
|
unsigned int nw = 0; // How many words are there in the line |
| 4630 |
|
bool big = false; |
| 4631 |
|
do { |
| 4632 |
|
while (pos < str.size()) { |
| 4633 |
|
if (!isspace((unsigned char) str[pos])) |
| 4634 |
|
break; |
| 4635 |
|
++pos; |
| 4636 |
|
} |
| 4637 |
|
SIZE_TYPE start = pos; |
| 4638 |
|
while (pos < str.size()) { |
| 4639 |
|
if ( isspace((unsigned char) str[pos])) |
| 4640 |
|
break; |
| 4641 |
|
++pos; |
| 4642 |
|
} |
| 4643 |
|
SIZE_TYPE wlen = pos - start; |
| 4644 |
|
if (!wlen) |
| 4645 |
|
break; |
| 4646 |
|
if (len + wlen + nw > width) { |
| 4647 |
|
if (nw) { |
| 4648 |
|
pos = start; // Will have to rescan this word again |
| 4649 |
|
break; |
| 4650 |
|
} |
| 4651 |
|
big = true; // Long line with a long lonely word :-/ |
| 4652 |
|
} |
| 4653 |
|
words.push_back(CTempString(str, start, wlen)); |
| 4654 |
|
len += wlen; |
| 4655 |
|
++nw; |
| 4656 |
|
if (str[pos - 1] == '.' || |
| 4657 |
|
str[pos - 1] == '!' || |
| 4658 |
|
str[pos - 1] == '?') { |
| 4659 |
|
if (len + 1 >= width) |
| 4660 |
|
break; |
| 4661 |
|
words.push_back(CTempString("", 0)); |
| 4662 |
|
_ASSERT(!big); |
| 4663 |
|
nw++; |
| 4664 |
|
} |
| 4665 |
|
} while (!big); |
| 4666 |
|
if (!nw) |
| 4667 |
|
break; |
| 4668 |
|
if (words.back().empty()) { |
| 4669 |
|
words.pop_back(); |
| 4670 |
|
_ASSERT(nw > 1); |
| 4671 |
|
nw--; |
| 4672 |
|
} |
| 4673 |
|
SIZE_TYPE space; |
| 4674 |
|
if (nw > 1) { |
| 4675 |
|
if (pos < str.size() && len < width && !big) { |
| 4676 |
|
space = (width - len) / (nw - 1); |
| 4677 |
|
nw = (width - len) % (nw - 1); |
| 4678 |
|
} else { |
| 4679 |
|
space = 1; |
| 4680 |
|
nw = 0; |
| 4681 |
|
} |
| 4682 |
|
} else |
| 4683 |
|
space = 0; |
| 4684 |
|
par.push_back(*p); |
| 4685 |
|
unsigned int n = 0; |
| 4686 |
|
ITERATE(list<CTempString>, w, words) { |
| 4687 |
|
if (n) |
| 4688 |
|
par.back().append(space + (n <= nw ? 1 : 0) , ' '); |
| 4689 |
|
par.back().append(w->data(), w->size()); |
| 4690 |
|
++n; |
| 4691 |
|
} |
| 4692 |
|
p = pfx; |
| 4693 |
|
} |
| 4694 |
|
return par; |
| 4695 |
|
} |
| 4696 |
|
|
| 4697 |
|
|
| 4698 |
#if !defined(HAVE_STRDUP) |
#if !defined(HAVE_STRDUP) |
| 4699 |
extern char* strdup(const char* str) |
extern char* strdup(const char* str) |
| 4700 |
{ |
{ |
| 4825 |
"%40", "A", "B", "C", "D", "E", "F", "G", |
"%40", "A", "B", "C", "D", "E", "F", "G", |
| 4826 |
"H", "I", "J", "K", "L", "M", "N", "O", |
"H", "I", "J", "K", "L", "M", "N", "O", |
| 4827 |
"P", "Q", "R", "S", "T", "U", "V", "W", |
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 4828 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 4829 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 4830 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 4831 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 4832 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", |
| 4833 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 4834 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 4835 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 4836 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 4837 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 4838 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 4839 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 4840 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 4841 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 4842 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 4843 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 4844 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 4845 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 4846 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 4847 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 4848 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 4849 |
|
}; |
| 4850 |
|
|
| 4851 |
|
// RFC-2396: |
| 4852 |
|
// scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
| 4853 |
|
static const char s_EncodeURIScheme[256][4] = { |
| 4854 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 4855 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 4856 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 4857 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 4858 |
|
"%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", |
| 4859 |
|
"%28", "%29", "%2A", "+", "%2C", "-", ".", "%2F", |
| 4860 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 4861 |
|
"8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", |
| 4862 |
|
"%40", "A", "B", "C", "D", "E", "F", "G", |
| 4863 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 4864 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 4865 |
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F", |
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F", |
| 4866 |
"%60", "a", "b", "c", "d", "e", "f", "g", |
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 4867 |
"h", "i", "j", "k", "l", "m", "n", "o", |
"h", "i", "j", "k", "l", "m", "n", "o", |
| 4885 |
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 4886 |
}; |
}; |
| 4887 |
|
|
| 4888 |
string NStr::URLEncode(const string& str, EUrlEncode flag) |
// RFC-2396: |
| 4889 |
|
// userinfo = *( unreserved | escaped | |
| 4890 |
|
// ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
| 4891 |
|
// unreserved = alphanum | mark |
| 4892 |
|
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" |
| 4893 |
|
// Note: ":" is name/password separator, so it must be encoded in each of them. |
| 4894 |
|
static const char s_EncodeURIUserinfo[256][4] = { |
| 4895 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 4896 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 4897 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 4898 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 4899 |
|
"%20", "!", "%22", "%23", "$", "%25", "&", "'", |
| 4900 |
|
"(", ")", "*", "+", ",", "-", ".", "%2F", |
| 4901 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 4902 |
|
"8", "9", "%3A", ";", "%3C", "=", "%3E", "%3F", |
| 4903 |
|
"%40", "A", "B", "C", "D", "E", "F", "G", |
| 4904 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 4905 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 4906 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 4907 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 4908 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 4909 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 4910 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F", |
| 4911 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 4912 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 4913 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 4914 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 4915 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 4916 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 4917 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 4918 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 4919 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 4920 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 4921 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 4922 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 4923 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 4924 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 4925 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 4926 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 4927 |
|
}; |
| 4928 |
|
|
| 4929 |
|
// RFC-2396: |
| 4930 |
|
// host = hostname | IPv4address |
| 4931 |
|
// hostname = *( domainlabel "." ) toplabel [ "." ] |
| 4932 |
|
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum |
| 4933 |
|
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum |
| 4934 |
|
// IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit |
| 4935 |
|
static const char s_EncodeURIHost[256][4] = { |
| 4936 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 4937 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 4938 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 4939 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 4940 |
|
"%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", |
| 4941 |
|
"%28", "%29", "%2A", "%2B", "%2C", "-", ".", "%2F", |
| 4942 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 4943 |
|
"8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", |
| 4944 |
|
"%40", "A", "B", "C", "D", "E", "F", "G", |
| 4945 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 4946 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 4947 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F", |
| 4948 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 4949 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 4950 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 4951 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", |
| 4952 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 4953 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 4954 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 4955 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 4956 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 4957 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 4958 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 4959 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 4960 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 4961 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 4962 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 4963 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 4964 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 4965 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 4966 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 4967 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 4968 |
|
}; |
| 4969 |
|
|
| 4970 |
|
// RFC-2396: |
| 4971 |
|
// path_segments = segment *( "/" segment ) |
| 4972 |
|
// segment = *pchar *( ";" param ) |
| 4973 |
|
// param = *pchar |
| 4974 |
|
// pchar = unreserved | escaped | |
| 4975 |
|
// ":" | "@" | "&" | "=" | "+" | "$" | "," |
| 4976 |
|
// unreserved = alphanum | mark |
| 4977 |
|
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" |
| 4978 |
|
static const char s_EncodeURIPath[256][4] = { |
| 4979 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 4980 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 4981 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 4982 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 4983 |
|
"%20", "!", "%22", "%23", "$", "%25", "&", "'", |
| 4984 |
|
"(", ")", "*", "+", ",", "-", ".", "/", |
| 4985 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 4986 |
|
"8", "9", ":", ";", "%3C", "=", "%3E", "%3F", |
| 4987 |
|
"@", "A", "B", "C", "D", "E", "F", "G", |
| 4988 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 4989 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 4990 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 4991 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 4992 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 4993 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 4994 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F", |
| 4995 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 4996 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 4997 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 4998 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 4999 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 5000 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 5001 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 5002 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 5003 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 5004 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 5005 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 5006 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 5007 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 5008 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 5009 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 5010 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 5011 |
|
}; |
| 5012 |
|
|
| 5013 |
|
static const char s_EncodeURIQueryName[256][4] = { |
| 5014 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 5015 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 5016 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 5017 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 5018 |
|
"%20", "!", "%22", "%23", "$", "%25", "%26", "'", |
| 5019 |
|
"(", ")", "%2A", "%2B", "%2C", "-", ".", "/", |
| 5020 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 5021 |
|
"8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?", |
| 5022 |
|
"@", "A", "B", "C", "D", "E", "F", "G", |
| 5023 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 5024 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 5025 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 5026 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 5027 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 5028 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 5029 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F", |
| 5030 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 5031 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 5032 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 5033 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 5034 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 5035 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 5036 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 5037 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 5038 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 5039 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 5040 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 5041 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 5042 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 5043 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 5044 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 5045 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 5046 |
|
}; |
| 5047 |
|
|
| 5048 |
|
static const char s_EncodeURIQueryValue[256][4] = { |
| 5049 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 5050 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 5051 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 5052 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 5053 |
|
"%20", "!", "%22", "%23", "$", "%25", "%26", "'", |
| 5054 |
|
"(", ")", "%2A", "%2B", "%2C", "-", ".", "/", |
| 5055 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 5056 |
|
"8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?", |
| 5057 |
|
"@", "A", "B", "C", "D", "E", "F", "G", |
| 5058 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 5059 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 5060 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 5061 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 5062 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 5063 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 5064 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F", |
| 5065 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 5066 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 5067 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 5068 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 5069 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 5070 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 5071 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 5072 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 5073 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 5074 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 5075 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 5076 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 5077 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 5078 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 5079 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 5080 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 5081 |
|
}; |
| 5082 |
|
|
| 5083 |
|
// RFC-2396: |
| 5084 |
|
// fragment = *uric |
| 5085 |
|
// uric = reserved | unreserved | escaped |
| 5086 |
|
// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
| 5087 |
|
// unreserved = alphanum | mark |
| 5088 |
|
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" |
| 5089 |
|
static const char s_EncodeURIFragment[256][4] = { |
| 5090 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 5091 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 5092 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 5093 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 5094 |
|
"%20", "!", "%22", "%23", "$", "%25", "&", "'", |
| 5095 |
|
"(", ")", "*", "+", ",", "-", ".", "/", |
| 5096 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 5097 |
|
"8", "9", ":", ";", "%3C", "=", "%3E", "?", |
| 5098 |
|
"@", "A", "B", "C", "D", "E", "F", "G", |
| 5099 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 5100 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 5101 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 5102 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 5103 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 5104 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 5105 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F", |
| 5106 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 5107 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 5108 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 5109 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 5110 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 5111 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 5112 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 5113 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 5114 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 5115 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 5116 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 5117 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 5118 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 5119 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 5120 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 5121 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 5122 |
|
}; |
| 5123 |
|
|
| 5124 |
|
static const char s_EncodeCookie[256][4] = { |
| 5125 |
|
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", |
| 5126 |
|
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", |
| 5127 |
|
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", |
| 5128 |
|
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", |
| 5129 |
|
"+", "!", "%22", "%23", "$", "%25", "%26", "'", |
| 5130 |
|
"(", ")", "*", "%2B", "%2C", "-", ".", "%2F", |
| 5131 |
|
"0", "1", "2", "3", "4", "5", "6", "7", |
| 5132 |
|
"8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", |
| 5133 |
|
"%40", "A", "B", "C", "D", "E", "F", "G", |
| 5134 |
|
"H", "I", "J", "K", "L", "M", "N", "O", |
| 5135 |
|
"P", "Q", "R", "S", "T", "U", "V", "W", |
| 5136 |
|
"X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_", |
| 5137 |
|
"%60", "a", "b", "c", "d", "e", "f", "g", |
| 5138 |
|
"h", "i", "j", "k", "l", "m", "n", "o", |
| 5139 |
|
"p", "q", "r", "s", "t", "u", "v", "w", |
| 5140 |
|
"x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", |
| 5141 |
|
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", |
| 5142 |
|
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", |
| 5143 |
|
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", |
| 5144 |
|
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", |
| 5145 |
|
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", |
| 5146 |
|
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", |
| 5147 |
|
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", |
| 5148 |
|
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", |
| 5149 |
|
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", |
| 5150 |
|
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", |
| 5151 |
|
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", |
| 5152 |
|
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", |
| 5153 |
|
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", |
| 5154 |
|
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", |
| 5155 |
|
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", |
| 5156 |
|
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" |
| 5157 |
|
}; |
| 5158 |
|
|
| 5159 |
|
string NStr::URLEncode(const CTempString& str, EUrlEncode flag) |
| 5160 |
{ |
{ |
| 5161 |
SIZE_TYPE len = str.length(); |
SIZE_TYPE len = str.length(); |
| 5162 |
if ( !len ) { |
if ( !len ) { |
| 5163 |
return kEmptyStr; |
return kEmptyStr; |
| 5164 |
} |
} |
|
|
|
| 5165 |
const char (*encode_table)[4]; |
const char (*encode_table)[4]; |
| 5166 |
switch (flag) { |
switch (flag) { |
| 5167 |
case eUrlEnc_SkipMarkChars: |
case eUrlEnc_SkipMarkChars: |
| 5176 |
case eUrlEnc_Path: |
case eUrlEnc_Path: |
| 5177 |
encode_table = s_EncodePath; |
encode_table = s_EncodePath; |
| 5178 |
break; |
break; |
| 5179 |
|
case eUrlEnc_URIScheme: |
| 5180 |
|
encode_table = s_EncodeURIScheme; |
| 5181 |
|
break; |
| 5182 |
|
case eUrlEnc_URIUserinfo: |
| 5183 |
|
encode_table = s_EncodeURIUserinfo; |
| 5184 |
|
break; |
| 5185 |
|
case eUrlEnc_URIHost: |
| 5186 |
|
encode_table = s_EncodeURIHost; |
| 5187 |
|
break; |
| 5188 |
|
case eUrlEnc_URIPath: |
| 5189 |
|
encode_table = s_EncodeURIPath; |
| 5190 |
|
break; |
| 5191 |
|
case eUrlEnc_URIQueryName: |
| 5192 |
|
encode_table = s_EncodeURIQueryName; |
| 5193 |
|
break; |
| 5194 |
|
case eUrlEnc_URIQueryValue: |
| 5195 |
|
encode_table = s_EncodeURIQueryValue; |
| 5196 |
|
break; |
| 5197 |
|
case eUrlEnc_URIFragment: |
| 5198 |
|
encode_table = s_EncodeURIFragment; |
| 5199 |
|
break; |
| 5200 |
|
case eUrlEnc_Cookie: |
| 5201 |
|
encode_table = s_EncodeCookie; |
| 5202 |
|
break; |
| 5203 |
case eUrlEnc_None: |
case eUrlEnc_None: |
| 5204 |
return str; |
return str; |
| 5205 |
default: |
default: |
| 5211 |
string dst; |
string dst; |
| 5212 |
SIZE_TYPE pos; |
SIZE_TYPE pos; |
| 5213 |
SIZE_TYPE dst_len = len; |
SIZE_TYPE dst_len = len; |
| 5214 |
const unsigned char* cstr = (const unsigned char*)str.c_str(); |
const unsigned char* cstr = (const unsigned char*)str.data(); |
| 5215 |
for (pos = 0; pos < len; pos++) { |
for (pos = 0; pos < len; pos++) { |
| 5216 |
if (encode_table[cstr[pos]][0] == '%') |
if (encode_table[cstr[pos]][0] == '%') |
| 5217 |
dst_len += 2; |
dst_len += 2; |
| 5218 |
} |
} |
|
dst.reserve(dst_len + 1); |
|
| 5219 |
dst.resize(dst_len); |
dst.resize(dst_len); |
| 5220 |
|
|
| 5221 |
SIZE_TYPE p = 0; |
SIZE_TYPE p = 0; |
| 5229 |
dst[++p] = *(++subst); |
dst[++p] = *(++subst); |
| 5230 |
} |
} |
| 5231 |
} |
} |
|
|
|
| 5232 |
_ASSERT( p == dst_len ); |
_ASSERT( p == dst_len ); |
|
dst[dst_len] = '\0'; |
|
| 5233 |
return dst; |
return dst; |
| 5234 |
} |
} |
| 5235 |
|
|
| 5236 |
|
|
| 5237 |
void s_URLDecode(const string& src, string& dst, NStr::EUrlDecode flag) |
CStringUTF8 NStr::SQLEncode(const CStringUTF8& str) { |
| 5238 |
|
SIZE_TYPE stringSize = str.size(); |
| 5239 |
|
CStringUTF8 result; |
| 5240 |
|
|
| 5241 |
|
result.reserve(stringSize + 6); |
| 5242 |
|
result.append(1, '\''); |
| 5243 |
|
for (SIZE_TYPE i = 0; i < stringSize; i++) { |
| 5244 |
|
char c = str[i]; |
| 5245 |
|
if (c == '\'') |
| 5246 |
|
result.append(1, '\''); |
| 5247 |
|
result.append(1, c); |
| 5248 |
|
} |
| 5249 |
|
result.append(1, '\''); |
| 5250 |
|
|
| 5251 |
|
return result; |
| 5252 |
|
} |
| 5253 |
|
|
| 5254 |
|
|
| 5255 |
|
static |
| 5256 |
|
void s_URLDecode(const CTempString& src, string& dst, NStr::EUrlDecode flag) |
| 5257 |
{ |
{ |
| 5258 |
SIZE_TYPE len = src.length(); |
SIZE_TYPE len = src.length(); |
| 5259 |
if ( !len ) { |
if ( !len ) { |
| 5260 |
dst.clear(); |
dst.erase(); |
| 5261 |
return; |
return; |
| 5262 |
} |
} |
| 5263 |
if (dst.length() < src.length()) { |
if (dst.length() < src.length()) { |
| 5295 |
} |
} |
| 5296 |
} |
} |
| 5297 |
if (pdst < len) { |
if (pdst < len) { |
|
dst[pdst] = '\0'; |
|
| 5298 |
dst.resize(pdst); |
dst.resize(pdst); |
| 5299 |
} |
} |
| 5300 |
} |
} |
| 5301 |
|
|
| 5302 |
|
|
| 5303 |
string NStr::URLDecode(const string& str, EUrlDecode flag) |
string NStr::URLDecode(const CTempString& str, EUrlDecode flag) |
| 5304 |
{ |
{ |
| 5305 |
string dst; |
string dst; |
| 5306 |
s_URLDecode(str, dst, flag); |
s_URLDecode(str, dst, flag); |
| 5314 |
} |
} |
| 5315 |
|
|
| 5316 |
|
|
| 5317 |
bool NStr::NeedsURLEncoding(const string& str, EUrlEncode flag) |
bool NStr::NeedsURLEncoding(const CTempString& str, EUrlEncode flag) |
| 5318 |
{ |
{ |
| 5319 |
SIZE_TYPE len = str.length(); |
SIZE_TYPE len = str.length(); |
| 5320 |
if ( !len ) { |
if ( !len ) { |
| 5321 |
return false; |
return false; |
| 5322 |
} |
} |
|
|
|
| 5323 |
const char (*encode_table)[4]; |
const char (*encode_table)[4]; |
| 5324 |
switch (flag) { |
switch (flag) { |
| 5325 |
case eUrlEnc_SkipMarkChars: |
case eUrlEnc_SkipMarkChars: |
| 5334 |
case eUrlEnc_Path: |
case eUrlEnc_Path: |
| 5335 |
encode_table = s_EncodePath; |
encode_table = s_EncodePath; |
| 5336 |
break; |
break; |
| 5337 |
|
case eUrlEnc_Cookie: |
| 5338 |
|
encode_table = s_EncodeCookie; |
| 5339 |
|
break; |
| 5340 |
case eUrlEnc_None: |
case eUrlEnc_None: |
| 5341 |
return false; |
return false; |
| 5342 |
default: |
default: |
| 5344 |
// To keep off compiler warning |
// To keep off compiler warning |
| 5345 |
encode_table = 0; |
encode_table = 0; |
| 5346 |
} |
} |
| 5347 |
|
const unsigned char* cstr = (const unsigned char*)str.data(); |
| 5348 |
|
|
|
const unsigned char* cstr = (const unsigned char*)str.c_str(); |
|
| 5349 |
for (SIZE_TYPE pos = 0; pos < len; pos++) { |
for (SIZE_TYPE pos = 0; pos < len; pos++) { |
| 5350 |
const char* subst = encode_table[cstr[pos]]; |
const char* subst = encode_table[cstr[pos]]; |
| 5351 |
if (*subst != cstr[pos]) { |
if (*subst != cstr[pos]) { |
| 5352 |
return true; |
return true; |
| 5353 |
} |
} |
| 5354 |
} |
} |
|
|
|
| 5355 |
return false; |
return false; |
| 5356 |
} |
} |
| 5357 |
|
|
| 5358 |
|
|
| 5359 |
bool NStr::IsIPAddress(const string& ip) |
/// @internal |
| 5360 |
|
static |
| 5361 |
|
bool s_IsIPAddress(const char* str, size_t size) |
| 5362 |
{ |
{ |
| 5363 |
const char* c = ip.c_str(); |
_ASSERT(str[size] == '\0'); |
| 5364 |
|
|
| 5365 |
|
const char* c = str; |
| 5366 |
|
|
| 5367 |
|
// IPv6? |
| 5368 |
|
if ( strchr(str, ':') ) { |
| 5369 |
|
if (NStr::CompareNocase(str, 0, 7, "::ffff:") == 0) { |
| 5370 |
|
// Mapped IPv4 address |
| 5371 |
|
return size > 7 && s_IsIPAddress(str + 7, size - 7); |
| 5372 |
|
} |
| 5373 |
|
|
| 5374 |
|
int colons = 0; |
| 5375 |
|
bool have_group = false; |
| 5376 |
|
const char* prev_colon = NULL; |
| 5377 |
|
int digits = 0; |
| 5378 |
|
// Continue until |
| 5379 |
|
for (; c && c - str < (int)size && *c != '%'; c++) { |
| 5380 |
|
if (*c == ':') { |
| 5381 |
|
colons++; |
| 5382 |
|
if (colons > 7) { |
| 5383 |
|
// Too many separators |
| 5384 |
|
return false; |
| 5385 |
|
} |
| 5386 |
|
if (prev_colon && c - prev_colon == 1) { |
| 5387 |
|
// A group of zeroes found |
| 5388 |
|
if (have_group) { |
| 5389 |
|
// Only one group is allowed |
| 5390 |
|
return false; |
| 5391 |
|
} |
| 5392 |
|
have_group = true; |
| 5393 |
|
} |
| 5394 |
|
prev_colon = c; |
| 5395 |
|
digits = 0; |
| 5396 |
|
continue; |
| 5397 |
|
} |
| 5398 |
|
digits++; |
| 5399 |
|
if (digits > 4) { |
| 5400 |
|
// Too many digits between colons |
| 5401 |
|
return false; |
| 5402 |
|
} |
| 5403 |
|
char d = toupper(*c); |
| 5404 |
|
if (d < '0' || d > 'F') { |
| 5405 |
|
// Invalid digit |
| 5406 |
|
return false; |
| 5407 |
|
} |
| 5408 |
|
} |
| 5409 |
|
// Check if zone index is present |
| 5410 |
|
if (*c == '%') { |
| 5411 |
|
// It's not clear yet what zone index may look like. |
| 5412 |
|
// Ignore it. |
| 5413 |
|
} |
| 5414 |
|
// Make sure there was at least one colon. |
| 5415 |
|
return colons > 1; |
| 5416 |
|
} |
| 5417 |
|
|
| 5418 |
unsigned long val; |
unsigned long val; |
| 5419 |
int dots = 0; |
int dots = 0; |
| 5420 |
|
|
| 5421 |
|
int& errno_ref = errno; |
| 5422 |
for (;;) { |
for (;;) { |
| 5423 |
char* e; |
char* e; |
| 5424 |
if ( !isdigit((unsigned char)(*c)) ) |
if ( !isdigit((unsigned char)(*c)) ) |
| 5425 |
return false; |
return false; |
| 5426 |
errno = 0; |
errno_ref = 0; |
| 5427 |
val = strtoul(c, &e, 10); |
val = strtoul(c, &e, 10); |
| 5428 |
if (c == e || errno) |
if (c == e || errno_ref) |
| 5429 |
return false; |
return false; |
| 5430 |
c = e; |
c = e; |
| 5431 |
if (*c != '.') |
if (*c != '.') |
| 5437 |
c++; |
c++; |
| 5438 |
} |
} |
| 5439 |
|
|
| 5440 |
|
// Make sure the whole string was checked (it is possible to have \0 chars |
| 5441 |
|
// in the middle of the string). |
| 5442 |
|
if ((size_t)(c - str) != size) { |
| 5443 |
|
return false; |
| 5444 |
|
} |
| 5445 |
return !*c && dots == 3 && val < 256; |
return !*c && dots == 3 && val < 256; |
| 5446 |
} |
} |
| 5447 |
|
|
| 5448 |
|
|
| 5449 |
|
bool NStr::IsIPAddress(const CTempStringEx& str) |
| 5450 |
|
{ |
| 5451 |
|
size_t size = str.size(); |
| 5452 |
|
if ( str.HasZeroAtEnd() ) { |
| 5453 |
|
// string has zero at the end already |
| 5454 |
|
return s_IsIPAddress(str.data(), size); |
| 5455 |
|
} |
| 5456 |
|
char buf[256]; // small temporary buffer on stack for appending zero char |
| 5457 |
|
if ( size < sizeof(buf) ) { |
| 5458 |
|
memcpy(buf, str.data(), size); |
| 5459 |
|
buf[size] = '\0'; |
| 5460 |
|
return s_IsIPAddress(buf, size); |
| 5461 |
|
} |
| 5462 |
|
else { |
| 5463 |
|
// use std::string() to allocate memory for appending zero char |
| 5464 |
|
return s_IsIPAddress(string(str).c_str(), size); |
| 5465 |
|
} |
| 5466 |
|
} |
| 5467 |
|
|
| 5468 |
|
|
| 5469 |
|
namespace { |
| 5470 |
|
// Comparator to decide if a symbol is a delimiter |
| 5471 |
|
template <typename TDelimiter> |
| 5472 |
|
class PDelimiter |
| 5473 |
|
{ |
| 5474 |
|
private: |
| 5475 |
|
const TDelimiter& delimiter; |
| 5476 |
|
|
| 5477 |
|
public: |
| 5478 |
|
PDelimiter(const TDelimiter& delim) |
| 5479 |
|
: delimiter(delim) |
| 5480 |
|
{} |
| 5481 |
|
|
| 5482 |
|
bool operator()(char tested_symbol) const; |
| 5483 |
|
}; |
| 5484 |
|
|
| 5485 |
|
|
| 5486 |
|
// Template search for a field |
| 5487 |
|
// @param str |
| 5488 |
|
// C or C++ string to search in. |
| 5489 |
|
// @param field_no |
| 5490 |
|
// Zero-based field number. |
| 5491 |
|
// @param delimiter |
| 5492 |
|
// Functor to decide if a symbol is a delimiter |
| 5493 |
|
// @param merge |
| 5494 |
|
// Whether to merge or not adjacent delimiters. |
| 5495 |
|
// @return |
| 5496 |
|
// Found field; or empty string if the required field is not found. |
| 5497 |
|
template <typename TComparator, typename TResult> |
| 5498 |
|
TResult s_GetField(const CTempString& str, |
| 5499 |
|
size_t field_no, |
| 5500 |
|
const TComparator& delimiter, |
| 5501 |
|
NStr::EMergeDelims merge) |
| 5502 |
|
{ |
| 5503 |
|
const char* current_ptr = str.data(); |
| 5504 |
|
const char* end_ptr = current_ptr + str.length(); |
| 5505 |
|
size_t current_field = 0; |
| 5506 |
|
|
| 5507 |
|
// Search for the beginning of the required field |
| 5508 |
|
for ( ; current_field != field_no; current_field++) { |
| 5509 |
|
while (current_ptr < end_ptr && !delimiter(*current_ptr)) |
| 5510 |
|
current_ptr++; |
| 5511 |
|
|
| 5512 |
|
if (merge == NStr::eMergeDelims) { |
| 5513 |
|
while (current_ptr < end_ptr && delimiter(*current_ptr)) |
| 5514 |
|
current_ptr++; |
| 5515 |
|
} |
| 5516 |
|
else |
| 5517 |
|
current_ptr++; |
| 5518 |
|
|
| 5519 |
|
if (current_ptr >= end_ptr) |
| 5520 |
|
return TResult(); |
| 5521 |
|
} |
| 5522 |
|
|
| 5523 |
|
if (current_field != field_no) |
| 5524 |
|
return TResult(); |
| 5525 |
|
|
| 5526 |
|
// Here: current_ptr points to the first character after the delimiter. |
| 5527 |
|
const char* field_start = current_ptr; |
| 5528 |
|
while (current_ptr < end_ptr && !delimiter(*current_ptr)) |
| 5529 |
|
current_ptr++; |
| 5530 |
|
|
| 5531 |
|
return TResult(field_start, current_ptr - field_start); |
| 5532 |
|
} |
| 5533 |
|
|
| 5534 |
|
|
| 5535 |
|
|
| 5536 |
|
template <> |
| 5537 |
|
bool PDelimiter<char>::operator() (char c) const |
| 5538 |
|
{ |
| 5539 |
|
return delimiter == c; |
| 5540 |
|
} |
| 5541 |
|
|
| 5542 |
|
template <> |
| 5543 |
|
bool PDelimiter<CTempString>::operator() (char c) const |
| 5544 |
|
{ |
| 5545 |
|
return delimiter.find(c) != NPOS; |
| 5546 |
|
} |
| 5547 |
|
} |
| 5548 |
|
|
| 5549 |
|
|
| 5550 |
|
string NStr::GetField(const CTempString& str, |
| 5551 |
|
size_t field_no, |
| 5552 |
|
const CTempString& delimiters, |
| 5553 |
|
EMergeDelims merge) |
| 5554 |
|
{ |
| 5555 |
|
return s_GetField<PDelimiter<CTempString>, string> |
| 5556 |
|
(str, |
| 5557 |
|
field_no, |
| 5558 |
|
PDelimiter<CTempString>(delimiters), |
| 5559 |
|
merge); |
| 5560 |
|
} |
| 5561 |
|
|
| 5562 |
|
|
| 5563 |
|
string NStr::GetField(const CTempString& str, |
| 5564 |
|
size_t field_no, |
| 5565 |
|
char delimiter, |
| 5566 |
|
EMergeDelims merge) |
| 5567 |
|
{ |
| 5568 |
|
return s_GetField<PDelimiter<char>, string> |
| 5569 |
|
(str, |
| 5570 |
|
field_no, |
| 5571 |
|
PDelimiter<char>(delimiter), |
| 5572 |
|
merge); |
| 5573 |
|
} |
| 5574 |
|
|
| 5575 |
|
|
| 5576 |
|
CTempString NStr::GetField_Unsafe(const CTempString& str, |
| 5577 |
|
size_t field_no, |
| 5578 |
|
const CTempString& delimiters, |
| 5579 |
|
EMergeDelims merge) |
| 5580 |
|
{ |
| 5581 |
|
return s_GetField<PDelimiter<CTempString>, CTempString> |
| 5582 |
|
(str, |
| 5583 |
|
field_no, |
| 5584 |
|
PDelimiter<CTempString>(delimiters), |
| 5585 |
|
merge); |
| 5586 |
|
} |
| 5587 |
|
|
| 5588 |
|
|
| 5589 |
|
CTempString NStr::GetField_Unsafe(const CTempString& str, |
| 5590 |
|
size_t field_no, |
| 5591 |
|
char delimiter, |
| 5592 |
|
EMergeDelims merge) |
| 5593 |
|
{ |
| 5594 |
|
return s_GetField<PDelimiter<char>, CTempString> |
| 5595 |
|
(str, |
| 5596 |
|
field_no, |
| 5597 |
|
PDelimiter<char>(delimiter), |
| 5598 |
|
merge); |
| 5599 |
|
} |
| 5600 |
|
|
| 5601 |
|
|
| 5602 |
|
|
| 5603 |
///////////////////////////////////////////////////////////////////////////// |
///////////////////////////////////////////////////////////////////////////// |
| 5604 |
// CStringUTF8 |
// CStringUTF8 / CUtf8 |
| 5605 |
|
|
| 5606 |
|
#if defined(__EXPORT_CTOR_STRINGUTF8__) |
| 5607 |
|
|
| 5608 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const CTempString& src) { |
| 5609 |
|
assign( CUtf8::AsUTF8(src, eEncoding_ISO8859_1, CUtf8::eNoValidate)); |
| 5610 |
|
} |
| 5611 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const char* src ) { |
| 5612 |
|
assign( CUtf8::AsUTF8(src, eEncoding_ISO8859_1, CUtf8::eNoValidate)); |
| 5613 |
|
} |
| 5614 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const string& src) { |
| 5615 |
|
assign( CUtf8::AsUTF8(src, eEncoding_ISO8859_1, CUtf8::eNoValidate)); |
| 5616 |
|
} |
| 5617 |
|
|
| 5618 |
|
|
| 5619 |
SIZE_TYPE CStringUTF8::GetSymbolCount(void) const |
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5620 |
|
const CTempString& src, EEncoding encoding,EValidate validate) { |
| 5621 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8_DEPRECATED::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5622 |
|
} |
| 5623 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5624 |
|
const char* src, EEncoding encoding, EValidate validate) { |
| 5625 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8_DEPRECATED::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5626 |
|
} |
| 5627 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5628 |
|
const string& src, EEncoding encoding, EValidate validate) { |
| 5629 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8_DEPRECATED::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5630 |
|
} |
| 5631 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const TStringUnicode& src) { |
| 5632 |
|
assign( CUtf8::AsUTF8(src)); |
| 5633 |
|
} |
| 5634 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const TStringUCS4& src) { |
| 5635 |
|
assign( CUtf8::AsUTF8(src)); |
| 5636 |
|
} |
| 5637 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const TStringUCS2& src) { |
| 5638 |
|
assign( CUtf8::AsUTF8(src)); |
| 5639 |
|
} |
| 5640 |
|
#if defined(HAVE_WSTRING) |
| 5641 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const wstring& src) { |
| 5642 |
|
assign( CUtf8::AsUTF8(src)); |
| 5643 |
|
} |
| 5644 |
|
#endif |
| 5645 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const TUnicodeSymbol* src) { |
| 5646 |
|
assign( CUtf8::AsUTF8(src)); |
| 5647 |
|
} |
| 5648 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const TCharUCS4* src) { |
| 5649 |
|
assign( CUtf8::AsUTF8(src)); |
| 5650 |
|
} |
| 5651 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const TCharUCS2* src) { |
| 5652 |
|
assign( CUtf8::AsUTF8(src)); |
| 5653 |
|
} |
| 5654 |
|
#if defined(HAVE_WSTRING) |
| 5655 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED(const wchar_t* src) { |
| 5656 |
|
assign( CUtf8::AsUTF8(src)); |
| 5657 |
|
} |
| 5658 |
|
#endif |
| 5659 |
|
|
| 5660 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5661 |
|
ECharBufferType type, const TUnicodeSymbol* src, SIZE_TYPE char_count) { |
| 5662 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5663 |
|
} |
| 5664 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5665 |
|
ECharBufferType type, const TCharUCS4* src, SIZE_TYPE char_count) { |
| 5666 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5667 |
|
} |
| 5668 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5669 |
|
ECharBufferType type, const TCharUCS2* src, SIZE_TYPE char_count) { |
| 5670 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5671 |
|
} |
| 5672 |
|
CStringUTF8_DEPRECATED::CStringUTF8_DEPRECATED( |
| 5673 |
|
ECharBufferType type, const wchar_t* src, SIZE_TYPE char_count) { |
| 5674 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5675 |
|
} |
| 5676 |
|
#endif // __EXPORT_CTOR_STRINGUTF8__ |
| 5677 |
|
|
| 5678 |
|
//#ifndef __NO_EXPORT_STRINGUTF8__ |
| 5679 |
|
#if 0 |
| 5680 |
|
|
| 5681 |
|
CStringUTF8::CStringUTF8(const CTempString& src) { |
| 5682 |
|
assign( CUtf8::AsUTF8(src, eEncoding_ISO8859_1, CUtf8::eNoValidate)); |
| 5683 |
|
} // NCBI_FAKE_WARNING |
| 5684 |
|
CStringUTF8::CStringUTF8(const char* src ) { |
| 5685 |
|
assign( CUtf8::AsUTF8(src, eEncoding_ISO8859_1, CUtf8::eNoValidate)); |
| 5686 |
|
} // NCBI_FAKE_WARNING |
| 5687 |
|
CStringUTF8::CStringUTF8(const string& src) { |
| 5688 |
|
assign( CUtf8::AsUTF8(src, eEncoding_ISO8859_1, CUtf8::eNoValidate)); |
| 5689 |
|
} // NCBI_FAKE_WARNING |
| 5690 |
|
|
| 5691 |
|
string CStringUTF8::AsLatin1(const char* substitute_on_error) const |
| 5692 |
{ |
{ |
| 5693 |
SIZE_TYPE count = 0; |
return CUtf8::AsSingleByteString(*this,eEncoding_ISO8859_1,substitute_on_error); |
|
for (const char* src = c_str(); *src; ++src, ++count) { |
|
|
SIZE_TYPE more = 0; |
|
|
bool good = x_EvalFirst(*src, more); |
|
|
while (more-- && good) { |
|
|
good = x_EvalNext(*(++src)); |
|
| 5694 |
} |
} |
| 5695 |
if ( !good ) { |
wstring CStringUTF8::AsUnicode(const wchar_t* substitute_on_error) const |
| 5696 |
NCBI_THROW2(CStringException, eFormat, |
{ |
| 5697 |
"String is not in UTF8 format", |
return CUtf8::AsBasicString<wchar_t>(*this,substitute_on_error,CUtf8::eNoValidate); |
|
s_DiffPtr(src,c_str())); |
|
| 5698 |
} |
} |
| 5699 |
|
TStringUCS2 CStringUTF8::AsUCS2(const TCharUCS2* substitute_on_error) const |
| 5700 |
|
{ |
| 5701 |
|
return CUtf8::AsBasicString<TCharUCS2>(*this,substitute_on_error,CUtf8::eNoValidate); |
| 5702 |
} |
} |
| 5703 |
return count; |
|
| 5704 |
|
CStringUTF8 CStringUTF8::TruncateSpaces(const CTempString& str, |
| 5705 |
|
NStr::ETrunc side) |
| 5706 |
|
{ |
| 5707 |
|
return CUtf8::TruncateSpaces(str,side); |
| 5708 |
|
} |
| 5709 |
|
|
| 5710 |
|
|
| 5711 |
|
CStringUTF8::CStringUTF8( |
| 5712 |
|
const CTempString& src, EEncoding encoding,EValidate validate) { |
| 5713 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5714 |
|
} |
| 5715 |
|
|
| 5716 |
|
CStringUTF8::CStringUTF8( |
| 5717 |
|
const char* src, EEncoding encoding, EValidate validate) { |
| 5718 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5719 |
|
} |
| 5720 |
|
|
| 5721 |
|
CStringUTF8::CStringUTF8( |
| 5722 |
|
const string& src, EEncoding encoding, EValidate validate) { |
| 5723 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5724 |
|
} |
| 5725 |
|
CStringUTF8::CStringUTF8(const TStringUnicode& src) { |
| 5726 |
|
assign( CUtf8::AsUTF8(src)); |
| 5727 |
|
} |
| 5728 |
|
CStringUTF8::CStringUTF8(const TStringUCS4& src) { |
| 5729 |
|
assign( CUtf8::AsUTF8(src)); |
| 5730 |
|
} |
| 5731 |
|
CStringUTF8::CStringUTF8(const TStringUCS2& src) { |
| 5732 |
|
assign( CUtf8::AsUTF8(src)); |
| 5733 |
|
} |
| 5734 |
|
#if defined(HAVE_WSTRING) |
| 5735 |
|
CStringUTF8::CStringUTF8(const wstring& src) { |
| 5736 |
|
assign( CUtf8::AsUTF8(src)); |
| 5737 |
|
} |
| 5738 |
|
#endif |
| 5739 |
|
CStringUTF8::CStringUTF8(const TUnicodeSymbol* src) { |
| 5740 |
|
assign( CUtf8::AsUTF8(src)); |
| 5741 |
|
} |
| 5742 |
|
CStringUTF8::CStringUTF8(const TCharUCS4* src) { |
| 5743 |
|
assign( CUtf8::AsUTF8(src)); |
| 5744 |
|
} |
| 5745 |
|
CStringUTF8::CStringUTF8(const TCharUCS2* src) { |
| 5746 |
|
assign( CUtf8::AsUTF8(src)); |
| 5747 |
|
} |
| 5748 |
|
#if defined(HAVE_WSTRING) |
| 5749 |
|
CStringUTF8::CStringUTF8(const wchar_t* src) { |
| 5750 |
|
assign( CUtf8::AsUTF8(src)); |
| 5751 |
|
} |
| 5752 |
|
#endif |
| 5753 |
|
|
| 5754 |
|
CStringUTF8::CStringUTF8( |
| 5755 |
|
ECharBufferType type, const TUnicodeSymbol* src, SIZE_TYPE char_count) { |
| 5756 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5757 |
} |
} |
| 5758 |
|
|
| 5759 |
|
CStringUTF8::CStringUTF8( |
| 5760 |
|
ECharBufferType type, const TCharUCS4* src, SIZE_TYPE char_count) { |
| 5761 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5762 |
|
} |
| 5763 |
|
|
| 5764 |
|
CStringUTF8::CStringUTF8( |
| 5765 |
|
ECharBufferType type, const TCharUCS2* src, SIZE_TYPE char_count) { |
| 5766 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5767 |
|
} |
| 5768 |
|
|
| 5769 |
|
CStringUTF8::CStringUTF8( |
| 5770 |
|
ECharBufferType type, const wchar_t* src, SIZE_TYPE char_count) { |
| 5771 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5772 |
|
} |
| 5773 |
|
CStringUTF8& CStringUTF8::operator= (const TStringUnicode& src) { |
| 5774 |
|
assign( CUtf8::AsUTF8(src)); |
| 5775 |
|
return *this; |
| 5776 |
|
} |
| 5777 |
|
CStringUTF8& CStringUTF8::operator= (const TStringUCS4& src) { |
| 5778 |
|
assign( CUtf8::AsUTF8(src)); |
| 5779 |
|
return *this; |
| 5780 |
|
} |
| 5781 |
|
CStringUTF8& CStringUTF8::operator= (const TStringUCS2& src) { |
| 5782 |
|
assign( CUtf8::AsUTF8(src)); |
| 5783 |
|
return *this; |
| 5784 |
|
} |
| 5785 |
|
#if defined(HAVE_WSTRING) |
| 5786 |
|
CStringUTF8& CStringUTF8::operator= (const wstring& src) { |
| 5787 |
|
assign( CUtf8::AsUTF8(src)); |
| 5788 |
|
return *this; |
| 5789 |
|
} |
| 5790 |
|
#endif |
| 5791 |
|
CStringUTF8& CStringUTF8::operator= (const TUnicodeSymbol* src) { |
| 5792 |
|
assign( CUtf8::AsUTF8(src)); |
| 5793 |
|
return *this; |
| 5794 |
|
} |
| 5795 |
|
CStringUTF8& CStringUTF8::operator= (const TCharUCS4* src) { |
| 5796 |
|
assign( CUtf8::AsUTF8(src)); |
| 5797 |
|
return *this; |
| 5798 |
|
} |
| 5799 |
|
CStringUTF8& CStringUTF8::operator= (const TCharUCS2* src) { |
| 5800 |
|
assign( CUtf8::AsUTF8(src)); |
| 5801 |
|
return *this; |
| 5802 |
|
} |
| 5803 |
|
#if defined(HAVE_WSTRING) |
| 5804 |
|
CStringUTF8& CStringUTF8::operator= (const wchar_t* src) { |
| 5805 |
|
assign( CUtf8::AsUTF8(src)); |
| 5806 |
|
return *this; |
| 5807 |
|
} |
| 5808 |
|
#endif |
| 5809 |
|
CStringUTF8& CStringUTF8::operator+= (const TStringUnicode& src) { |
| 5810 |
|
append( CUtf8::AsUTF8(src)); |
| 5811 |
|
return *this; |
| 5812 |
|
} |
| 5813 |
|
CStringUTF8& CStringUTF8::operator+= (const TStringUCS4& src) { |
| 5814 |
|
append( CUtf8::AsUTF8(src)); |
| 5815 |
|
return *this; |
| 5816 |
|
} |
| 5817 |
|
CStringUTF8& CStringUTF8::operator+= (const TStringUCS2& src) { |
| 5818 |
|
append( CUtf8::AsUTF8(src)); |
| 5819 |
|
return *this; |
| 5820 |
|
} |
| 5821 |
|
#if defined(HAVE_WSTRING) |
| 5822 |
|
CStringUTF8& CStringUTF8::operator+= (const wstring& src) { |
| 5823 |
|
append( CUtf8::AsUTF8(src)); |
| 5824 |
|
return *this; |
| 5825 |
|
} |
| 5826 |
|
#endif |
| 5827 |
|
CStringUTF8& CStringUTF8::operator+= (const TUnicodeSymbol* src) { |
| 5828 |
|
append( CUtf8::AsUTF8(src)); |
| 5829 |
|
return *this; |
| 5830 |
|
} |
| 5831 |
|
CStringUTF8& CStringUTF8::operator+= (const TCharUCS4* src) { |
| 5832 |
|
append( CUtf8::AsUTF8(src)); |
| 5833 |
|
return *this; |
| 5834 |
|
} |
| 5835 |
|
CStringUTF8& CStringUTF8::operator+= (const TCharUCS2* src) { |
| 5836 |
|
append( CUtf8::AsUTF8(src)); |
| 5837 |
|
return *this; |
| 5838 |
|
} |
| 5839 |
|
#if defined(HAVE_WSTRING) |
| 5840 |
|
CStringUTF8& CStringUTF8::operator+= (const wchar_t* src) { |
| 5841 |
|
append( CUtf8::AsUTF8(src)); |
| 5842 |
|
return *this; |
| 5843 |
|
} |
| 5844 |
|
#endif |
| 5845 |
|
CStringUTF8& CStringUTF8::Assign( |
| 5846 |
|
const CTempString& src, EEncoding encoding, EValidate validate) { |
| 5847 |
|
assign( CUtf8::AsUTF8(src, encoding, validate == CStringUTF8::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5848 |
|
return *this; |
| 5849 |
|
} |
| 5850 |
|
|
| 5851 |
|
CStringUTF8& CStringUTF8::Assign(const TStringUnicode& src) { |
| 5852 |
|
assign( CUtf8::AsUTF8(src)); |
| 5853 |
|
return *this; |
| 5854 |
|
} |
| 5855 |
|
CStringUTF8& CStringUTF8::Assign(const TStringUCS4& src) { |
| 5856 |
|
assign( CUtf8::AsUTF8(src)); |
| 5857 |
|
return *this; |
| 5858 |
|
} |
| 5859 |
|
CStringUTF8& CStringUTF8::Assign(const TStringUCS2& src) { |
| 5860 |
|
assign( CUtf8::AsUTF8(src)); |
| 5861 |
|
return *this; |
| 5862 |
|
} |
| 5863 |
|
#if defined(HAVE_WSTRING) |
| 5864 |
|
CStringUTF8& CStringUTF8::Assign(const wstring& src) { |
| 5865 |
|
assign( CUtf8::AsUTF8(src)); |
| 5866 |
|
return *this; |
| 5867 |
|
} |
| 5868 |
|
#endif |
| 5869 |
|
CStringUTF8& CStringUTF8::Assign(const TUnicodeSymbol* src) { |
| 5870 |
|
assign( CUtf8::AsUTF8(src)); |
| 5871 |
|
return *this; |
| 5872 |
|
} |
| 5873 |
|
CStringUTF8& CStringUTF8::Assign(const TCharUCS4* src) { |
| 5874 |
|
assign( CUtf8::AsUTF8(src)); |
| 5875 |
|
return *this; |
| 5876 |
|
} |
| 5877 |
|
CStringUTF8& CStringUTF8::Assign(const TCharUCS2* src) { |
| 5878 |
|
assign( CUtf8::AsUTF8(src)); |
| 5879 |
|
return *this; |
| 5880 |
|
} |
| 5881 |
|
#if defined(HAVE_WSTRING) |
| 5882 |
|
CStringUTF8& CStringUTF8::Assign(const wchar_t* src) { |
| 5883 |
|
assign( CUtf8::AsUTF8(src)); |
| 5884 |
|
return *this; |
| 5885 |
|
} |
| 5886 |
|
#endif |
| 5887 |
|
CStringUTF8& CStringUTF8::Assign( |
| 5888 |
|
ECharBufferType type, const TUnicodeSymbol* src, SIZE_TYPE char_count) { |
| 5889 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5890 |
|
return *this; |
| 5891 |
|
} |
| 5892 |
|
CStringUTF8& CStringUTF8::Assign( |
| 5893 |
|
ECharBufferType type, const TCharUCS4* src, SIZE_TYPE char_count) { |
| 5894 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5895 |
|
return *this; |
| 5896 |
|
} |
| 5897 |
|
CStringUTF8& CStringUTF8::Assign( |
| 5898 |
|
ECharBufferType type, const TCharUCS2* src, SIZE_TYPE char_count) { |
| 5899 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5900 |
|
return *this; |
| 5901 |
|
} |
| 5902 |
|
#if defined(HAVE_WSTRING) |
| 5903 |
|
CStringUTF8& CStringUTF8::Assign( |
| 5904 |
|
ECharBufferType type, const wchar_t* src, SIZE_TYPE char_count) { |
| 5905 |
|
assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5906 |
|
return *this; |
| 5907 |
|
} |
| 5908 |
|
#endif |
| 5909 |
|
CStringUTF8& CStringUTF8::Assign(char ch, EEncoding encoding) { |
| 5910 |
|
assign( CUtf8::AsUTF8( CTempString(&ch,1), encoding, CUtf8::eValidate)); |
| 5911 |
|
return *this; |
| 5912 |
|
} |
| 5913 |
|
CStringUTF8& CStringUTF8::Append( |
| 5914 |
|
const CTempString& src, EEncoding encoding, EValidate validate) { |
| 5915 |
|
append( CUtf8::AsUTF8( src, encoding, validate == CStringUTF8::eValidate ? CUtf8::eValidate : CUtf8::eNoValidate)); |
| 5916 |
|
return *this; |
| 5917 |
|
} |
| 5918 |
|
CStringUTF8& CStringUTF8::Append(const TStringUnicode& src) { |
| 5919 |
|
append( CUtf8::AsUTF8( src)); |
| 5920 |
|
return *this; |
| 5921 |
|
} |
| 5922 |
|
CStringUTF8& CStringUTF8::Append(const TStringUCS4& src) { |
| 5923 |
|
append( CUtf8::AsUTF8( src)); |
| 5924 |
|
return *this; |
| 5925 |
|
} |
| 5926 |
|
CStringUTF8& CStringUTF8::Append(const TStringUCS2& src) { |
| 5927 |
|
append( CUtf8::AsUTF8( src)); |
| 5928 |
|
return *this; |
| 5929 |
|
} |
| 5930 |
|
#if defined(HAVE_WSTRING) |
| 5931 |
|
CStringUTF8& CStringUTF8::Append(const wstring& src) { |
| 5932 |
|
append( CUtf8::AsUTF8( src)); |
| 5933 |
|
return *this; |
| 5934 |
|
} |
| 5935 |
|
#endif |
| 5936 |
|
CStringUTF8& CStringUTF8::Append(const TUnicodeSymbol* src) { |
| 5937 |
|
append( CUtf8::AsUTF8( src)); |
| 5938 |
|
return *this; |
| 5939 |
|
} |
| 5940 |
|
CStringUTF8& CStringUTF8::Append(const TCharUCS4* src) { |
| 5941 |
|
append( CUtf8::AsUTF8( src)); |
| 5942 |
|
return *this; |
| 5943 |
|
} |
| 5944 |
|
CStringUTF8& CStringUTF8::Append(const TCharUCS2* src) { |
| 5945 |
|
append( CUtf8::AsUTF8( src)); |
| 5946 |
|
return *this; |
| 5947 |
|
} |
| 5948 |
|
#if defined(HAVE_WSTRING) |
| 5949 |
|
CStringUTF8& CStringUTF8::Append(const wchar_t* src) { |
| 5950 |
|
append( CUtf8::AsUTF8( src)); |
| 5951 |
|
return *this; |
| 5952 |
|
} |
| 5953 |
|
#endif |
| 5954 |
|
CStringUTF8& CStringUTF8::Append( |
| 5955 |
|
ECharBufferType type, const TUnicodeSymbol* src, SIZE_TYPE char_count) { |
| 5956 |
|
append( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5957 |
|
return *this; |
| 5958 |
|
} |
| 5959 |
|
CStringUTF8& CStringUTF8::Append( |
| 5960 |
|
ECharBufferType type, const TCharUCS4* src, SIZE_TYPE char_count) { |
| 5961 |
|
append( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5962 |
|
return *this; |
| 5963 |
|
} |
| 5964 |
|
CStringUTF8& CStringUTF8::Append( |
| 5965 |
|
ECharBufferType type, const TCharUCS2* src, SIZE_TYPE char_count) { |
| 5966 |
|
append( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5967 |
|
return *this; |
| 5968 |
|
} |
| 5969 |
|
#if defined(HAVE_WSTRING) |
| 5970 |
|
CStringUTF8& CStringUTF8::Append( |
| 5971 |
|
ECharBufferType type, const wchar_t* src, SIZE_TYPE char_count) { |
| 5972 |
|
append( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS)); |
| 5973 |
|
return *this; |
| 5974 |
|
} |
| 5975 |
|
#endif |
| 5976 |
|
CStringUTF8& CStringUTF8::Append(char ch, EEncoding encoding) { |
| 5977 |
|
append( CUtf8::AsUTF8( CTempString(&ch,1), encoding, CUtf8::eValidate)); |
| 5978 |
|
return *this; |
| 5979 |
|
} |
| 5980 |
|
CStringUTF8& CStringUTF8::Append(TUnicodeSymbol ch) { |
| 5981 |
|
append( CUtf8::AsUTF8(&ch, 1)); |
| 5982 |
|
return *this; |
| 5983 |
|
} |
| 5984 |
|
SIZE_TYPE CStringUTF8::GetSymbolCount(void) const { |
| 5985 |
|
return CUtf8::GetSymbolCount(*this); |
| 5986 |
|
} |
| 5987 |
|
bool CStringUTF8::IsValid(void) const { |
| 5988 |
|
return CUtf8::MatchEncoding(*this, eEncoding_UTF8); |
| 5989 |
|
} |
| 5990 |
|
TUnicodeSymbol CStringUTF8::Decode(const char*& src) { |
| 5991 |
|
return CUtf8::Decode(src); |
| 5992 |
|
} |
| 5993 |
|
#ifndef NCBI_COMPILER_WORKSHOP |
| 5994 |
|
TUnicodeSymbol CStringUTF8::Decode(string::const_iterator& src) { |
| 5995 |
|
return CUtf8::Decode(src); |
| 5996 |
|
} |
| 5997 |
|
#endif |
| 5998 |
|
|
| 5999 |
|
|
| 6000 |
|
SIZE_TYPE CStringUTF8::GetSymbolCount(const CTempString& src) |
| 6001 |
|
{ |
| 6002 |
|
return CUtf8::GetSymbolCount(src); |
| 6003 |
|
} |
| 6004 |
|
|
| 6005 |
SIZE_TYPE CStringUTF8::GetValidSymbolCount(const char* src, SIZE_TYPE buf_size) |
SIZE_TYPE CStringUTF8::GetValidSymbolCount(const char* src, SIZE_TYPE buf_size) |
| 6006 |
{ |
{ |
| 6007 |
SIZE_TYPE count = 0, cur_size=0; |
return CUtf8::GetValidSymbolCount(CTempString(src, buf_size)); |
|
for (; cur_size < buf_size && src && *src; ++src, ++count, ++cur_size) { |
|
|
SIZE_TYPE more = 0; |
|
|
bool good = x_EvalFirst(*src, more); |
|
|
while (more-- && good && ++cur_size < buf_size) { |
|
|
good = x_EvalNext(*(++src)); |
|
| 6008 |
} |
} |
| 6009 |
if ( !good ) { |
|
| 6010 |
return count; |
SIZE_TYPE CStringUTF8::GetValidSymbolCount(const CTempString& src) |
| 6011 |
|
{ |
| 6012 |
|
return CUtf8::GetValidSymbolCount(src); |
| 6013 |
} |
} |
| 6014 |
|
|
| 6015 |
|
SIZE_TYPE CStringUTF8::GetValidBytesCount(const char* src, SIZE_TYPE buf_size) |
| 6016 |
|
{ |
| 6017 |
|
return CUtf8::GetValidBytesCount(CTempString(src,buf_size)); |
| 6018 |
} |
} |
| 6019 |
return count; |
|
| 6020 |
|
SIZE_TYPE CStringUTF8::GetValidBytesCount(const CTempString& src) |
| 6021 |
|
{ |
| 6022 |
|
return CUtf8::GetValidBytesCount(src); |
| 6023 |
} |
} |
| 6024 |
|
|
| 6025 |
|
string CStringUTF8::AsSingleByteString(EEncoding encoding, |
| 6026 |
|
const char* substitute_on_error) const |
| 6027 |
|
{ |
| 6028 |
|
return CUtf8::AsSingleByteString(*this,encoding,substitute_on_error); |
| 6029 |
|
} |
| 6030 |
|
|
| 6031 |
SIZE_TYPE CStringUTF8::GetValidBytesCount(const char* src, SIZE_TYPE buf_size) |
EEncoding CStringUTF8::GuessEncoding( const CTempString& src) |
| 6032 |
{ |
{ |
| 6033 |
SIZE_TYPE count = 0; |
return CUtf8::GuessEncoding(src); |
| 6034 |
SIZE_TYPE cur_size = 0; |
} |
| 6035 |
|
|
| 6036 |
|
bool CStringUTF8::MatchEncoding( const CTempString& src, EEncoding encoding) |
| 6037 |
|
{ |
| 6038 |
|
return CUtf8::MatchEncoding(src,encoding); |
| 6039 |
|
} |
| 6040 |
|
|
| 6041 |
|
string CStringUTF8::EncodingToString(EEncoding encoding) |
| 6042 |
|
{ |
| 6043 |
|
return CUtf8::EncodingToString(encoding); |
| 6044 |
|
} |
| 6045 |
|
|
| 6046 |
|
EEncoding CStringUTF8::StringToEncoding(const CTempString& str) |
| 6047 |
|
{ |
| 6048 |
|
return CUtf8::StringToEncoding(str); |
| 6049 |
|
} |
| 6050 |
|
|
| 6051 |
|
|
| 6052 |
|
TUnicodeSymbol CStringUTF8::CharToSymbol(char ch, EEncoding encoding) |
| 6053 |
|
{ |
| 6054 |
|
return CUtf8::CharToSymbol(ch,encoding); |
| 6055 |
|
} |
| 6056 |
|
|
| 6057 |
|
char CStringUTF8::SymbolToChar(TUnicodeSymbol sym, EEncoding encoding) |
| 6058 |
|
{ |
| 6059 |
|
return CUtf8::SymbolToChar(sym,encoding); |
| 6060 |
|
} |
| 6061 |
|
|
| 6062 |
|
TUnicodeSymbol CStringUTF8::DecodeFirst(char ch, SIZE_TYPE& more) |
| 6063 |
|
{ |
| 6064 |
|
return CUtf8::DecodeFirst(ch,more); |
| 6065 |
|
} |
| 6066 |
|
|
| 6067 |
|
TUnicodeSymbol CStringUTF8::DecodeNext(TUnicodeSymbol chU, char ch) |
| 6068 |
|
{ |
| 6069 |
|
return CUtf8::DecodeNext(chU,ch); |
| 6070 |
|
} |
| 6071 |
|
|
| 6072 |
for (; cur_size < buf_size && src && *src; ++src, ++count, ++cur_size) { |
bool CStringUTF8::IsWhiteSpace(TUnicodeSymbol chU) |
| 6073 |
|
{ |
| 6074 |
|
return CUtf8::IsWhiteSpace(chU); |
| 6075 |
|
} |
| 6076 |
|
|
| 6077 |
|
CStringUTF8& CStringUTF8::TruncateSpacesInPlace(NStr::ETrunc side) |
| 6078 |
|
{ |
| 6079 |
|
return CUtf8::TruncateSpacesInPlace(*this,side); |
| 6080 |
|
} |
| 6081 |
|
|
| 6082 |
|
CTempString CStringUTF8::TruncateSpaces_Unsafe(const CTempString& str, NStr::ETrunc side) |
| 6083 |
|
{ |
| 6084 |
|
return CUtf8::TruncateSpaces_Unsafe(str,side); |
| 6085 |
|
} |
| 6086 |
|
|
| 6087 |
|
void CStringUTF8::x_Validate(void) const |
| 6088 |
|
{ |
| 6089 |
|
CUtf8::x_Validate(*this); |
| 6090 |
|
} |
| 6091 |
|
|
| 6092 |
|
void CStringUTF8::x_AppendChar(TUnicodeSymbol ch) |
| 6093 |
|
{ |
| 6094 |
|
CUtf8::x_AppendChar(*this, ch); |
| 6095 |
|
} |
| 6096 |
|
|
| 6097 |
|
void CStringUTF8::x_Append(const CTempString& src, |
| 6098 |
|
EEncoding encoding, EValidate validate) |
| 6099 |
|
{ |
| 6100 |
|
CUtf8::x_Append(*this, src, encoding, (CUtf8::EValidate)validate); |
| 6101 |
|
} |
| 6102 |
|
|
| 6103 |
|
SIZE_TYPE CStringUTF8::x_BytesNeeded(TUnicodeSymbol ch) |
| 6104 |
|
{ |
| 6105 |
|
return CUtf8::x_BytesNeeded(ch); |
| 6106 |
|
} |
| 6107 |
|
|
| 6108 |
|
bool CStringUTF8::x_EvalFirst(char ch, SIZE_TYPE& more) |
| 6109 |
|
{ |
| 6110 |
|
return CUtf8::x_EvalFirst(ch, more); |
| 6111 |
|
} |
| 6112 |
|
|
| 6113 |
|
bool CStringUTF8::x_EvalNext(char ch) |
| 6114 |
|
{ |
| 6115 |
|
return CUtf8::x_EvalNext(ch); |
| 6116 |
|
} |
| 6117 |
|
#endif // __NO_EXPORT_STRINGUTF8__ |
| 6118 |
|
|
| 6119 |
|
SIZE_TYPE CUtf8::x_GetValidSymbolCount(const CTempString& str, |
| 6120 |
|
CTempString::const_iterator& src) |
| 6121 |
|
{ |
| 6122 |
|
SIZE_TYPE count = 0; |
| 6123 |
|
src = str.begin(); |
| 6124 |
|
CTempString::const_iterator to = str.end(); |
| 6125 |
|
for (; src != to; ++src, ++count) { |
| 6126 |
SIZE_TYPE more = 0; |
SIZE_TYPE more = 0; |
| 6127 |
bool good = x_EvalFirst(*src, more); |
bool good = x_EvalFirst(*src, more); |
| 6128 |
while (more-- && good && cur_size < buf_size) { |
while (more-- && good) { |
| 6129 |
good = x_EvalNext(*(++src)); |
good = x_EvalNext(*(++src)); |
|
if (good) { |
|
|
++cur_size; |
|
|
} |
|
| 6130 |
} |
} |
| 6131 |
if ( !good ) { |
if ( !good ) { |
| 6132 |
return cur_size; |
return count; |
| 6133 |
} |
} |
| 6134 |
} |
} |
| 6135 |
return cur_size; |
return count; |
| 6136 |
} |
} |
| 6137 |
|
|
| 6138 |
|
SIZE_TYPE CUtf8::GetSymbolCount( const CTempString& str) |
| 6139 |
|
{ |
| 6140 |
|
CTempString::const_iterator err; |
| 6141 |
|
SIZE_TYPE count = x_GetValidSymbolCount(str,err); |
| 6142 |
|
if (err != str.end()) { |
| 6143 |
|
NCBI_THROW2(CStringException, eFormat, |
| 6144 |
|
"String is not in UTF8 format", |
| 6145 |
|
(err - str.begin())); |
| 6146 |
|
} |
| 6147 |
|
return count; |
| 6148 |
|
} |
| 6149 |
|
|
| 6150 |
string CStringUTF8::AsSingleByteString(EEncoding encoding) const |
string CUtf8::AsSingleByteString( const CTempString& str, |
| 6151 |
|
EEncoding encoding, const char* substitute_on_error, EValidate validate) |
| 6152 |
{ |
{ |
| 6153 |
|
if (validate == eValidate) { |
| 6154 |
|
if ( !MatchEncoding( str,eEncoding_UTF8 ) ) { |
| 6155 |
|
NCBI_THROW2(CStringException, eBadArgs, |
| 6156 |
|
"Source string is not in UTF8 format", 0); |
| 6157 |
|
} |
| 6158 |
|
} |
| 6159 |
|
if( encoding == eEncoding_UTF8) { |
| 6160 |
|
return str; |
| 6161 |
|
} |
| 6162 |
string result; |
string result; |
| 6163 |
result.reserve( GetSymbolCount()+1 ); |
result.reserve( GetSymbolCount(str)+1 ); |
| 6164 |
for ( const char* src = c_str(); *src; ++src ) { |
CTempString::const_iterator src = str.begin(); |
| 6165 |
result.append(1, SymbolToChar( Decode( src ), encoding)); |
CTempString::const_iterator to = str.end(); |
| 6166 |
|
for ( ; src != to; ++src ) { |
| 6167 |
|
TUnicodeSymbol sym = CUtf8::Decode( src ); |
| 6168 |
|
if (substitute_on_error) { |
| 6169 |
|
try { |
| 6170 |
|
result.append(1, SymbolToChar( sym, encoding)); |
| 6171 |
|
} |
| 6172 |
|
catch (CStringException&) { |
| 6173 |
|
result.append(substitute_on_error); |
| 6174 |
|
} |
| 6175 |
|
} else { |
| 6176 |
|
result.append(1, SymbolToChar( sym, encoding)); |
| 6177 |
|
} |
| 6178 |
} |
} |
| 6179 |
return result; |
return result; |
| 6180 |
} |
} |
| 6181 |
|
|
| 6182 |
|
EEncoding CUtf8::GuessEncoding( const CTempString& src) |
|
EEncoding CStringUTF8::GuessEncoding( const char* src) |
|
| 6183 |
{ |
{ |
| 6184 |
SIZE_TYPE more = 0; |
SIZE_TYPE more = 0; |
| 6185 |
|
CTempString::const_iterator i = src.begin(); |
| 6186 |
|
CTempString::const_iterator end = src.end(); |
| 6187 |
bool cp1252, iso1, ascii, utf8; |
bool cp1252, iso1, ascii, utf8; |
| 6188 |
for (cp1252 = iso1 = ascii = utf8 = true; *src; ++src) { |
for (cp1252 = iso1 = ascii = utf8 = true; i != end; ++i) { |
| 6189 |
Uint1 ch = *src; |
Uint1 ch = *i; |
| 6190 |
bool skip = false; |
bool skip = false; |
| 6191 |
if (more != 0) { |
if (more != 0) { |
| 6192 |
if (x_EvalNext(ch)) { |
if (x_EvalNext(ch)) { |
| 6193 |
--more; |
--more; |
| 6194 |
if (more == 0) { |
if (more == 0) { |
| 6195 |
ascii = cp1252 = iso1 = false; |
ascii = false; |
| 6196 |
} |
} |
| 6197 |
skip = true; |
skip = true; |
| 6198 |
} else { |
} else { |
| 6202 |
} |
} |
| 6203 |
if (ch > 0x7F) { |
if (ch > 0x7F) { |
| 6204 |
ascii = false; |
ascii = false; |
| 6205 |
|
// http://en.wikipedia.org/wiki/ISO/IEC_8859-1 |
| 6206 |
|
// Note: From the point of view of the C++ Toolkit, the ISO 8859-1 |
| 6207 |
|
// character set includes symbols 0x00 through 0xFF except 0x80 through 0x9F. |
| 6208 |
if (ch < 0xA0) { |
if (ch < 0xA0) { |
| 6209 |
iso1 = false; |
iso1 = false; |
| 6210 |
|
// http://en.wikipedia.org/wiki/Windows-1252 |
| 6211 |
if (ch == 0x81 || ch == 0x8D || ch == 0x8F || |
if (ch == 0x81 || ch == 0x8D || ch == 0x8F || |
| 6212 |
ch == 0x90 || ch == 0x9D) { |
ch == 0x90 || ch == 0x9D) { |
| 6213 |
cp1252 = false; |
cp1252 = false; |
| 6223 |
} |
} |
| 6224 |
if (ascii) { |
if (ascii) { |
| 6225 |
return eEncoding_Ascii; |
return eEncoding_Ascii; |
|
} else if (cp1252) { |
|
|
return iso1 ? eEncoding_ISO8859_1 : eEncoding_Windows_1252; |
|
| 6226 |
} else if (utf8) { |
} else if (utf8) { |
| 6227 |
return eEncoding_UTF8; |
return eEncoding_UTF8; |
| 6228 |
|
} else if (cp1252) { |
| 6229 |
|
return iso1 ? eEncoding_ISO8859_1 : eEncoding_Windows_1252; |
| 6230 |
} |
} |
| 6231 |
return eEncoding_Unknown; |
return eEncoding_Unknown; |
| 6232 |
} |
} |
| 6233 |
|
|
| 6234 |
|
|
| 6235 |
bool CStringUTF8::MatchEncoding( const char* src, EEncoding encoding) |
bool CUtf8::MatchEncoding( const CTempString& src, EEncoding encoding) |
| 6236 |
{ |
{ |
| 6237 |
bool matches = false; |
bool matches = false; |
| 6238 |
EEncoding enc_src = GuessEncoding(src); |
EEncoding enc_src = GuessEncoding(src); |
| 6255 |
return matches; |
return matches; |
| 6256 |
} |
} |
| 6257 |
|
|
| 6258 |
|
string CUtf8::EncodingToString(EEncoding encoding) |
| 6259 |
|
{ |
| 6260 |
|
switch (encoding) { |
| 6261 |
|
case eEncoding_UTF8: break; |
| 6262 |
|
case eEncoding_Ascii: return "US-ASCII"; |
| 6263 |
|
case eEncoding_ISO8859_1: return "ISO-8859-1"; |
| 6264 |
|
case eEncoding_Windows_1252: return "windows-1252"; |
| 6265 |
|
default: |
| 6266 |
|
NCBI_THROW2(CStringException, eBadArgs, |
| 6267 |
|
"Cannot convert encoding to string", 0); |
| 6268 |
|
break; |
| 6269 |
|
} |
| 6270 |
|
return "UTF-8"; |
| 6271 |
|
} |
| 6272 |
|
|
| 6273 |
|
// see http://www.iana.org/assignments/character-sets |
| 6274 |
|
EEncoding CUtf8::StringToEncoding(const CTempString& str) |
| 6275 |
|
{ |
| 6276 |
|
if (NStr::CompareNocase(str,"UTF-8")==0) { |
| 6277 |
|
return eEncoding_UTF8; |
| 6278 |
|
} |
| 6279 |
|
if (NStr::CompareNocase(str,"windows-1252")==0) { |
| 6280 |
|
return eEncoding_Windows_1252; |
| 6281 |
|
} |
| 6282 |
|
int i; |
| 6283 |
|
const char* ascii[] = { |
| 6284 |
|
"ANSI_X3.4-1968","iso-ir-6","ANSI_X3.4-1986","ISO_646.irv:1991", |
| 6285 |
|
"ASCII","ISO646-US","US-ASCII","us","IBM367","cp367","csASCII", NULL}; |
| 6286 |
|
for (i=0; ascii[i]; ++i) { |
| 6287 |
|
if (NStr::CompareNocase(str,ascii[i])==0) { |
| 6288 |
|
return eEncoding_Ascii; |
| 6289 |
|
} |
| 6290 |
|
} |
| 6291 |
|
const char* iso8859_1[] = { |
| 6292 |
|
"ISO_8859-1:1987","iso-ir-100","ISO_8859-1","ISO-8859-1", |
| 6293 |
|
"latin1","l1","IBM819","CP819","csISOLatin1", NULL}; |
| 6294 |
|
for (i=0; iso8859_1[i]; ++i) { |
| 6295 |
|
if (NStr::CompareNocase(str,iso8859_1[i])==0) { |
| 6296 |
|
return eEncoding_ISO8859_1; |
| 6297 |
|
} |
| 6298 |
|
} |
| 6299 |
|
return eEncoding_Unknown; |
| 6300 |
|
} |
| 6301 |
|
|
| 6302 |
|
|
| 6303 |
// cp1252, codepoints for chars 0x80 to 0x9F |
// cp1252, codepoints for chars 0x80 to 0x9F |
| 6304 |
static const TUnicodeSymbol s_cp1252_table[] = { |
static const TUnicodeSymbol s_cp1252_table[] = { |
| 6308 |
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003F, 0x017E, 0x0178 |
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003F, 0x017E, 0x0178 |
| 6309 |
}; |
}; |
| 6310 |
|
|
| 6311 |
|
TUnicodeSymbol CUtf8::CharToSymbol(char c, EEncoding encoding) |
|
TUnicodeSymbol CStringUTF8::CharToSymbol(char c, EEncoding encoding) |
|
| 6312 |
{ |
{ |
| 6313 |
Uint1 ch = c; |
Uint1 ch = c; |
| 6314 |
switch (encoding) |
switch (encoding) |
| 6333 |
} |
} |
| 6334 |
|
|
| 6335 |
|
|
| 6336 |
char CStringUTF8::SymbolToChar(TUnicodeSymbol cp, EEncoding encoding) |
char CUtf8::SymbolToChar(TUnicodeSymbol cp, EEncoding encoding) |
| 6337 |
{ |
{ |
| 6338 |
if( encoding == eEncoding_UTF8 || encoding == eEncoding_Unknown) { |
if( encoding == eEncoding_UTF8 || encoding == eEncoding_Unknown) { |
| 6339 |
NCBI_THROW2(CStringException, eBadArgs, |
NCBI_THROW2(CStringException, eBadArgs, |
| 6356 |
return (char)cp; |
return (char)cp; |
| 6357 |
} |
} |
| 6358 |
|
|
| 6359 |
|
void CUtf8::x_Validate(const CTempString& str) |
|
void CStringUTF8::x_Validate(void) const |
|
| 6360 |
{ |
{ |
| 6361 |
if (!IsValid()) { |
if ( !MatchEncoding( str,eEncoding_UTF8 ) ) { |
| 6362 |
NCBI_THROW2(CStringException, eBadArgs, |
NCBI_THROW2(CStringException, eBadArgs, |
| 6363 |
"Source string is not in UTF8 format", 0); |
"Source string is not in UTF8 format", 0); |
| 6364 |
} |
} |
| 6365 |
} |
} |
| 6366 |
|
|
| 6367 |
|
CStringUTF8& CUtf8::x_AppendChar( CStringUTF8& self, TUnicodeSymbol c) |
|
void CStringUTF8::x_AppendChar(TUnicodeSymbol c) |
|
| 6368 |
{ |
{ |
| 6369 |
Uint4 ch = c; |
Uint4 ch = c; |
| 6370 |
if (ch < 0x80) { |
if (ch < 0x80) { |
| 6371 |
append(1, Uint1(ch)); |
self.append(1, Uint1(ch)); |
| 6372 |
} |
} |
| 6373 |
else if (ch < 0x800) { |
else if (ch < 0x800) { |
| 6374 |
append(1, Uint1( (ch >> 6) | 0xC0)); |
self.append(1, Uint1( (ch >> 6) | 0xC0)); |
| 6375 |
append(1, Uint1( (ch & 0x3F) | 0x80)); |
self.append(1, Uint1( (ch & 0x3F) | 0x80)); |
| 6376 |
} else if (ch < 0x10000) { |
} else if (ch < 0x10000) { |
| 6377 |
append(1, Uint1( (ch >> 12) | 0xE0)); |
self.append(1, Uint1( (ch >> 12) | 0xE0)); |
| 6378 |
append(1, Uint1(((ch >> 6) & 0x3F) | 0x80)); |
self.append(1, Uint1(((ch >> 6) & 0x3F) | 0x80)); |
| 6379 |
append(1, Uint1(( ch & 0x3F) | 0x80)); |
self.append(1, Uint1(( ch & 0x3F) | 0x80)); |
| 6380 |
} else { |
} else { |
| 6381 |
append(1, Uint1( (ch >> 18) | 0xF0)); |
self.append(1, Uint1( (ch >> 18) | 0xF0)); |
| 6382 |
append(1, Uint1(((ch >> 12) & 0x3F) | 0x80)); |
self.append(1, Uint1(((ch >> 12) & 0x3F) | 0x80)); |
| 6383 |
append(1, Uint1(((ch >> 6) & 0x3F) | 0x80)); |
self.append(1, Uint1(((ch >> 6) & 0x3F) | 0x80)); |
| 6384 |
append(1, Uint1( (ch & 0x3F) | 0x80)); |
self.append(1, Uint1( (ch & 0x3F) | 0x80)); |
| 6385 |
} |
} |
| 6386 |
|
return self; |
| 6387 |
} |
} |
| 6388 |
|
|
| 6389 |
|
CStringUTF8& CUtf8::x_Append( CStringUTF8& self, const CTempString& src, |
|
void CStringUTF8::x_Append(const char* src, |
|
| 6390 |
EEncoding encoding, EValidate validate) |
EEncoding encoding, EValidate validate) |
| 6391 |
{ |
{ |
| 6392 |
if (encoding == eEncoding_Unknown) { |
if (encoding == eEncoding_Unknown) { |
| 6402 |
} |
} |
| 6403 |
} |
} |
| 6404 |
if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) { |
if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) { |
| 6405 |
append(src); |
self.append(src); |
| 6406 |
return; |
return self; |
| 6407 |
} |
} |
| 6408 |
|
|
|
const char* srcBuf; |
|
| 6409 |
SIZE_TYPE needed = 0; |
SIZE_TYPE needed = 0; |
| 6410 |
for (srcBuf = src; *srcBuf; ++srcBuf) { |
CTempString::const_iterator i; |
| 6411 |
needed += x_BytesNeeded( CharToSymbol( *srcBuf,encoding ) ); |
CTempString::const_iterator end = src.end(); |
| 6412 |
|
for (i = src.begin(); i != end; ++i) { |
| 6413 |
|
needed += x_BytesNeeded( CharToSymbol( *i,encoding ) ); |
| 6414 |
} |
} |
| 6415 |
if ( !needed ) { |
if ( !needed ) { |
| 6416 |
return; |
return self; |
| 6417 |
} |
} |
| 6418 |
reserve(max(capacity(),length()+needed+1)); |
self.reserve(max(self.capacity(),self.length()+needed+1)); |
| 6419 |
for (srcBuf = src; *srcBuf; ++srcBuf) { |
for (i = src.begin(); i != end; ++i) { |
| 6420 |
x_AppendChar( CharToSymbol( *srcBuf, encoding ) ); |
x_AppendChar( self, CharToSymbol( *i, encoding ) ); |
| 6421 |
} |
} |
| 6422 |
|
return self; |
| 6423 |
} |
} |
| 6424 |
|
|
| 6425 |
|
SIZE_TYPE CUtf8::x_BytesNeeded(TUnicodeSymbol c) |
|
SIZE_TYPE CStringUTF8::x_BytesNeeded(TUnicodeSymbol c) |
|
| 6426 |
{ |
{ |
| 6427 |
Uint4 ch = c; |
Uint4 ch = c; |
| 6428 |
if (ch < 0x80) { |
if (ch < 0x80) { |
| 6436 |
} |
} |
| 6437 |
|
|
| 6438 |
|
|
| 6439 |
bool CStringUTF8::x_EvalFirst(char ch, SIZE_TYPE& more) |
bool CUtf8::x_EvalFirst(char ch, SIZE_TYPE& more) |
| 6440 |
{ |
{ |
| 6441 |
more = 0; |
more = 0; |
| 6442 |
if ((ch & 0x80) != 0) { |
if ((ch & 0x80) != 0) { |
| 6443 |
if ((ch & 0xE0) == 0xC0) { |
if ((ch & 0xE0) == 0xC0) { |
| 6444 |
|
if ((ch & 0xFE) == 0xC0) { |
| 6445 |
|
// C0 and C1 are not valid UTF-8 chars |
| 6446 |
|
return false; |
| 6447 |
|
} |
| 6448 |
more = 1; |
more = 1; |
| 6449 |
} else if ((ch & 0xF0) == 0xE0) { |
} else if ((ch & 0xF0) == 0xE0) { |
| 6450 |
more = 2; |
more = 2; |
| 6451 |
} else if ((ch & 0xF8) == 0xF0) { |
} else if ((ch & 0xF8) == 0xF0) { |
| 6452 |
|
if ((unsigned char)ch > (unsigned char)0xF4) { |
| 6453 |
|
// F5-FF are not valid UTF-8 chars |
| 6454 |
|
return false; |
| 6455 |
|
} |
| 6456 |
more = 3; |
more = 3; |
| 6457 |
} else { |
} else { |
| 6458 |
return false; |
return false; |
| 6462 |
} |
} |
| 6463 |
|
|
| 6464 |
|
|
| 6465 |
bool CStringUTF8::x_EvalNext(char ch) |
bool CUtf8::x_EvalNext(char ch) |
| 6466 |
{ |
{ |
| 6467 |
return (ch & 0xC0) == 0x80; |
return (ch & 0xC0) == 0x80; |
| 6468 |
} |
} |
| 6469 |
|
|
| 6470 |
|
TUnicodeSymbol CUtf8::DecodeFirst(char ch, SIZE_TYPE& more) |
|
TUnicodeSymbol CStringUTF8::Decode(const char*& src) |
|
| 6471 |
{ |
{ |
| 6472 |
TUnicodeSymbol chRes; |
TUnicodeSymbol chRes = 0; |
| 6473 |
SIZE_TYPE more; |
more = 0; |
|
Uint1 ch = *src; |
|
| 6474 |
if ((ch & 0x80) == 0) { |
if ((ch & 0x80) == 0) { |
| 6475 |
chRes = ch; |
chRes = ch; |
|
more = 0; |
|
| 6476 |
} else if ((ch & 0xE0) == 0xC0) { |
} else if ((ch & 0xE0) == 0xC0) { |
| 6477 |
chRes = (ch & 0x1F); |
chRes = (ch & 0x1F); |
| 6478 |
more = 1; |
more = 1; |
| 6486 |
NCBI_THROW2(CStringException, eBadArgs, |
NCBI_THROW2(CStringException, eBadArgs, |
| 6487 |
"Source string is not in UTF8 format", 0); |
"Source string is not in UTF8 format", 0); |
| 6488 |
} |
} |
|
while (more--) { |
|
|
ch = *(++src); |
|
|
if ((ch & 0xC0) != 0x80) { |
|
|
NCBI_THROW2(CStringException, eBadArgs, |
|
|
"Source string is not in UTF8 format", 0); |
|
|
} |
|
|
chRes = (chRes << 6) | (ch & 0x3F); |
|
|
} |
|
| 6489 |
return chRes; |
return chRes; |
| 6490 |
} |
} |
| 6491 |
|
|
| 6492 |
|
|
| 6493 |
TUnicodeSymbol CStringUTF8::DecodeFirst(char ch, SIZE_TYPE& more) |
TUnicodeSymbol CUtf8::DecodeNext(TUnicodeSymbol chU, char ch) |
| 6494 |
{ |
{ |
| 6495 |
TUnicodeSymbol chRes = 0; |
if ((ch & 0xC0) == 0x80) { |
| 6496 |
more = 0; |
return (chU << 6) | (ch & 0x3F); |
| 6497 |
if ((ch & 0x80) == 0) { |
} else { |
| 6498 |
chRes = ch; |
NCBI_THROW2(CStringException, eBadArgs, |
| 6499 |
} else if ((ch & 0xE0) == 0xC0) { |
"Source string is not in UTF8 format", 0); |
|
chRes = (ch & 0x1F); |
|
|
more = 1; |
|
|
} else if ((ch & 0xF0) == 0xE0) { |
|
|
chRes = (ch & 0x0F); |
|
|
more = 2; |
|
|
} else if ((ch & 0xF8) == 0xF0) { |
|
|
chRes = (ch & 0x07); |
|
|
more = 3; |
|
| 6500 |
} |
} |
| 6501 |
return chRes; |
return 0; |
| 6502 |
} |
} |
| 6503 |
|
|
| 6504 |
|
bool CUtf8::IsWhiteSpace(TUnicodeSymbol chU) |
| 6505 |
|
{ |
| 6506 |
|
/* |
| 6507 |
|
{0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x20, 0x85, 0xA0, 0x1680, 0x180E, |
| 6508 |
|
0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, |
| 6509 |
|
0x2028, 0x2029, 0x202F, 0x205F, 0x3000 }; |
| 6510 |
|
*/ |
| 6511 |
|
if (chU >= 0x85) { |
| 6512 |
|
if (chU < 0x2000) { |
| 6513 |
|
return chU == 0x85 || chU == 0xA0 || chU == 0x1680 || chU == 0x180E; |
| 6514 |
|
} else if (chU >= 0x3000) { |
| 6515 |
|
return chU == 0x3000; |
| 6516 |
|
} |
| 6517 |
|
return chU <=0x200A || chU == 0x2028 || chU == 0x2029 || chU == 0x202F || chU == 0x205F; |
| 6518 |
|
} |
| 6519 |
|
return iswspace(chU)!=0; |
| 6520 |
|
} |
| 6521 |
|
|
| 6522 |
TUnicodeSymbol CStringUTF8::DecodeNext(TUnicodeSymbol chU, char ch) |
CStringUTF8& CUtf8::TruncateSpacesInPlace( CStringUTF8& str, NStr::ETrunc side) |
| 6523 |
{ |
{ |
| 6524 |
if ((ch & 0xC0) == 0x80) { |
if (!str.empty()) { |
| 6525 |
return (chU << 6) | (ch & 0x3F); |
CTempString t( TruncateSpaces_Unsafe( str,side)); |
| 6526 |
|
if (t.empty()) { |
| 6527 |
|
str.erase(); |
| 6528 |
|
} else { |
| 6529 |
|
str.replace(0,str.length(),t.data(),t.length()); |
| 6530 |
} |
} |
| 6531 |
return 0; |
} |
| 6532 |
|
return str; |
| 6533 |
} |
} |
| 6534 |
|
|
| 6535 |
|
CTempString CUtf8::TruncateSpaces_Unsafe( |
| 6536 |
|
const CTempString& str, NStr::ETrunc side) |
| 6537 |
|
{ |
| 6538 |
|
if (str.empty()) { |
| 6539 |
|
return str; |
| 6540 |
|
} |
| 6541 |
|
CTempString::const_iterator beg = str.begin(); |
| 6542 |
|
CTempString::const_iterator end = str.end(); |
| 6543 |
|
if (side == NStr::eTrunc_Begin || side == NStr::eTrunc_Both) { |
| 6544 |
|
for (CTempString::const_iterator next = beg; beg != end; beg = ++next) { |
| 6545 |
|
if (!IsWhiteSpace( CUtf8::Decode( next ) )) { |
| 6546 |
|
break; |
| 6547 |
|
} |
| 6548 |
|
} |
| 6549 |
|
} |
| 6550 |
|
if (side == NStr::eTrunc_End || side == NStr::eTrunc_Both) { |
| 6551 |
|
while (end != beg) { |
| 6552 |
|
while (end != beg) { |
| 6553 |
|
char ch = *(--end); |
| 6554 |
|
if ((ch & 0x80) == 0 || (ch & 0xC0) == 0xC0) { |
| 6555 |
|
break; |
| 6556 |
|
} |
| 6557 |
|
} |
| 6558 |
|
CTempString::const_iterator next = end; |
| 6559 |
|
if (!IsWhiteSpace( CUtf8::Decode( next ) )) { |
| 6560 |
|
end = ++next; |
| 6561 |
|
break; |
| 6562 |
|
} |
| 6563 |
|
} |
| 6564 |
|
} |
| 6565 |
|
CTempString res; |
| 6566 |
|
if (beg != end) { |
| 6567 |
|
res.assign(beg,end-beg); |
| 6568 |
|
} |
| 6569 |
|
return res; |
| 6570 |
|
} |
| 6571 |
|
|
| 6572 |
const char* CStringException::GetErrCodeString(void) const |
const char* CStringException::GetErrCodeString(void) const |
| 6573 |
{ |
{ |
| 6590 |
} |
} |
| 6591 |
|
|
| 6592 |
|
|
| 6593 |
string CStringDecoder_Url::Decode(const string& src, |
string CStringDecoder_Url::Decode(const CTempString& src, |
| 6594 |
EStringType ) const |
EStringType ) const |
| 6595 |
{ |
{ |
| 6596 |
return NStr::URLDecode(src, m_Flag); |
return NStr::URLDecode(src, m_Flag); |
| 6603 |
} |
} |
| 6604 |
|
|
| 6605 |
|
|
| 6606 |
string CStringEncoder_Url::Encode(const string& src, |
string CStringEncoder_Url::Encode(const CTempString& src, |
| 6607 |
EStringType ) const |
EStringType ) const |
| 6608 |
{ |
{ |
| 6609 |
return NStr::URLEncode(src, m_Flag); |
return NStr::URLEncode(src, m_Flag); |
| 6613 |
///////////////////////////////////////////////////////////////////////////// |
///////////////////////////////////////////////////////////////////////////// |
| 6614 |
// CEncodedString -- |
// CEncodedString -- |
| 6615 |
|
|
| 6616 |
CEncodedString::CEncodedString(const string& s, |
CEncodedString::CEncodedString(const CTempString& s, |
| 6617 |
NStr::EUrlEncode flag) |
NStr::EUrlEncode flag) |
| 6618 |
{ |
{ |
| 6619 |
SetString(s, flag); |
SetString(s, flag); |
| 6620 |
} |
} |
| 6621 |
|
|
| 6622 |
|
|
| 6623 |
void CEncodedString::SetString(const string& s, |
void CEncodedString::SetString(const CTempString& s, |
| 6624 |
NStr::EUrlEncode flag) |
NStr::EUrlEncode flag) |
| 6625 |
{ |
{ |
| 6626 |
m_Original = s; |
m_Original = s; |
| 6657 |
} // NCBI_FAKE_WARNING |
} // NCBI_FAKE_WARNING |
| 6658 |
|
|
| 6659 |
|
|
| 6660 |
END_NCBI_SCOPE |
|
| 6661 |
|
void CTempStringList::Join(string* s) const |
| 6662 |
|
{ |
| 6663 |
|
s->reserve(GetSize()); |
| 6664 |
|
*s = m_FirstNode.str; |
| 6665 |
|
for (const SNode* node = m_FirstNode.next.get(); node != NULL; |
| 6666 |
|
node = node->next.get()) { |
| 6667 |
|
s->append(node->str.data(), node->str.size()); |
| 6668 |
|
} |
| 6669 |
|
} |
| 6670 |
|
|
| 6671 |
|
|
| 6672 |
|
void CTempStringList::Join(CTempStringEx* s) const |
| 6673 |
|
{ |
| 6674 |
|
if (m_FirstNode.next.get() == NULL) { |
| 6675 |
|
*s = m_FirstNode.str; |
| 6676 |
|
} else { |
| 6677 |
|
if ( !m_Storage ) { |
| 6678 |
|
NCBI_THROW2(CStringException, eBadArgs, |
| 6679 |
|
"CTempStringList::Join(): non-NULL storage required", 0); |
| 6680 |
|
} |
| 6681 |
|
SIZE_TYPE n = GetSize(); |
| 6682 |
|
char* buf = m_Storage->Allocate(n + 1); |
| 6683 |
|
char* p = buf; |
| 6684 |
|
for (const SNode* node = &m_FirstNode; node != NULL; |
| 6685 |
|
node = node->next.get()) { |
| 6686 |
|
memcpy(p, node->str.data(), node->str.size()); |
| 6687 |
|
p += node->str.size(); |
| 6688 |
|
} |
| 6689 |
|
*p = '\0'; |
| 6690 |
|
s->assign(buf, n); |
| 6691 |
|
} |
| 6692 |
|
} |
| 6693 |
|
|
| 6694 |
|
|
| 6695 |
|
SIZE_TYPE CTempStringList::GetSize(void) const |
| 6696 |
|
{ |
| 6697 |
|
SIZE_TYPE total = m_FirstNode.str.size(); |
| 6698 |
|
for (const SNode* node = m_FirstNode.next.get(); node != NULL; |
| 6699 |
|
node = node->next.get()) { |
| 6700 |
|
total += node->str.size(); |
| 6701 |
|
} |
| 6702 |
|
return total; |
| 6703 |
|
} |
| 6704 |
|
|
| 6705 |
|
|
| 6706 |
|
bool CStrTokenizeBase::Advance(CTempStringList* part_collector) |
| 6707 |
|
{ |
| 6708 |
|
SIZE_TYPE pos = m_Pos, part_start = m_Pos, delim_pos = 0, quote_pos = 0; |
| 6709 |
|
bool found_text = false, done = (pos == NPOS); |
| 6710 |
|
char active_quote = '\0'; |
| 6711 |
|
|
| 6712 |
|
if (pos >= m_Str.size()) { |
| 6713 |
|
pos = NPOS; |
| 6714 |
|
done = true; |
| 6715 |
|
} |
| 6716 |
|
|
| 6717 |
|
// Each chunk covers the half-open interval [part_start, delim_pos). |
| 6718 |
|
|
| 6719 |
|
while ( !done |
| 6720 |
|
&& ((delim_pos = m_Str.find_first_of(m_InternalDelim, pos)) |
| 6721 |
|
!= NPOS)) { |
| 6722 |
|
SIZE_TYPE next_start = pos = delim_pos + 1; |
| 6723 |
|
bool handled = false; |
| 6724 |
|
char c = m_Str[delim_pos]; |
| 6725 |
|
|
| 6726 |
|
if ((m_Flags & NStr::fSplit_CanEscape) != 0 && c == '\\') { |
| 6727 |
|
// treat the following character literally |
| 6728 |
|
if (++pos > m_Str.size()) { |
| 6729 |
|
NCBI_THROW2(CStringException, eFormat, "Unescaped trailing \\", |
| 6730 |
|
delim_pos); |
| 6731 |
|
} |
| 6732 |
|
handled = true; |
| 6733 |
|
} else if ((m_Flags & NStr::fSplit_CanQuote) != 0) { |
| 6734 |
|
if (active_quote != '\0') { |
| 6735 |
|
if (c == active_quote) { |
| 6736 |
|
if (pos < m_Str.size() && m_Str[pos] == active_quote) { |
| 6737 |
|
// count a doubled quote as one literal occurrence |
| 6738 |
|
++pos; |
| 6739 |
|
} else { |
| 6740 |
|
active_quote = '\0'; |
| 6741 |
|
} |
| 6742 |
|
} else { |
| 6743 |
|
continue; // not actually a boundary |
| 6744 |
|
} |
| 6745 |
|
handled = true; |
| 6746 |
|
} else if (((m_Flags & NStr::fSplit_CanSingleQuote) != 0 |
| 6747 |
|
&& c == '\'') |
| 6748 |
|
|| ((m_Flags & NStr::fSplit_CanDoubleQuote) != 0 |
| 6749 |
|
&& c == '"')) { |
| 6750 |
|
active_quote = c; |
| 6751 |
|
quote_pos = delim_pos; |
| 6752 |
|
handled = true; |
| 6753 |
|
} |
| 6754 |
|
} |
| 6755 |
|
|
| 6756 |
|
if ( !handled ) { |
| 6757 |
|
if ((m_Flags & NStr::fSplit_ByPattern) != 0) { |
| 6758 |
|
if (delim_pos + m_Delim.size() <= m_Str.size() |
| 6759 |
|
&& (memcmp(m_Delim.data() + 1, m_Str.data() + pos, |
| 6760 |
|
m_Delim.size() - 1) == 0)) { |
| 6761 |
|
done = true; |
| 6762 |
|
next_start = pos = delim_pos + m_Delim.size(); |
| 6763 |
|
} else { |
| 6764 |
|
continue; |
| 6765 |
|
} |
| 6766 |
|
} else { |
| 6767 |
|
done = true; |
| 6768 |
|
} |
| 6769 |
|
} |
| 6770 |
|
|
| 6771 |
|
if (delim_pos > part_start) { |
| 6772 |
|
found_text = true; |
| 6773 |
|
if (part_collector != NULL) { |
| 6774 |
|
part_collector->Add |
| 6775 |
|
(m_Str.substr(part_start, delim_pos - part_start)); |
| 6776 |
|
} |
| 6777 |
|
} |
| 6778 |
|
part_start = next_start; |
| 6779 |
|
} |
| 6780 |
|
|
| 6781 |
|
if (active_quote != '\0') { |
| 6782 |
|
NCBI_THROW2(CStringException, eFormat, |
| 6783 |
|
string("Unbalanced ") + active_quote, |
| 6784 |
|
quote_pos); |
| 6785 |
|
} |
| 6786 |
|
|
| 6787 |
|
if (delim_pos == NPOS) { |
| 6788 |
|
found_text = true; |
| 6789 |
|
if (part_collector != NULL) { |
| 6790 |
|
part_collector->Add(m_Str.substr(part_start)); |
| 6791 |
|
} |
| 6792 |
|
m_Pos = NPOS; |
| 6793 |
|
} else { |
| 6794 |
|
m_Pos = pos; |
| 6795 |
|
SkipDelims(); |
| 6796 |
|
} |
| 6797 |
|
|
| 6798 |
|
return found_text || (m_Flags & NStr::fSplit_MergeDelims) == 0; |
| 6799 |
|
} |
| 6800 |
|
|
| 6801 |
|
void CStrTokenizeBase::SkipDelims(void) |
| 6802 |
|
{ |
| 6803 |
|
if ((m_Flags & NStr::fSplit_MergeDelims) != 0) { |
| 6804 |
|
if ((m_Flags & NStr::fSplit_ByPattern) == 0) { |
| 6805 |
|
m_Pos = m_Str.find_first_not_of(m_Delim, m_Pos); |
| 6806 |
|
} else { |
| 6807 |
|
while (m_Pos + m_Delim.size() <= m_Str.size() && m_Pos != NPOS |
| 6808 |
|
&& (memcmp(m_Delim.data(), m_Str.data() + m_Pos, |
| 6809 |
|
m_Delim.size()) == 0)) { |
| 6810 |
|
m_Pos += m_Delim.size(); |
| 6811 |
|
} |
| 6812 |
|
} |
| 6813 |
|
} |
| 6814 |
|
} |
| 6815 |
|
|
| 6816 |
|
void CStrTokenizeBase::x_ExtendInternalDelim() |
| 6817 |
|
{ |
| 6818 |
|
if ( !(m_Flags & (NStr::fSplit_CanEscape | NStr::fSplit_CanQuote)) ) { |
| 6819 |
|
return; // Nothing to do |
| 6820 |
|
} |
| 6821 |
|
|
| 6822 |
|
SIZE_TYPE n = m_InternalDelim.size(); |
| 6823 |
|
char* buf = m_DelimStorage.Allocate(n + 3); |
| 6824 |
|
char *s = buf; |
| 6825 |
|
memcpy(s, m_InternalDelim.data(), n); |
| 6826 |
|
if ((m_Flags & NStr::fSplit_CanEscape) != 0) { |
| 6827 |
|
s[n++] = '\\'; |
| 6828 |
|
} |
| 6829 |
|
if ((m_Flags & NStr::fSplit_CanSingleQuote) != 0) { |
| 6830 |
|
s[n++] = '\''; |
| 6831 |
|
} |
| 6832 |
|
if ((m_Flags & NStr::fSplit_CanDoubleQuote) != 0) { |
| 6833 |
|
s[n++] = '"'; |
| 6834 |
|
} |
| 6835 |
|
m_InternalDelim.assign(buf, n); |
| 6836 |
|
} |
| 6837 |
|
|
| 6838 |
|
|
| 6839 |
|
END_NCBI_NAMESPACE; |