00001 #ifndef CORELIB___NCBISTR__HPP 00002 #define CORELIB___NCBISTR__HPP 00003 00004 /* $Id: ncbistr.hpp 172519 2009-10-06 14:23:09Z vasilche $ 00005 * =========================================================================== 00006 * 00007 * PUBLIC DOMAIN NOTICE 00008 * National Center for Biotechnology Information 00009 * 00010 * This software/database is a "United States Government Work" under the 00011 * terms of the United States Copyright Act. It was written as part of 00012 * the author's official duties as a United States Government employee and 00013 * thus cannot be copyrighted. This software/database is freely available 00014 * to the public for use. The National Library of Medicine and the U.S. 00015 * Government have not placed any restriction on its use or reproduction. 00016 * 00017 * Although all reasonable efforts have been taken to ensure the accuracy 00018 * and reliability of the software and data, the NLM and the U.S. 00019 * Government do not and cannot warrant the performance or results that 00020 * may be obtained by using this software or data. The NLM and the U.S. 00021 * Government disclaim all warranties, express or implied, including 00022 * warranties of performance, merchantability or fitness for any particular 00023 * purpose. 00024 * 00025 * Please cite the author in any work or product based on this material. 00026 * 00027 * =========================================================================== 00028 * 00029 * Authors: Eugene Vasilchenko, Denis Vakatov 00030 * 00031 * 00032 */ 00033 00034 /// @file ncbistr.hpp 00035 /// The NCBI C++ standard methods for dealing with std::string 00036 00037 00038 #include <corelib/tempstr.hpp> 00039 #include <corelib/ncbi_limits.hpp> 00040 #ifdef NCBI_OS_OSF1 00041 # include <strings.h> 00042 #endif 00043 #include <stdarg.h> 00044 #include <time.h> 00045 #include <vector> 00046 00047 00048 00049 BEGIN_NCBI_SCOPE 00050 00051 /** @addtogroup String 00052 * 00053 * @{ 00054 */ 00055 00056 /// Empty "C" string (points to a '\0'). 00057 extern const char *const kEmptyCStr; 00058 #define NcbiEmptyCStr NCBI_NS_NCBI::kEmptyCStr 00059 00060 00061 /// Empty "C++" string. 00062 #if defined(NCBI_OS_MSWIN) || ( defined(NCBI_OS_LINUX) && defined(NCBI_COMPILER_GCC) ) 00063 class CNcbiEmptyString 00064 { 00065 public: 00066 /// Get string. 00067 static const string& Get(void) 00068 { 00069 static string empty_str; 00070 return empty_str; 00071 } 00072 }; 00073 #else 00074 class CNcbiEmptyString 00075 { 00076 public: 00077 /// Get string. 00078 static const string& Get(void); 00079 private: 00080 /// Helper method to initialize private data member and return 00081 /// null string. 00082 static const string& FirstGet(void); 00083 static const string* m_Str; ///< Null string pointer. 00084 }; 00085 #endif // NCBI_OS_MSWIN.... 00086 00087 00088 #define NcbiEmptyString NCBI_NS_NCBI::CNcbiEmptyString::Get() 00089 #define kEmptyStr NcbiEmptyString 00090 00091 00092 // SIZE_TYPE and NPOS 00093 00094 /// Define size type. 00095 typedef NCBI_NS_STD::string::size_type SIZE_TYPE; 00096 00097 /// Define NPOS constant as the special value "std::string::npos" which is 00098 /// returned when a substring search fails, or to indicate an unspecified 00099 /// string position. 00100 static const SIZE_TYPE NPOS = NCBI_NS_STD::string::npos; 00101 00102 00103 00104 ///////////////////////////////////////////////////////////////////////////// 00105 /// 00106 /// NStr -- 00107 /// 00108 /// Encapuslates class-wide string processing functions. 00109 00110 class NStr 00111 { 00112 public: 00113 /// Convert string to numeric value. 00114 /// 00115 /// @param str 00116 /// String containing digits. 00117 /// @return 00118 /// - Convert "str" to a (non-negative) "int" value and return 00119 /// this value. 00120 /// - -1 if "str" contains any symbols other than [0-9], or 00121 /// if it represents a number that does not fit into "int". 00122 static int StringToNumeric(const string& str); 00123 00124 00125 /// Number to string conversion flags. 00126 /// 00127 /// NOTE: 00128 /// If specified base in the *ToString() methods is not default 10, 00129 /// that some flags like fWithSign and fWithCommas will be ignored. 00130 enum ENumToStringFlags { 00131 fWithSign = (1 << 9), ///< Prefix the output value with a sign 00132 fWithCommas = (1 << 10), ///< Use commas as thousands separator 00133 fDoubleFixed = (1 << 11), ///< Use n.nnnn format for double 00134 fDoubleScientific= (1 << 12), ///< Use scientific format for double 00135 fDoubleGeneral = fDoubleFixed | fDoubleScientific 00136 }; 00137 typedef int TNumToStringFlags; ///< Bitwise OR of "ENumToStringFlags" 00138 00139 /// String to number conversion flags. 00140 enum EStringToNumFlags { 00141 fConvErr_NoThrow = (1 << 9), ///< Return "natural null" 00142 // value on error, instead of throwing (by default) an exception 00143 00144 fMandatorySign = (1 << 10), ///< See 'fWithSign' 00145 fAllowCommas = (1 << 11), ///< See 'fWithCommas' 00146 fAllowLeadingSpaces = (1 << 12), ///< Can have leading spaces 00147 fAllowLeadingSymbols = (1 << 13) | fAllowLeadingSpaces, 00148 ///< Can have leading non-nums 00149 fAllowTrailingSpaces = (1 << 14), ///< Can have trailing spaces 00150 fAllowTrailingSymbols = (1 << 15) | fAllowTrailingSpaces, 00151 ///< Can have trailing non-nums 00152 fAllStringToNumFlags = 0x7F00 00153 }; 00154 typedef int TStringToNumFlags; ///< Binary OR of "EStringToNumFlags" 00155 00156 /// Convert string to int. 00157 /// 00158 /// @param str 00159 /// String to be converted. 00160 /// @param flags 00161 /// How to convert string to value. 00162 /// @param base 00163 /// Radix base. Default is 10. Allowed values are 0, 2..32. 00164 /// @return 00165 /// - Convert "str" to "int" value and return it. 00166 /// - 0 if "str" contains illegal symbols, or if it represents a number 00167 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00168 /// - Throw an exception otherwise. 00169 static int StringToInt(const CTempString& str, 00170 TStringToNumFlags flags = 0, 00171 int base = 10); 00172 00173 /// Convert string to unsigned int. 00174 /// 00175 /// @param str 00176 /// String to be converted. 00177 /// @param flags 00178 /// How to convert string to value. 00179 /// @param base 00180 /// Radix base. Default is 10. Allowed values are 0, 2..32. 00181 /// @return 00182 /// - Convert "str" to "unsigned int" value and return it. 00183 /// - 0 if "str" contains illegal symbols, or if it represents a number 00184 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00185 /// - Throw an exception otherwise. 00186 static unsigned int StringToUInt(const CTempString& str, 00187 TStringToNumFlags flags = 0, 00188 int base = 10); 00189 00190 /// Convert string to long. 00191 /// 00192 /// @param str 00193 /// String to be converted. 00194 /// @param flags 00195 /// How to convert string to value. 00196 /// @param base 00197 /// Radix base. Default is 10. Allowed values are 0, 2..32. 00198 /// @return 00199 /// - Convert "str" to "long" value and return it. 00200 /// - 0 if "str" contains illegal symbols, or if it represents a number 00201 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00202 /// - Throw an exception otherwise. 00203 static long StringToLong(const CTempString& str, 00204 TStringToNumFlags flags = 0, 00205 int base = 10); 00206 00207 /// Convert string to unsigned long. 00208 /// 00209 /// @param str 00210 /// String to be converted. 00211 /// @param flags 00212 /// How to convert string to value. 00213 /// @param base 00214 /// Numeric base of the number symbols (default = 10). 00215 /// @return 00216 /// - Convert "str" to "unsigned long" value and return it. 00217 /// - 0 if "str" contains illegal symbols, or if it represents a number 00218 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00219 /// - Throw an exception otherwise. 00220 static unsigned long StringToULong(const CTempString& str, 00221 TStringToNumFlags flags = 0, 00222 int base = 10); 00223 00224 /// Convert string to double. 00225 /// 00226 /// @param str 00227 /// String to be converted. 00228 /// @param flags 00229 /// How to convert string to value. 00230 /// Do not support fAllowCommas flag. 00231 /// @return 00232 /// - Convert "str" to "double" value and return it. 00233 /// - 0 if "str" contains illegal symbols, or if it represents a number 00234 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00235 /// - Throw an exception otherwise. 00236 static double StringToDouble(const CTempStringEx& str, 00237 TStringToNumFlags flags = 0); 00238 00239 /// This version accepts zero-terminated string 00240 static double StringToDoubleEx(const char* str, size_t size, 00241 TStringToNumFlags flags = 0); 00242 00243 /// Convert string to Int8. 00244 /// 00245 /// @param str 00246 /// String to be converted. 00247 /// @param flags 00248 /// How to convert string to value. 00249 /// @param base 00250 /// Radix base. Default is 10. Allowed values are 0, 2..32. 00251 /// @return 00252 /// - Convert "str" to "Int8" value and return it. 00253 /// - 0 if "str" contains illegal symbols, or if it represents a number 00254 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00255 /// - Throw an exception otherwise. 00256 static Int8 StringToInt8(const CTempString& str, 00257 TStringToNumFlags flags = 0, 00258 int base = 10); 00259 00260 /// Convert string to Uint8. 00261 /// 00262 /// @param str 00263 /// String to be converted. 00264 /// @param flags 00265 /// How to convert string to value. 00266 /// @param base 00267 /// Radix base. Default is 10. Allowed values are 0, 2..32. 00268 /// @return 00269 /// - Convert "str" to "UInt8" value and return it. 00270 /// - 0 if "str" contains illegal symbols, or if it represents a number 00271 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00272 /// - Throw an exception otherwise. 00273 static Uint8 StringToUInt8(const CTempString& str, 00274 TStringToNumFlags flags = 0, 00275 int base = 10); 00276 00277 /// Convert string to number of bytes. 00278 /// 00279 /// String can contain "software" qualifiers: MB(megabyte), KB (kilobyte).. 00280 /// Example: 100MB, 1024KB 00281 /// Note the qualifiers are power-of-2 based, aka kibi-, mebi- etc, so that 00282 /// 1KB = 1024B (not 1000B), 1MB = 1024KB = 1048576B, etc. 00283 /// 00284 /// @param str 00285 /// String to be converted. 00286 /// @param flags 00287 /// How to convert string to value. 00288 /// @param base 00289 /// Numeric base of the number (before the qualifier). 00290 /// Default is 10. Allowed values are 0, 2..20. 00291 /// @return 00292 /// - Convert "str" to "Uint8" value and return it. 00293 /// - 0 if "str" contains illegal symbols, or if it represents a number 00294 /// that does not fit into range, and flag fConvErr_NoThrow is set. 00295 /// - Throw an exception otherwise. 00296 static Uint8 StringToUInt8_DataSize(const CTempString& str, 00297 TStringToNumFlags flags = 0, 00298 int base = 10); 00299 00300 /// Convert string to pointer. 00301 /// 00302 /// @param str 00303 /// String to be converted. 00304 /// @return 00305 /// Pointer value corresponding to its string representation. 00306 static const void* StringToPtr(const string& str); 00307 00308 /// Convert character to integer. 00309 /// 00310 /// @param ch 00311 /// Character to be converted. 00312 /// @return 00313 /// Integer (0..15) corresponding to the "ch" as a hex digit. 00314 /// Return -1 on error. 00315 static int HexChar(char ch); 00316 00317 /// Convert Int to String. 00318 /// 00319 /// @param value 00320 /// Integer value (long) to be converted. 00321 /// @param flags 00322 /// How to convert value to string. 00323 /// @param base 00324 /// Radix base. Default is 10. Allowed values are 2..32. 00325 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00326 /// If necessary you should add it yourself. 00327 /// @return 00328 /// Converted string value. 00329 static string IntToString(long value, TNumToStringFlags flags = 0, 00330 int base = 10); 00331 00332 /// Convert Int to String. 00333 /// 00334 /// @param out_str 00335 /// Output string variable. 00336 /// @param value 00337 /// Integer value (long) to be converted. 00338 /// @param flags 00339 /// How to convert value to string. 00340 /// @param base 00341 /// Radix base. Default is 10. Allowed values are 2..32. 00342 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00343 /// If necessary you should add it yourself. 00344 static void IntToString(string& out_str, long value, 00345 TNumToStringFlags flags = 0, 00346 int base = 10); 00347 00348 /// Convert UInt to string. 00349 /// 00350 /// @param value 00351 /// Integer value (unsigned long) to be converted. 00352 /// @param flags 00353 /// How to convert value to string. 00354 /// @param base 00355 /// Radix base. Default is 10. Allowed values are 2..32. 00356 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00357 /// If necessary you should add it yourself. 00358 /// @return 00359 /// Converted string value. 00360 static string UIntToString(unsigned long value, 00361 TNumToStringFlags flags = 0, 00362 int base = 10); 00363 00364 /// Convert UInt to string. 00365 /// 00366 /// @param out_str 00367 /// Output string variable 00368 /// @param value 00369 /// Integer value (unsigned long) to be converted. 00370 /// @param flags 00371 /// How to convert value to string. 00372 /// @param base 00373 /// Radix base. Default is 10. Allowed values are 2..32. 00374 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00375 /// If necessary you should add it yourself. 00376 static void UIntToString(string& out_str, unsigned long value, 00377 TNumToStringFlags flags = 0, 00378 int base = 10); 00379 00380 /// Convert Int8 to string. 00381 /// 00382 /// @param value 00383 /// Integer value (Int8) to be converted. 00384 /// @param flags 00385 /// How to convert value to string. 00386 /// @param base 00387 /// Radix base. Default is 10. Allowed values are 2..32. 00388 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00389 /// If necessary you should add it yourself. 00390 /// @return 00391 /// Converted string value. 00392 static string Int8ToString(Int8 value, 00393 TNumToStringFlags flags = 0, 00394 int base = 10); 00395 00396 /// Convert Int8 to string. 00397 /// 00398 /// @param out_str 00399 /// Output string variable 00400 /// @param value 00401 /// Integer value (Int8) to be converted. 00402 /// @param flags 00403 /// How to convert value to string. 00404 /// @param base 00405 /// Radix base. Default is 10. Allowed values are 2..32. 00406 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00407 /// If necessary you should add it yourself. 00408 static void Int8ToString(string& out_str, Int8 value, 00409 TNumToStringFlags flags = 0, 00410 int base = 10); 00411 00412 /// Convert UInt8 to string. 00413 /// 00414 /// @param value 00415 /// Integer value (UInt8) to be converted. 00416 /// @param flags 00417 /// How to convert value to string. 00418 /// @param base 00419 /// Radix base. Default is 10. Allowed values are 2..32. 00420 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00421 /// If necessary you should add it yourself. 00422 /// @return 00423 /// Converted string value. 00424 static string UInt8ToString(Uint8 value, 00425 TNumToStringFlags flags = 0, 00426 int base = 10); 00427 00428 /// Convert UInt8 to string. 00429 /// 00430 /// @param out_str 00431 /// Output string variable 00432 /// @param value 00433 /// Integer value (UInt8) to be converted. 00434 /// @param flags 00435 /// How to convert value to string. 00436 /// @param base 00437 /// Radix base. Default is 10. Allowed values are 2..32. 00438 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly. 00439 /// If necessary you should add it yourself. 00440 static void UInt8ToString(string& out_str, Uint8 value, 00441 TNumToStringFlags flags = 0, 00442 int base = 10); 00443 00444 /// Convert double to string. 00445 /// 00446 /// @param value 00447 /// Double value to be converted. 00448 /// @param precision 00449 /// Precision value for conversion. If precision is more that maximum 00450 /// for current platform, then it will be truncated to this maximum. 00451 // If it is negative, that double will be converted to number in 00452 /// scientific notation. 00453 /// @param flags 00454 /// How to convert value to string. 00455 /// If double format flags are not specified, that next output format 00456 /// will be used by default: 00457 /// - fDoubleFixed, if 'precision' >= 0. 00458 /// - fDoubleGeneral, if 'precision' < 0. 00459 /// @return 00460 /// Converted string value. 00461 static string DoubleToString(double value, int precision = -1, 00462 TNumToStringFlags flags = 0); 00463 00464 /// Convert double to string. 00465 /// 00466 /// @param out_str 00467 /// Output string variable 00468 /// @param value 00469 /// Double value to be converted. 00470 /// @param precision 00471 /// Precision value for conversion. If precision is more that maximum 00472 /// for current platform, then it will be truncated to this maximum. 00473 // If it is negative, that double will be converted to number in 00474 /// scientific notation. 00475 /// @param flags 00476 /// How to convert value to string. 00477 /// If double format flags are not specified, that next output format 00478 /// will be used by default: 00479 /// - fDoubleFixed, if 'precision' >= 0. 00480 /// - fDoubleGeneral, if 'precision' < 0. 00481 static void DoubleToString(string& out_str, double value, 00482 int precision = -1, 00483 TNumToStringFlags flags = 0); 00484 00485 /// Convert double to string with specified precision and place the result 00486 /// in the specified buffer. 00487 /// 00488 /// @param value 00489 /// Double value to be converted. 00490 /// @param precision 00491 /// Precision value for conversion. If precision is more that maximum 00492 /// for current platform, then it will be truncated to this maximum. 00493 /// @param buf 00494 /// Put result of the conversion into this buffer. 00495 /// @param buf_size 00496 /// Size of buffer, "buf". 00497 /// @param flags 00498 /// How to convert value to string. 00499 /// Default output format is fDoubleFixed. 00500 /// @return 00501 /// The number of bytes stored in "buf", not counting the 00502 /// terminating '\0'. 00503 static SIZE_TYPE DoubleToString(double value, unsigned int precision, 00504 char* buf, SIZE_TYPE buf_size, 00505 TNumToStringFlags flags = 0); 00506 00507 /// Convert pointer to string. 00508 /// 00509 /// @param out_str 00510 /// Output string variable 00511 /// @param str 00512 /// Pointer to be converted. 00513 static void PtrToString(string& out_str, const void* ptr); 00514 00515 /// Convert pointer to string. 00516 /// 00517 /// @param str 00518 /// Pointer to be converted. 00519 /// @return 00520 /// String value representing the pointer. 00521 static string PtrToString(const void* ptr); 00522 00523 /// Convert bool to string. 00524 /// 00525 /// @param value 00526 /// Boolean value to be converted. 00527 /// @return 00528 /// One of: 'true, 'false' 00529 static const string BoolToString(bool value); 00530 00531 /// Convert string to bool. 00532 /// 00533 /// @param str 00534 /// Boolean string value to be converted. Can recognize 00535 /// case-insensitive version as one of: 'true, 't', 'yes', 'y' 00536 /// for TRUE; and 'false', 'f', 'no', 'n' for FALSE. 00537 /// @return 00538 /// TRUE or FALSE. 00539 static bool StringToBool(const string& str); 00540 00541 00542 /// Handle an arbitrary printf-style format string. 00543 /// 00544 /// This method exists only to support third-party code that insists on 00545 /// representing messages in this format; please stick to type-checked 00546 /// means of formatting such as the above ToString methods and I/O 00547 /// streams whenever possible. 00548 static string FormatVarargs(const char* format, va_list args); 00549 00550 00551 /// Which type of string comparison. 00552 enum ECase { 00553 eCase, ///< Case sensitive compare 00554 eNocase ///< Case insensitive compare 00555 }; 00556 00557 // ATTENTION. Be aware that: 00558 // 00559 // 1) "Compare***(..., SIZE_TYPE pos, SIZE_TYPE n, ...)" functions 00560 // follow the ANSI C++ comparison rules a la "basic_string::compare()": 00561 // str[pos:pos+n) == pattern --> return 0 00562 // str[pos:pos+n) < pattern --> return negative value 00563 // str[pos:pos+n) > pattern --> return positive value 00564 // 00565 // 2) "strn[case]cmp()" functions follow the ANSI C comparison rules: 00566 // str[0:n) == pattern[0:n) --> return 0 00567 // str[0:n) < pattern[0:n) --> return negative value 00568 // str[0:n) > pattern[0:n) --> return positive value 00569 00570 00571 /// Case-sensitive compare of a substring with a pattern. 00572 /// 00573 /// @param str 00574 /// String containing the substring to be compared. 00575 /// @param pos 00576 /// Start position of substring to be compared. 00577 /// @param n 00578 /// Number of characters in substring to be compared. 00579 /// @param pattern 00580 /// String pattern (char*) to be compared with substring. 00581 /// @return 00582 /// - 0, if str[pos:pos+n) == pattern. 00583 /// - Negative integer, if str[pos:pos+n) < pattern. 00584 /// - Positive integer, if str[pos:pos+n) > pattern. 00585 /// @sa 00586 /// Other forms of overloaded CompareCase() with differences in argument 00587 /// types: char* vs. string& 00588 static int CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00589 const char* pattern); 00590 00591 /// Case-sensitive compare of a substring with a pattern. 00592 /// 00593 /// @param str 00594 /// String containing the substring to be compared. 00595 /// @param pos 00596 /// Start position of substring to be compared. 00597 /// @param n 00598 /// Number of characters in substring to be compared. 00599 /// @param pattern 00600 /// String pattern (string&) to be compared with substring. 00601 /// @return 00602 /// - 0, if str[pos:pos+n) == pattern. 00603 /// - Negative integer, if str[pos:pos+n) < pattern. 00604 /// - Positive integer, if str[pos:pos+n) > pattern. 00605 /// @sa 00606 /// Other forms of overloaded CompareCase() with differences in argument 00607 /// types: char* vs. string& 00608 static int CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00609 const string& pattern); 00610 00611 /// Case-sensitive compare of two strings -- char* version. 00612 /// 00613 /// @param s1 00614 /// String to be compared -- operand 1. 00615 /// @param s2 00616 /// String to be compared -- operand 2. 00617 /// @return 00618 /// - 0, if s1 == s2. 00619 /// - Negative integer, if s1 < s2. 00620 /// - Positive integer, if s1 > s2. 00621 /// @sa 00622 /// CompareNocase(), Compare() versions with same argument types. 00623 static int CompareCase(const char* s1, const char* s2); 00624 00625 /// Case-sensitive compare of two strings -- string& version. 00626 /// 00627 /// @param s1 00628 /// String to be compared -- operand 1. 00629 /// @param s2 00630 /// String to be compared -- operand 2. 00631 /// @return 00632 /// - 0, if s1 == s2. 00633 /// - Negative integer, if s1 < s2. 00634 /// - Positive integer, if s1 > s2. 00635 /// @sa 00636 /// CompareNocase(), Compare() versions with same argument types. 00637 static int CompareCase(const string& s1, const string& s2); 00638 00639 /// Case-insensitive compare of a substring with a pattern. 00640 /// 00641 /// @param str 00642 /// String containing the substring to be compared. 00643 /// @param pos 00644 /// Start position of substring to be compared. 00645 /// @param n 00646 /// Number of characters in substring to be compared. 00647 /// @param pattern 00648 /// String pattern (char*) to be compared with substring. 00649 /// @return 00650 /// - 0, if str[pos:pos+n) == pattern (case-insensitive compare). 00651 /// - Negative integer, if str[pos:pos+n) < pattern (case-insensitive 00652 /// compare). 00653 /// - Positive integer, if str[pos:pos+n) > pattern (case-insensitive 00654 /// compare). 00655 /// @sa 00656 /// Other forms of overloaded CompareNocase() with differences in 00657 /// argument types: char* vs. string& 00658 static int CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00659 const char* pattern); 00660 00661 /// Case-insensitive compare of a substring with a pattern. 00662 /// 00663 /// @param str 00664 /// String containing the substring to be compared. 00665 /// @param pos 00666 /// Start position of substring to be compared. 00667 /// @param n 00668 /// Number of characters in substring to be compared. 00669 /// @param pattern 00670 /// String pattern (string&) to be compared with substring. 00671 /// @return 00672 /// - 0, if str[pos:pos+n) == pattern (case-insensitive compare). 00673 /// - Negative integer, if str[pos:pos+n) < pattern (case-insensitive 00674 /// compare). 00675 /// - Positive integer, if str[pos:pos+n) > pattern (case-insensitive 00676 /// compare). 00677 /// @sa 00678 /// Other forms of overloaded CompareNocase() with differences in 00679 /// argument types: char* vs. string& 00680 static int CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00681 const string& pattern); 00682 00683 /// Case-insensitive compare of two strings -- char* version. 00684 /// 00685 /// @param s1 00686 /// String to be compared -- operand 1. 00687 /// @param s2 00688 /// String to be compared -- operand 2. 00689 /// @return 00690 /// - 0, if s1 == s2 (case-insensitive compare). 00691 /// - Negative integer, if s1 < s2 (case-insensitive compare). 00692 /// - Positive integer, if s1 > s2 (case-insensitive compare). 00693 /// @sa 00694 /// CompareCase(), Compare() versions with same argument types. 00695 static int CompareNocase(const char* s1, const char* s2); 00696 00697 /// Case-insensitive compare of two strings -- string& version. 00698 /// 00699 /// @param s1 00700 /// String to be compared -- operand 1. 00701 /// @param s2 00702 /// String to be compared -- operand 2. 00703 /// @return 00704 /// - 0, if s1 == s2 (case-insensitive compare). 00705 /// - Negative integer, if s1 < s2 (case-insensitive compare). 00706 /// - Positive integer, if s1 > s2 (case-insensitive compare). 00707 /// @sa 00708 /// CompareCase(), Compare() versions with same argument types. 00709 static int CompareNocase(const string& s1, const string& s2); 00710 00711 /// Compare of a substring with a pattern. 00712 /// 00713 /// @param str 00714 /// String containing the substring to be compared. 00715 /// @param pos 00716 /// Start position of substring to be compared. 00717 /// @param n 00718 /// Number of characters in substring to be compared. 00719 /// @param pattern 00720 /// String pattern (char*) to be compared with substring. 00721 /// @param use_case 00722 /// Whether to do a case sensitive compare(eCase -- default), or a 00723 /// case-insensitive compare (eNocase). 00724 /// @return 00725 /// - 0, if str[pos:pos+n) == pattern. 00726 /// - Negative integer, if str[pos:pos+n) < pattern. 00727 /// - Positive integer, if str[pos:pos+n) > pattern. 00728 /// @sa 00729 /// Other forms of overloaded Compare() with differences in argument 00730 /// types: char* vs. string& 00731 static int Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00732 const char* pattern, ECase use_case = eCase); 00733 00734 /// Compare of a substring with a pattern. 00735 /// 00736 /// @param str 00737 /// String containing the substring to be compared. 00738 /// @param pos 00739 /// Start position of substring to be compared. 00740 /// @param n 00741 /// Number of characters in substring to be compared. 00742 /// @param pattern 00743 /// String pattern (string&) to be compared with substring. 00744 /// @param use_case 00745 /// Whether to do a case sensitive compare(default is eCase), or a 00746 /// case-insensitive compare (eNocase). 00747 /// @return 00748 /// - 0, if str[pos:pos+n) == pattern. 00749 /// - Negative integer, if str[pos:pos+n) < pattern. 00750 /// - Positive integer, if str[pos:pos+n) > pattern. 00751 /// @sa 00752 /// Other forms of overloaded Compare() with differences in argument 00753 /// types: char* vs. string& 00754 static int Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00755 const string& pattern, ECase use_case = eCase); 00756 00757 /// Compare two strings -- char* version. 00758 /// 00759 /// @param s1 00760 /// String to be compared -- operand 1. 00761 /// @param s2 00762 /// String to be compared -- operand 2. 00763 /// @param use_case 00764 /// Whether to do a case sensitive compare(default is eCase), or a 00765 /// case-insensitive compare (eNocase). 00766 /// @return 00767 /// - 0, if s1 == s2. 00768 /// - Negative integer, if s1 < s2. 00769 /// - Positive integer, if s1 > s2. 00770 /// @sa 00771 /// CompareNocase(), Compare() versions with similar argument types. 00772 static int Compare(const char* s1, const char* s2, 00773 ECase use_case = eCase); 00774 00775 /// Compare two strings -- string&, char* version. 00776 /// 00777 /// @param s1 00778 /// String to be compared -- operand 1. 00779 /// @param s2 00780 /// String to be compared -- operand 2. 00781 /// @param use_case 00782 /// Whether to do a case sensitive compare(default is eCase), or a 00783 /// case-insensitive compare (eNocase). 00784 /// @return 00785 /// - 0, if s1 == s2. 00786 /// - Negative integer, if s1 < s2. 00787 /// - Positive integer, if s1 > s2. 00788 /// @sa 00789 /// CompareNocase(), Compare() versions with similar argument types. 00790 static int Compare(const string& s1, const char* s2, 00791 ECase use_case = eCase); 00792 00793 /// Compare two strings -- char*, string& version. 00794 /// 00795 /// @param s1 00796 /// String to be compared -- operand 1. 00797 /// @param s2 00798 /// String to be compared -- operand 2. 00799 /// @param use_case 00800 /// Whether to do a case sensitive compare(default is eCase), or a 00801 /// case-insensitive compare (eNocase). 00802 /// @return 00803 /// - 0, if s1 == s2. 00804 /// - Negative integer, if s1 < s2. 00805 /// - Positive integer, if s1 > s2. 00806 /// @sa 00807 /// CompareNocase(), Compare() versions with similar argument types. 00808 static int Compare(const char* s1, const string& s2, 00809 ECase use_case = eCase); 00810 00811 /// Compare two strings -- string& version. 00812 /// 00813 /// @param s1 00814 /// String to be compared -- operand 1. 00815 /// @param s2 00816 /// String to be compared -- operand 2. 00817 /// @param use_case 00818 /// Whether to do a case sensitive compare(default is eCase), or a 00819 /// case-insensitive compare (eNocase). 00820 /// @return 00821 /// - 0, if s1 == s2. 00822 /// - Negative integer, if s1 < s2. 00823 /// - Positive integer, if s1 > s2. 00824 /// @sa 00825 /// CompareNocase(), Compare() versions with similar argument types. 00826 static int Compare(const string& s1, const string& s2, 00827 ECase use_case = eCase); 00828 00829 /// Case-sensitive equality of a substring with a pattern. 00830 /// 00831 /// @param str 00832 /// String containing the substring to be compared. 00833 /// @param pos 00834 /// Start position of substring to be compared. 00835 /// @param n 00836 /// Number of characters in substring to be compared. 00837 /// @param pattern 00838 /// String pattern (char*) to be compared with substring. 00839 /// @return 00840 /// - true, if str[pos:pos+n) equals pattern. 00841 /// - false, otherwise 00842 /// @sa 00843 /// Other forms of overloaded EqualCase() with differences in argument 00844 /// types: char* vs. string& 00845 static bool EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00846 const char* pattern); 00847 00848 /// Case-sensitive equality of a substring with a pattern. 00849 /// 00850 /// @param str 00851 /// String containing the substring to be compared. 00852 /// @param pos 00853 /// Start position of substring to be compared. 00854 /// @param n 00855 /// Number of characters in substring to be compared. 00856 /// @param pattern 00857 /// String pattern (string&) to be compared with substring. 00858 /// @return 00859 /// - true, if str[pos:pos+n) equals pattern. 00860 /// - false, otherwise 00861 /// @sa 00862 /// Other forms of overloaded EqualCase() with differences in argument 00863 /// types: char* vs. string& 00864 static bool EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00865 const string& pattern); 00866 00867 /// Case-sensitive equality of two strings -- char* version. 00868 /// 00869 /// @param s1 00870 /// String to be compared -- operand 1. 00871 /// @param s2 00872 /// String to be compared -- operand 2. 00873 /// @return 00874 /// - true, if s1 equals s2 00875 /// - false, otherwise 00876 /// @sa 00877 /// EqualCase(), Equal() versions with same argument types. 00878 static bool EqualCase(const char* s1, const char* s2); 00879 00880 /// Case-sensitive equality of two strings -- string& version. 00881 /// 00882 /// @param s1 00883 /// String to be compared -- operand 1. 00884 /// @param s2 00885 /// String to be compared -- operand 2. 00886 /// @return 00887 /// - true, if s1 equals s2 00888 /// - false, otherwise 00889 /// @sa 00890 /// EqualCase(), Equal() versions with same argument types. 00891 static bool EqualCase(const string& s1, const string& s2); 00892 00893 /// Case-insensitive equality of a substring with a pattern. 00894 /// 00895 /// @param str 00896 /// String containing the substring to be compared. 00897 /// @param pos 00898 /// Start position of substring to be compared. 00899 /// @param n 00900 /// Number of characters in substring to be compared. 00901 /// @param pattern 00902 /// String pattern (char*) to be compared with substring. 00903 /// @return 00904 /// - true, if str[pos:pos+n) equals pattern (case-insensitive compare). 00905 /// - false, otherwise. 00906 /// @sa 00907 /// Other forms of overloaded EqualNocase() with differences in 00908 /// argument types: char* vs. string& 00909 static bool EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00910 const char* pattern); 00911 00912 /// Case-insensitive equality of a substring with a pattern. 00913 /// 00914 /// @param str 00915 /// String containing the substring to be compared. 00916 /// @param pos 00917 /// Start position of substring to be compared. 00918 /// @param n 00919 /// Number of characters in substring to be compared. 00920 /// @param pattern 00921 /// String pattern (string&) to be compared with substring. 00922 /// @return 00923 /// - true, if str[pos:pos+n) equals pattern (case-insensitive compare). 00924 /// - false, otherwise. 00925 /// @sa 00926 /// Other forms of overloaded EqualNocase() with differences in 00927 /// argument types: char* vs. string& 00928 static bool EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00929 const string& pattern); 00930 00931 /// Case-insensitive equality of two strings -- char* version. 00932 /// 00933 /// @param s1 00934 /// String to be compared -- operand 1. 00935 /// @param s2 00936 /// String to be compared -- operand 2. 00937 /// @return 00938 /// - true, if s1 equals s2 (case-insensitive compare). 00939 /// - false, otherwise. 00940 /// @sa 00941 /// EqualCase(), Equal() versions with same argument types. 00942 static bool EqualNocase(const char* s1, const char* s2); 00943 00944 /// Case-insensitive equality of two strings -- string& version. 00945 /// 00946 /// @param s1 00947 /// String to be compared -- operand 1. 00948 /// @param s2 00949 /// String to be compared -- operand 2. 00950 /// @return 00951 /// - true, if s1 equals s2 (case-insensitive compare). 00952 /// - false, otherwise. 00953 /// @sa 00954 /// EqualCase(), Equal() versions with same argument types. 00955 static bool EqualNocase(const string& s1, const string& s2); 00956 00957 /// Test for equality of a substring with a pattern. 00958 /// 00959 /// @param str 00960 /// String containing the substring to be compared. 00961 /// @param pos 00962 /// Start position of substring to be compared. 00963 /// @param n 00964 /// Number of characters in substring to be compared. 00965 /// @param pattern 00966 /// String pattern (char*) to be compared with substring. 00967 /// @param use_case 00968 /// Whether to do a case sensitive compare(eCase -- default), or a 00969 /// case-insensitive compare (eNocase). 00970 /// @return 00971 /// - true, if str[pos:pos+n) equals pattern. 00972 /// - false, otherwise. 00973 /// @sa 00974 /// Other forms of overloaded Equal() with differences in argument 00975 /// types: char* vs. string& 00976 static bool Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 00977 const char* pattern, ECase use_case = eCase); 00978 00979 /// Test for equality of a substring with a pattern. 00980 /// 00981 /// @param str 00982 /// String containing the substring to be compared. 00983 /// @param pos 00984 /// Start position of substring to be compared. 00985 /// @param n 00986 /// Number of characters in substring to be compared. 00987 /// @param pattern 00988 /// String pattern (string&) to be compared with substring. 00989 /// @param use_case 00990 /// Whether to do a case sensitive compare(default is eCase), or a 00991 /// case-insensitive compare (eNocase). 00992 /// @return 00993 /// - 0, if str[pos:pos+n) == pattern. 00994 /// - Negative integer, if str[pos:pos+n) < pattern. 00995 /// - Positive integer, if str[pos:pos+n) > pattern. 00996 /// @sa 00997 /// Other forms of overloaded Equal() with differences in argument 00998 /// types: char* vs. string& 00999 static bool Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 01000 const string& pattern, ECase use_case = eCase); 01001 01002 /// Test for equality of two strings -- char* version. 01003 /// 01004 /// @param s1 01005 /// String to be compared -- operand 1. 01006 /// @param s2 01007 /// String to be compared -- operand 2. 01008 /// @param use_case 01009 /// Whether to do a case sensitive compare(default is eCase), or a 01010 /// case-insensitive compare (eNocase). 01011 /// @return 01012 /// - 0, if s1 == s2. 01013 /// - Negative integer, if s1 < s2. 01014 /// - Positive integer, if s1 > s2. 01015 /// @sa 01016 /// EqualNocase(), Equal() versions with similar argument types. 01017 static bool Equal(const char* s1, const char* s2, 01018 ECase use_case = eCase); 01019 01020 /// Test for equality of two strings -- string&, char* version. 01021 /// 01022 /// @param s1 01023 /// String to be compared -- operand 1. 01024 /// @param s2 01025 /// String to be compared -- operand 2. 01026 /// @param use_case 01027 /// Whether to do a case sensitive compare(default is eCase), or a 01028 /// case-insensitive compare (eNocase). 01029 /// @return 01030 /// - true, if s1 equals s2. 01031 /// - false, otherwise. 01032 /// @sa 01033 /// EqualNocase(), Equal() versions with similar argument types. 01034 static bool Equal(const string& s1, const char* s2, 01035 ECase use_case = eCase); 01036 01037 /// Test for equality of two strings -- char*, string& version. 01038 /// 01039 /// @param s1 01040 /// String to be compared -- operand 1. 01041 /// @param s2 01042 /// String to be compared -- operand 2. 01043 /// @param use_case 01044 /// Whether to do a case sensitive compare(default is eCase), or a 01045 /// case-insensitive compare (eNocase). 01046 /// @return 01047 /// - true, if s1 equals s2. 01048 /// - false, otherwise. 01049 /// @sa 01050 /// EqualNocase(), Equal() versions with similar argument types. 01051 static bool Equal(const char* s1, const string& s2, 01052 ECase use_case = eCase); 01053 01054 /// Test for equality of two strings -- string& version. 01055 /// 01056 /// @param s1 01057 /// String to be compared -- operand 1. 01058 /// @param s2 01059 /// String to be compared -- operand 2. 01060 /// @param use_case 01061 /// Whether to do a case sensitive compare(default is eCase), or a 01062 /// case-insensitive compare (eNocase). 01063 /// @return 01064 /// - true, if s1 equals s2. 01065 /// - false, otherwise. 01066 /// @sa 01067 /// EqualNocase(), Equal() versions with similar argument types. 01068 static bool Equal(const string& s1, const string& s2, 01069 ECase use_case = eCase); 01070 01071 // NOTE. On some platforms, "strn[case]cmp()" can work faster than their 01072 // "Compare***()" counterparts. 01073 01074 /// String compare. 01075 /// 01076 /// @param s1 01077 /// String to be compared -- operand 1. 01078 /// @param s2 01079 /// String to be compared -- operand 2. 01080 /// @return 01081 /// - 0, if s1 == s2. 01082 /// - Negative integer, if s1 < s2. 01083 /// - Positive integer, if s1 > s2. 01084 /// @sa 01085 /// strncmp(), strcasecmp(), strncasecmp() 01086 static int strcmp(const char* s1, const char* s2); 01087 01088 /// String compare upto specified number of characters. 01089 /// 01090 /// @param s1 01091 /// String to be compared -- operand 1. 01092 /// @param s2 01093 /// String to be compared -- operand 2. 01094 /// @param n 01095 /// Number of characters in string 01096 /// @return 01097 /// - 0, if s1 == s2. 01098 /// - Negative integer, if s1 < s2. 01099 /// - Positive integer, if s1 > s2. 01100 /// @sa 01101 /// strcmp(), strcasecmp(), strncasecmp() 01102 static int strncmp(const char* s1, const char* s2, size_t n); 01103 01104 /// Case-insensitive string compare. 01105 /// 01106 /// @param s1 01107 /// String to be compared -- operand 1. 01108 /// @param s2 01109 /// String to be compared -- operand 2. 01110 /// @return 01111 /// - 0, if s1 == s2. 01112 /// - Negative integer, if s1 < s2. 01113 /// - Positive integer, if s1 > s2. 01114 /// @sa 01115 /// strcmp(), strncmp(), strncasecmp() 01116 static int strcasecmp(const char* s1, const char* s2); 01117 01118 /// Case-insensitive string compare upto specfied number of characters. 01119 /// 01120 /// @param s1 01121 /// String to be compared -- operand 1. 01122 /// @param s2 01123 /// String to be compared -- operand 2. 01124 /// @return 01125 /// - 0, if s1 == s2. 01126 /// - Negative integer, if s1 < s2. 01127 /// - Positive integer, if s1 > s2. 01128 /// @sa 01129 /// strcmp(), strcasecmp(), strcasecmp() 01130 static int strncasecmp(const char* s1, const char* s2, size_t n); 01131 01132 /// Wrapper for the function strftime() that corrects handling %D and %T 01133 /// time formats on MS Windows. 01134 static size_t strftime (char* s, size_t maxsize, const char* format, 01135 const struct tm* timeptr); 01136 01137 /// Match "str" against the "mask". 01138 /// 01139 /// This function do not use regular expressions. 01140 /// @param str 01141 /// String to match. 01142 /// @param mask 01143 /// Mask used to match string "str". And can contains next 01144 /// wildcard characters: 01145 /// ? - matches to any one symbol in the string. 01146 /// * - matches to any number of symbols in the string. 01147 /// @param use_case 01148 /// Whether to do a case sensitive compare(eCase -- default), or a 01149 /// case-insensitive compare (eNocase). 01150 /// @return 01151 /// Return TRUE if "str" matches "mask", and FALSE otherwise. 01152 /// @sa 01153 /// CRegexp, CRegexpUtil 01154 static bool MatchesMask(const char *str, const char *mask, 01155 ECase use_case = eCase); 01156 01157 /// Match "str" against the "mask". 01158 /// 01159 /// This function do not use regular expressions. 01160 /// @param str 01161 /// String to match. 01162 /// @param mask 01163 /// Mask used to match string "str". And can contains next 01164 /// wildcard characters: 01165 /// ? - matches to any one symbol in the string. 01166 /// * - matches to any number of symbols in the string. 01167 /// @param use_case 01168 /// Whether to do a case sensitive compare(eCase -- default), or a 01169 /// case-insensitive compare (eNocase). 01170 /// @return 01171 /// Return TRUE if "str" matches "mask", and FALSE otherwise. 01172 /// @sa 01173 /// CRegexp, CRegexpUtil 01174 static bool MatchesMask(const string& str, const string& mask, 01175 ECase use_case = eCase); 01176 01177 // The following 4 methods change the passed string, then return it 01178 01179 /// Convert string to lower case -- string& version. 01180 /// 01181 /// @param str 01182 /// String to be converted. 01183 /// @return 01184 /// Lower cased string. 01185 static string& ToLower(string& str); 01186 01187 /// Convert string to lower case -- char* version. 01188 /// 01189 /// @param str 01190 /// String to be converted. 01191 /// @return 01192 /// Lower cased string. 01193 static char* ToLower(char* str); 01194 01195 /// Convert string to upper case -- string& version. 01196 /// 01197 /// @param str 01198 /// String to be converted. 01199 /// @return 01200 /// Upper cased string. 01201 static string& ToUpper(string& str); 01202 01203 /// Convert string to upper case -- char* version. 01204 /// 01205 /// @param str 01206 /// String to be converted. 01207 /// @return 01208 /// Upper cased string. 01209 static char* ToUpper(char* str); 01210 01211 private: 01212 /// Privatized ToLower() with const char* parameter to prevent passing of 01213 /// constant strings. 01214 static void/*dummy*/ ToLower(const char* /*dummy*/); 01215 01216 /// Privatized ToUpper() with const char* parameter to prevent passing of 01217 /// constant strings. 01218 static void/*dummy*/ ToUpper(const char* /*dummy*/); 01219 01220 public: 01221 /// Check if a string starts with a specified prefix value. 01222 /// 01223 /// @param str 01224 /// String to check. 01225 /// @param start 01226 /// Prefix value to check for. 01227 /// @param use_case 01228 /// Whether to do a case sensitive compare(default is eCase), or a 01229 /// case-insensitive compare (eNocase) while checking. 01230 static bool StartsWith(const string& str, const string& start, 01231 ECase use_case = eCase); 01232 01233 /// Check if a string starts with a specified prefix value. 01234 /// 01235 /// @param str 01236 /// String to check. 01237 /// @param start 01238 /// Prefix value to check for. 01239 /// @param use_case 01240 /// Whether to do a case sensitive compare(default is eCase), or a 01241 /// case-insensitive compare (eNocase) while checking. 01242 static bool StartsWith(const string& str, const char* start, 01243 ECase use_case = eCase); 01244 01245 /// Check if a string starts with a specified character value. 01246 /// 01247 /// @param str 01248 /// String to check. 01249 /// @param start 01250 /// Character value to check for. 01251 /// @param use_case 01252 /// Whether to do a case sensitive compare(default is eCase), or a 01253 /// case-insensitive compare (eNocase) while checking. 01254 static bool StartsWith(const string& str, char start, 01255 ECase use_case = eCase); 01256 01257 /// Check if a string ends with a specified suffix value. 01258 /// 01259 /// @param str 01260 /// String to check. 01261 /// @param end 01262 /// Suffix value to check for. 01263 /// @param use_case 01264 /// Whether to do a case sensitive compare(default is eCase), or a 01265 /// case-insensitive compare (eNocase) while checking. 01266 static bool EndsWith(const string& str, const string& end, 01267 ECase use_case = eCase); 01268 01269 /// Check if a string ends with a specified character value. 01270 /// 01271 /// @param str 01272 /// String to check. 01273 /// @param end 01274 /// Character value to check for. 01275 /// @param use_case 01276 /// Whether to do a case sensitive compare(default is eCase), or a 01277 /// case-insensitive compare (eNocase) while checking. 01278 static bool EndsWith(const string& str, char end, 01279 ECase use_case = eCase); 01280 01281 /// Check if a string is blank (has no text). 01282 /// 01283 /// @param str 01284 /// String to check. 01285 /// @param pos 01286 /// starting position (default 0) 01287 static bool IsBlank(const string& str, SIZE_TYPE pos = 0); 01288 01289 /// Whether it is the first or last occurrence. 01290 enum EOccurrence { 01291 eFirst, ///< First occurrence 01292 eLast ///< Last occurrence 01293 }; 01294 01295 /// Find the pattern in the specfied range of a string. 01296 /// 01297 /// @param str 01298 /// String to search. 01299 /// @param pattern 01300 /// Pattern to search for in "str". 01301 /// @param start 01302 /// Position in "str" to start search from -- default of 0 means start 01303 /// the search from the beginning of the string. 01304 /// @param end 01305 /// Position in "str" to start search up to -- default of NPOS means 01306 /// to search to the end of the string. 01307 /// @param which 01308 /// When set to eFirst, this means to find the first occurrence of 01309 /// "pattern" in "str". When set to eLast, this means to find the last 01310 /// occurrence of "pattern" in "str". 01311 /// @param use_case 01312 /// Whether to do a case sensitive compare(default is eCase), or a 01313 /// case-insensitive compare (eNocase) while searching for the pattern. 01314 /// @return 01315 /// - The start of the first or last (depending on "which" parameter) 01316 /// occurrence of "pattern" in "str", within the string interval 01317 /// ["start", "end"], or 01318 /// - NPOS if there is no occurrence of the pattern. 01319 static SIZE_TYPE Find(const string& str, const string& pattern, 01320 SIZE_TYPE start = 0, SIZE_TYPE end = NPOS, 01321 EOccurrence which = eFirst, 01322 ECase use_case = eCase); 01323 01324 /// Find the pattern in the specfied range of a string using a case 01325 /// sensitive search. 01326 /// 01327 /// @param str 01328 /// String to search. 01329 /// @param pattern 01330 /// Pattern to search for in "str". 01331 /// @param start 01332 /// Position in "str" to start search from -- default of 0 means start 01333 /// the search from the beginning of the string. 01334 /// @param end 01335 /// Position in "str" to start search up to -- default of NPOS means 01336 /// to search to the end of the string. 01337 /// @param which 01338 /// When set to eFirst, this means to find the first occurrence of 01339 /// "pattern" in "str". When set to eLast, this means to find the last 01340 /// occurrence of "pattern" in "str". 01341 /// @return 01342 /// - The start of the first or last (depending on "which" parameter) 01343 /// occurrence of "pattern" in "str", within the string interval 01344 /// ["start", "end"], or 01345 /// - NPOS if there is no occurrence of the pattern. 01346 static SIZE_TYPE FindCase (const string& str, const string& pattern, 01347 SIZE_TYPE start = 0, SIZE_TYPE end = NPOS, 01348 EOccurrence which = eFirst); 01349 01350 /// Find the pattern in the specfied range of a string using a case 01351 /// insensitive search. 01352 /// 01353 /// @param str 01354 /// String to search. 01355 /// @param pattern 01356 /// Pattern to search for in "str". 01357 /// @param start 01358 /// Position in "str" to start search from -- default of 0 means start 01359 /// the search from the beginning of the string. 01360 /// @param end 01361 /// Position in "str" to start search up to -- default of NPOS means 01362 /// to search to the end of the string. 01363 /// @param which 01364 /// When set to eFirst, this means to find the first occurrence of 01365 /// "pattern" in "str". When set to eLast, this means to find the last 01366 /// occurrence of "pattern" in "str". 01367 /// @return 01368 /// - The start of the first or last (depending on "which" parameter) 01369 /// occurrence of "pattern" in "str", within the string interval 01370 /// ["start", "end"], or 01371 /// - NPOS if there is no occurrence of the pattern. 01372 static SIZE_TYPE FindNoCase(const string& str, const string& pattern, 01373 SIZE_TYPE start = 0, SIZE_TYPE end = NPOS, 01374 EOccurrence which = eFirst); 01375 01376 /// Test for presence of a given string in a list or vector of strings 01377 01378 static const string* Find(const list<string>& lst, const string& val, 01379 ECase use_case = eCase); 01380 01381 static const string* FindCase(const list<string>& lst, const string& val); 01382 01383 static const string* FindNoCase(const list<string>& lst, const string& val); 01384 01385 static const string* Find(const vector<string>& vec, const string& val, 01386 ECase use_case = eCase); 01387 01388 static const string* FindCase(const vector<string>& vec, const string& val); 01389 01390 static const string* FindNoCase(const vector<string>& vec, 01391 const string& val); 01392 01393 01394 /// Which end to truncate a string. 01395 enum ETrunc { 01396 eTrunc_Begin, ///< Truncate leading spaces only 01397 eTrunc_End, ///< Truncate trailing spaces only 01398 eTrunc_Both ///< Truncate spaces at both begin and end of string 01399 }; 01400 01401 /// Truncate spaces in a string. 01402 /// 01403 /// @param str 01404 /// String to truncate spaces from. 01405 /// @param where 01406 /// Which end of the string to truncate space from. Default is to 01407 /// truncate space from both ends (eTrunc_Both). 01408 static string TruncateSpaces(const string& str, ETrunc where=eTrunc_Both); 01409 static CTempString TruncateSpaces(const CTempString& str, 01410 ETrunc where=eTrunc_Both); 01411 static CTempString TruncateSpaces(const char* str, 01412 ETrunc where=eTrunc_Both); 01413 01414 /// Truncate spaces in a string (in-place) 01415 /// 01416 /// @param str 01417 /// String to truncate spaces from. 01418 /// @param where 01419 /// Which end of the string to truncate space from. Default is to 01420 /// truncate space from both ends (eTrunc_Both). 01421 static void TruncateSpacesInPlace(string& str, ETrunc where=eTrunc_Both); 01422 01423 /// Replace occurrences of a substring within a string. 01424 /// 01425 /// @param src 01426 /// Source string from which specified substring occurrences are 01427 /// replaced. 01428 /// @param search 01429 /// Substring value in "src" that is replaced. 01430 /// @param replace 01431 /// Replace "search" substring with this value. 01432 /// @param dst 01433 /// Result of replacing the "search" string with "replace" in "src". 01434 /// This value is also returned by the function. 01435 /// @param start_pos 01436 /// Position to start search from. 01437 /// @param max_replace 01438 /// Replace no more than "max_replace" occurrences of substring "search" 01439 /// If "max_replace" is zero(default), then replace all occurrences with 01440 /// "replace". 01441 /// @return 01442 /// Result of replacing the "search" string with "replace" in "src". This 01443 /// value is placed in "dst" as well. 01444 /// @sa 01445 /// Version of Replace() that returns a new string. 01446 static string& Replace(const string& src, 01447 const string& search, 01448 const string& replace, 01449 string& dst, 01450 SIZE_TYPE start_pos = 0, 01451 SIZE_TYPE max_replace = 0); 01452 01453 /// Replace occurrences of a substring within a string and returns the 01454 /// result as a new string. 01455 /// 01456 /// @param src 01457 /// Source string from which specified substring occurrences are 01458 /// replaced. 01459 /// @param search 01460 /// Substring value in "src" that is replaced. 01461 /// @param replace 01462 /// Replace "search" substring with this value. 01463 /// @param start_pos 01464 /// Position to start search from. 01465 /// @param max_replace 01466 /// Replace no more than "max_replace" occurrences of substring "search" 01467 /// If "max_replace" is zero(default), then replace all occurrences with 01468 /// "replace". 01469 /// @return 01470 /// A new string containing the result of replacing the "search" string 01471 /// with "replace" in "src" 01472 /// @sa 01473 /// Version of Replace() that has a destination parameter to accept 01474 /// result. 01475 static string Replace(const string& src, 01476 const string& search, 01477 const string& replace, 01478 SIZE_TYPE start_pos = 0, 01479 SIZE_TYPE max_replace = 0); 01480 01481 /// Replace occurrences of a substring within a string. 01482 /// 01483 /// On some platforms this function is much faster than Replace() 01484 /// if sizes of "search" and "replace" strings are equal. 01485 /// Otherwise, the performance is mainly the same. 01486 /// @param src 01487 /// String whre specified substring occurrences are replaced. 01488 /// This value is also returned by the function. 01489 /// @param search 01490 /// Substring value in "src" that is replaced. 01491 /// @param replace 01492 /// Replace "search" substring with this value. 01493 /// @param start_pos 01494 /// Position to start search from. 01495 /// @param max_replace 01496 /// Replace no more than "max_replace" occurrences of substring "search" 01497 /// If "max_replace" is zero(default), then replace all occurrences with 01498 /// "replace". 01499 /// @return 01500 /// Result of replacing the "search" string with "replace" in "src". 01501 /// @sa 01502 /// Replace 01503 static string& ReplaceInPlace(string& src, 01504 const string& search, 01505 const string& replace, 01506 SIZE_TYPE start_pos = 0, 01507 SIZE_TYPE max_replace = 0); 01508 01509 /// Whether to merge adjacent delimiters in Split and Tokenize. 01510 enum EMergeDelims { 01511 eNoMergeDelims, ///< No merging of delimiters -- default for 01512 ///< Tokenize() 01513 eMergeDelims ///< Merge the delimiters -- default for Split() 01514 }; 01515 01516 01517 /// Split a string using specified delimiters. 01518 /// 01519 /// @param str 01520 /// String to be split. 01521 /// @param delim 01522 /// Delimiters used to split string "str". 01523 /// @param arr 01524 /// The split tokens are added to the list "arr" and also returned 01525 /// by the function. 01526 /// @param merge 01527 /// Whether to merge the delimiters or not. The default setting of 01528 /// eMergeDelims means that delimiters that immediately follow each other 01529 /// are treated as one delimiter. 01530 /// @param token_pos 01531 /// Optional array for the tokens' positions in "str". 01532 /// @return 01533 /// The list "arr" is also returned. 01534 /// @sa 01535 /// Tokenize() 01536 static list<string>& Split(const string& str, 01537 const string& delim, 01538 list<string>& arr, 01539 EMergeDelims merge = eMergeDelims, 01540 vector<SIZE_TYPE>* token_pos = NULL); 01541 01542 /// Tokenize a string using the specified set of char delimiters. 01543 /// 01544 /// @param str 01545 /// String to be tokenized. 01546 /// @param delim 01547 /// Set of char delimiters used to tokenize string "str". 01548 /// If delimiter is empty, then input string is appended to "arr" as is. 01549 /// @param arr 01550 /// The tokens defined in "str" by using symbols from "delim" are added 01551 /// to the list "arr" and also returned by the function. 01552 /// @param merge 01553 /// Whether to merge the delimiters or not. The default setting of 01554 /// eNoMergeDelims means that delimiters that immediately follow each 01555 /// other are treated as separate delimiters. 01556 /// @param token_pos 01557 /// Optional array for the tokens' positions in "str". 01558 /// @return 01559 /// The list "arr" is also returned. 01560 /// @sa 01561 /// Split, TokenizePattern, TokenizeInTwo 01562 static vector<string>& Tokenize(const string& str, 01563 const string& delim, 01564 vector<string>& arr, 01565 EMergeDelims merge = eNoMergeDelims, 01566 vector<SIZE_TYPE>* token_pos = NULL); 01567 01568 /// Tokenize a string using the specified delimiter (string). 01569 /// 01570 /// @param str 01571 /// String to be tokenized. 01572 /// @param delim 01573 /// Delimiter used to tokenize string "str". 01574 /// If delimiter is empty, then input string is appended to "arr" as is. 01575 /// @param arr 01576 /// The tokens defined in "str" by using delimeter "delim" are added 01577 /// to the list "arr" and also returned by the function. 01578 /// @param merge 01579 /// Whether to merge the delimiters or not. The default setting of 01580 /// eNoMergeDelims means that delimiters that immediately follow each 01581 /// other are treated as separate delimiters. 01582 /// @param token_pos 01583 /// Optional array for the tokens' positions in "str". 01584 /// @return 01585 /// The list "arr" is also returned. 01586 /// @sa 01587 /// Split, Tokenize 01588 static 01589 vector<string>& TokenizePattern(const string& str, 01590 const string& delim, 01591 vector<string>& arr, 01592 EMergeDelims merge = eNoMergeDelims, 01593 vector<SIZE_TYPE>* token_pos = NULL); 01594 01595 /// Split a string into two pieces using the specified delimiters 01596 /// 01597 /// @param str 01598 /// String to be split. 01599 /// @param delim 01600 /// Delimiters used to split string "str". 01601 /// @param str1 01602 /// The sub-string of "str" before the first character of "delim". 01603 /// It will not contain any characters in "delim". 01604 /// Will be empty if "str" begin with a "delim" character. 01605 /// @param str2 01606 /// The sub-string of "str" after the first character of "delim" found. 01607 /// May contain "delim" characters. 01608 /// Will be empty if "str" had no "delim" characters or ended 01609 /// with the first "delim" charcter. 01610 /// @return 01611 /// true if a symbol from "delim" was found in "str", false if not. 01612 /// This lets you distinguish when there were no delimiters and when 01613 /// the very last character was the first delimiter. 01614 /// @sa 01615 /// Split, Tokenoze, TokenizePattern 01616 static bool SplitInTwo(const string& str, 01617 const string& delim, 01618 string& str1, 01619 string& str2); 01620 01621 01622 /// Join strings using the specified delimiter. 01623 /// 01624 /// @param arr 01625 /// Array of strings to be joined. 01626 /// @param delim 01627 /// Delimiter used to join the string. 01628 /// @return 01629 /// The strings in "arr" are joined into a single string, separated 01630 /// with "delim". 01631 static string Join(const list<string>& arr, const string& delim); 01632 static string Join(const vector<string>& arr, const string& delim); 01633 01634 /// How to display printable strings. 01635 /// 01636 /// Assists in making a printable version of "str". 01637 enum EPrintableMode { 01638 fNewLine_Quote = 0, ///< Display "\n" instead of actual linebreak 01639 eNewLine_Quote = fNewLine_Quote, 01640 fNewLine_Passthru = 1, ///< Break the line at every "\n" occurrence 01641 eNewLine_Passthru = fNewLine_Passthru, 01642 fPrintable_Full = 2 ///< Show all octal digits at all times 01643 }; 01644 typedef int TPrintableMode; ///< Bitwise OR of EPrintableMode flags 01645 01646 /// Get a printable version of the specified string. 01647 /// 01648 /// All non-printable characters will be represented as "\r", "\n", "\v", 01649 /// "\t", "\"", "\\", etc, or "\ooo" where 'ooo' is the octal code of the 01650 /// character. The resultant string is a well-formed C string literal, 01651 /// which, without alterations, can be compiled by a C/C++ compiler. 01652 /// In many instances, octal representations of non-printable characters 01653 /// can be reduced to take less than all 3 digits, if there is no 01654 /// ambiguity in the interpretation. fPrintable_Full cancels the 01655 /// reduction, and forces to produce full 3-digit octal codes throughout. 01656 /// 01657 /// @param str 01658 /// The string whose printable version is wanted. 01659 /// @param mode 01660 /// How to display the string. The default setting of fNewLine_Quote 01661 /// displays the new lines as "\n", and uses the octal code reduction. 01662 /// When set to fNewLine_Passthru, line breaks are actually 01663 /// produced on output but preceded with trailing backslashes. 01664 /// @return 01665 /// Return a printable version of "str". 01666 /// @sa 01667 /// ParseEscapes 01668 static string PrintableString(const string& str, 01669 TPrintableMode mode = eNewLine_Quote); 01670 01671 /// Parse C-style escape sequences in the specified string, including 01672 /// all those produced by PrintableString. 01673 static string ParseEscapes(const string& str); 01674 01675 /// How to wrap the words in a string to a new line. 01676 enum EWrapFlags { 01677 fWrap_Hyphenate = 0x1, ///< Add a hyphen when breaking words? 01678 fWrap_HTMLPre = 0x2, ///< Wrap as preformatted HTML? 01679 fWrap_FlatFile = 0x4 ///< Wrap for flat file use. 01680 }; 01681 typedef int TWrapFlags; ///< Binary OR of "EWrapFlags" 01682 01683 /// Encode a string for C/C++. 01684 /// 01685 /// Synonym for PrintableString(). 01686 /// @sa PrintableString 01687 static string CEncode(const string& str); 01688 01689 /// Encode a string for JavaScript. 01690 /// 01691 /// Like to CEncode(), but process some symbols in different way. 01692 /// @sa PrintableString, CEncode 01693 static string JavaScriptEncode(const string& str); 01694 01695 /// Encode a string for XML. 01696 /// 01697 /// Replace relevant characters by predefined entities. 01698 static string XmlEncode(const string& str); 01699 01700 /// Encode a string for JSON. 01701 static string JsonEncode(const string& str); 01702 01703 /// URL-encode flags 01704 enum EUrlEncode { 01705 eUrlEnc_SkipMarkChars, ///< Do not convert chars like '!', '(' etc. 01706 eUrlEnc_ProcessMarkChars, ///< Convert all non-alphanum chars, 01707 ///< spaces are converted to '+' 01708 eUrlEnc_PercentOnly, ///< Convert all non-alphanum chars including 01709 ///< space and '%' to %## format 01710 eUrlEnc_Path, ///< Same as ProcessMarkChars but preserves 01711 ///< valid path characters ('/', '.') 01712 01713 eUrlEnc_URIScheme, ///< Encode scheme part of an URI. 01714 eUrlEnc_URIUserinfo, ///< Encode userinfo part of an URI. 01715 eUrlEnc_URIHost, ///< Encode host part of an URI. 01716 eUrlEnc_URIPath, ///< Encode path part of an URI. 01717 eUrlEnc_URIQueryName, ///< Encode query part of an URI, arg name. 01718 eUrlEnc_URIQueryValue, ///< Encode query part of an URI, arg value. 01719 eUrlEnc_URIFragment, ///< Encode fragment part of an URI. 01720 01721 eUrlEnc_None ///< Do not encode 01722 }; 01723 /// URL decode flags 01724 enum EUrlDecode { 01725 eUrlDec_All, ///< Decode '+' to space 01726 eUrlDec_Percent ///< Decode only %XX 01727 }; 01728 /// URL-encode string 01729 static string URLEncode(const string& str, 01730 EUrlEncode flag = eUrlEnc_SkipMarkChars); 01731 01732 /// SQL-encode string 01733 /// 01734 /// There are some assumptions/notes about the function: 01735 /// 1. Only for MS SQL and Sybase. 01736 /// 2. Only for string values in WHERE and LIKE clauses. 01737 /// 3. The ' symbol must not be used as an escape symbol in LIKE clause. 01738 /// 4. It must not be used for non-string values. 01739 /// 5. It expects a string without any outer quotes, and 01740 /// it adds single quotes to the returned string. 01741 /// 6. It expects UTF-8 (including its subsets, ASCII and Latin1) or 01742 /// Win1252 string, and the input encoding is preserved. 01743 /// @param str 01744 /// The string to encode 01745 /// @return 01746 /// Encoded string with added outer single quotes 01747 static CStringUTF8 SQLEncode(const CStringUTF8& str); 01748 01749 /// URL-decode string 01750 static string URLDecode(const string& str, 01751 EUrlDecode flag = eUrlDec_All); 01752 /// URL-decode string to itself 01753 static void URLDecodeInPlace(string& str, 01754 EUrlDecode flag = eUrlDec_All); 01755 /// Check if the string needs the reqested URL-encoding 01756 static bool NeedsURLEncoding(const string& str, 01757 EUrlEncode flag = eUrlEnc_SkipMarkChars); 01758 01759 /// Check if the string contains a valid IP address 01760 static bool IsIPAddress(const string& ip); 01761 01762 /// Wrap the specified string into lines of a specified width -- prefix, 01763 /// prefix1 default version. 01764 /// 01765 /// Split string "str" into lines of width "width" and add the 01766 /// resulting lines to the list "arr". Normally, all 01767 /// lines will begin with "prefix" (counted against "width"), 01768 /// but the first line will instead begin with "prefix1" if 01769 /// you supply it. 01770 /// 01771 /// @param str 01772 /// String to be split into wrapped lines. 01773 /// @param width 01774 /// Width of each wrapped line. 01775 /// @param arr 01776 /// List of strings containing wrapped lines. 01777 /// @param flags 01778 /// How to wrap the words to a new line. See EWrapFlags documentation. 01779 /// @param prefix 01780 /// The prefix string added to each wrapped line, except the first line, 01781 /// unless "prefix1" is set. 01782 /// If "prefix" is set to 0(default), do not add a prefix string to the 01783 /// wrapped lines. 01784 /// @param prefix1 01785 /// The prefix string for the first line. Use this for the first line 01786 /// instead of "prefix". 01787 /// If "prefix1" is set to 0(default), do not add a prefix string to the 01788 /// first line. 01789 /// @return 01790 /// Return "arr", the list of wrapped lines. 01791 static list<string>& Wrap(const string& str, SIZE_TYPE width, 01792 list<string>& arr, TWrapFlags flags = 0, 01793 const string* prefix = 0, 01794 const string* prefix1 = 0); 01795 01796 /// Wrap the specified string into lines of a specified width -- prefix1 01797 /// default version. 01798 /// 01799 /// Split string "str" into lines of width "width" and add the 01800 /// resulting lines to the list "arr". Normally, all 01801 /// lines will begin with "prefix" (counted against "width"), 01802 /// but the first line will instead begin with "prefix1" if 01803 /// you supply it. 01804 /// 01805 /// @param str 01806 /// String to be split into wrapped lines. 01807 /// @param width 01808 /// Width of each wrapped line. 01809 /// @param arr 01810 /// List of strings containing wrapped lines. 01811 /// @param flags 01812 /// How to wrap the words to a new line. See EWrapFlags documentation. 01813 /// @param prefix 01814 /// The prefix string added to each wrapped line, except the first line, 01815 /// unless "prefix1" is set. 01816 /// If "prefix" is set to 0, do not add a prefix string to the wrapped 01817 /// lines. 01818 /// @param prefix1 01819 /// The prefix string for the first line. Use this for the first line 01820 /// instead of "prefix". 01821 /// If "prefix1" is set to 0(default), do not add a prefix string to the 01822 /// first line. 01823 /// @return 01824 /// Return "arr", the list of wrapped lines. 01825 static list<string>& Wrap(const string& str, SIZE_TYPE width, 01826 list<string>& arr, TWrapFlags flags, 01827 const string& prefix, const string* prefix1 = 0); 01828 01829 /// Wrap the specified string into lines of a specified width. 01830 /// 01831 /// Split string "str" into lines of width "width" and add the 01832 /// resulting lines to the list "arr". Normally, all 01833 /// lines will begin with "prefix" (counted against "width"), 01834 /// but the first line will instead begin with "prefix1" if 01835 /// you supply it. 01836 /// 01837 /// @param str 01838 /// String to be split into wrapped lines. 01839 /// @param width 01840 /// Width of each wrapped line. 01841 /// @param arr 01842 /// List of strings containing wrapped lines. 01843 /// @param flags 01844 /// How to wrap the words to a new line. See EWrapFlags documentation. 01845 /// @param prefix 01846 /// The prefix string added to each wrapped line, except the first line, 01847 /// unless "prefix1" is set. 01848 /// If "prefix" is set to 0, do not add a prefix string to the wrapped 01849 /// lines. 01850 /// @param prefix1 01851 /// The prefix string for the first line. Use this for the first line 01852 /// instead of "prefix". 01853 /// If "prefix1" is set to 0, do not add a prefix string to the first 01854 /// line. 01855 /// @return 01856 /// Return "arr", the list of wrapped lines. 01857 static list<string>& Wrap(const string& str, SIZE_TYPE width, 01858 list<string>& arr, TWrapFlags flags, 01859 const string& prefix, const string& prefix1); 01860 01861 01862 /// Wrap the list using the specified criteria -- default prefix, 01863 /// prefix1 version. 01864 /// 01865 /// WrapList() is similar to Wrap(), but tries to avoid splitting any 01866 /// elements of the list to be wrapped. Also, the "delim" only applies 01867 /// between elements on the same line; if you want everything to end with 01868 /// commas or such, you should add them first. 01869 /// 01870 /// @param l 01871 /// The list to be wrapped. 01872 /// @param width 01873 /// Width of each wrapped line. 01874 /// @param delim 01875 /// Delimiters used to split elements on the same line. 01876 /// @param arr 01877 /// List containing the wrapped list result. 01878 /// @param flags 01879 /// How to wrap the words to a new line. See EWrapFlags documentation. 01880 /// @param prefix 01881 /// The prefix string added to each wrapped line, except the first line, 01882 /// unless "prefix1" is set. 01883 /// If "prefix" is set to 0(default), do not add a prefix string to the 01884 /// wrapped lines. 01885 /// @param prefix1 01886 /// The prefix string for the first line. Use this for the first line 01887 /// instead of "prefix". 01888 /// If "prefix1" is set to 0(default), do not add a prefix string to the 01889 /// first line. 01890 /// @return 01891 /// Return "arr", the wrapped list. 01892 static list<string>& WrapList(const list<string>& l, SIZE_TYPE width, 01893 const string& delim, list<string>& arr, 01894 TWrapFlags flags = 0, 01895 const string* prefix = 0, 01896 const string* prefix1 = 0); 01897 01898 /// Wrap the list using the specified criteria -- default prefix1 version. 01899 /// 01900 /// WrapList() is similar to Wrap(), but tries to avoid splitting any 01901 /// elements of the list to be wrapped. Also, the "delim" only applies 01902 /// between elements on the same line; if you want everything to end with 01903 /// commas or such, you should add them first. 01904 /// 01905 /// @param l 01906 /// The list to be wrapped. 01907 /// @param width 01908 /// Width of each wrapped line. 01909 /// @param delim 01910 /// Delimiters used to split elements on the same line. 01911 /// @param arr 01912 /// List containing the wrapped list result. 01913 /// @param flags 01914 /// How to wrap the words to a new line. See EWrapFlags documentation. 01915 /// @param prefix 01916 /// The prefix string added to each wrapped line, except the first line, 01917 /// unless "prefix1" is set. 01918 /// If "prefix" is set to 0, do not add a prefix string to the 01919 /// wrapped lines. 01920 /// @param prefix1 01921 /// The prefix string for the first line. Use this for the first line 01922 /// instead of "prefix". 01923 /// If "prefix1" is set to 0(default), do not add a prefix string to the 01924 /// first line. 01925 /// @return 01926 /// Return "arr", the wrappe list. 01927 static list<string>& WrapList(const list<string>& l, SIZE_TYPE width, 01928 const string& delim, list<string>& arr, 01929 TWrapFlags flags, const string& prefix, 01930 const string* prefix1 = 0); 01931 01932 /// Wrap the list using the specified criteria. 01933 /// 01934 /// WrapList() is similar to Wrap(), but tries to avoid splitting any 01935 /// elements of the list to be wrapped. Also, the "delim" only applies 01936 /// between elements on the same line; if you want everything to end with 01937 /// commas or such, you should add them first. 01938 /// 01939 /// @param l 01940 /// The list to be wrapped. 01941 /// @param width 01942 /// Width of each wrapped line. 01943 /// @param delim 01944 /// Delimiters used to split elements on the same line. 01945 /// @param arr 01946 /// List containing the wrapped list result. 01947 /// @param flags 01948 /// How to wrap the words to a new line. See EWrapFlags documentation. 01949 /// @param prefix 01950 /// The prefix string added to each wrapped line, except the first line, 01951 /// unless "prefix1" is set. 01952 /// If "prefix" is set to 0, do not add a prefix string to the 01953 /// wrapped lines. 01954 /// @param prefix1 01955 /// The prefix string for the first line. Use this for the first line 01956 /// instead of "prefix". 01957 /// If "prefix1" is set to 0, do not add a prefix string to the 01958 /// first line. 01959 /// @return 01960 /// Return "arr", the wrapped list. 01961 static list<string>& WrapList(const list<string>& l, SIZE_TYPE width, 01962 const string& delim, list<string>& arr, 01963 TWrapFlags flags, const string& prefix, 01964 const string& prefix1); 01965 01966 /// Search for a field 01967 /// 01968 /// @param str 01969 /// C or C++ string to search in. 01970 /// @param field_no 01971 /// Zero based field number. 01972 /// @param delimiters 01973 /// Single character delimiters. 01974 /// @param merge 01975 /// Whether to merge or not adjacent delimiters. Default: not to merge. 01976 /// @return 01977 /// Found field; or empty string if the required field is not found. 01978 static string GetField(const CTempString& str, 01979 size_t field_no, 01980 const CTempString& delimiters, 01981 EMergeDelims merge = eNoMergeDelims); 01982 01983 /// Search for a field 01984 /// 01985 /// @param str 01986 /// C or C++ string to search in. 01987 /// @param field_no 01988 /// Zero based field number. 01989 /// @param delimiter 01990 /// Single character delimiter. 01991 /// @param merge 01992 /// Whether to merge or not adjacent delimiters. Default: not to merge. 01993 /// @return 01994 /// Found field; or empty string if the required field is not found. 01995 static string GetField(const CTempString& str, 01996 size_t field_no, 01997 char delimiter, 01998 EMergeDelims merge = eNoMergeDelims); 01999 02000 /// Search for a field 02001 /// Avoid memory allocation at the expence of some usage safety. 02002 /// 02003 /// @param str 02004 /// C or C++ string to search in. 02005 /// @param field_no 02006 /// Zero based field number. 02007 /// @param delimiters 02008 /// Single character delimiters. 02009 /// @param merge 02010 /// Whether to merge or not adjacent delimiters. Default: not to merge. 02011 /// @return 02012 /// Found field; or empty string if the required field is not found. 02013 /// @warning 02014 /// The return value stores a pointer to the input string 'str' so 02015 /// the return object validity time matches lifetime of the input 'str' 02016 static 02017 CTempString GetField_Unsafe(const CTempString& str, 02018 size_t field_no, 02019 const CTempString& delimiters, 02020 EMergeDelims merge = eNoMergeDelims); 02021 02022 /// Search for a field. 02023 /// Avoid memory allocation at the expence of some usage safety. 02024 /// 02025 /// @param str 02026 /// C or C++ string to search in. 02027 /// @param field_no 02028 /// Zero-based field number. 02029 /// @param delimiters 02030 /// Single character delimiter. 02031 /// @param merge 02032 /// Whether to merge or not adjacent delimiters. Default: not to merge. 02033 /// @return 02034 /// Found field; or empty string if the required field is not found. 02035 /// @warning 02036 /// The return value stores a pointer to the input string 'str' so 02037 /// the return object validity time matches lifetime of the input 'str' 02038 static 02039 CTempString GetField_Unsafe(const CTempString& str, 02040 size_t field_no, 02041 char delimiter, 02042 EMergeDelims merge = eNoMergeDelims); 02043 02044 }; // class NStr 02045 02046 02047 /// Type for character in UCS-2 encoding 02048 typedef Uint2 TCharUCS2; 02049 /// Type for string in UCS-2 encoding 02050 typedef basic_string<TCharUCS2> TStringUCS2; 02051 02052 02053 02054 ///////////////////////////////////////////////////////////////////////////// 02055 /// 02056 /// CStringUTF8 -- 02057 /// 02058 /// An UTF-8 string. 02059 /// Stores character data in UTF-8 encoding form. 02060 /// Being initialized, converts source characters into UTF-8. 02061 /// Can convert data back into a particular encoding form (non-UTF8) 02062 /// Supported encodings: 02063 /// ISO 8859-1 (Latin1) 02064 /// Microsoft Windows code page 1252 02065 /// UCS-2, UCS-4 (no surrogates) 02066 02067 enum EEncoding { 02068 eEncoding_Unknown, 02069 eEncoding_UTF8, 02070 eEncoding_Ascii, 02071 eEncoding_ISO8859_1, 02072 eEncoding_Windows_1252 02073 }; 02074 typedef Uint4 TUnicodeSymbol; 02075 02076 class CStringUTF8 : public string 02077 { 02078 public: 02079 enum EValidate { 02080 eNoValidate, 02081 eValidate 02082 }; 02083 02084 /// Default constructor. 02085 CStringUTF8(void) 02086 { 02087 } 02088 02089 /// Destructor. 02090 ~CStringUTF8(void) 02091 { 02092 } 02093 02094 /// Copy constructor. 02095 /// 02096 /// @param src 02097 /// Source UTF-8 string 02098 /// @param validate 02099 /// Verify that the source character encoding is really UTF-8 02100 CStringUTF8(const CStringUTF8& src, EValidate validate = eNoValidate) 02101 : string(src) 02102 { 02103 if (validate == eValidate) { 02104 x_Validate(); 02105 } 02106 } 02107 02108 /// Constructor from a C++ string 02109 /// 02110 /// @param src 02111 /// Source string 02112 /// @param encoding 02113 /// Character encoding of the source string 02114 /// @param validate 02115 /// Verify the character encoding of the source 02116 CStringUTF8(const string& src, 02117 EEncoding encoding = eEncoding_ISO8859_1, 02118 EValidate validate = eNoValidate) 02119 : string() 02120 { 02121 x_Append(src.c_str(), encoding, validate); 02122 } 02123 02124 /// Constructor from a C string 02125 /// 02126 /// @param src 02127 /// Source zero-terminated character buffer 02128 /// @param encoding 02129 /// Character encoding of the source string 02130 /// @param validate 02131 /// Verify the character encoding of the source 02132 CStringUTF8(const char* src, 02133 EEncoding encoding = eEncoding_ISO8859_1, 02134 EValidate validate = eNoValidate) 02135 : string() 02136 { 02137 x_Append(src, encoding, validate); 02138 } 02139 02140 /// Constructor from any string (ISO8859-1, USC-2 or USC-4, 02141 /// depending on the size of TChar). 02142 template <class T> 02143 CStringUTF8(const basic_string<T>& src) 02144 : string() 02145 { 02146 x_Append(src.c_str()); 02147 } 02148 02149 /// Constructor from any character sequence (ISO8859-1, USC-2 or USC-4, 02150 /// depending on the size of TChar). 02151 template <typename TChar> 02152 CStringUTF8(const TChar* src) 02153 : string() 02154 { 02155 x_Append(src); 02156 } 02157 02158 /// Assign to UTF8 string 02159 CStringUTF8& operator= (const CStringUTF8& src) 02160 { 02161 string::operator= (src); 02162 return *this; 02163 } 02164 02165 /// Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the 02166 /// size of TChar) 02167 template <typename TChar> 02168 CStringUTF8& operator= (const basic_string<TChar>& src) 02169 { 02170 erase(); 02171 x_Append(src.c_str()); 02172 return *this; 02173 } 02174 02175 /// Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the 02176 /// size of TChar) 02177 template <typename TChar> 02178 CStringUTF8& operator= (const TChar* src) 02179 { 02180 erase(); 02181 x_Append(src); 02182 return *this; 02183 } 02184 02185 /// Append a string in UTF8 encoding 02186 CStringUTF8& operator+= (const CStringUTF8& src) 02187 { 02188 string::operator+= (src); 02189 return *this; 02190 } 02191 02192 /// Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the 02193 /// size of TChar) 02194 template <typename TChar> 02195 CStringUTF8& operator+= (const basic_string<TChar>& src) 02196 { 02197 x_Append(src.c_str()); 02198 return *this; 02199 } 02200 02201 /// Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the 02202 /// size of TChar) 02203 template <typename TChar> 02204 CStringUTF8& operator+= (const TChar* src) 02205 { 02206 x_Append(src); 02207 return *this; 02208 } 02209 02210 /// Assign to C++ string 02211 /// 02212 /// @param src 02213 /// Source string 02214 /// @param encoding 02215 /// Character encoding of the source string 02216 /// @param validate 02217 /// Verify the character encoding of the source 02218 CStringUTF8& Assign(const string& src, 02219 EEncoding encoding, 02220 EValidate validate = eNoValidate) 02221 { 02222 erase(); 02223 x_Append(src.c_str(), encoding, validate); 02224 return *this; 02225 } 02226 02227 /// Assign to C string 02228 /// 02229 /// @param src 02230 /// Source zero-terminated character buffer 02231 /// @param encoding 02232 /// Character encoding of the source string 02233 /// @param validate 02234 /// Verify the character encoding of the source 02235 CStringUTF8& Assign(const char* src, 02236 EEncoding encoding, 02237 EValidate validate = eNoValidate) 02238 { 02239 erase(); 02240 x_Append(src, encoding, validate); 02241 return *this; 02242 } 02243 02244 /// Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the 02245 /// size of TChar) 02246 /// 02247 /// @param src 02248 /// Source string 02249 template <typename TChar> 02250 CStringUTF8& Assign(const basic_string<TChar>& src) 02251 { 02252 erase(); 02253 x_Append(src.c_str()); 02254 return *this; 02255 } 02256 02257 /// Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the 02258 /// size of TChar) 02259 /// 02260 /// @param src 02261 /// Source zero-terminated character buffer 02262 template <typename TChar> 02263 CStringUTF8& Assign(const TChar* src) 02264 { 02265 erase(); 02266 x_Append(src); 02267 return *this; 02268 } 02269 02270 /// Assign to a single character 02271 /// 02272 /// @param ch 02273 /// Character 02274 /// @param encoding 02275 /// Character encoding 02276 CStringUTF8& Assign(char ch, 02277 EEncoding encoding) 02278 { 02279 erase(); 02280 x_AppendChar( CharToSymbol( ch, encoding ) ); 02281 return *this; 02282 } 02283 02284 /// Append a C++ string 02285 /// 02286 /// @param src 02287 /// Source string 02288 /// @param encoding 02289 /// Character encoding of the source string 02290 /// @param validate 02291 /// Verify the character encoding of the source 02292 CStringUTF8& Append(const string& src, 02293 EEncoding encoding, 02294 EValidate validate = eNoValidate) 02295 { 02296 x_Append(src.c_str(), encoding, validate); 02297 return *this; 02298 } 02299 02300 /// Append a C string 02301 /// 02302 /// @param src 02303 /// Source zero-terminated character buffer 02304 /// @param encoding 02305 /// Character encoding of the source string 02306 /// @param validate 02307 /// Verify the character encoding of the source 02308 CStringUTF8& Append(const char* src, 02309 EEncoding encoding, 02310 EValidate validate = eNoValidate) 02311 { 02312 x_Append(src, encoding, validate); 02313 return *this; 02314 } 02315 02316 /// Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the 02317 /// size of TChar) 02318 /// 02319 /// @param src 02320 /// Source string 02321 template <typename TChar> 02322 CStringUTF8& Append(const basic_string<TChar>& src) 02323 { 02324 x_Append(src.c_str()); 02325 return *this; 02326 } 02327 02328 /// Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the 02329 /// size of TChar) 02330 /// 02331 /// @param src 02332 /// Source zero-terminated character buffer 02333 template <typename TChar> 02334 CStringUTF8& Append(const TChar* src) 02335 { 02336 x_Append(src); 02337 return *this; 02338 } 02339 02340 /// Append single character 02341 /// 02342 /// @param ch 02343 /// Character 02344 /// @param encoding 02345 /// Character encoding 02346 CStringUTF8& Append(char ch, 02347 EEncoding encoding) 02348 { 02349 x_AppendChar( CharToSymbol( ch, encoding ) ); 02350 return *this; 02351 } 02352 02353 /// Append single Unicode code point 02354 /// 02355 /// @param ch 02356 /// Unicode code point 02357 CStringUTF8& Append(TUnicodeSymbol ch) 02358 { 02359 x_AppendChar(ch); 02360 return *this; 02361 } 02362 02363 /// Get the number of symbols (code points) in the string 02364 /// 02365 /// @return 02366 /// Number of symbols (code points) 02367 SIZE_TYPE GetSymbolCount(void) const; 02368 02369 /// Get the number of valid UTF-8 symbols (code points) in the buffer 02370 /// 02371 /// @param src 02372 /// Character buffer 02373 /// @param buf_size 02374 /// The number of bytes in the buffer 02375 /// @return 02376 /// Number of valid symbols (no exception thrown) 02377 static SIZE_TYPE GetValidSymbolCount(const char* src, SIZE_TYPE buf_size); 02378 02379 /// Get the number of valid UTF-8 bytes (code units) in the buffer 02380 /// 02381 /// @param src 02382 /// Character buffer 02383 /// @param buf_size 02384 /// The number of bytes in the buffer 02385 /// @return 02386 /// Number of valid bytes (no exception thrown) 02387 static SIZE_TYPE GetValidBytesCount(const char* src, SIZE_TYPE buf_size); 02388 02389 /// Check that the character encoding of the string is valid UTF-8 02390 /// 02391 /// @return 02392 /// Result of the check 02393 bool IsValid(void) const 02394 { 02395 return MatchEncoding(c_str(), eEncoding_UTF8); 02396 } 02397 /// Convert to ISO 8859-1 (Latin1) character representation 02398 /// 02399 /// Can throw a CStringException if the conversion is impossible 02400 /// or the string has invalid UTF-8 format. 02401 /// @param substitute_on_error 02402 /// If the conversion is impossible, append the provided string 02403 /// or, if substitute_on_error equals 0, throw the exception 02404 string AsLatin1(const char* substitute_on_error = 0) const 02405 { 02406 return AsSingleByteString(eEncoding_ISO8859_1,substitute_on_error); 02407 } 02408 02409 /// Convert the string to a single-byte character representation 02410 /// 02411 /// Can throw a CStringException if the conversion is impossible 02412 /// or the string has invalid UTF-8 format. 02413 /// @param encoding 02414 /// Desired encoding 02415 /// @param substitute_on_error 02416 /// If the conversion is impossible, append the provided string 02417 /// or, if substitute_on_error equals 0, throw the exception 02418 /// @return 02419 /// C++ string 02420 string AsSingleByteString(EEncoding encoding, 02421 const char* substitute_on_error = 0) const; 02422 02423 #if defined(HAVE_WSTRING) 02424 /// Convert to Unicode (UCS-2 with no surrogates where 02425 /// sizeof(wchar_t) == 2 and UCS-4 where sizeof(wchar_t) == 4). 02426 /// 02427 /// Can throw a CStringException if the conversion is impossible 02428 /// or the string has invalid UTF-8 format. 02429 /// Defined only if wstring is supported by the compiler. 02430 /// 02431 /// @param substitute_on_error 02432 /// If the conversion is impossible, append the provided string 02433 /// or, if substitute_on_error equals 0, throw the exception 02434 wstring AsUnicode(const wchar_t* substitute_on_error = 0) const 02435 { 02436 return x_AsBasicString<wchar_t>(substitute_on_error); 02437 } 02438 #endif // HAVE_WSTRING 02439 02440 /// Convert to UCS-2 for all platforms 02441 /// 02442 /// Can throw a CStringException if the conversion is impossible 02443 /// or the string has invalid UTF-8 format. 02444 /// 02445 /// @param substitute_on_error 02446 /// If the conversion is impossible, append the provided string 02447 /// or, if substitute_on_error equals 0, throw the exception 02448 TStringUCS2 AsUCS2(const TCharUCS2* substitute_on_error = 0) const 02449 { 02450 return x_AsBasicString<TCharUCS2>(substitute_on_error); 02451 } 02452 02453 /// Guess the encoding of the C string 02454 /// 02455 /// It can distinguish between UTF-8, Latin1, and Win1252 only 02456 /// @param src 02457 /// Source zero-terminated character buffer 02458 /// @return 02459 /// Encoding 02460 static EEncoding GuessEncoding( const char* src); 02461 02462 /// Guess the encoding of the C++ string 02463 /// 02464 /// It can distinguish between UTF-8, Latin1, and Win1252 only 02465 /// @param src 02466 /// Source string 02467 /// @return 02468 /// Encoding 02469 static EEncoding GuessEncoding( const string& src) 02470 { 02471 return GuessEncoding( src.c_str()); 02472 } 02473 02474 /// Check the encoding of the C string 02475 /// 02476 /// Check that the encoding of the source is the same, or 02477 /// is compatible with the specified one 02478 /// @param src 02479 /// Source string 02480 /// @param encoding 02481 /// Character encoding form to check against 02482 /// @return 02483 /// Boolean result: encoding is same or compatible 02484 static bool MatchEncoding( const char* src, EEncoding encoding); 02485 02486 /// Check the encoding of the C++ string 02487 /// 02488 /// Check that the encoding of the source is the same, or 02489 /// is compatible with the specified one 02490 /// @param src 02491 /// Source string 02492 /// @param encoding 02493 /// Character encoding form to check against 02494 /// @return 02495 /// Boolean result: encoding is same or compatible 02496 static bool MatchEncoding( const string& src, EEncoding encoding) 02497 { 02498 return MatchEncoding( src.c_str(), encoding); 02499 } 02500 02501 /// Convert encoded character into UTF16 02502 /// 02503 /// @param ch 02504 /// Encoded character 02505 /// @param encoding 02506 /// Character encoding 02507 /// @return 02508 /// Code point 02509 static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding); 02510 02511 /// Convert Unicode code point into encoded character 02512 /// 02513 /// @param ch 02514 /// Code point 02515 /// @param encoding 02516 /// Character encoding 02517 /// @return 02518 /// Encoded character 02519 static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding); 02520 02521 /// Convert sequence of UTF8 code units into Unicode code point 02522 /// 02523 /// @param src 02524 /// UTF8 zero-terminated buffer 02525 /// @return 02526 /// Unicode code point 02527 static TUnicodeSymbol Decode(const char*& src); 02528 02529 /// Convert first character of UTF8 sequence into Unicode 02530 /// 02531 /// @param ch 02532 /// character 02533 /// @param more 02534 /// if the character is valid, - how many more characters to expect 02535 /// @return 02536 /// non-zero, if the character is valid 02537 static TUnicodeSymbol DecodeFirst(char ch, SIZE_TYPE& more); 02538 02539 /// Convert next character of UTF8 sequence into Unicode 02540 /// 02541 /// @param ch 02542 /// character 02543 /// @param ch16 02544 /// Unicode character 02545 /// @return 02546 /// non-zero, if the character is valid 02547 static TUnicodeSymbol DecodeNext(TUnicodeSymbol chU, char ch); 02548 02549 private: 02550 /// Function AsAscii is deprecated - use AsLatin1() instead 02551 string AsAscii(void) const 02552 { 02553 return AsLatin1(); 02554 } 02555 02556 /// Conversion to basic_string with any base type we need 02557 template <typename TChar> 02558 basic_string<TChar> x_AsBasicString(const TChar* substitute_on_error) const; 02559 02560 void x_Validate(void) const; 02561 /// Convert Unicode code point into UTF8 and append 02562 void x_AppendChar(TUnicodeSymbol ch); 02563 /// Convert coded character sequence into UTF8 and append 02564 void x_Append(const char* src, 02565 EEncoding encoding = eEncoding_ISO8859_1, 02566 EValidate validate = eNoValidate); 02567 02568 /// Convert Unicode character sequence into UTF8 and append 02569 /// Sequence can be in UCS-4 (TChar == (U)Int4), UCS-2 (TChar == (U)Int2) 02570 /// or in ISO8859-1 (TChar == char) 02571 template <typename TChar> 02572 void x_Append(const TChar* src); 02573 02574 /// Check how many bytes is needed to represent the code point in UTF8 02575 static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch); 02576 /// Check if the character is valid first code unit of UTF8 02577 static bool x_EvalFirst(char ch, SIZE_TYPE& more); 02578 /// Check if the character is valid non-first code unit of UTF8 02579 static bool x_EvalNext(char ch); 02580 }; 02581 02582 02583 02584 ///////////////////////////////////////////////////////////////////////////// 02585 /// 02586 /// CParseTemplException -- 02587 /// 02588 /// Define template class for parsing exception. This class is used to define 02589 /// exceptions for complex parsing tasks and includes an additional m_Pos 02590 /// data member. The constructor requires that an additional postional 02591 /// parameter be supplied along with the description message. 02592 02593 template <class TBase> 02594 class CParseTemplException : EXCEPTION_VIRTUAL_BASE public TBase 02595 { 02596 public: 02597 /// Error types that for exception class. 02598 enum EErrCode { 02599 eErr ///< Generic error 02600 }; 02601 02602 /// Translate from the error code value to its string representation. 02603 virtual const char* GetErrCodeString(void) const 02604 { 02605 switch (GetErrCode()) { 02606 case eErr: return "eErr"; 02607 default: return CException::GetErrCodeString(); 02608 } 02609 } 02610 02611 /// Constructor. 02612 /// 02613 /// Report "pos" along with "what". 02614 CParseTemplException(const CDiagCompileInfo &info, 02615 const CException* prev_exception, 02616 EErrCode err_code,const string& message, 02617 string::size_type pos, EDiagSev severity = eDiag_Error) 02618 : TBase(info, prev_exception, 02619 (typename TBase::EErrCode)(CException::eInvalid), 02620 message), m_Pos(pos) 02621 { 02622 this->x_Init(info, 02623 string("{") + NStr::UIntToString((unsigned long)m_Pos) + 02624 "} " + message, 02625 prev_exception, 02626 severity); 02627 this->x_InitErrCode((CException::EErrCode) err_code); 02628 } 02629 02630 /// Constructor. 02631 CParseTemplException(const CParseTemplException<TBase>& other) 02632 : TBase(other) 02633 { 02634 m_Pos = other.m_Pos; 02635 x_Assign(other); 02636 } 02637 02638 /// Destructor. 02639 virtual ~CParseTemplException(void) throw() {} 02640 02641 /// Report error position. 02642 virtual void ReportExtra(ostream& out) const 02643 { 02644 out << "m_Pos = " << (unsigned long)m_Pos; 02645 } 02646 02647 // Attributes. 02648 02649 /// Get exception class type. 02650 virtual const char* GetType(void) const { return "CParseTemplException"; } 02651 02652 /// Get error code. 02653 EErrCode GetErrCode(void) const 02654 { 02655 return typeid(*this) == typeid(CParseTemplException<TBase>) ? 02656 (typename CParseTemplException<TBase>::EErrCode) 02657 this->x_GetErrCode() : 02658 (typename CParseTemplException<TBase>::EErrCode) 02659 CException::eInvalid; 02660 } 02661 02662 /// Get error position. 02663 string::size_type GetPos(void) const throw() { return m_Pos; } 02664 02665 protected: 02666 /// Constructor. 02667 CParseTemplException(void) 02668 { 02669 m_Pos = 0; 02670 } 02671 02672 /// Helper clone method. 02673 virtual const CException* x_Clone(void) const 02674 { 02675 return new CParseTemplException<TBase>(*this); 02676 } 02677 02678 private: 02679 string::size_type m_Pos; ///< Error position 02680 }; 02681 02682 02683 ///////////////////////////////////////////////////////////////////////////// 02684 /// 02685 /// CStringException -- 02686 /// 02687 /// Define exceptions generated by string classes. 02688 /// 02689 /// CStringException inherits its basic functionality from 02690 /// CParseTemplException<CCoreException> and defines additional error codes 02691 /// for string parsing. 02692 02693 class CStringException : public CParseTemplException<CCoreException> 02694 { 02695 public: 02696 /// Error types that string classes can generate. 02697 enum EErrCode { 02698 eConvert, ///< Failure to convert string 02699 eBadArgs, ///< Bad arguments to string methods 02700 eFormat ///< Wrong format for any input to string methods 02701 }; 02702 02703 /// Translate from the error code value to its string representation. 02704 virtual const char* GetErrCodeString(void) const; 02705 02706 // Standard exception boilerplate code. 02707 NCBI_EXCEPTION_DEFAULT2(CStringException, 02708 CParseTemplException<CCoreException>, std::string::size_type); 02709 }; 02710 02711 02712 02713 ///////////////////////////////////////////////////////////////////////////// 02714 /// 02715 /// CStringPairsParser -- 02716 /// 02717 /// Base class for parsing a string to a set of name-value pairs. 02718 02719 02720 /// Decoder interface. Names and values can be decoded with different rules. 02721 class IStringDecoder 02722 { 02723 public: 02724 /// Type of string to be decoded 02725 enum EStringType { 02726 eName, 02727 eValue 02728 }; 02729 /// Decode the string. Must throw CStringException if the source string 02730 /// is not valid. 02731 virtual string Decode(const string& src, EStringType stype) const = 0; 02732 virtual ~IStringDecoder(void) {} 02733 }; 02734 02735 02736 /// Encoder interface. Names and values can be encoded with different rules. 02737 class IStringEncoder 02738 { 02739 public: 02740 /// Type of string to be decoded 02741 enum EStringType { 02742 eName, 02743 eValue 02744 }; 02745 /// Encode the string. 02746 virtual string Encode(const string& src, EStringType stype) const = 0; 02747 virtual ~IStringEncoder(void) {} 02748 }; 02749 02750 02751 /// URL-decoder for string pairs parser 02752 class CStringDecoder_Url : public IStringDecoder 02753 { 02754 public: 02755 CStringDecoder_Url(NStr::EUrlDecode flag = NStr::eUrlDec_All); 02756 02757 virtual string Decode(const string& src, EStringType stype) const; 02758 02759 private: 02760 NStr::EUrlDecode m_Flag; 02761 }; 02762 02763 02764 /// URL-encoder for string pairs parser 02765 class CStringEncoder_Url : public IStringEncoder 02766 { 02767 public: 02768 CStringEncoder_Url(NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars); 02769 02770 virtual string Encode(const string& src, EStringType stype) const; 02771 02772 private: 02773 NStr::EUrlEncode m_Flag; 02774 }; 02775 02776 02777 /// Template for parsing string into pairs of name and value or merging 02778 /// them back into a single string. 02779 /// The container class must hold pairs of strings (pair<string, string>). 02780 template<class TContainer> 02781 class CStringPairs 02782 { 02783 public: 02784 typedef TContainer TStrPairs; 02785 /// The container's value type must be pair<string, string> 02786 /// or a compatible type. 02787 typedef typename TContainer::value_type TStrPair; 02788 02789 /// Create parser with the specified decoder/encoder and default separators. 02790 /// 02791 /// @param decoder 02792 /// String decoder (Url, Xml etc.) 02793 /// @param own_decoder 02794 /// Decoder ownership flag 02795 /// @param decoder 02796 /// String encoder (Url, Xml etc.), optional 02797 /// @param own_encoder 02798 /// Encoder ownership flag, optional 02799 CStringPairs(IStringDecoder* decoder = NULL, 02800 EOwnership own_decoder = eTakeOwnership, 02801 IStringEncoder* encoder = NULL, 02802 EOwnership own_encoder = eTakeOwnership) 02803 : m_ArgSep("&"), 02804 m_ValSep("="), 02805 m_Decoder(decoder, own_decoder), 02806 m_Encoder(encoder, own_encoder) 02807 { 02808 } 02809 02810 /// Create parser with the specified parameters. 02811 /// 02812 /// @param arg_sep 02813 /// Separator between name+value pairs 02814 /// @param val_sep 02815 /// Separator between name and value 02816 /// @param decoder 02817 /// String decoder (Url, Xml etc.) 02818 /// @param own_decoder 02819 /// Decoder ownership flag 02820 /// @param encoder 02821 /// String encoder (Url, Xml etc.) 02822 /// @param own_encoder 02823 /// Encoder ownership flag 02824 CStringPairs(const string& arg_sep, 02825 const string& val_sep, 02826 IStringDecoder* decoder = NULL, 02827 EOwnership own_decoder = eTakeOwnership, 02828 IStringEncoder* encoder = NULL, 02829 EOwnership own_encoder = eTakeOwnership) 02830 : m_ArgSep(arg_sep), 02831 m_ValSep(val_sep), 02832 m_Decoder(decoder, own_decoder), 02833 m_Encoder(encoder, own_encoder) 02834 { 02835 } 02836 02837 /// Create parser with the selected URL-encoding/decoding options 02838 /// and default separators. 02839 /// 02840 /// @param decode_flag 02841 /// URL-decoding flag 02842 /// @param encode_flag 02843 /// URL-encoding flag 02844 CStringPairs(NStr::EUrlDecode decode_flag, 02845 NStr::EUrlEncode encode_flag) 02846 : m_ArgSep("&"), 02847 m_ValSep("="), 02848 m_Decoder(new CStringDecoder_Url(decode_flag), eTakeOwnership), 02849 m_Encoder(new CStringEncoder_Url(encode_flag), eTakeOwnership) 02850 { 02851 } 02852 02853 virtual ~CStringPairs(void) {} 02854 02855 /// Set string decoder. 02856 /// 02857 /// @param decoder 02858 /// String decoder (Url, Xml etc.) 02859 /// @param own 02860 /// Decoder ownership flag 02861 void SetDecoder(IStringDecoder* decoder, EOwnership own = eTakeOwnership) 02862 { m_Decoder.reset(decoder, own); } 02863 /// Get decoder or NULL. Does not affect decoder ownership. 02864 IStringDecoder* GetDecoder(void) { return m_Decoder.get(); } 02865 02866 /// Set string encoder. 02867 /// 02868 /// @param encoder 02869 /// String encoder (Url, Xml etc.) 02870 /// @param own 02871 /// Encoder ownership flag 02872 void SetEncoder(IStringEncoder* encoder, EOwnership own = eTakeOwnership) 02873 { m_Encoder.reset(encoder, own); } 02874 /// Get encoder or NULL. Does not affect encoder ownership. 02875 IStringDecoder* GetEncoder(void) { return m_Encoder.get(); } 02876 02877 /// Parse the string. 02878 /// 02879 /// @param str 02880 /// String to parse. The parser assumes the string is formatted like 02881 /// "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and 02882 /// value is passed to the decoder (if not NULL) before storing the pair. 02883 /// @param merge_argsep 02884 /// Flag for merging separators between pairs. By default the separators 02885 /// are merged to prevent pairs where both name and value are empty. 02886 void Parse(const CTempString& str, 02887 NStr::EMergeDelims merge_argsep = NStr::eMergeDelims) 02888 { 02889 Parse(m_Data, str, m_ArgSep, m_ValSep, 02890 m_Decoder.get(), eNoOwnership, merge_argsep); 02891 } 02892 02893 /// Parse the string using the provided decoder, put data into the 02894 /// container. 02895 /// 02896 /// @param pairs 02897 /// Container to be filled with the parsed name/value pairs 02898 /// @param str 02899 /// String to parse. The parser assumes the string is formatted like 02900 /// "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and 02901 /// value is passed to the decoder (if not NULL) before storing the pair. 02902 /// @param decoder 02903 /// String decoder (Url, Xml etc.) 02904 /// @param own 02905 /// Flag indicating if the decoder must be deleted by the function. 02906 /// @param merge_argsep 02907 /// Flag for merging separators between pairs. By default the separators 02908 /// are merged to prevent pairs where both name and value are empty. 02909 static void Parse(TStrPairs& pairs, 02910 const CTempString& str, 02911 const string& arg_sep, 02912 const string& val_sep, 02913 IStringDecoder* decoder = NULL, 02914 EOwnership own = eTakeOwnership, 02915 NStr::EMergeDelims merge_argsep = NStr::eMergeDelims) 02916 { 02917 AutoPtr<IStringDecoder> decoder_guard(decoder, own); 02918 list<string> lst; 02919 NStr::Split(str, arg_sep, lst, merge_argsep); 02920 pairs.clear(); 02921 ITERATE(list<string>, it, lst) { 02922 string name, val; 02923 NStr::SplitInTwo(*it, val_sep, name, val); 02924 if ( decoder ) { 02925 try { 02926 name = decoder->Decode(name, IStringDecoder::eName); 02927 val = decoder->Decode(val, IStringDecoder::eValue); 02928 } 02929 catch (CStringException) { 02930 // Discard all data 02931 pairs.clear(); 02932 throw; 02933 } 02934 } 02935 pairs.insert(pairs.end(), TStrPair(name, val)); 02936 } 02937 } 02938 02939 /// Merge name-value pairs into a single string using the currently set 02940 /// separators and the provided encoder if any. 02941 string Merge(void) const 02942 { 02943 return Merge(m_Data, m_ArgSep, m_ValSep, 02944 m_Encoder.get(), eNoOwnership); 02945 } 02946 02947 /// Merge name-value pairs from the provided container, separators 02948 /// and encoder. Delete the encoder if the ownership flag allows. 02949 /// 02950 /// @param pairs 02951 /// Container with the name/value pairs to be merged. 02952 /// @param arg_sep 02953 /// Separator to be inserted bewteen pairs. 02954 /// @param val_sep 02955 /// Separator to be inserted bewteen name and value. 02956 /// @param encoder 02957 /// String encoder (Url, Xml etc.) 02958 /// @param own 02959 /// Flag indicating if the encoder must be deleted by the function. 02960 static string Merge(const TStrPairs& pairs, 02961 const string& arg_sep, 02962 const string& val_sep, 02963 IStringEncoder* encoder = NULL, 02964 EOwnership own = eTakeOwnership) 02965 { 02966 AutoPtr<IStringEncoder> encoder_guard(encoder, own); 02967 string ret; 02968 ITERATE(typename TStrPairs, it, pairs) { 02969 if ( !ret.empty() ) { 02970 ret += arg_sep; 02971 } 02972 if ( encoder ) { 02973 ret += encoder->Encode(it->first, IStringEncoder::eName) + 02974 val_sep + 02975 encoder->Encode(it->second, IStringEncoder::eValue); 02976 } 02977 else { 02978 ret += it->first + val_sep + it->second; 02979 } 02980 } 02981 return ret; 02982 } 02983 02984 /// Read data 02985 const TStrPairs& GetPairs(void) const { return m_Data; } 02986 /// Get non-const data 02987 TStrPairs& GetPairs(void) { return m_Data; } 02988 02989 private: 02990 string m_ArgSep; // Separator between name+value pairs ("&") 02991 string m_ValSep; // Separator between name and value ("=") 02992 AutoPtr<IStringDecoder> m_Decoder; // String decoder (Url, Xml etc.) 02993 AutoPtr<IStringEncoder> m_Encoder; // String encoder (Url, Xml etc.) 02994 TStrPairs m_Data; // Parsed data 02995 }; 02996 02997 02998 typedef vector<pair<string, string> > TStringPairsVector; 02999 typedef CStringPairs<TStringPairsVector> CStringPairsParser; 03000 03001 03002 ///////////////////////////////////////////////////////////////////////////// 03003 /// 03004 /// CEncodedString -- 03005 /// 03006 /// Class to detect if a string needs to be URL-encoded and hold both 03007 /// encoded and original versions. 03008 /// 03009 03010 class CEncodedString 03011 { 03012 public: 03013 CEncodedString(void) {} 03014 CEncodedString(const string& s, 03015 NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars); 03016 03017 /// Set new original string 03018 void SetString(const string& s, 03019 NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars); 03020 03021 /// Check if the original string was encoded. 03022 bool IsEncoded(void) const { return m_Encoded.get() != 0; } 03023 /// Get the original unencoded string 03024 const string& GetOriginalString(void) const { return m_Original; } 03025 /// Get encoded string 03026 const string& GetEncodedString(void) const 03027 { return IsEncoded() ? *m_Encoded : m_Original; } 03028 03029 /// Check if the string is empty 03030 bool IsEmpty(void) const { return m_Original.empty(); } 03031 03032 private: 03033 string m_Original; 03034 auto_ptr<string> m_Encoded; 03035 }; 03036 03037 03038 ///////////////////////////////////////////////////////////////////////////// 03039 // Predicates 03040 // 03041 03042 03043 03044 ///////////////////////////////////////////////////////////////////////////// 03045 /// 03046 /// Define Case-sensitive string comparison methods. 03047 /// 03048 /// Used as arguments to template functions for specifying the type of 03049 /// comparison. 03050 03051 template <typename T> 03052 struct PCase_Generic 03053 { 03054 /// Return difference between "s1" and "s2". 03055 int Compare(const T& s1, const T& s2) const; 03056 03057 /// Return TRUE if s1 < s2. 03058 bool Less(const T& s1, const T& s2) const; 03059 03060 /// Return TRUE if s1 == s2. 03061 bool Equals(const T& s1, const T& s2) const; 03062 03063 /// Return TRUE if s1 < s2. 03064 bool operator()(const T& s1, const T& s2) const; 03065 }; 03066 03067 typedef PCase_Generic<string> PCase; 03068 typedef PCase_Generic<const char *> PCase_CStr; 03069 03070 03071 03072 ///////////////////////////////////////////////////////////////////////////// 03073 /// 03074 /// Define Case-insensitive string comparison methods. 03075 /// 03076 /// Used as arguments to template functions for specifying the type of 03077 /// comparison. 03078 /// 03079 /// @sa PNocase_Conditional_Generic 03080 03081 template <typename T> 03082 struct PNocase_Generic 03083 { 03084 /// Return difference between "s1" and "s2". 03085 int Compare(const T& s1, const T& s2) const; 03086 03087 /// Return TRUE if s1 < s2. 03088 bool Less(const T& s1, const T& s2) const; 03089 03090 /// Return TRUE if s1 == s2. 03091 bool Equals(const T& s1, const T& s2) const; 03092 03093 /// Return TRUE if s1 < s2 ignoring case. 03094 bool operator()(const T& s1, const T& s2) const; 03095 }; 03096 03097 typedef PNocase_Generic<string> PNocase; 03098 typedef PNocase_Generic<const char *> PNocase_CStr; 03099 03100 03101 ///////////////////////////////////////////////////////////////////////////// 03102 /// 03103 /// Define Case-insensitive string comparison methods. 03104 /// Case sensitivity can be turned on and off at runtime. 03105 /// 03106 /// Used as arguments to template functions for specifying the type of 03107 /// comparison. 03108 /// 03109 /// @sa PNocase_Generic 03110 03111 template <typename T> 03112 class PNocase_Conditional_Generic 03113 { 03114 public: 03115 /// Construction 03116 PNocase_Conditional_Generic(NStr::ECase case_sens = NStr::eCase); 03117 03118 /// Get comparison type 03119 NStr::ECase GetCase() const { return m_CaseSensitive; } 03120 03121 /// Set comparison type 03122 void SetCase(NStr::ECase case_sens) { m_CaseSensitive = case_sens; } 03123 03124 /// Return difference between "s1" and "s2". 03125 int Compare(const T& s1, const T& s2) const; 03126 03127 /// Return TRUE if s1 < s2. 03128 bool Less(const T& s1, const T& s2) const; 03129 03130 /// Return TRUE if s1 == s2. 03131 bool Equals(const T& s1, const T& s2) const; 03132 03133 /// Return TRUE if s1 < s2 ignoring case. 03134 bool operator()(const T& s1, const T& s2) const; 03135 private: 03136 NStr::ECase m_CaseSensitive; ///< case sensitive when TRUE 03137 }; 03138 03139 typedef PNocase_Conditional_Generic<string> PNocase_Conditional; 03140 typedef PNocase_Conditional_Generic<const char *> PNocase_Conditional_CStr; 03141 03142 03143 ///////////////////////////////////////////////////////////////////////////// 03144 /// 03145 /// PQuickStringLess implements an ordering of strings, 03146 /// that is more efficient than usual lexicographical order. 03147 /// It can be used in cases when no specific order is required, 03148 /// e.g. only simple key lookup is needed. 03149 /// Current implementation first compares lengths of strings, 03150 /// and will compare string data only when lengths are the same. 03151 /// 03152 struct PQuickStringLess 03153 { 03154 bool operator()(const CTempString& s1, const CTempString& s2) const { 03155 size_t len1 = s1.size(), len2 = s2.size(); 03156 return len1 < len2 || 03157 (len1 == len2 && ::memcmp(s1.data(), s2.data(), len1) < 0); 03158 } 03159 }; 03160 03161 03162 ///////////////////////////////////////////////////////////////////////////// 03163 // Algorithms 03164 // 03165 03166 03167 /// Check equivalence of arguments using predicate. 03168 template<class Arg1, class Arg2, class Pred> 03169 inline 03170 bool AStrEquiv(const Arg1& x, const Arg2& y, Pred pr) 03171 { 03172 return pr.Equals(x, y); 03173 } 03174 03175 03176 /* @} */ 03177 03178 03179 03180 ///////////////////////////////////////////////////////////////////////////// 03181 // 03182 // IMPLEMENTATION of INLINE functions 03183 // 03184 ///////////////////////////////////////////////////////////////////////////// 03185 03186 03187 ///////////////////////////////////////////////////////////////////////////// 03188 // CNcbiEmptyString:: 03189 // 03190 #if !defined(NCBI_OS_MSWIN) && !( defined(NCBI_OS_LINUX) && defined(NCBI_COMPILER_GCC) ) 03191 inline 03192 const string& CNcbiEmptyString::Get(void) 03193 { 03194 const string* str = m_Str; 03195 return str ? *str: FirstGet(); 03196 } 03197 #endif 03198 03199 03200 03201 ///////////////////////////////////////////////////////////////////////////// 03202 // NStr:: 03203 // 03204 03205 inline 03206 string NStr::IntToString(long value, 03207 TNumToStringFlags flags, int base) 03208 { 03209 string ret; 03210 IntToString(ret, value, flags, base); 03211 return ret; 03212 } 03213 03214 inline 03215 string NStr::UIntToString(unsigned long value, 03216 TNumToStringFlags flags, int base) 03217 { 03218 string ret; 03219 UIntToString(ret, value, flags, base); 03220 return ret; 03221 } 03222 03223 inline 03224 int NStr::HexChar(char ch) 03225 { 03226 unsigned int rc = ch - '0'; 03227 if (rc <= 9) { 03228 return rc; 03229 } else { 03230 rc = (ch | ' ') - 'a'; 03231 return rc <= 5 ? int(rc + 10) : -1; 03232 } 03233 } 03234 03235 inline 03236 bool NStr::MatchesMask(const string& str, const string& mask, ECase use_case) 03237 { 03238 return MatchesMask(str.c_str(), mask.c_str(), use_case); 03239 } 03240 03241 inline 03242 int NStr::strcmp(const char* s1, const char* s2) 03243 { 03244 return ::strcmp(s1, s2); 03245 } 03246 03247 inline 03248 int NStr::strncmp(const char* s1, const char* s2, size_t n) 03249 { 03250 return ::strncmp(s1, s2, n); 03251 } 03252 03253 inline 03254 int NStr::strcasecmp(const char* s1, const char* s2) 03255 { 03256 #if defined(HAVE_STRICMP) 03257 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400) 03258 return ::_stricmp(s1, s2); 03259 #else 03260 return ::stricmp(s1, s2); 03261 #endif 03262 03263 #elif defined(HAVE_STRCASECMP_LC) 03264 return ::strcasecmp(s1, s2); 03265 03266 #else 03267 int diff = 0; 03268 for ( ;; ++s1, ++s2) { 03269 char c1 = *s1; 03270 // calculate difference 03271 diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2)); 03272 // if end of string or different 03273 if (!c1 || diff) 03274 break; // return difference 03275 } 03276 return diff; 03277 #endif 03278 } 03279 03280 inline 03281 int NStr::strncasecmp(const char* s1, const char* s2, size_t n) 03282 { 03283 #if defined(HAVE_STRICMP) 03284 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400) 03285 return ::_strnicmp(s1, s2, n); 03286 #else 03287 return ::strnicmp(s1, s2, n); 03288 #endif 03289 03290 #elif defined(HAVE_STRCASECMP_LC) 03291 return ::strncasecmp(s1, s2, n); 03292 03293 #else 03294 int diff = 0; 03295 for ( ; ; ++s1, ++s2, --n) { 03296 if (n == 0) 03297 return 0; 03298 char c1 = *s1; 03299 // calculate difference 03300 diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2)); 03301 // if end of string or different 03302 if (!c1 || diff) 03303 break; // return difference 03304 } 03305 return diff; 03306 #endif 03307 } 03308 03309 inline 03310 size_t NStr::strftime(char* s, size_t maxsize, const char* format, 03311 const struct tm* timeptr) 03312 { 03313 #if defined(NCBI_COMPILER_MSVC) 03314 string x_format = Replace(format, "%T", "%H:%M:%S"); 03315 ReplaceInPlace(x_format, "%D", "%m/%d/%y"); 03316 return ::strftime(s, maxsize, x_format.c_str(), timeptr); 03317 #else 03318 return ::strftime(s, maxsize, format, timeptr); 03319 #endif 03320 } 03321 03322 03323 inline 03324 int NStr::Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03325 const char* pattern, ECase use_case) 03326 { 03327 return use_case == eCase ? 03328 CompareCase(str, pos, n, pattern): CompareNocase(str, pos, n, pattern); 03329 } 03330 03331 inline 03332 int NStr::Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03333 const string& pattern, ECase use_case) 03334 { 03335 return use_case == eCase ? 03336 CompareCase(str, pos, n, pattern): CompareNocase(str, pos, n, pattern); 03337 } 03338 03339 inline 03340 int NStr::CompareCase(const char* s1, const char* s2) 03341 { 03342 return NStr::strcmp(s1, s2); 03343 } 03344 03345 inline 03346 int NStr::CompareNocase(const char* s1, const char* s2) 03347 { 03348 return NStr::strcasecmp(s1, s2); 03349 } 03350 03351 inline 03352 int NStr::Compare(const char* s1, const char* s2, ECase use_case) 03353 { 03354 return use_case == eCase ? CompareCase(s1, s2): CompareNocase(s1, s2); 03355 } 03356 03357 inline 03358 int NStr::Compare(const string& s1, const char* s2, ECase use_case) 03359 { 03360 return Compare(s1.c_str(), s2, use_case); 03361 } 03362 03363 inline 03364 int NStr::Compare(const char* s1, const string& s2, ECase use_case) 03365 { 03366 return Compare(s1, s2.c_str(), use_case); 03367 } 03368 03369 inline 03370 int NStr::Compare(const string& s1, const string& s2, ECase use_case) 03371 { 03372 return Compare(s1.c_str(), s2.c_str(), use_case); 03373 } 03374 03375 inline 03376 int NStr::CompareCase(const string& s1, const string& s2) 03377 { 03378 return CompareCase(s1.c_str(), s2.c_str()); 03379 } 03380 03381 inline 03382 int NStr::CompareNocase(const string& s1, const string& s2) 03383 { 03384 return CompareNocase(s1.c_str(), s2.c_str()); 03385 } 03386 03387 inline 03388 bool NStr::Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03389 const char* pattern, ECase use_case) 03390 { 03391 return use_case == eCase ? 03392 EqualCase(str, pos, n, pattern) : EqualNocase(str, pos, n, pattern); 03393 } 03394 03395 inline 03396 bool NStr::Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03397 const string& pattern, ECase use_case) 03398 { 03399 return use_case == eCase ? 03400 EqualCase(str, pos, n, pattern) : EqualNocase(str, pos, n, pattern); 03401 } 03402 03403 inline 03404 bool NStr::EqualCase(const char* s1, const char* s2) 03405 { 03406 return NStr::strcmp(s1, s2) == 0; 03407 } 03408 03409 inline 03410 bool NStr::EqualNocase(const char* s1, const char* s2) 03411 { 03412 return NStr::strcasecmp(s1, s2) == 0; 03413 } 03414 03415 inline 03416 bool NStr::EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03417 const char* pattern) 03418 { 03419 return NStr::CompareCase(str, pos, n, pattern) == 0; 03420 } 03421 03422 inline 03423 bool NStr::EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03424 const string& pattern) 03425 { 03426 return NStr::CompareCase(str, pos, n, pattern) == 0; 03427 } 03428 03429 inline 03430 bool NStr::Equal(const char* s1, const char* s2, ECase use_case) 03431 { 03432 return (use_case == eCase ? EqualCase(s1, s2) : EqualNocase(s1, s2)); 03433 } 03434 03435 inline 03436 bool NStr::Equal(const string& s1, const char* s2, ECase use_case) 03437 { 03438 return Equal(s1.c_str(), s2, use_case); 03439 } 03440 03441 inline 03442 bool NStr::Equal(const char* s1, const string& s2, ECase use_case) 03443 { 03444 return Equal(s1, s2.c_str(), use_case); 03445 } 03446 03447 inline 03448 bool NStr::Equal(const string& s1, const string& s2, ECase use_case) 03449 { 03450 return Equal(s1.c_str(), s2.c_str(), use_case); 03451 } 03452 03453 inline 03454 bool NStr::EqualCase(const string& s1, const string& s2) 03455 { 03456 // return EqualCase(s1.c_str(), s2.c_str()); 03457 return s1 == s2; 03458 } 03459 03460 inline 03461 bool NStr::EqualNocase(const string& s1, const string& s2) 03462 { 03463 return EqualNocase(s1.c_str(), s2.c_str()); 03464 } 03465 03466 inline 03467 bool NStr::EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03468 const char* pattern) 03469 { 03470 return CompareNocase(str, pos, n, pattern) == 0; 03471 } 03472 03473 inline 03474 bool NStr::EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n, 03475 const string& pattern) 03476 { 03477 return CompareNocase(str, pos, n, pattern) == 0; 03478 } 03479 03480 inline 03481 bool NStr::StartsWith(const string& str, const string& start, ECase use_case) 03482 { 03483 return str.size() >= start.size() && 03484 Compare(str, 0, start.size(), start, use_case) == 0; 03485 } 03486 03487 inline 03488 bool NStr::StartsWith(const string& str, const char* start, ECase use_case) 03489 { 03490 size_t start_size = strlen(start); 03491 return str.size() >= start_size && 03492 Compare(str, 0, start_size, start, use_case) == 0; 03493 } 03494 03495 inline 03496 bool NStr::StartsWith(const string& str, char start, ECase use_case) 03497 { 03498 return !str.empty() && 03499 ((use_case == eCase) ? (str[0] == start) : 03500 (toupper((unsigned char) str[0]) == start || 03501 tolower((unsigned char) str[0]))); 03502 } 03503 03504 inline 03505 bool NStr::EndsWith(const string& str, const string& end, ECase use_case) 03506 { 03507 return str.size() >= end.size() && 03508 Compare(str, str.size() - end.size(), end.size(), end, use_case) == 0; 03509 } 03510 03511 inline 03512 bool NStr::EndsWith(const string& str, char end, ECase use_case) 03513 { 03514 if (!str.empty()) { 03515 char last = str[str.length() - 1]; 03516 return (use_case == eCase) ? (last == end) : 03517 (toupper((unsigned char) last) == end || 03518 tolower((unsigned char) last) == end); 03519 } 03520 return false; 03521 } 03522 03523 inline 03524 SIZE_TYPE NStr::Find(const string& str, const string& pattern, 03525 SIZE_TYPE start, SIZE_TYPE end, EOccurrence where, 03526 ECase use_case) 03527 { 03528 return use_case == eCase ? FindCase(str, pattern, start, end, where) 03529 : FindNoCase(str, pattern, start, end, where); 03530 } 03531 03532 inline 03533 SIZE_TYPE NStr::FindCase(const string& str, const string& pattern, 03534 SIZE_TYPE start, SIZE_TYPE end, EOccurrence where) 03535 { 03536 if (where == eFirst) { 03537 SIZE_TYPE result = str.find(pattern, start); 03538 return (result == NPOS || result > end) ? NPOS : result; 03539 } else { 03540 SIZE_TYPE result = str.rfind(pattern, end); 03541 return (result == NPOS || result < start) ? NPOS : result; 03542 } 03543 } 03544 03545 inline 03546 const string* NStr::FindCase(const list<string>& lst, const string& val) 03547 { 03548 return Find(lst, val, eCase); 03549 } 03550 03551 inline 03552 const string* NStr::FindNoCase(const list <string>& lst, const string& val) 03553 { 03554 return Find(lst, val, eNocase); 03555 } 03556 03557 inline 03558 const string* NStr::FindCase(const vector <string>& vec, const string& val) 03559 { 03560 return Find(vec, val, eCase); 03561 } 03562 03563 inline 03564 const string* NStr::FindNoCase(const vector <string>& vec, const string& val) 03565 { 03566 return Find(vec, val, eNocase); 03567 } 03568 03569 03570 inline 03571 string NStr::CEncode(const string& str) 03572 { 03573 return PrintableString(str); 03574 } 03575 03576 03577 inline 03578 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr, 03579 NStr::TWrapFlags flags, const string& prefix, 03580 const string* prefix1) 03581 { 03582 return Wrap(str, width, arr, flags, &prefix, prefix1); 03583 } 03584 03585 03586 inline 03587 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr, 03588 NStr::TWrapFlags flags, const string& prefix, 03589 const string& prefix1) 03590 { 03591 return Wrap(str, width, arr, flags, &prefix, &prefix1); 03592 } 03593 03594 03595 inline 03596 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width, 03597 const string& delim, list<string>& arr, 03598 NStr::TWrapFlags flags, const string& prefix, 03599 const string* prefix1) 03600 { 03601 return WrapList(l, width, delim, arr, flags, &prefix, prefix1); 03602 } 03603 03604 03605 inline 03606 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width, 03607 const string& delim, list<string>& arr, 03608 NStr::TWrapFlags flags, const string& prefix, 03609 const string& prefix1) 03610 { 03611 return WrapList(l, width, delim, arr, flags, &prefix, &prefix1); 03612 } 03613 03614 03615 03616 ///////////////////////////////////////////////////////////////////////////// 03617 // CStringUTF8:: 03618 // 03619 03620 template <typename TChar> 03621 inline 03622 basic_string<TChar> CStringUTF8::x_AsBasicString( 03623 const TChar* substitute_on_error) const 03624 { 03625 TUnicodeSymbol max_char = (TUnicodeSymbol)numeric_limits<TChar>::max(); 03626 basic_string<TChar> result; 03627 result.reserve( GetSymbolCount()+1 ); 03628 for (const char* src = c_str(); *src; ++src) { 03629 TUnicodeSymbol ch = Decode(src); 03630 if (ch > max_char) { 03631 if (substitute_on_error) { 03632 result.append(substitute_on_error); 03633 continue; 03634 } else { 03635 NCBI_THROW2(CStringException, eConvert, 03636 "Failed to convert symbol to wide character", 03637 (SIZE_TYPE)(src - c_str())); 03638 } 03639 } 03640 result.append(1, (TChar)ch); 03641 } 03642 return result; 03643 } 03644 03645 03646 template <typename TChar> 03647 inline 03648 void CStringUTF8::x_Append(const TChar* src) 03649 { 03650 const TChar* srcBuf; 03651 SIZE_TYPE needed = 0; 03652 03653 for (srcBuf = src; *srcBuf; ++srcBuf) { 03654 needed += x_BytesNeeded( *srcBuf ); 03655 } 03656 if ( !needed ) { 03657 return; 03658 } 03659 reserve(max(capacity(),length()+needed+1)); 03660 for (srcBuf = src; *srcBuf; ++srcBuf) { 03661 x_AppendChar( *srcBuf ); 03662 } 03663 } 03664 03665 03666 03667 03668 ///////////////////////////////////////////////////////////////////////////// 03669 // PCase_Generic:: 03670 // 03671 03672 template <typename T> 03673 inline 03674 int PCase_Generic<T>::Compare(const T& s1, const T& s2) const 03675 { 03676 return NStr::Compare(s1, s2, NStr::eCase); 03677 } 03678 03679 template <typename T> 03680 inline 03681 bool PCase_Generic<T>::Less(const T& s1, const T& s2) const 03682 { 03683 return Compare(s1, s2) < 0; 03684 } 03685 03686 template <typename T> 03687 inline 03688 bool PCase_Generic<T>::Equals(const T& s1, const T& s2) const 03689 { 03690 return Compare(s1, s2) == 0; 03691 } 03692 03693 template <typename T> 03694 inline 03695 bool PCase_Generic<T>::operator()(const T& s1, const T& s2) const 03696 { 03697 return Less(s1, s2); 03698 } 03699 03700 03701 03702 //////////////////////////////////////////////////////////////////////////// 03703 // PNocase_Generic<T>:: 03704 // 03705 03706 03707 template <typename T> 03708 inline 03709 int PNocase_Generic<T>::Compare(const T& s1, const T& s2) const 03710 { 03711 return NStr::Compare(s1, s2, NStr::eNocase); 03712 } 03713 03714 template <typename T> 03715 inline 03716 bool PNocase_Generic<T>::Less(const T& s1, const T& s2) const 03717 { 03718 return Compare(s1, s2) < 0; 03719 } 03720 03721 template <typename T> 03722 inline 03723 bool PNocase_Generic<T>::Equals(const T& s1, const T& s2) const 03724 { 03725 return Compare(s1, s2) == 0; 03726 } 03727 03728 template <typename T> 03729 inline 03730 bool PNocase_Generic<T>::operator()(const T& s1, const T& s2) const 03731 { 03732 return Less(s1, s2); 03733 } 03734 03735 //////////////////////////////////////////////////////////////////////////// 03736 // PNocase_Conditional_Generic<T>:: 03737 // 03738 03739 template <typename T> 03740 inline 03741 PNocase_Conditional_Generic<T>::PNocase_Conditional_Generic(NStr::ECase cs) 03742 : m_CaseSensitive(cs) 03743 {} 03744 03745 template <typename T> 03746 inline 03747 int PNocase_Conditional_Generic<T>::Compare(const T& s1, const T& s2) const 03748 { 03749 return NStr::Compare(s1, s2, m_CaseSensitive); 03750 } 03751 03752 template <typename T> 03753 inline 03754 bool PNocase_Conditional_Generic<T>::Less(const T& s1, const T& s2) const 03755 { 03756 return Compare(s1, s2) < 0; 03757 } 03758 03759 template <typename T> 03760 inline 03761 bool PNocase_Conditional_Generic<T>::Equals(const T& s1, const T& s2) const 03762 { 03763 return Compare(s1, s2) == 0; 03764 } 03765 03766 template <typename T> 03767 inline 03768 bool PNocase_Conditional_Generic<T>::operator()(const T& s1, const T& s2) const 03769 { 03770 return Less(s1, s2); 03771 } 03772 03773 03774 03775 END_NCBI_SCOPE 03776 03777 #endif /* CORELIB___NCBISTR__HPP */ 03778 03779
1.4.6
Modified on Mon Dec 07 16:20:35 2009 by modify_doxy.py rev. 173732