include/corelib/ncbistr.hpp

Go to the documentation of this file.
00001 #ifndef CORELIB___NCBISTR__HPP
00002 #define CORELIB___NCBISTR__HPP
00003 
00004 /*  $Id: ncbistr.hpp 172519 2009-10-06 14:23:09Z vasilche $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Authors:  Eugene Vasilchenko, Denis Vakatov
00030  *
00031  *
00032  */
00033 
00034 /// @file ncbistr.hpp
00035 /// The NCBI C++ standard methods for dealing with std::string
00036 
00037 
00038 #include <corelib/tempstr.hpp>
00039 #include <corelib/ncbi_limits.hpp>
00040 #ifdef NCBI_OS_OSF1
00041 #  include <strings.h>
00042 #endif
00043 #include <stdarg.h>
00044 #include <time.h>
00045 #include <vector>
00046 
00047 
00048 
00049 BEGIN_NCBI_SCOPE
00050 
00051 /** @addtogroup String
00052  *
00053  * @{
00054  */
00055 
00056 /// Empty "C" string (points to a '\0').
00057  extern const char *const kEmptyCStr;
00058 #define NcbiEmptyCStr NCBI_NS_NCBI::kEmptyCStr
00059 
00060 
00061 /// Empty "C++" string.
00062 #if defined(NCBI_OS_MSWIN) || ( defined(NCBI_OS_LINUX)  &&  defined(NCBI_COMPILER_GCC) )
00063 class CNcbiEmptyString
00064 {
00065 public:
00066     /// Get string.
00067     static const string& Get(void)
00068     {
00069         static string empty_str;
00070         return empty_str;
00071     }
00072 };
00073 #else
00074 class  CNcbiEmptyString
00075 {
00076 public:
00077     /// Get string.
00078     static const string& Get(void);
00079 private:
00080     /// Helper method to initialize private data member and return
00081     /// null string.
00082     static const string& FirstGet(void);
00083     static const string* m_Str;     ///< Null string pointer.
00084 };
00085 #endif // NCBI_OS_MSWIN....
00086 
00087 
00088 #define NcbiEmptyString NCBI_NS_NCBI::CNcbiEmptyString::Get()
00089 #define kEmptyStr NcbiEmptyString
00090 
00091 
00092 // SIZE_TYPE and NPOS
00093 
00094 /// Define size type.
00095 typedef NCBI_NS_STD::string::size_type SIZE_TYPE;
00096 
00097 /// Define NPOS constant as the special value "std::string::npos" which is
00098 /// returned when a substring search fails, or to indicate an unspecified
00099 /// string position.
00100 static const SIZE_TYPE NPOS = NCBI_NS_STD::string::npos;
00101 
00102 
00103 
00104 /////////////////////////////////////////////////////////////////////////////
00105 ///
00106 /// NStr --
00107 ///
00108 /// Encapuslates class-wide string processing functions.
00109 
00110 class  NStr
00111 {
00112 public:
00113     /// Convert string to numeric value.
00114     ///
00115     /// @param str
00116     ///   String containing digits.
00117     /// @return
00118     ///   - Convert "str" to a (non-negative) "int" value and return
00119     ///     this value.
00120     ///   - -1 if "str" contains any symbols other than [0-9], or
00121     ///     if it represents a number that does not fit into "int".
00122     static int StringToNumeric(const string& str);
00123 
00124 
00125     /// Number to string conversion flags.
00126     ///
00127     /// NOTE: 
00128     ///   If specified base in the *ToString() methods is not default 10,
00129     ///   that some flags like fWithSign and fWithCommas will be ignored.
00130     enum ENumToStringFlags {
00131         fWithSign        = (1 <<  9), ///< Prefix the output value with a sign
00132         fWithCommas      = (1 << 10), ///< Use commas as thousands separator
00133         fDoubleFixed     = (1 << 11), ///< Use n.nnnn format for double
00134         fDoubleScientific= (1 << 12), ///< Use scientific format for double
00135         fDoubleGeneral   = fDoubleFixed | fDoubleScientific
00136     };
00137     typedef int TNumToStringFlags;    ///< Bitwise OR of "ENumToStringFlags"
00138 
00139     /// String to number conversion flags.
00140     enum EStringToNumFlags {
00141         fConvErr_NoThrow      = (1 <<  9),   ///< Return "natural null"
00142         // value on error, instead of throwing (by default) an exception
00143         
00144         fMandatorySign        = (1 << 10),   ///< See 'fWithSign'
00145         fAllowCommas          = (1 << 11),   ///< See 'fWithCommas'
00146         fAllowLeadingSpaces   = (1 << 12),   ///< Can have leading spaces
00147         fAllowLeadingSymbols  = (1 << 13) | fAllowLeadingSpaces,
00148                                              ///< Can have leading non-nums
00149         fAllowTrailingSpaces  = (1 << 14),   ///< Can have trailing spaces
00150         fAllowTrailingSymbols = (1 << 15) | fAllowTrailingSpaces,
00151                                              ///< Can have trailing non-nums
00152         fAllStringToNumFlags  = 0x7F00
00153     };
00154     typedef int TStringToNumFlags;   ///< Binary OR of "EStringToNumFlags"
00155 
00156     /// Convert string to int.
00157     ///
00158     /// @param str
00159     ///   String to be converted.
00160     /// @param flags
00161     ///   How to convert string to value.
00162     /// @param base
00163     ///   Radix base. Default is 10. Allowed values are 0, 2..32.
00164     /// @return
00165     ///   - Convert "str" to "int" value and return it.
00166     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00167     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00168     ///   - Throw an exception otherwise.
00169     static int StringToInt(const CTempString& str,
00170                            TStringToNumFlags  flags = 0,
00171                            int                base  = 10);
00172 
00173     /// Convert string to unsigned int.
00174     ///
00175     /// @param str
00176     ///   String to be converted.
00177     /// @param flags
00178     ///   How to convert string to value.
00179     /// @param base
00180     ///   Radix base. Default is 10. Allowed values are 0, 2..32.
00181     /// @return
00182     ///   - Convert "str" to "unsigned int" value and return it.
00183     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00184     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00185     ///   - Throw an exception otherwise.
00186     static unsigned int StringToUInt(const CTempString& str,
00187                                      TStringToNumFlags  flags = 0,
00188                                      int                base  = 10);
00189 
00190     /// Convert string to long.
00191     ///
00192     /// @param str
00193     ///   String to be converted.
00194     /// @param flags
00195     ///   How to convert string to value.
00196     /// @param base
00197     ///   Radix base. Default is 10. Allowed values are 0, 2..32.
00198     /// @return
00199     ///   - Convert "str" to "long" value and return it.
00200     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00201     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00202     ///   - Throw an exception otherwise.
00203     static long StringToLong(const CTempString& str,
00204                              TStringToNumFlags  flags = 0,
00205                              int                base  = 10);
00206 
00207     /// Convert string to unsigned long.
00208     ///
00209     /// @param str
00210     ///   String to be converted.
00211     /// @param flags
00212     ///   How to convert string to value.
00213     /// @param base
00214     ///   Numeric base of the number symbols (default = 10).
00215     /// @return
00216     ///   - Convert "str" to "unsigned long" value and return it.
00217     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00218     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00219     ///   - Throw an exception otherwise.
00220     static unsigned long StringToULong(const CTempString& str,
00221                                        TStringToNumFlags  flags = 0,
00222                                        int                base  = 10);
00223 
00224     /// Convert string to double.
00225     ///
00226     /// @param str
00227     ///   String to be converted.
00228     /// @param flags
00229     ///   How to convert string to value.
00230     ///   Do not support fAllowCommas flag.
00231     /// @return
00232     ///   - Convert "str" to "double" value and return it.
00233     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00234     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00235     ///   - Throw an exception otherwise.
00236     static double StringToDouble(const CTempStringEx& str,
00237                                  TStringToNumFlags  flags = 0);
00238 
00239     /// This version accepts zero-terminated string
00240     static double StringToDoubleEx(const char* str, size_t size,
00241                                    TStringToNumFlags  flags = 0);
00242 
00243     /// Convert string to Int8.
00244     ///
00245     /// @param str
00246     ///   String to be converted.
00247     /// @param flags
00248     ///   How to convert string to value.
00249     /// @param base
00250     ///   Radix base. Default is 10. Allowed values are 0, 2..32.
00251     /// @return
00252     ///   - Convert "str" to "Int8" value and return it.
00253     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00254     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00255     ///   - Throw an exception otherwise.
00256     static Int8 StringToInt8(const CTempString& str,
00257                              TStringToNumFlags  flags = 0,
00258                              int                base  = 10);
00259 
00260     /// Convert string to Uint8.
00261     ///
00262     /// @param str
00263     ///   String to be converted.
00264     /// @param flags
00265     ///   How to convert string to value.
00266     /// @param base
00267     ///   Radix base. Default is 10. Allowed values are 0, 2..32.
00268     /// @return
00269     ///   - Convert "str" to "UInt8" value and return it.
00270     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00271     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00272     ///   - Throw an exception otherwise.
00273     static Uint8 StringToUInt8(const CTempString& str,
00274                                TStringToNumFlags  flags = 0,
00275                                int                base  = 10);
00276 
00277     /// Convert string to number of bytes. 
00278     ///
00279     /// String can contain "software" qualifiers: MB(megabyte), KB (kilobyte)..
00280     /// Example: 100MB, 1024KB
00281     /// Note the qualifiers are power-of-2 based, aka kibi-, mebi- etc, so that
00282     /// 1KB = 1024B (not 1000B), 1MB = 1024KB = 1048576B, etc.
00283     ///
00284     /// @param str
00285     ///   String to be converted.
00286     /// @param flags
00287     ///   How to convert string to value.
00288     /// @param base
00289     ///   Numeric base of the number (before the qualifier).
00290     ///   Default is 10. Allowed values are 0, 2..20.
00291     /// @return
00292     ///   - Convert "str" to "Uint8" value and return it.
00293     ///   - 0 if "str" contains illegal symbols, or if it represents a number
00294     ///     that does not fit into range, and flag fConvErr_NoThrow is set.
00295     ///   - Throw an exception otherwise.
00296     static Uint8 StringToUInt8_DataSize(const CTempString& str,
00297                                         TStringToNumFlags  flags = 0,
00298                                         int                base  = 10);
00299 
00300     /// Convert string to pointer.
00301     ///
00302     /// @param str
00303     ///   String to be converted.
00304     /// @return
00305     ///   Pointer value corresponding to its string representation.
00306     static const void* StringToPtr(const string& str);
00307 
00308     /// Convert character to integer.
00309     ///
00310     /// @param ch
00311     ///   Character to be converted.
00312     /// @return
00313     ///   Integer (0..15) corresponding to the "ch" as a hex digit.
00314     ///   Return -1 on error.
00315     static int HexChar(char ch);
00316 
00317     /// Convert Int to String.
00318     ///
00319     /// @param value
00320     ///   Integer value (long) to be converted.
00321     /// @param flags
00322     ///   How to convert value to string.
00323     /// @param base
00324     ///   Radix base. Default is 10. Allowed values are 2..32.
00325     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00326     ///   If necessary you should add it yourself.
00327     /// @return
00328     ///   Converted string value.
00329     static string IntToString(long value, TNumToStringFlags flags = 0,
00330                               int  base = 10);
00331 
00332     /// Convert Int to String.
00333     ///
00334     /// @param out_str
00335     ///   Output string variable.
00336     /// @param value
00337     ///   Integer value (long) to be converted.
00338     /// @param flags
00339     ///   How to convert value to string.
00340     /// @param base
00341     ///   Radix base. Default is 10. Allowed values are 2..32.
00342     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00343     ///   If necessary you should add it yourself.
00344     static void IntToString(string& out_str, long value, 
00345                             TNumToStringFlags flags = 0,
00346                             int               base  = 10);
00347 
00348     /// Convert UInt to string.
00349     ///
00350     /// @param value
00351     ///   Integer value (unsigned long) to be converted.
00352     /// @param flags
00353     ///   How to convert value to string.
00354     /// @param base
00355     ///   Radix base. Default is 10. Allowed values are 2..32.
00356     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00357     ///   If necessary you should add it yourself.
00358     /// @return
00359     ///   Converted string value.
00360     static string UIntToString(unsigned long value,
00361                                TNumToStringFlags flags = 0,
00362                                int               base  = 10);
00363 
00364     /// Convert UInt to string.
00365     ///
00366     /// @param out_str
00367     ///   Output string variable
00368     /// @param value
00369     ///   Integer value (unsigned long) to be converted.
00370     /// @param flags
00371     ///   How to convert value to string.
00372     /// @param base
00373     ///   Radix base. Default is 10. Allowed values are 2..32.
00374     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00375     ///   If necessary you should add it yourself.
00376     static void UIntToString(string& out_str, unsigned long value,
00377                              TNumToStringFlags flags = 0,
00378                              int               base  = 10);
00379 
00380     /// Convert Int8 to string.
00381     ///
00382     /// @param value
00383     ///   Integer value (Int8) to be converted.
00384     /// @param flags
00385     ///   How to convert value to string.
00386     /// @param base
00387     ///   Radix base. Default is 10. Allowed values are 2..32.
00388     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00389     ///   If necessary you should add it yourself.
00390     /// @return
00391     ///   Converted string value.
00392     static string Int8ToString(Int8 value,
00393                                TNumToStringFlags flags = 0,
00394                                int               base  = 10);
00395 
00396     /// Convert Int8 to string.
00397     ///
00398     /// @param out_str
00399     ///   Output string variable
00400     /// @param value
00401     ///   Integer value (Int8) to be converted.
00402     /// @param flags
00403     ///   How to convert value to string.
00404     /// @param base
00405     ///   Radix base. Default is 10. Allowed values are 2..32.
00406     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00407     ///   If necessary you should add it yourself.
00408     static void Int8ToString(string& out_str, Int8 value,
00409                              TNumToStringFlags flags = 0,
00410                              int               base  = 10);
00411 
00412     /// Convert UInt8 to string.
00413     ///
00414     /// @param value
00415     ///   Integer value (UInt8) to be converted.
00416     /// @param flags
00417     ///   How to convert value to string.
00418     /// @param base
00419     ///   Radix base. Default is 10. Allowed values are 2..32.
00420     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00421     ///   If necessary you should add it yourself.
00422     /// @return
00423     ///   Converted string value.
00424     static string UInt8ToString(Uint8 value,
00425                                 TNumToStringFlags flags = 0,
00426                                 int               base  = 10);
00427 
00428     /// Convert UInt8 to string.
00429     ///
00430     /// @param out_str
00431     ///   Output string variable
00432     /// @param value
00433     ///   Integer value (UInt8) to be converted.
00434     /// @param flags
00435     ///   How to convert value to string.
00436     /// @param base
00437     ///   Radix base. Default is 10. Allowed values are 2..32.
00438     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
00439     ///   If necessary you should add it yourself.
00440     static void UInt8ToString(string& out_str, Uint8 value,
00441                               TNumToStringFlags flags = 0,
00442                               int               base  = 10);
00443 
00444     /// Convert double to string.
00445     ///
00446     /// @param value
00447     ///   Double value to be converted.
00448     /// @param precision
00449     ///   Precision value for conversion. If precision is more that maximum
00450     ///   for current platform, then it will be truncated to this maximum.
00451     //    If it is negative, that double will be converted to number in
00452     ///   scientific notation.
00453     /// @param flags
00454     ///   How to convert value to string.
00455     ///   If double format flags are not specified, that next output format
00456     ///   will be used by default:
00457     ///     - fDoubleFixed,   if 'precision' >= 0.
00458     ///     - fDoubleGeneral, if 'precision' < 0.
00459     /// @return
00460     ///   Converted string value.
00461     static string DoubleToString(double value, int precision = -1,
00462                                  TNumToStringFlags flags = 0);
00463 
00464     /// Convert double to string.
00465     ///
00466     /// @param out_str
00467     ///   Output string variable
00468     /// @param value
00469     ///   Double value to be converted.
00470     /// @param precision
00471     ///   Precision value for conversion. If precision is more that maximum
00472     ///   for current platform, then it will be truncated to this maximum.
00473     //    If it is negative, that double will be converted to number in
00474     ///   scientific notation.
00475     /// @param flags
00476     ///   How to convert value to string.
00477     ///   If double format flags are not specified, that next output format
00478     ///   will be used by default:
00479     ///     - fDoubleFixed,   if 'precision' >= 0.
00480     ///     - fDoubleGeneral, if 'precision' < 0.
00481     static void DoubleToString(string& out_str, double value,
00482                                int precision = -1,
00483                                TNumToStringFlags flags = 0);
00484 
00485     /// Convert double to string with specified precision and place the result
00486     /// in the specified buffer.
00487     ///
00488     /// @param value
00489     ///   Double value to be converted.
00490     /// @param precision
00491     ///   Precision value for conversion. If precision is more that maximum
00492     ///   for current platform, then it will be truncated to this maximum.
00493     /// @param buf
00494     ///   Put result of the conversion into this buffer.
00495     /// @param buf_size
00496     ///   Size of buffer, "buf".
00497     /// @param flags
00498     ///   How to convert value to string.
00499     ///   Default output format is fDoubleFixed.
00500     /// @return
00501     ///   The number of bytes stored in "buf", not counting the
00502     ///   terminating '\0'.
00503     static SIZE_TYPE DoubleToString(double value, unsigned int precision,
00504                                     char* buf, SIZE_TYPE buf_size,
00505                                     TNumToStringFlags flags = 0);
00506 
00507     /// Convert pointer to string.
00508     ///
00509     /// @param out_str
00510     ///   Output string variable
00511     /// @param str
00512     ///   Pointer to be converted.
00513     static void PtrToString(string& out_str, const void* ptr);
00514 
00515     /// Convert pointer to string.
00516     ///
00517     /// @param str
00518     ///   Pointer to be converted.
00519     /// @return
00520     ///   String value representing the pointer.
00521     static string PtrToString(const void* ptr);
00522 
00523     /// Convert bool to string.
00524     ///
00525     /// @param value
00526     ///   Boolean value to be converted.
00527     /// @return
00528     ///   One of: 'true, 'false'
00529     static const string BoolToString(bool value);
00530 
00531     /// Convert string to bool.
00532     ///
00533     /// @param str
00534     ///   Boolean string value to be converted.  Can recognize
00535     ///   case-insensitive version as one of:  'true, 't', 'yes', 'y'
00536     ///   for TRUE; and  'false', 'f', 'no', 'n' for FALSE.
00537     /// @return
00538     ///   TRUE or FALSE.
00539     static bool StringToBool(const string& str);
00540 
00541 
00542     /// Handle an arbitrary printf-style format string.
00543     ///
00544     /// This method exists only to support third-party code that insists on
00545     /// representing messages in this format; please stick to type-checked
00546     /// means of formatting such as the above ToString methods and I/O
00547     /// streams whenever possible.
00548     static string FormatVarargs(const char* format, va_list args);
00549 
00550 
00551     /// Which type of string comparison.
00552     enum ECase {
00553         eCase,      ///< Case sensitive compare
00554         eNocase     ///< Case insensitive compare
00555     };
00556 
00557     // ATTENTION.  Be aware that:
00558     //
00559     // 1) "Compare***(..., SIZE_TYPE pos, SIZE_TYPE n, ...)" functions
00560     //    follow the ANSI C++ comparison rules a la "basic_string::compare()":
00561     //       str[pos:pos+n) == pattern   --> return 0
00562     //       str[pos:pos+n) <  pattern   --> return negative value
00563     //       str[pos:pos+n) >  pattern   --> return positive value
00564     //
00565     // 2) "strn[case]cmp()" functions follow the ANSI C comparison rules:
00566     //       str[0:n) == pattern[0:n)   --> return 0
00567     //       str[0:n) <  pattern[0:n)   --> return negative value
00568     //       str[0:n) >  pattern[0:n)   --> return positive value
00569 
00570 
00571     /// Case-sensitive compare of a substring with a pattern.
00572     ///
00573     /// @param str
00574     ///   String containing the substring to be compared.
00575     /// @param pos
00576     ///   Start position of substring to be compared.
00577     /// @param n
00578     ///   Number of characters in substring to be compared.
00579     /// @param pattern
00580     ///   String pattern (char*) to be compared with substring.
00581     /// @return
00582     ///   - 0, if str[pos:pos+n) == pattern.   
00583     ///   - Negative integer, if str[pos:pos+n) <  pattern.   
00584     ///   - Positive integer, if str[pos:pos+n) >  pattern.   
00585     /// @sa
00586     ///   Other forms of overloaded CompareCase() with differences in argument
00587     ///   types: char* vs. string&
00588     static int CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00589                            const char* pattern);
00590 
00591     /// Case-sensitive compare of a substring with a pattern.
00592     ///
00593     /// @param str
00594     ///   String containing the substring to be compared.
00595     /// @param pos
00596     ///   Start position of substring to be compared.
00597     /// @param n
00598     ///   Number of characters in substring to be compared.
00599     /// @param pattern
00600     ///   String pattern (string&) to be compared with substring.
00601     /// @return
00602     ///   - 0, if str[pos:pos+n) == pattern.   
00603     ///   - Negative integer, if str[pos:pos+n) <  pattern.   
00604     ///   - Positive integer, if str[pos:pos+n) >  pattern.   
00605     /// @sa
00606     ///   Other forms of overloaded CompareCase() with differences in argument
00607     ///   types: char* vs. string&
00608     static int CompareCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00609                            const string& pattern);
00610 
00611     /// Case-sensitive compare of two strings -- char* version.
00612     ///
00613     /// @param s1
00614     ///   String to be compared -- operand 1.
00615     /// @param s2
00616     ///   String to be compared -- operand 2.
00617     /// @return
00618     ///   - 0, if s1 == s2.   
00619     ///   - Negative integer, if s1 < s2.   
00620     ///   - Positive integer, if s1 > s2.   
00621     /// @sa
00622     ///   CompareNocase(), Compare() versions with same argument types.
00623     static int CompareCase(const char* s1, const char* s2);
00624 
00625     /// Case-sensitive compare of two strings -- string& version.
00626     ///
00627     /// @param s1
00628     ///   String to be compared -- operand 1.
00629     /// @param s2
00630     ///   String to be compared -- operand 2.
00631     /// @return
00632     ///   - 0, if s1 == s2.   
00633     ///   - Negative integer, if s1 < s2.   
00634     ///   - Positive integer, if s1 > s2.   
00635     /// @sa
00636     ///   CompareNocase(), Compare() versions with same argument types.
00637     static int CompareCase(const string& s1, const string& s2);
00638 
00639     /// Case-insensitive compare of a substring with a pattern.
00640     ///
00641     /// @param str
00642     ///   String containing the substring to be compared.
00643     /// @param pos
00644     ///   Start position of substring to be compared.
00645     /// @param n
00646     ///   Number of characters in substring to be compared.
00647     /// @param pattern
00648     ///   String pattern (char*) to be compared with substring.
00649     /// @return
00650     ///   - 0, if str[pos:pos+n) == pattern (case-insensitive compare).   
00651     ///   - Negative integer, if str[pos:pos+n) <  pattern (case-insensitive
00652     ///     compare).
00653     ///   - Positive integer, if str[pos:pos+n) >  pattern (case-insensitive
00654     ///     compare).
00655     /// @sa
00656     ///   Other forms of overloaded CompareNocase() with differences in
00657     ///   argument types: char* vs. string&
00658     static int CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00659                              const char* pattern);
00660 
00661     /// Case-insensitive compare of a substring with a pattern.
00662     ///
00663     /// @param str
00664     ///   String containing the substring to be compared.
00665     /// @param pos
00666     ///   Start position of substring to be compared.
00667     /// @param n
00668     ///   Number of characters in substring to be compared.
00669     /// @param pattern
00670     ///   String pattern (string&) to be compared with substring.
00671     /// @return
00672     ///   - 0, if str[pos:pos+n) == pattern (case-insensitive compare).   
00673     ///   - Negative integer, if str[pos:pos+n) <  pattern (case-insensitive
00674     ///     compare).
00675     ///   - Positive integer, if str[pos:pos+n) >  pattern (case-insensitive
00676     ///     compare).
00677     /// @sa
00678     ///   Other forms of overloaded CompareNocase() with differences in
00679     ///   argument types: char* vs. string&
00680     static int CompareNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00681                              const string& pattern);
00682 
00683     /// Case-insensitive compare of two strings -- char* version.
00684     ///
00685     /// @param s1
00686     ///   String to be compared -- operand 1.
00687     /// @param s2
00688     ///   String to be compared -- operand 2.
00689     /// @return
00690     ///   - 0, if s1 == s2 (case-insensitive compare).      
00691     ///   - Negative integer, if s1 < s2 (case-insensitive compare).      
00692     ///   - Positive integer, if s1 > s2 (case-insensitive compare).    
00693     /// @sa
00694     ///   CompareCase(), Compare() versions with same argument types.
00695     static int CompareNocase(const char* s1, const char* s2);
00696 
00697     /// Case-insensitive compare of two strings -- string& version.
00698     ///
00699     /// @param s1
00700     ///   String to be compared -- operand 1.
00701     /// @param s2
00702     ///   String to be compared -- operand 2.
00703     /// @return
00704     ///   - 0, if s1 == s2 (case-insensitive compare).      
00705     ///   - Negative integer, if s1 < s2 (case-insensitive compare).      
00706     ///   - Positive integer, if s1 > s2 (case-insensitive compare).    
00707     /// @sa
00708     ///   CompareCase(), Compare() versions with same argument types.
00709     static int CompareNocase(const string& s1, const string& s2);
00710 
00711     /// Compare of a substring with a pattern.
00712     ///
00713     /// @param str
00714     ///   String containing the substring to be compared.
00715     /// @param pos
00716     ///   Start position of substring to be compared.
00717     /// @param n
00718     ///   Number of characters in substring to be compared.
00719     /// @param pattern
00720     ///   String pattern (char*) to be compared with substring.
00721     /// @param use_case
00722     ///   Whether to do a case sensitive compare(eCase -- default), or a
00723     ///   case-insensitive compare (eNocase).
00724     /// @return
00725     ///   - 0, if str[pos:pos+n) == pattern.   
00726     ///   - Negative integer, if str[pos:pos+n) <  pattern.   
00727     ///   - Positive integer, if str[pos:pos+n) >  pattern.   
00728     /// @sa
00729     ///   Other forms of overloaded Compare() with differences in argument
00730     ///   types: char* vs. string&
00731     static int Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00732                        const char* pattern, ECase use_case = eCase);
00733 
00734     /// Compare of a substring with a pattern.
00735     ///
00736     /// @param str
00737     ///   String containing the substring to be compared.
00738     /// @param pos
00739     ///   Start position of substring to be compared.
00740     /// @param n
00741     ///   Number of characters in substring to be compared.
00742     /// @param pattern
00743     ///   String pattern (string&) to be compared with substring.
00744     /// @param use_case
00745     ///   Whether to do a case sensitive compare(default is eCase), or a
00746     ///   case-insensitive compare (eNocase).
00747     /// @return
00748     ///   - 0, if str[pos:pos+n) == pattern.   
00749     ///   - Negative integer, if str[pos:pos+n) <  pattern.   
00750     ///   - Positive integer, if str[pos:pos+n) >  pattern.   
00751     /// @sa
00752     ///   Other forms of overloaded Compare() with differences in argument
00753     ///   types: char* vs. string&
00754     static int Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00755                        const string& pattern, ECase use_case = eCase);
00756 
00757     /// Compare two strings -- char* version.
00758     ///
00759     /// @param s1
00760     ///   String to be compared -- operand 1.
00761     /// @param s2
00762     ///   String to be compared -- operand 2.
00763     /// @param use_case
00764     ///   Whether to do a case sensitive compare(default is eCase), or a
00765     ///   case-insensitive compare (eNocase).
00766     /// @return
00767     ///   - 0, if s1 == s2.   
00768     ///   - Negative integer, if s1 < s2.   
00769     ///   - Positive integer, if s1 > s2.   
00770     /// @sa
00771     ///   CompareNocase(), Compare() versions with similar argument types.
00772     static int Compare(const char* s1, const char* s2,
00773                        ECase use_case = eCase);
00774 
00775     /// Compare two strings -- string&, char* version.
00776     ///
00777     /// @param s1
00778     ///   String to be compared -- operand 1.
00779     /// @param s2
00780     ///   String to be compared -- operand 2.
00781     /// @param use_case
00782     ///   Whether to do a case sensitive compare(default is eCase), or a
00783     ///   case-insensitive compare (eNocase).
00784     /// @return
00785     ///   - 0, if s1 == s2.   
00786     ///   - Negative integer, if s1 < s2.   
00787     ///   - Positive integer, if s1 > s2.   
00788     /// @sa
00789     ///   CompareNocase(), Compare() versions with similar argument types.
00790     static int Compare(const string& s1, const char* s2,
00791                        ECase use_case = eCase);
00792 
00793     /// Compare two strings -- char*, string& version.
00794     ///
00795     /// @param s1
00796     ///   String to be compared -- operand 1.
00797     /// @param s2
00798     ///   String to be compared -- operand 2.
00799     /// @param use_case
00800     ///   Whether to do a case sensitive compare(default is eCase), or a
00801     ///   case-insensitive compare (eNocase).
00802     /// @return
00803     ///   - 0, if s1 == s2.   
00804     ///   - Negative integer, if s1 < s2.   
00805     ///   - Positive integer, if s1 > s2.   
00806     /// @sa
00807     ///   CompareNocase(), Compare() versions with similar argument types.
00808     static int Compare(const char* s1, const string& s2,
00809                        ECase use_case = eCase);
00810 
00811     /// Compare two strings -- string& version.
00812     ///
00813     /// @param s1
00814     ///   String to be compared -- operand 1.
00815     /// @param s2
00816     ///   String to be compared -- operand 2.
00817     /// @param use_case
00818     ///   Whether to do a case sensitive compare(default is eCase), or a
00819     ///   case-insensitive compare (eNocase).
00820     /// @return
00821     ///   - 0, if s1 == s2.   
00822     ///   - Negative integer, if s1 < s2.   
00823     ///   - Positive integer, if s1 > s2.   
00824     /// @sa
00825     ///   CompareNocase(), Compare() versions with similar argument types.
00826     static int Compare(const string& s1, const string& s2,
00827                        ECase use_case = eCase);
00828 
00829     /// Case-sensitive equality of a substring with a pattern.
00830     ///
00831     /// @param str
00832     ///   String containing the substring to be compared.
00833     /// @param pos
00834     ///   Start position of substring to be compared.
00835     /// @param n
00836     ///   Number of characters in substring to be compared.
00837     /// @param pattern
00838     ///   String pattern (char*) to be compared with substring.
00839     /// @return
00840     ///   - true, if str[pos:pos+n) equals pattern.   
00841     ///   - false, otherwise
00842     /// @sa
00843     ///   Other forms of overloaded EqualCase() with differences in argument
00844     ///   types: char* vs. string&
00845     static bool EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00846                            const char* pattern);
00847 
00848     /// Case-sensitive equality of a substring with a pattern.
00849     ///
00850     /// @param str
00851     ///   String containing the substring to be compared.
00852     /// @param pos
00853     ///   Start position of substring to be compared.
00854     /// @param n
00855     ///   Number of characters in substring to be compared.
00856     /// @param pattern
00857     ///   String pattern (string&) to be compared with substring.
00858     /// @return
00859     ///   - true, if str[pos:pos+n) equals pattern.   
00860     ///   - false, otherwise
00861     /// @sa
00862     ///   Other forms of overloaded EqualCase() with differences in argument
00863     ///   types: char* vs. string&
00864     static bool EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00865                            const string& pattern);
00866 
00867     /// Case-sensitive equality of two strings -- char* version.
00868     ///
00869     /// @param s1
00870     ///   String to be compared -- operand 1.
00871     /// @param s2
00872     ///   String to be compared -- operand 2.
00873     /// @return
00874     ///   - true, if s1 equals s2
00875     ///   - false, otherwise
00876     /// @sa
00877     ///   EqualCase(), Equal() versions with same argument types.
00878     static bool EqualCase(const char* s1, const char* s2);
00879 
00880     /// Case-sensitive equality of two strings -- string& version.
00881     ///
00882     /// @param s1
00883     ///   String to be compared -- operand 1.
00884     /// @param s2
00885     ///   String to be compared -- operand 2.
00886     /// @return
00887     ///   - true, if s1 equals s2
00888     ///   - false, otherwise
00889     /// @sa
00890     ///   EqualCase(), Equal() versions with same argument types.
00891     static bool EqualCase(const string& s1, const string& s2);
00892 
00893     /// Case-insensitive equality of a substring with a pattern.
00894     ///
00895     /// @param str
00896     ///   String containing the substring to be compared.
00897     /// @param pos
00898     ///   Start position of substring to be compared.
00899     /// @param n
00900     ///   Number of characters in substring to be compared.
00901     /// @param pattern
00902     ///   String pattern (char*) to be compared with substring.
00903     /// @return
00904     ///   - true, if str[pos:pos+n) equals pattern (case-insensitive compare).
00905     ///   - false, otherwise.
00906     /// @sa
00907     ///   Other forms of overloaded EqualNocase() with differences in
00908     ///   argument types: char* vs. string&
00909     static bool EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00910                              const char* pattern);
00911 
00912     /// Case-insensitive equality of a substring with a pattern.
00913     ///
00914     /// @param str
00915     ///   String containing the substring to be compared.
00916     /// @param pos
00917     ///   Start position of substring to be compared.
00918     /// @param n
00919     ///   Number of characters in substring to be compared.
00920     /// @param pattern
00921     ///   String pattern (string&) to be compared with substring.
00922     /// @return
00923     ///   - true, if str[pos:pos+n) equals pattern (case-insensitive compare).
00924     ///   - false, otherwise.
00925     /// @sa
00926     ///   Other forms of overloaded EqualNocase() with differences in
00927     ///   argument types: char* vs. string&
00928     static bool EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00929                              const string& pattern);
00930 
00931     /// Case-insensitive equality of two strings -- char* version.
00932     ///
00933     /// @param s1
00934     ///   String to be compared -- operand 1.
00935     /// @param s2
00936     ///   String to be compared -- operand 2.
00937     /// @return
00938     ///   - true, if s1 equals s2 (case-insensitive compare).      
00939     ///   - false, otherwise.
00940     /// @sa
00941     ///   EqualCase(), Equal() versions with same argument types.
00942     static bool EqualNocase(const char* s1, const char* s2);
00943 
00944     /// Case-insensitive equality of two strings -- string& version.
00945     ///
00946     /// @param s1
00947     ///   String to be compared -- operand 1.
00948     /// @param s2
00949     ///   String to be compared -- operand 2.
00950     /// @return
00951     ///   - true, if s1 equals s2 (case-insensitive compare).      
00952     ///   - false, otherwise.
00953     /// @sa
00954     ///   EqualCase(), Equal() versions with same argument types.
00955     static bool EqualNocase(const string& s1, const string& s2);
00956 
00957     /// Test for equality of a substring with a pattern.
00958     ///
00959     /// @param str
00960     ///   String containing the substring to be compared.
00961     /// @param pos
00962     ///   Start position of substring to be compared.
00963     /// @param n
00964     ///   Number of characters in substring to be compared.
00965     /// @param pattern
00966     ///   String pattern (char*) to be compared with substring.
00967     /// @param use_case
00968     ///   Whether to do a case sensitive compare(eCase -- default), or a
00969     ///   case-insensitive compare (eNocase).
00970     /// @return
00971     ///   - true, if str[pos:pos+n) equals pattern.   
00972     ///   - false, otherwise.
00973     /// @sa
00974     ///   Other forms of overloaded Equal() with differences in argument
00975     ///   types: char* vs. string&
00976     static bool Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
00977                        const char* pattern, ECase use_case = eCase);
00978 
00979     /// Test for equality of a substring with a pattern.
00980     ///
00981     /// @param str
00982     ///   String containing the substring to be compared.
00983     /// @param pos
00984     ///   Start position of substring to be compared.
00985     /// @param n
00986     ///   Number of characters in substring to be compared.
00987     /// @param pattern
00988     ///   String pattern (string&) to be compared with substring.
00989     /// @param use_case
00990     ///   Whether to do a case sensitive compare(default is eCase), or a
00991     ///   case-insensitive compare (eNocase).
00992     /// @return
00993     ///   - 0, if str[pos:pos+n) == pattern.   
00994     ///   - Negative integer, if str[pos:pos+n) <  pattern.   
00995     ///   - Positive integer, if str[pos:pos+n) >  pattern.   
00996     /// @sa
00997     ///   Other forms of overloaded Equal() with differences in argument
00998     ///   types: char* vs. string&
00999     static bool Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
01000                        const string& pattern, ECase use_case = eCase);
01001 
01002     /// Test for equality of two strings -- char* version.
01003     ///
01004     /// @param s1
01005     ///   String to be compared -- operand 1.
01006     /// @param s2
01007     ///   String to be compared -- operand 2.
01008     /// @param use_case
01009     ///   Whether to do a case sensitive compare(default is eCase), or a
01010     ///   case-insensitive compare (eNocase).
01011     /// @return
01012     ///   - 0, if s1 == s2.   
01013     ///   - Negative integer, if s1 < s2.   
01014     ///   - Positive integer, if s1 > s2.   
01015     /// @sa
01016     ///   EqualNocase(), Equal() versions with similar argument types.
01017     static bool Equal(const char* s1, const char* s2,
01018                        ECase use_case = eCase);
01019 
01020     /// Test for equality of two strings -- string&, char* version.
01021     ///
01022     /// @param s1
01023     ///   String to be compared -- operand 1.
01024     /// @param s2
01025     ///   String to be compared -- operand 2.
01026     /// @param use_case
01027     ///   Whether to do a case sensitive compare(default is eCase), or a
01028     ///   case-insensitive compare (eNocase).
01029     /// @return
01030     ///   - true, if s1 equals s2.   
01031     ///   - false, otherwise.
01032     /// @sa
01033     ///   EqualNocase(), Equal() versions with similar argument types.
01034     static bool Equal(const string& s1, const char* s2,
01035                        ECase use_case = eCase);
01036 
01037     /// Test for equality of two strings -- char*, string& version.
01038     ///
01039     /// @param s1
01040     ///   String to be compared -- operand 1.
01041     /// @param s2
01042     ///   String to be compared -- operand 2.
01043     /// @param use_case
01044     ///   Whether to do a case sensitive compare(default is eCase), or a
01045     ///   case-insensitive compare (eNocase).
01046     /// @return
01047     ///   - true, if s1 equals s2.   
01048     ///   - false, otherwise.
01049     /// @sa
01050     ///   EqualNocase(), Equal() versions with similar argument types.
01051     static bool Equal(const char* s1, const string& s2,
01052                        ECase use_case = eCase);
01053 
01054     /// Test for equality of two strings -- string& version.
01055     ///
01056     /// @param s1
01057     ///   String to be compared -- operand 1.
01058     /// @param s2
01059     ///   String to be compared -- operand 2.
01060     /// @param use_case
01061     ///   Whether to do a case sensitive compare(default is eCase), or a
01062     ///   case-insensitive compare (eNocase).
01063     /// @return
01064     ///   - true, if s1 equals s2.   
01065     ///   - false, otherwise.
01066     /// @sa
01067     ///   EqualNocase(), Equal() versions with similar argument types.
01068     static bool Equal(const string& s1, const string& s2,
01069                        ECase use_case = eCase);
01070 
01071     // NOTE.  On some platforms, "strn[case]cmp()" can work faster than their
01072     //        "Compare***()" counterparts.
01073 
01074     /// String compare.
01075     ///
01076     /// @param s1
01077     ///   String to be compared -- operand 1.
01078     /// @param s2
01079     ///   String to be compared -- operand 2.
01080     /// @return
01081     ///   - 0, if s1 == s2.   
01082     ///   - Negative integer, if s1 < s2.   
01083     ///   - Positive integer, if s1 > s2.   
01084     /// @sa
01085     ///   strncmp(), strcasecmp(), strncasecmp()
01086     static int strcmp(const char* s1, const char* s2);
01087 
01088     /// String compare upto specified number of characters.
01089     ///
01090     /// @param s1
01091     ///   String to be compared -- operand 1.
01092     /// @param s2
01093     ///   String to be compared -- operand 2.
01094     /// @param n
01095     ///   Number of characters in string 
01096     /// @return
01097     ///   - 0, if s1 == s2.   
01098     ///   - Negative integer, if s1 < s2.   
01099     ///   - Positive integer, if s1 > s2.   
01100     /// @sa
01101     ///   strcmp(), strcasecmp(), strncasecmp()
01102     static int strncmp(const char* s1, const char* s2, size_t n);
01103 
01104     /// Case-insensitive string compare.
01105     ///
01106     /// @param s1
01107     ///   String to be compared -- operand 1.
01108     /// @param s2
01109     ///   String to be compared -- operand 2.
01110     /// @return
01111     ///   - 0, if s1 == s2.   
01112     ///   - Negative integer, if s1 < s2.   
01113     ///   - Positive integer, if s1 > s2.   
01114     /// @sa
01115     ///   strcmp(), strncmp(), strncasecmp()
01116     static int strcasecmp(const char* s1, const char* s2);
01117 
01118     /// Case-insensitive string compare upto specfied number of characters.
01119     ///
01120     /// @param s1
01121     ///   String to be compared -- operand 1.
01122     /// @param s2
01123     ///   String to be compared -- operand 2.
01124     /// @return
01125     ///   - 0, if s1 == s2.   
01126     ///   - Negative integer, if s1 < s2.   
01127     ///   - Positive integer, if s1 > s2.   
01128     /// @sa
01129     ///   strcmp(), strcasecmp(), strcasecmp()
01130     static int strncasecmp(const char* s1, const char* s2, size_t n);
01131 
01132     /// Wrapper for the function strftime() that corrects handling %D and %T
01133     /// time formats on MS Windows.
01134     static size_t strftime (char* s, size_t maxsize, const char* format,
01135                             const struct tm* timeptr);
01136 
01137     /// Match "str" against the "mask".
01138     ///
01139     /// This function do not use regular expressions.
01140     /// @param str
01141     ///   String to match.
01142     /// @param mask
01143     ///   Mask used to match string "str". And can contains next
01144     ///   wildcard characters:
01145     ///     ? - matches to any one symbol in the string.
01146     ///     * - matches to any number of symbols in the string. 
01147     /// @param use_case
01148     ///   Whether to do a case sensitive compare(eCase -- default), or a
01149     ///   case-insensitive compare (eNocase).
01150     /// @return
01151     ///   Return TRUE if "str" matches "mask", and FALSE otherwise.
01152     /// @sa
01153     ///    CRegexp, CRegexpUtil
01154     static bool MatchesMask(const char *str, const char *mask,
01155                             ECase use_case = eCase);
01156 
01157     /// Match "str" against the "mask".
01158     ///
01159     /// This function do not use regular expressions.
01160     /// @param str
01161     ///   String to match.
01162     /// @param mask
01163     ///   Mask used to match string "str". And can contains next
01164     ///   wildcard characters:
01165     ///     ? - matches to any one symbol in the string.
01166     ///     * - matches to any number of symbols in the string. 
01167     /// @param use_case
01168     ///   Whether to do a case sensitive compare(eCase -- default), or a
01169     ///   case-insensitive compare (eNocase).
01170     /// @return
01171     ///   Return TRUE if "str" matches "mask", and FALSE otherwise.
01172     /// @sa
01173     ///    CRegexp, CRegexpUtil
01174     static bool MatchesMask(const string& str, const string& mask,
01175                             ECase use_case = eCase);
01176 
01177     // The following 4 methods change the passed string, then return it
01178 
01179     /// Convert string to lower case -- string& version.
01180     /// 
01181     /// @param str
01182     ///   String to be converted.
01183     /// @return
01184     ///   Lower cased string.
01185     static string& ToLower(string& str);
01186 
01187     /// Convert string to lower case -- char* version.
01188     /// 
01189     /// @param str
01190     ///   String to be converted.
01191     /// @return
01192     ///   Lower cased string.
01193     static char* ToLower(char*   str);
01194 
01195     /// Convert string to upper case -- string& version.
01196     /// 
01197     /// @param str
01198     ///   String to be converted.
01199     /// @return
01200     ///   Upper cased string.
01201     static string& ToUpper(string& str);
01202 
01203     /// Convert string to upper case -- char* version.
01204     /// 
01205     /// @param str
01206     ///   String to be converted.
01207     /// @return
01208     ///   Upper cased string.
01209     static char* ToUpper(char*   str);
01210 
01211 private:
01212     /// Privatized ToLower() with const char* parameter to prevent passing of 
01213     /// constant strings.
01214     static void/*dummy*/ ToLower(const char* /*dummy*/);
01215 
01216     /// Privatized ToUpper() with const char* parameter to prevent passing of 
01217     /// constant strings.
01218     static void/*dummy*/ ToUpper(const char* /*dummy*/);
01219 
01220 public:
01221     /// Check if a string starts with a specified prefix value.
01222     ///
01223     /// @param str
01224     ///   String to check.
01225     /// @param start
01226     ///   Prefix value to check for.
01227     /// @param use_case
01228     ///   Whether to do a case sensitive compare(default is eCase), or a
01229     ///   case-insensitive compare (eNocase) while checking.
01230     static bool StartsWith(const string& str, const string& start,
01231                            ECase use_case = eCase);
01232 
01233     /// Check if a string starts with a specified prefix value.
01234     ///
01235     /// @param str
01236     ///   String to check.
01237     /// @param start
01238     ///   Prefix value to check for.
01239     /// @param use_case
01240     ///   Whether to do a case sensitive compare(default is eCase), or a
01241     ///   case-insensitive compare (eNocase) while checking.
01242     static bool StartsWith(const string& str, const char* start,
01243                            ECase use_case = eCase);
01244 
01245     /// Check if a string starts with a specified character value.
01246     ///
01247     /// @param str
01248     ///   String to check.
01249     /// @param start
01250     ///   Character value to check for.
01251     /// @param use_case
01252     ///   Whether to do a case sensitive compare(default is eCase), or a
01253     ///   case-insensitive compare (eNocase) while checking.
01254     static bool StartsWith(const string& str, char start,
01255                            ECase use_case = eCase);
01256 
01257     /// Check if a string ends with a specified suffix value.
01258     ///
01259     /// @param str
01260     ///   String to check.
01261     /// @param end
01262     ///   Suffix value to check for.
01263     /// @param use_case
01264     ///   Whether to do a case sensitive compare(default is eCase), or a
01265     ///   case-insensitive compare (eNocase) while checking.
01266     static bool EndsWith(const string& str, const string& end,
01267                          ECase use_case = eCase);
01268 
01269     /// Check if a string ends with a specified character value.
01270     ///
01271     /// @param str
01272     ///   String to check.
01273     /// @param end
01274     ///   Character value to check for.
01275     /// @param use_case
01276     ///   Whether to do a case sensitive compare(default is eCase), or a
01277     ///   case-insensitive compare (eNocase) while checking.
01278     static bool EndsWith(const string& str, char end,
01279                          ECase use_case = eCase);
01280 
01281     /// Check if a string is blank (has no text).
01282     ///
01283     /// @param str
01284     ///   String to check.
01285     /// @param pos
01286     ///   starting position (default 0)
01287     static bool IsBlank(const string& str, SIZE_TYPE pos = 0);
01288 
01289     /// Whether it is the first or last occurrence.
01290     enum EOccurrence {
01291         eFirst,             ///< First occurrence
01292         eLast               ///< Last occurrence
01293     };
01294 
01295     /// Find the pattern in the specfied range of a string.
01296     ///
01297     /// @param str
01298     ///   String to search.
01299     /// @param pattern
01300     ///   Pattern to search for in "str". 
01301     /// @param start
01302     ///   Position in "str" to start search from -- default of 0 means start
01303     ///   the search from the beginning of the string.
01304     /// @param end
01305     ///   Position in "str" to start search up to -- default of NPOS means
01306     ///   to search to the end of the string.
01307     /// @param which
01308     ///   When set to eFirst, this means to find the first occurrence of 
01309     ///   "pattern" in "str". When set to eLast, this means to find the last
01310     ///    occurrence of "pattern" in "str".
01311     /// @param use_case
01312     ///   Whether to do a case sensitive compare(default is eCase), or a
01313     ///   case-insensitive compare (eNocase) while searching for the pattern.
01314     /// @return
01315     ///   - The start of the first or last (depending on "which" parameter)
01316     ///   occurrence of "pattern" in "str", within the string interval
01317     ///   ["start", "end"], or
01318     ///   - NPOS if there is no occurrence of the pattern.
01319     static SIZE_TYPE Find(const string& str, const string& pattern,
01320                           SIZE_TYPE start = 0, SIZE_TYPE end = NPOS,
01321                           EOccurrence which = eFirst,
01322                           ECase use_case = eCase);
01323 
01324     /// Find the pattern in the specfied range of a string using a case
01325     /// sensitive search.
01326     ///
01327     /// @param str
01328     ///   String to search.
01329     /// @param pattern
01330     ///   Pattern to search for in "str". 
01331     /// @param start
01332     ///   Position in "str" to start search from -- default of 0 means start
01333     ///   the search from the beginning of the string.
01334     /// @param end
01335     ///   Position in "str" to start search up to -- default of NPOS means
01336     ///   to search to the end of the string.
01337     /// @param which
01338     ///   When set to eFirst, this means to find the first occurrence of 
01339     ///   "pattern" in "str". When set to eLast, this means to find the last
01340     ///    occurrence of "pattern" in "str".
01341     /// @return
01342     ///   - The start of the first or last (depending on "which" parameter)
01343     ///   occurrence of "pattern" in "str", within the string interval
01344     ///   ["start", "end"], or
01345     ///   - NPOS if there is no occurrence of the pattern.
01346     static SIZE_TYPE FindCase  (const string& str, const string& pattern,
01347                                 SIZE_TYPE start = 0, SIZE_TYPE end = NPOS,
01348                                 EOccurrence which = eFirst);
01349 
01350     /// Find the pattern in the specfied range of a string using a case
01351     /// insensitive search.
01352     ///
01353     /// @param str
01354     ///   String to search.
01355     /// @param pattern
01356     ///   Pattern to search for in "str". 
01357     /// @param start
01358     ///   Position in "str" to start search from -- default of 0 means start
01359     ///   the search from the beginning of the string.
01360     /// @param end
01361     ///   Position in "str" to start search up to -- default of NPOS means
01362     ///   to search to the end of the string.
01363     /// @param which
01364     ///   When set to eFirst, this means to find the first occurrence of 
01365     ///   "pattern" in "str". When set to eLast, this means to find the last
01366     ///    occurrence of "pattern" in "str".
01367     /// @return
01368     ///   - The start of the first or last (depending on "which" parameter)
01369     ///   occurrence of "pattern" in "str", within the string interval
01370     ///   ["start", "end"], or
01371     ///   - NPOS if there is no occurrence of the pattern.
01372     static SIZE_TYPE FindNoCase(const string& str, const string& pattern,
01373                                 SIZE_TYPE start = 0, SIZE_TYPE end = NPOS,
01374                                 EOccurrence which = eFirst);
01375 
01376     /// Test for presence of a given string in a list or vector of strings
01377 
01378     static const string* Find(const list<string>& lst, const string& val,
01379                                ECase use_case = eCase);
01380 
01381     static const string* FindCase(const list<string>& lst, const string& val);
01382 
01383     static const string* FindNoCase(const list<string>& lst, const string& val);
01384 
01385     static const string* Find(const vector<string>& vec, const string& val,
01386                               ECase use_case = eCase);
01387 
01388     static const string* FindCase(const vector<string>& vec, const string& val);
01389 
01390     static const string* FindNoCase(const vector<string>& vec,
01391                                     const string& val);
01392 
01393 
01394     /// Which end to truncate a string.
01395     enum ETrunc {
01396         eTrunc_Begin,  ///< Truncate leading spaces only
01397         eTrunc_End,    ///< Truncate trailing spaces only
01398         eTrunc_Both    ///< Truncate spaces at both begin and end of string
01399     };
01400 
01401     /// Truncate spaces in a string.
01402     ///
01403     /// @param str
01404     ///   String to truncate spaces from.
01405     /// @param where
01406     ///   Which end of the string to truncate space from. Default is to
01407     ///   truncate space from both ends (eTrunc_Both).
01408     static string TruncateSpaces(const string& str, ETrunc where=eTrunc_Both);
01409     static CTempString TruncateSpaces(const CTempString& str,
01410                                       ETrunc where=eTrunc_Both);
01411     static CTempString TruncateSpaces(const char* str,
01412                                       ETrunc where=eTrunc_Both);
01413 
01414     /// Truncate spaces in a string (in-place)
01415     ///
01416     /// @param str
01417     ///   String to truncate spaces from.
01418     /// @param where
01419     ///   Which end of the string to truncate space from. Default is to
01420     ///   truncate space from both ends (eTrunc_Both).
01421     static void TruncateSpacesInPlace(string& str, ETrunc where=eTrunc_Both);
01422     
01423     /// Replace occurrences of a substring within a string.
01424     ///
01425     /// @param src
01426     ///   Source string from which specified substring occurrences are
01427     ///   replaced.
01428     /// @param search
01429     ///   Substring value in "src" that is replaced.
01430     /// @param replace
01431     ///   Replace "search" substring with this value.
01432     /// @param dst
01433     ///   Result of replacing the "search" string with "replace" in "src".
01434     ///   This value is also returned by the function.
01435     /// @param start_pos
01436     ///   Position to start search from.
01437     /// @param max_replace
01438     ///   Replace no more than "max_replace" occurrences of substring "search"
01439     ///   If "max_replace" is zero(default), then replace all occurrences with
01440     ///   "replace".
01441     /// @return
01442     ///   Result of replacing the "search" string with "replace" in "src". This
01443     ///   value is placed in "dst" as well.
01444     /// @sa
01445     ///   Version of Replace() that returns a new string.
01446     static string& Replace(const string& src,
01447                            const string& search,
01448                            const string& replace,
01449                            string&       dst,
01450                            SIZE_TYPE     start_pos = 0,
01451                            SIZE_TYPE     max_replace = 0);
01452 
01453     /// Replace occurrences of a substring within a string and returns the
01454     /// result as a new string.
01455     ///
01456     /// @param src
01457     ///   Source string from which specified substring occurrences are
01458     ///   replaced.
01459     /// @param search
01460     ///   Substring value in "src" that is replaced.
01461     /// @param replace
01462     ///   Replace "search" substring with this value.
01463     /// @param start_pos
01464     ///   Position to start search from.
01465     /// @param max_replace
01466     ///   Replace no more than "max_replace" occurrences of substring "search"
01467     ///   If "max_replace" is zero(default), then replace all occurrences with
01468     ///   "replace".
01469     /// @return
01470     ///   A new string containing the result of replacing the "search" string
01471     ///   with "replace" in "src"
01472     /// @sa
01473     ///   Version of Replace() that has a destination parameter to accept
01474     ///   result.
01475     static string Replace(const string& src,
01476                           const string& search,
01477                           const string& replace,
01478                           SIZE_TYPE     start_pos = 0,
01479                           SIZE_TYPE     max_replace = 0);
01480 
01481     /// Replace occurrences of a substring within a string.
01482     ///
01483     /// On some platforms this function is much faster than Replace()
01484     /// if sizes of "search" and "replace" strings are equal.
01485     /// Otherwise, the performance is mainly the same.
01486     /// @param src
01487     ///   String whre specified substring occurrences are replaced.
01488     ///   This value is also returned by the function.
01489     /// @param search
01490     ///   Substring value in "src" that is replaced.
01491     /// @param replace
01492     ///   Replace "search" substring with this value.
01493     /// @param start_pos
01494     ///   Position to start search from.
01495     /// @param max_replace
01496     ///   Replace no more than "max_replace" occurrences of substring "search"
01497     ///   If "max_replace" is zero(default), then replace all occurrences with
01498     ///   "replace".
01499     /// @return
01500     ///   Result of replacing the "search" string with "replace" in "src".
01501     /// @sa
01502     ///   Replace
01503     static string& ReplaceInPlace(string& src,
01504                                   const string& search,
01505                                   const string& replace,
01506                                   SIZE_TYPE     start_pos = 0,
01507                                   SIZE_TYPE     max_replace = 0);
01508 
01509     /// Whether to merge adjacent delimiters in Split and Tokenize.
01510     enum EMergeDelims {
01511         eNoMergeDelims,     ///< No merging of delimiters -- default for
01512                             ///< Tokenize()
01513         eMergeDelims        ///< Merge the delimiters -- default for Split()
01514     };
01515 
01516 
01517     /// Split a string using specified delimiters.
01518     ///
01519     /// @param str
01520     ///   String to be split.
01521     /// @param delim
01522     ///   Delimiters used to split string "str".
01523     /// @param arr
01524     ///   The split tokens are added to the list "arr" and also returned
01525     ///   by the function. 
01526     /// @param merge
01527     ///   Whether to merge the delimiters or not. The default setting of
01528     ///   eMergeDelims means that delimiters that immediately follow each other
01529     ///   are treated as one delimiter.
01530     /// @param token_pos
01531     ///   Optional array for the tokens' positions in "str".
01532     /// @return 
01533     ///   The list "arr" is also returned.
01534     /// @sa
01535     ///   Tokenize()
01536     static list<string>& Split(const string& str,
01537                                const string& delim,
01538                                list<string>& arr,
01539                                EMergeDelims  merge = eMergeDelims,
01540                                vector<SIZE_TYPE>* token_pos = NULL);
01541 
01542     /// Tokenize a string using the specified set of char delimiters.
01543     ///
01544     /// @param str
01545     ///   String to be tokenized.
01546     /// @param delim
01547     ///   Set of char delimiters used to tokenize string "str".
01548     ///   If delimiter is empty, then input string is appended to "arr" as is.
01549     /// @param arr
01550     ///   The tokens defined in "str" by using symbols from "delim" are added
01551     ///   to the list "arr" and also returned by the function. 
01552     /// @param merge
01553     ///   Whether to merge the delimiters or not. The default setting of
01554     ///   eNoMergeDelims means that delimiters that immediately follow each
01555     ///    other are treated as separate delimiters.
01556     /// @param token_pos
01557     ///   Optional array for the tokens' positions in "str".
01558     /// @return 
01559     ///   The list "arr" is also returned.
01560     /// @sa
01561     ///   Split, TokenizePattern, TokenizeInTwo
01562     static vector<string>& Tokenize(const string&      str,
01563                                     const string&      delim,
01564                                     vector<string>&    arr,
01565                                     EMergeDelims       merge = eNoMergeDelims,
01566                                     vector<SIZE_TYPE>* token_pos = NULL);
01567 
01568     /// Tokenize a string using the specified delimiter (string).
01569     ///
01570     /// @param str
01571     ///   String to be tokenized.
01572     /// @param delim
01573     ///   Delimiter used to tokenize string "str".
01574     ///   If delimiter is empty, then input string is appended to "arr" as is.
01575     /// @param arr
01576     ///   The tokens defined in "str" by using delimeter "delim" are added
01577     ///   to the list "arr" and also returned by the function. 
01578     /// @param merge
01579     ///   Whether to merge the delimiters or not. The default setting of
01580     ///   eNoMergeDelims means that delimiters that immediately follow each
01581     ///   other are treated as separate delimiters.
01582     /// @param token_pos
01583     ///   Optional array for the tokens' positions in "str".
01584     /// @return 
01585     ///   The list "arr" is also returned.
01586     /// @sa
01587     ///   Split, Tokenize
01588     static
01589     vector<string>& TokenizePattern(const string&      str,
01590                                     const string&      delim,
01591                                     vector<string>&    arr,
01592                                     EMergeDelims       merge = eNoMergeDelims,
01593                                     vector<SIZE_TYPE>* token_pos = NULL);
01594 
01595     /// Split a string into two pieces using the specified delimiters
01596     ///
01597     /// @param str 
01598     ///   String to be split.
01599     /// @param delim
01600     ///   Delimiters used to split string "str".
01601     /// @param str1
01602     ///   The sub-string of "str" before the first character of "delim".
01603     ///   It will not contain any characters in "delim".
01604     ///   Will be empty if "str" begin with a "delim" character.
01605     /// @param str2
01606     ///   The sub-string of "str" after the first character of "delim" found.
01607     ///   May contain "delim" characters.
01608     ///   Will be empty if "str" had no "delim" characters or ended
01609     ///   with the first "delim" charcter.
01610     /// @return
01611     ///   true if a symbol from "delim" was found in "str", false if not.
01612     ///   This lets you distinguish when there were no delimiters and when
01613     ///   the very last character was the first delimiter.
01614     /// @sa
01615     ///   Split, Tokenoze, TokenizePattern
01616     static bool SplitInTwo(const string& str, 
01617                            const string& delim,
01618                            string&       str1,
01619                            string&       str2);
01620                          
01621 
01622     /// Join strings using the specified delimiter.
01623     ///
01624     /// @param arr
01625     ///   Array of strings to be joined.
01626     /// @param delim
01627     ///   Delimiter used to join the string.
01628     /// @return 
01629     ///   The strings in "arr" are joined into a single string, separated
01630     ///   with "delim".
01631     static string Join(const list<string>& arr,   const string& delim);
01632     static string Join(const vector<string>& arr, const string& delim);
01633 
01634     /// How to display printable strings.
01635     ///
01636     /// Assists in making a printable version of "str".
01637     enum EPrintableMode {
01638         fNewLine_Quote    = 0,   ///< Display "\n" instead of actual linebreak
01639         eNewLine_Quote    = fNewLine_Quote,
01640         fNewLine_Passthru = 1,   ///< Break the line at every "\n" occurrence
01641         eNewLine_Passthru = fNewLine_Passthru,
01642         fPrintable_Full   = 2    ///< Show all octal digits at all times
01643     };
01644     typedef int TPrintableMode;  ///< Bitwise OR of EPrintableMode flags
01645 
01646     /// Get a printable version of the specified string. 
01647     ///
01648     /// All non-printable characters will be represented as "\r", "\n", "\v",
01649     /// "\t", "\"", "\\", etc, or "\ooo" where 'ooo' is the octal code of the
01650     /// character.  The resultant string is a well-formed C string literal,
01651     /// which, without alterations, can be compiled by a C/C++ compiler.
01652     /// In many instances, octal representations of non-printable characters
01653     /// can be reduced to take less than all 3 digits, if there is no
01654     /// ambiguity in the interpretation.  fPrintable_Full cancels the
01655     /// reduction, and forces to produce full 3-digit octal codes throughout.
01656     ///
01657     /// @param str
01658     ///   The string whose printable version is wanted.
01659     /// @param mode
01660     ///   How to display the string.  The default setting of fNewLine_Quote
01661     ///   displays the new lines as "\n", and uses the octal code reduction.
01662     ///   When set to fNewLine_Passthru, line breaks are actually
01663     ///   produced on output but preceded with trailing backslashes.
01664     /// @return
01665     ///   Return a printable version of "str".
01666     /// @sa
01667     ///   ParseEscapes
01668     static string PrintableString(const string&  str,
01669                                   TPrintableMode mode = eNewLine_Quote);
01670 
01671     /// Parse C-style escape sequences in the specified string, including
01672     /// all those produced by PrintableString.
01673     static string ParseEscapes(const string& str);
01674 
01675     /// How to wrap the words in a string to a new line.
01676     enum EWrapFlags {
01677         fWrap_Hyphenate  = 0x1, ///< Add a hyphen when breaking words?
01678         fWrap_HTMLPre    = 0x2, ///< Wrap as preformatted HTML?
01679         fWrap_FlatFile   = 0x4  ///< Wrap for flat file use.
01680     };
01681     typedef int TWrapFlags;     ///< Binary OR of "EWrapFlags"
01682 
01683     /// Encode a string for C/C++.
01684     ///
01685     /// Synonym for PrintableString().
01686     /// @sa PrintableString
01687     static string CEncode(const string& str);
01688 
01689     /// Encode a string for JavaScript.
01690     ///
01691     /// Like to CEncode(), but process some symbols in different way.
01692     /// @sa PrintableString, CEncode
01693     static string JavaScriptEncode(const string& str);
01694 
01695     /// Encode a string for XML.
01696     ///
01697     /// Replace relevant characters by predefined entities.
01698     static string XmlEncode(const string& str);
01699 
01700     /// Encode a string for JSON.
01701     static string JsonEncode(const string& str);
01702 
01703     /// URL-encode flags
01704     enum EUrlEncode {
01705         eUrlEnc_SkipMarkChars,    ///< Do not convert chars like '!', '(' etc.
01706         eUrlEnc_ProcessMarkChars, ///< Convert all non-alphanum chars,
01707                                   ///< spaces are converted to '+'
01708         eUrlEnc_PercentOnly,      ///< Convert all non-alphanum chars including
01709                                   ///< space and '%' to %## format
01710         eUrlEnc_Path,             ///< Same as ProcessMarkChars but preserves
01711                                   ///< valid path characters ('/', '.')
01712 
01713         eUrlEnc_URIScheme,        ///< Encode scheme part of an URI.
01714         eUrlEnc_URIUserinfo,      ///< Encode userinfo part of an URI.
01715         eUrlEnc_URIHost,          ///< Encode host part of an URI.
01716         eUrlEnc_URIPath,          ///< Encode path part of an URI.
01717         eUrlEnc_URIQueryName,     ///< Encode query part of an URI, arg name.
01718         eUrlEnc_URIQueryValue,    ///< Encode query part of an URI, arg value.
01719         eUrlEnc_URIFragment,      ///< Encode fragment part of an URI.
01720 
01721         eUrlEnc_None              ///< Do not encode
01722     };
01723     /// URL decode flags
01724     enum EUrlDecode {
01725         eUrlDec_All,              ///< Decode '+' to space
01726         eUrlDec_Percent           ///< Decode only %XX
01727     };
01728     /// URL-encode string
01729     static string URLEncode(const string& str,
01730                             EUrlEncode flag = eUrlEnc_SkipMarkChars);
01731 
01732     /// SQL-encode string
01733     ///
01734     /// There are some assumptions/notes about the function:
01735     /// 1. Only for MS SQL and Sybase.
01736     /// 2. Only for string values in WHERE and LIKE clauses.
01737     /// 3. The ' symbol must not be used as an escape symbol in LIKE clause.
01738     /// 4. It must not be used for non-string values.
01739     /// 5. It expects a string without any outer quotes, and
01740     ///    it adds single quotes to the returned string.
01741     /// 6. It expects UTF-8 (including its subsets, ASCII and Latin1) or
01742     ///    Win1252 string, and the input encoding is preserved.
01743     /// @param str
01744     ///   The string to encode
01745     /// @return
01746     ///   Encoded string with added outer single quotes
01747     static CStringUTF8 SQLEncode(const CStringUTF8& str);
01748 
01749     /// URL-decode string
01750     static string URLDecode(const string& str,
01751                             EUrlDecode flag = eUrlDec_All);
01752     /// URL-decode string to itself
01753     static void URLDecodeInPlace(string& str,
01754                                  EUrlDecode flag = eUrlDec_All);
01755     /// Check if the string needs the reqested URL-encoding
01756     static bool NeedsURLEncoding(const string& str,
01757                                 EUrlEncode flag = eUrlEnc_SkipMarkChars);
01758 
01759     /// Check if the string contains a valid IP address
01760     static bool IsIPAddress(const string& ip);
01761 
01762     /// Wrap the specified string into lines of a specified width -- prefix,
01763     /// prefix1 default version.
01764     ///
01765     /// Split string "str" into lines of width "width" and add the
01766     /// resulting lines to the list "arr". Normally, all
01767     /// lines will begin with "prefix" (counted against "width"),
01768     /// but the first line will instead begin with "prefix1" if
01769     /// you supply it.
01770     ///
01771     /// @param str
01772     ///   String to be split into wrapped lines.
01773     /// @param width
01774     ///   Width of each wrapped line.
01775     /// @param arr
01776     ///   List of strings containing wrapped lines.
01777     /// @param flags
01778     ///   How to wrap the words to a new line. See EWrapFlags documentation.
01779     /// @param prefix
01780     ///   The prefix string added to each wrapped line, except the first line,
01781     ///   unless "prefix1" is set.
01782     ///   If "prefix" is set to 0(default), do not add a prefix string to the
01783     ///   wrapped lines.
01784     /// @param prefix1
01785     ///   The prefix string for the first line. Use this for the first line
01786     ///   instead of "prefix".
01787     ///   If "prefix1" is set to 0(default), do not add a prefix string to the
01788     ///   first line.
01789     /// @return
01790     ///   Return "arr", the list of wrapped lines.
01791     static list<string>& Wrap(const string& str, SIZE_TYPE width,
01792                               list<string>& arr, TWrapFlags flags = 0,
01793                               const string* prefix = 0,
01794                               const string* prefix1 = 0);
01795 
01796     /// Wrap the specified string into lines of a specified width -- prefix1
01797     /// default version.
01798     ///
01799     /// Split string "str" into lines of width "width" and add the
01800     /// resulting lines to the list "arr". Normally, all
01801     /// lines will begin with "prefix" (counted against "width"),
01802     /// but the first line will instead begin with "prefix1" if
01803     /// you supply it.
01804     ///
01805     /// @param str
01806     ///   String to be split into wrapped lines.
01807     /// @param width
01808     ///   Width of each wrapped line.
01809     /// @param arr
01810     ///   List of strings containing wrapped lines.
01811     /// @param flags
01812     ///   How to wrap the words to a new line. See EWrapFlags documentation.
01813     /// @param prefix
01814     ///   The prefix string added to each wrapped line, except the first line,
01815     ///   unless "prefix1" is set.
01816     ///   If "prefix" is set to 0, do not add a prefix string to the wrapped
01817     ///   lines.
01818     /// @param prefix1
01819     ///   The prefix string for the first line. Use this for the first line
01820     ///   instead of "prefix".
01821     ///   If "prefix1" is set to 0(default), do not add a prefix string to the
01822     ///   first line.
01823     /// @return
01824     ///   Return "arr", the list of wrapped lines.
01825     static list<string>& Wrap(const string& str, SIZE_TYPE width,
01826                               list<string>& arr, TWrapFlags flags,
01827                               const string& prefix, const string* prefix1 = 0);
01828 
01829     /// Wrap the specified string into lines of a specified width.
01830     ///
01831     /// Split string "str" into lines of width "width" and add the
01832     /// resulting lines to the list "arr". Normally, all
01833     /// lines will begin with "prefix" (counted against "width"),
01834     /// but the first line will instead begin with "prefix1" if
01835     /// you supply it.
01836     ///
01837     /// @param str
01838     ///   String to be split into wrapped lines.
01839     /// @param width
01840     ///   Width of each wrapped line.
01841     /// @param arr
01842     ///   List of strings containing wrapped lines.
01843     /// @param flags
01844     ///   How to wrap the words to a new line. See EWrapFlags documentation.
01845     /// @param prefix
01846     ///   The prefix string added to each wrapped line, except the first line,
01847     ///   unless "prefix1" is set.
01848     ///   If "prefix" is set to 0, do not add a prefix string to the wrapped
01849     ///   lines.
01850     /// @param prefix1
01851     ///   The prefix string for the first line. Use this for the first line
01852     ///   instead of "prefix".
01853     ///   If "prefix1" is set to 0, do not add a prefix string to the first
01854     ///   line.
01855     /// @return
01856     ///   Return "arr", the list of wrapped lines.
01857     static list<string>& Wrap(const string& str, SIZE_TYPE width,
01858                               list<string>& arr, TWrapFlags flags,
01859                               const string& prefix, const string& prefix1);
01860 
01861 
01862     /// Wrap the list using the specified criteria -- default prefix, 
01863     /// prefix1 version.
01864     ///
01865     /// WrapList() is similar to Wrap(), but tries to avoid splitting any
01866     /// elements of the list to be wrapped. Also, the "delim" only applies
01867     /// between elements on the same line; if you want everything to end with
01868     /// commas or such, you should add them first.
01869     ///
01870     /// @param l
01871     ///   The list to be wrapped.
01872     /// @param width
01873     ///   Width of each wrapped line.
01874     /// @param delim
01875     ///   Delimiters used to split elements on the same line.
01876     /// @param arr
01877     ///   List containing the wrapped list result.
01878     /// @param flags
01879     ///   How to wrap the words to a new line. See EWrapFlags documentation.
01880     /// @param prefix
01881     ///   The prefix string added to each wrapped line, except the first line,
01882     ///   unless "prefix1" is set.
01883     ///   If "prefix" is set to 0(default), do not add a prefix string to the
01884     ///   wrapped lines.
01885     /// @param prefix1
01886     ///   The prefix string for the first line. Use this for the first line
01887     ///   instead of "prefix".
01888     ///   If "prefix1" is set to 0(default), do not add a prefix string to the
01889     ///   first line.
01890     /// @return
01891     ///   Return "arr", the wrapped list.
01892     static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
01893                                   const string& delim, list<string>& arr,
01894                                   TWrapFlags flags = 0,
01895                                   const string* prefix = 0,
01896                                   const string* prefix1 = 0);
01897 
01898     /// Wrap the list using the specified criteria -- default prefix1 version.
01899     ///
01900     /// WrapList() is similar to Wrap(), but tries to avoid splitting any
01901     /// elements of the list to be wrapped. Also, the "delim" only applies
01902     /// between elements on the same line; if you want everything to end with
01903     /// commas or such, you should add them first.
01904     ///
01905     /// @param l
01906     ///   The list to be wrapped.
01907     /// @param width
01908     ///   Width of each wrapped line.
01909     /// @param delim
01910     ///   Delimiters used to split elements on the same line.
01911     /// @param arr
01912     ///   List containing the wrapped list result.
01913     /// @param flags
01914     ///   How to wrap the words to a new line. See EWrapFlags documentation.
01915     /// @param prefix
01916     ///   The prefix string added to each wrapped line, except the first line,
01917     ///   unless "prefix1" is set.
01918     ///   If "prefix" is set to 0, do not add a prefix string to the
01919     ///   wrapped lines.
01920     /// @param prefix1
01921     ///   The prefix string for the first line. Use this for the first line
01922     ///   instead of "prefix".
01923     ///   If "prefix1" is set to 0(default), do not add a prefix string to the
01924     ///   first line.
01925     /// @return
01926     ///   Return "arr", the wrappe list.
01927     static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
01928                                   const string& delim, list<string>& arr,
01929                                   TWrapFlags flags, const string& prefix,
01930                                   const string* prefix1 = 0);
01931         
01932     /// Wrap the list using the specified criteria.
01933     ///
01934     /// WrapList() is similar to Wrap(), but tries to avoid splitting any
01935     /// elements of the list to be wrapped. Also, the "delim" only applies
01936     /// between elements on the same line; if you want everything to end with
01937     /// commas or such, you should add them first.
01938     ///
01939     /// @param l
01940     ///   The list to be wrapped.
01941     /// @param width
01942     ///   Width of each wrapped line.
01943     /// @param delim
01944     ///   Delimiters used to split elements on the same line.
01945     /// @param arr
01946     ///   List containing the wrapped list result.
01947     /// @param flags
01948     ///   How to wrap the words to a new line. See EWrapFlags documentation.
01949     /// @param prefix
01950     ///   The prefix string added to each wrapped line, except the first line,
01951     ///   unless "prefix1" is set.
01952     ///   If "prefix" is set to 0, do not add a prefix string to the
01953     ///   wrapped lines.
01954     /// @param prefix1
01955     ///   The prefix string for the first line. Use this for the first line
01956     ///   instead of "prefix".
01957     ///   If "prefix1" is set to 0, do not add a prefix string to the
01958     ///   first line.
01959     /// @return
01960     ///   Return "arr", the wrapped list.
01961     static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
01962                                   const string& delim, list<string>& arr,
01963                                   TWrapFlags flags, const string& prefix,
01964                                   const string& prefix1);
01965 
01966     /// Search for a field
01967     ///
01968     /// @param str
01969     ///   C or C++ string to search in.
01970     /// @param field_no
01971     ///   Zero based field number.
01972     /// @param delimiters
01973     ///   Single character delimiters.
01974     /// @param merge
01975     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
01976     /// @return
01977     ///   Found field; or empty string if the required field is not found.
01978     static string GetField(const CTempString& str,
01979                            size_t             field_no,
01980                            const CTempString& delimiters,
01981                            EMergeDelims       merge = eNoMergeDelims);
01982 
01983     /// Search for a field
01984     ///
01985     /// @param str
01986     ///   C or C++ string to search in.
01987     /// @param field_no
01988     ///   Zero based field number.
01989     /// @param delimiter
01990     ///   Single character delimiter.
01991     /// @param merge
01992     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
01993     /// @return
01994     ///   Found field; or empty string if the required field is not found.
01995     static string GetField(const CTempString& str,
01996                            size_t             field_no,
01997                            char               delimiter,
01998                            EMergeDelims       merge = eNoMergeDelims);
01999 
02000     /// Search for a field
02001     /// Avoid memory allocation at the expence of some usage safety.
02002     ///
02003     /// @param str
02004     ///   C or C++ string to search in.
02005     /// @param field_no
02006     ///   Zero based field number.
02007     /// @param delimiters
02008     ///   Single character delimiters.
02009     /// @param merge
02010     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
02011     /// @return
02012     ///   Found field; or empty string if the required field is not found.
02013     /// @warning
02014     ///   The return value stores a pointer to the input string 'str' so
02015     ///   the return object validity time matches lifetime of the input 'str'
02016     static
02017     CTempString GetField_Unsafe(const CTempString& str,
02018                                 size_t             field_no,
02019                                 const CTempString& delimiters,
02020                                 EMergeDelims       merge = eNoMergeDelims);
02021 
02022     /// Search for a field.
02023     /// Avoid memory allocation at the expence of some usage safety.
02024     ///
02025     /// @param str
02026     ///   C or C++ string to search in.
02027     /// @param field_no
02028     ///   Zero-based field number.
02029     /// @param delimiters
02030     ///   Single character delimiter.
02031     /// @param merge
02032     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
02033     /// @return
02034     ///   Found field; or empty string if the required field is not found.
02035     /// @warning
02036     ///   The return value stores a pointer to the input string 'str' so
02037     ///   the return object validity time matches lifetime of the input 'str'
02038     static
02039     CTempString GetField_Unsafe(const CTempString& str,
02040                                 size_t             field_no,
02041                                 char               delimiter,
02042                                 EMergeDelims       merge = eNoMergeDelims);
02043 
02044 }; // class NStr
02045 
02046 
02047 /// Type for character in UCS-2 encoding
02048 typedef Uint2 TCharUCS2;
02049 /// Type for string in UCS-2 encoding
02050 typedef basic_string<TCharUCS2> TStringUCS2;
02051 
02052 
02053 
02054 /////////////////////////////////////////////////////////////////////////////
02055 ///
02056 /// CStringUTF8 --
02057 ///
02058 ///   An UTF-8 string.
02059 ///   Stores character data in UTF-8 encoding form.
02060 ///   Being initialized, converts source characters into UTF-8.
02061 ///   Can convert data back into a particular encoding form (non-UTF8)
02062 ///   Supported encodings:
02063 ///      ISO 8859-1 (Latin1)
02064 ///      Microsoft Windows code page 1252
02065 ///      UCS-2, UCS-4 (no surrogates)
02066 
02067 enum EEncoding {
02068     eEncoding_Unknown,
02069     eEncoding_UTF8,
02070     eEncoding_Ascii,
02071     eEncoding_ISO8859_1,
02072     eEncoding_Windows_1252
02073 };
02074 typedef Uint4 TUnicodeSymbol;
02075 
02076 class  CStringUTF8 : public string
02077 {
02078 public:
02079     enum EValidate {
02080         eNoValidate,
02081         eValidate
02082     };
02083 
02084     /// Default constructor.
02085     CStringUTF8(void)
02086     {
02087     }
02088 
02089     /// Destructor.
02090     ~CStringUTF8(void)
02091     {
02092     }
02093 
02094     /// Copy constructor.
02095     ///
02096     /// @param src
02097     ///   Source UTF-8 string
02098     /// @param validate
02099     ///   Verify that the source character encoding is really UTF-8
02100     CStringUTF8(const CStringUTF8& src, EValidate validate = eNoValidate)
02101         : string(src)
02102     {
02103         if (validate == eValidate) {
02104             x_Validate();
02105         }
02106     }
02107 
02108     /// Constructor from a C++ string
02109     ///
02110     /// @param src
02111     ///   Source string
02112     /// @param encoding
02113     ///   Character encoding of the source string
02114     /// @param validate
02115     ///   Verify the character encoding of the source
02116     CStringUTF8(const string& src,
02117                 EEncoding encoding = eEncoding_ISO8859_1,
02118                 EValidate validate = eNoValidate)
02119         : string()
02120     {
02121         x_Append(src.c_str(), encoding, validate);
02122     }
02123 
02124     /// Constructor from a C string
02125     ///
02126     /// @param src
02127     ///   Source zero-terminated character buffer
02128     /// @param encoding
02129     ///   Character encoding of the source string
02130     /// @param validate
02131     ///   Verify the character encoding of the source
02132     CStringUTF8(const char* src,
02133                 EEncoding encoding = eEncoding_ISO8859_1,
02134                 EValidate validate = eNoValidate)
02135         : string()
02136     {
02137         x_Append(src, encoding, validate);
02138     }
02139 
02140     /// Constructor from any string (ISO8859-1, USC-2 or USC-4,
02141     /// depending on the size of TChar).
02142     template <class T>
02143     CStringUTF8(const basic_string<T>& src)
02144         : string()
02145     {
02146         x_Append(src.c_str());
02147     }
02148 
02149     /// Constructor from any character sequence (ISO8859-1, USC-2 or USC-4,
02150     /// depending on the size of TChar).
02151     template <typename TChar>
02152     CStringUTF8(const TChar* src)
02153         : string()
02154     {
02155         x_Append(src);
02156     }
02157 
02158     /// Assign to UTF8 string
02159     CStringUTF8& operator= (const CStringUTF8& src)
02160     {
02161         string::operator= (src);
02162         return *this;
02163     }
02164 
02165     /// Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the
02166     /// size of TChar)
02167     template <typename TChar>
02168     CStringUTF8& operator= (const basic_string<TChar>& src)
02169     {
02170         erase();
02171         x_Append(src.c_str());
02172         return *this;
02173     }
02174 
02175     /// Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the
02176     /// size of TChar)
02177     template <typename TChar>
02178     CStringUTF8& operator= (const TChar* src)
02179     {
02180         erase();
02181         x_Append(src);
02182         return *this;
02183     }
02184 
02185     /// Append a string in UTF8 encoding
02186     CStringUTF8& operator+= (const CStringUTF8& src)
02187     {
02188         string::operator+= (src);
02189         return *this;
02190     }
02191 
02192     /// Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the
02193     /// size of TChar)
02194     template <typename TChar>
02195     CStringUTF8& operator+= (const basic_string<TChar>& src)
02196     {
02197         x_Append(src.c_str());
02198         return *this;
02199     }
02200 
02201     /// Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the
02202     /// size of TChar)
02203     template <typename TChar>
02204     CStringUTF8& operator+= (const TChar* src)
02205     {
02206         x_Append(src);
02207         return *this;
02208     }
02209 
02210     /// Assign to C++ string
02211     ///
02212     /// @param src
02213     ///   Source string
02214     /// @param encoding
02215     ///   Character encoding of the source string
02216     /// @param validate
02217     ///   Verify the character encoding of the source
02218     CStringUTF8& Assign(const string& src,
02219                         EEncoding encoding,
02220                         EValidate validate = eNoValidate)
02221     {
02222         erase();
02223         x_Append(src.c_str(), encoding, validate);
02224         return *this;
02225     }
02226 
02227     /// Assign to C string
02228     ///
02229     /// @param src
02230     ///   Source zero-terminated character buffer
02231     /// @param encoding
02232     ///   Character encoding of the source string
02233     /// @param validate
02234     ///   Verify the character encoding of the source
02235     CStringUTF8& Assign(const char* src,
02236                         EEncoding encoding,
02237                         EValidate validate = eNoValidate)
02238     {
02239         erase();
02240         x_Append(src, encoding, validate);
02241         return *this;
02242     }
02243 
02244     /// Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the
02245     /// size of TChar)
02246     ///
02247     /// @param src
02248     ///   Source string
02249     template <typename TChar>
02250     CStringUTF8& Assign(const basic_string<TChar>& src)
02251     {
02252         erase();
02253         x_Append(src.c_str());
02254         return *this;
02255     }
02256 
02257     /// Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the
02258     /// size of TChar)
02259     ///
02260     /// @param src
02261     ///   Source zero-terminated character buffer
02262     template <typename TChar>
02263     CStringUTF8& Assign(const TChar* src)
02264     {
02265         erase();
02266         x_Append(src);
02267         return *this;
02268     }
02269 
02270     /// Assign to a single character
02271     ///
02272     /// @param ch
02273     ///   Character
02274     /// @param encoding
02275     ///   Character encoding
02276     CStringUTF8& Assign(char ch,
02277                         EEncoding encoding)
02278     {
02279         erase();
02280         x_AppendChar( CharToSymbol( ch, encoding ) );
02281         return *this;
02282     }
02283 
02284     /// Append a C++ string
02285     ///
02286     /// @param src
02287     ///   Source string
02288     /// @param encoding
02289     ///   Character encoding of the source string
02290     /// @param validate
02291     ///   Verify the character encoding of the source
02292     CStringUTF8& Append(const string& src,
02293                         EEncoding encoding,
02294                         EValidate validate = eNoValidate)
02295     {
02296         x_Append(src.c_str(), encoding, validate);
02297         return *this;
02298     }
02299 
02300     /// Append a C string
02301     ///
02302     /// @param src
02303     ///   Source zero-terminated character buffer
02304     /// @param encoding
02305     ///   Character encoding of the source string
02306     /// @param validate
02307     ///   Verify the character encoding of the source
02308     CStringUTF8& Append(const char* src,
02309                         EEncoding encoding,
02310                         EValidate validate = eNoValidate)
02311     {
02312         x_Append(src, encoding, validate);
02313         return *this;
02314     }
02315 
02316     /// Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the
02317     /// size of TChar)
02318     ///
02319     /// @param src
02320     ///   Source string
02321     template <typename TChar>
02322     CStringUTF8& Append(const basic_string<TChar>& src)
02323     {
02324         x_Append(src.c_str());
02325         return *this;
02326     }
02327 
02328     /// Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the
02329     /// size of TChar)
02330     ///
02331     /// @param src
02332     ///   Source zero-terminated character buffer
02333     template <typename TChar>
02334     CStringUTF8& Append(const TChar* src)
02335     {
02336         x_Append(src);
02337         return *this;
02338     }
02339 
02340     /// Append single character
02341     ///
02342     /// @param ch
02343     ///   Character
02344     /// @param encoding
02345     ///   Character encoding
02346     CStringUTF8& Append(char ch,
02347                         EEncoding encoding)
02348     {
02349         x_AppendChar( CharToSymbol( ch, encoding ) );
02350         return *this;
02351     }
02352 
02353     /// Append single Unicode code point
02354     ///
02355     /// @param ch
02356     ///   Unicode code point
02357     CStringUTF8& Append(TUnicodeSymbol ch)
02358     {
02359         x_AppendChar(ch);
02360         return *this;
02361     }
02362 
02363     /// Get the number of symbols (code points) in the string
02364     ///
02365     /// @return
02366     ///   Number of symbols (code points)
02367     SIZE_TYPE GetSymbolCount(void) const;
02368     
02369     /// Get the number of valid UTF-8 symbols (code points) in the buffer
02370     ///
02371     /// @param src
02372     ///   Character buffer
02373     /// @param buf_size
02374     ///   The number of bytes in the buffer
02375     /// @return
02376     ///   Number of valid symbols (no exception thrown)
02377     static SIZE_TYPE GetValidSymbolCount(const char* src, SIZE_TYPE buf_size);
02378     
02379     /// Get the number of valid UTF-8 bytes (code units) in the buffer
02380     ///
02381     /// @param src
02382     ///   Character buffer
02383     /// @param buf_size
02384     ///   The number of bytes in the buffer
02385     /// @return
02386     ///   Number of valid bytes (no exception thrown)
02387     static SIZE_TYPE GetValidBytesCount(const char* src, SIZE_TYPE buf_size);
02388 
02389     /// Check that the character encoding of the string is valid UTF-8
02390     ///
02391     /// @return
02392     ///   Result of the check
02393     bool IsValid(void) const
02394     {
02395         return MatchEncoding(c_str(), eEncoding_UTF8);
02396     }
02397     /// Convert to ISO 8859-1 (Latin1) character representation
02398     ///
02399     /// Can throw a CStringException if the conversion is impossible
02400     /// or the string has invalid UTF-8 format.
02401     /// @param substitute_on_error
02402     ///   If the conversion is impossible, append the provided string
02403     ///   or, if substitute_on_error equals 0, throw the exception
02404     string AsLatin1(const char* substitute_on_error = 0) const
02405     {
02406         return AsSingleByteString(eEncoding_ISO8859_1,substitute_on_error);
02407     }
02408     
02409     /// Convert the string to a single-byte character representation
02410     ///
02411     /// Can throw a CStringException if the conversion is impossible
02412     /// or the string has invalid UTF-8 format.
02413     /// @param encoding
02414     ///   Desired encoding
02415     /// @param substitute_on_error
02416     ///   If the conversion is impossible, append the provided string
02417     ///   or, if substitute_on_error equals 0, throw the exception
02418     /// @return
02419     ///   C++ string
02420     string AsSingleByteString(EEncoding encoding,
02421         const char* substitute_on_error = 0) const;
02422 
02423 #if defined(HAVE_WSTRING)
02424     /// Convert to Unicode (UCS-2 with no surrogates where
02425     /// sizeof(wchar_t) == 2 and UCS-4 where sizeof(wchar_t) == 4).
02426     ///
02427     /// Can throw a CStringException if the conversion is impossible
02428     /// or the string has invalid UTF-8 format.
02429     /// Defined only if wstring is supported by the compiler.
02430     ///
02431     /// @param substitute_on_error
02432     ///   If the conversion is impossible, append the provided string
02433     ///   or, if substitute_on_error equals 0, throw the exception
02434     wstring AsUnicode(const wchar_t* substitute_on_error = 0) const
02435     {
02436         return x_AsBasicString<wchar_t>(substitute_on_error);
02437     }
02438 #endif // HAVE_WSTRING
02439 
02440     /// Convert to UCS-2 for all platforms
02441     ///
02442     /// Can throw a CStringException if the conversion is impossible
02443     /// or the string has invalid UTF-8 format.
02444     ///
02445     /// @param substitute_on_error
02446     ///   If the conversion is impossible, append the provided string
02447     ///   or, if substitute_on_error equals 0, throw the exception
02448     TStringUCS2 AsUCS2(const TCharUCS2* substitute_on_error = 0) const
02449     {
02450         return x_AsBasicString<TCharUCS2>(substitute_on_error);
02451     }
02452 
02453     /// Guess the encoding of the C string
02454     ///
02455     /// It can distinguish between UTF-8, Latin1, and Win1252 only
02456     /// @param src
02457     ///   Source zero-terminated character buffer
02458     /// @return
02459     ///   Encoding
02460     static EEncoding GuessEncoding( const char* src);
02461 
02462     /// Guess the encoding of the C++ string
02463     ///
02464     /// It can distinguish between UTF-8, Latin1, and Win1252 only
02465     /// @param src
02466     ///   Source string
02467     /// @return
02468     ///   Encoding
02469     static EEncoding GuessEncoding( const string& src)
02470     {
02471         return GuessEncoding( src.c_str());
02472     }
02473 
02474     /// Check the encoding of the C string
02475     ///
02476     /// Check that the encoding of the source is the same, or
02477     /// is compatible with the specified one
02478     /// @param src
02479     ///   Source string
02480     /// @param encoding
02481     ///   Character encoding form to check against
02482     /// @return
02483     ///   Boolean result: encoding is same or compatible
02484     static bool MatchEncoding( const char* src, EEncoding encoding);
02485 
02486     /// Check the encoding of the C++ string
02487     ///
02488     /// Check that the encoding of the source is the same, or
02489     /// is compatible with the specified one
02490     /// @param src
02491     ///   Source string
02492     /// @param encoding
02493     ///   Character encoding form to check against
02494     /// @return
02495     ///   Boolean result: encoding is same or compatible
02496     static bool MatchEncoding( const string& src, EEncoding encoding)
02497     {
02498         return MatchEncoding( src.c_str(), encoding);
02499     }
02500     
02501     /// Convert encoded character into UTF16
02502     ///
02503     /// @param ch
02504     ///   Encoded character
02505     /// @param encoding
02506     ///   Character encoding
02507     /// @return
02508     ///   Code point
02509     static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding);
02510     
02511     /// Convert Unicode code point into encoded character
02512     ///
02513     /// @param ch
02514     ///   Code point
02515     /// @param encoding
02516     ///   Character encoding
02517     /// @return
02518     ///   Encoded character
02519     static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding);
02520 
02521     /// Convert sequence of UTF8 code units into Unicode code point
02522     ///
02523     /// @param src
02524     ///   UTF8 zero-terminated buffer
02525     /// @return
02526     ///   Unicode code point
02527     static TUnicodeSymbol Decode(const char*& src);
02528 
02529     /// Convert first character of UTF8 sequence into Unicode
02530     ///
02531     /// @param ch
02532     ///   character
02533     /// @param more
02534     ///   if the character is valid, - how many more characters to expect
02535     /// @return
02536     ///   non-zero, if the character is valid
02537     static TUnicodeSymbol  DecodeFirst(char ch, SIZE_TYPE& more);
02538 
02539     /// Convert next character of UTF8 sequence into Unicode
02540     ///
02541     /// @param ch
02542     ///   character
02543     /// @param ch16
02544     ///   Unicode character
02545     /// @return
02546     ///   non-zero, if the character is valid
02547     static TUnicodeSymbol  DecodeNext(TUnicodeSymbol chU, char ch);
02548 
02549 private:
02550     /// Function AsAscii is deprecated - use AsLatin1() instead
02551     string AsAscii(void) const
02552     {
02553         return AsLatin1();
02554     }
02555 
02556     /// Conversion to basic_string with any base type we need
02557     template <typename TChar>
02558     basic_string<TChar> x_AsBasicString(const TChar* substitute_on_error) const;
02559 
02560     void   x_Validate(void) const;
02561     /// Convert Unicode code point into UTF8 and append
02562     void   x_AppendChar(TUnicodeSymbol ch);
02563     /// Convert coded character sequence into UTF8 and append
02564     void   x_Append(const char* src,
02565                     EEncoding encoding = eEncoding_ISO8859_1,
02566                     EValidate validate = eNoValidate);
02567 
02568     /// Convert Unicode character sequence into UTF8 and append
02569     /// Sequence can be in UCS-4 (TChar == (U)Int4), UCS-2 (TChar == (U)Int2)
02570     /// or in ISO8859-1 (TChar == char)
02571     template <typename TChar>
02572     void x_Append(const TChar* src);
02573 
02574     /// Check how many bytes is needed to represent the code point in UTF8
02575     static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch);
02576     /// Check if the character is valid first code unit of UTF8
02577     static bool   x_EvalFirst(char ch, SIZE_TYPE& more);
02578     /// Check if the character is valid non-first code unit of UTF8
02579     static bool   x_EvalNext(char ch);
02580 };
02581 
02582 
02583 
02584 /////////////////////////////////////////////////////////////////////////////
02585 ///
02586 /// CParseTemplException --
02587 ///
02588 /// Define template class for parsing exception. This class is used to define
02589 /// exceptions for complex parsing tasks and includes an additional m_Pos
02590 /// data member. The constructor requires that an additional postional
02591 /// parameter be supplied along with the description message.
02592 
02593 template <class TBase>
02594 class CParseTemplException : EXCEPTION_VIRTUAL_BASE public TBase
02595 {
02596 public:
02597     /// Error types that for exception class.
02598     enum EErrCode {
02599         eErr        ///< Generic error 
02600     };
02601 
02602     /// Translate from the error code value to its string representation.
02603     virtual const char* GetErrCodeString(void) const
02604     {
02605         switch (GetErrCode()) {
02606         case eErr: return "eErr";
02607         default:   return CException::GetErrCodeString();
02608         }
02609     }
02610 
02611     /// Constructor.
02612     ///
02613     /// Report "pos" along with "what".
02614     CParseTemplException(const CDiagCompileInfo &info,
02615         const CException* prev_exception,
02616         EErrCode err_code,const string& message,
02617         string::size_type pos, EDiagSev severity = eDiag_Error)
02618           : TBase(info, prev_exception,
02619             (typename TBase::EErrCode)(CException::eInvalid),
02620             message), m_Pos(pos)
02621     {
02622         this->x_Init(info,
02623                      string("{") + NStr::UIntToString((unsigned long)m_Pos) +
02624                      "} " + message,
02625                      prev_exception,
02626                      severity);
02627         this->x_InitErrCode((CException::EErrCode) err_code);
02628     }
02629 
02630     /// Constructor.
02631     CParseTemplException(const CParseTemplException<TBase>& other)
02632         : TBase(other)
02633     {
02634         m_Pos = other.m_Pos;
02635         x_Assign(other);
02636     }
02637 
02638     /// Destructor.
02639     virtual ~CParseTemplException(void) throw() {}
02640 
02641     /// Report error position.
02642     virtual void ReportExtra(ostream& out) const
02643     {
02644         out << "m_Pos = " << (unsigned long)m_Pos;
02645     }
02646 
02647     // Attributes.
02648 
02649     /// Get exception class type.
02650     virtual const char* GetType(void) const { return "CParseTemplException"; }
02651 
02652     /// Get error code.
02653     EErrCode GetErrCode(void) const
02654     {
02655         return typeid(*this) == typeid(CParseTemplException<TBase>) ?
02656             (typename CParseTemplException<TBase>::EErrCode)
02657                 this->x_GetErrCode() :
02658             (typename CParseTemplException<TBase>::EErrCode)
02659                 CException::eInvalid;
02660     }
02661 
02662     /// Get error position.
02663     string::size_type GetPos(void) const throw() { return m_Pos; }
02664 
02665 protected:
02666     /// Constructor.
02667     CParseTemplException(void)
02668     {
02669         m_Pos = 0;
02670     }
02671 
02672     /// Helper clone method.
02673     virtual const CException* x_Clone(void) const
02674     {
02675         return new CParseTemplException<TBase>(*this);
02676     }
02677 
02678 private:
02679     string::size_type m_Pos;    ///< Error position
02680 };
02681 
02682 
02683 /////////////////////////////////////////////////////////////////////////////
02684 ///
02685 /// CStringException --
02686 ///
02687 /// Define exceptions generated by string classes.
02688 ///
02689 /// CStringException inherits its basic functionality from
02690 /// CParseTemplException<CCoreException> and defines additional error codes
02691 /// for string parsing.
02692 
02693 class  CStringException : public CParseTemplException<CCoreException>
02694 {
02695 public:
02696     /// Error types that string classes can generate.
02697     enum EErrCode {
02698         eConvert,       ///< Failure to convert string
02699         eBadArgs,       ///< Bad arguments to string methods 
02700         eFormat         ///< Wrong format for any input to string methods
02701     };
02702 
02703     /// Translate from the error code value to its string representation.
02704     virtual const char* GetErrCodeString(void) const;
02705 
02706     // Standard exception boilerplate code.
02707     NCBI_EXCEPTION_DEFAULT2(CStringException,
02708         CParseTemplException<CCoreException>, std::string::size_type);
02709 };
02710 
02711 
02712 
02713 /////////////////////////////////////////////////////////////////////////////
02714 ///
02715 /// CStringPairsParser --
02716 ///
02717 /// Base class for parsing a string to a set of name-value pairs.
02718 
02719 
02720 /// Decoder interface. Names and values can be decoded with different rules.
02721 class IStringDecoder
02722 {
02723 public:
02724     /// Type of string to be decoded
02725     enum EStringType {
02726         eName,
02727         eValue
02728     };
02729     /// Decode the string. Must throw CStringException if the source string
02730     /// is not valid.
02731     virtual string Decode(const string& src, EStringType stype) const = 0;
02732     virtual ~IStringDecoder(void) {}
02733 };
02734 
02735 
02736 /// Encoder interface. Names and values can be encoded with different rules.
02737 class IStringEncoder
02738 {
02739 public:
02740     /// Type of string to be decoded
02741     enum EStringType {
02742         eName,
02743         eValue
02744     };
02745     /// Encode the string.
02746     virtual string Encode(const string& src, EStringType stype) const = 0;
02747     virtual ~IStringEncoder(void) {}
02748 };
02749 
02750 
02751 /// URL-decoder for string pairs parser
02752 class  CStringDecoder_Url : public IStringDecoder
02753 {
02754 public:
02755     CStringDecoder_Url(NStr::EUrlDecode flag = NStr::eUrlDec_All);
02756 
02757     virtual string Decode(const string& src, EStringType stype) const;
02758 
02759 private:
02760     NStr::EUrlDecode m_Flag;
02761 };
02762 
02763 
02764 /// URL-encoder for string pairs parser
02765 class  CStringEncoder_Url : public IStringEncoder
02766 {
02767 public:
02768     CStringEncoder_Url(NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
02769 
02770     virtual string Encode(const string& src, EStringType stype) const;
02771 
02772 private:
02773     NStr::EUrlEncode m_Flag;
02774 };
02775 
02776 
02777 /// Template for parsing string into pairs of name and value or merging
02778 /// them back into a single string.
02779 /// The container class must hold pairs of strings (pair<string, string>).
02780 template<class TContainer>
02781 class CStringPairs
02782 {
02783 public:
02784     typedef TContainer TStrPairs;
02785     /// The container's value type must be pair<string, string>
02786     /// or a compatible type.
02787     typedef typename TContainer::value_type TStrPair;
02788 
02789     /// Create parser with the specified decoder/encoder and default separators.
02790     ///
02791     /// @param decoder
02792     ///   String decoder (Url, Xml etc.)
02793     /// @param own_decoder
02794     ///   Decoder ownership flag
02795     /// @param decoder
02796     ///   String encoder (Url, Xml etc.), optional
02797     /// @param own_encoder
02798     ///   Encoder ownership flag, optional
02799     CStringPairs(IStringDecoder* decoder = NULL,
02800                  EOwnership      own_decoder = eTakeOwnership,
02801                  IStringEncoder* encoder = NULL,
02802                  EOwnership      own_encoder = eTakeOwnership)
02803         : m_ArgSep("&"),
02804           m_ValSep("="),
02805           m_Decoder(decoder, own_decoder),
02806           m_Encoder(encoder, own_encoder)
02807     {
02808     }
02809 
02810     /// Create parser with the specified parameters.
02811     ///
02812     /// @param arg_sep
02813     ///   Separator between name+value pairs
02814     /// @param val_sep
02815     ///   Separator between name and value
02816     /// @param decoder
02817     ///   String decoder (Url, Xml etc.)
02818     /// @param own_decoder
02819     ///   Decoder ownership flag
02820     /// @param encoder
02821     ///   String encoder (Url, Xml etc.)
02822     /// @param own_encoder
02823     ///   Encoder ownership flag
02824     CStringPairs(const string&   arg_sep,
02825                  const string&   val_sep,
02826                  IStringDecoder* decoder = NULL,
02827                  EOwnership      own_decoder = eTakeOwnership,
02828                  IStringEncoder* encoder = NULL,
02829                  EOwnership      own_encoder = eTakeOwnership)
02830         : m_ArgSep(arg_sep),
02831           m_ValSep(val_sep),
02832           m_Decoder(decoder, own_decoder),
02833           m_Encoder(encoder, own_encoder)
02834     {
02835     }
02836 
02837     /// Create parser with the selected URL-encoding/decoding options
02838     /// and default separators.
02839     ///
02840     /// @param decode_flag
02841     ///   URL-decoding flag
02842     /// @param encode_flag
02843     ///   URL-encoding flag
02844     CStringPairs(NStr::EUrlDecode decode_flag,
02845                  NStr::EUrlEncode encode_flag)
02846         : m_ArgSep("&"),
02847           m_ValSep("="),
02848           m_Decoder(new CStringDecoder_Url(decode_flag), eTakeOwnership),
02849           m_Encoder(new CStringEncoder_Url(encode_flag), eTakeOwnership)
02850     {
02851     }
02852 
02853     virtual ~CStringPairs(void) {}
02854 
02855     /// Set string decoder.
02856     ///
02857     /// @param decoder
02858     ///   String decoder (Url, Xml etc.)
02859     /// @param own
02860     ///   Decoder ownership flag
02861     void SetDecoder(IStringDecoder* decoder, EOwnership own = eTakeOwnership)
02862         { m_Decoder.reset(decoder, own); }
02863     /// Get decoder or NULL. Does not affect decoder ownership.
02864     IStringDecoder* GetDecoder(void) { return m_Decoder.get(); }
02865 
02866     /// Set string encoder.
02867     ///
02868     /// @param encoder
02869     ///   String encoder (Url, Xml etc.)
02870     /// @param own
02871     ///   Encoder ownership flag
02872     void SetEncoder(IStringEncoder* encoder, EOwnership own = eTakeOwnership)
02873         { m_Encoder.reset(encoder, own); }
02874     /// Get encoder or NULL. Does not affect encoder ownership.
02875     IStringDecoder* GetEncoder(void) { return m_Encoder.get(); }
02876 
02877     /// Parse the string.
02878     ///
02879     /// @param str
02880     ///   String to parse. The parser assumes the string is formatted like
02881     ///   "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
02882     ///   value is passed to the decoder (if not NULL) before storing the pair.
02883     /// @param merge_argsep
02884     ///   Flag for merging separators between pairs. By default the separators
02885     ///   are merged to prevent pairs where both name and value are empty.
02886     void Parse(const CTempString& str,
02887                NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
02888     {
02889         Parse(m_Data, str, m_ArgSep, m_ValSep,
02890               m_Decoder.get(), eNoOwnership, merge_argsep);
02891     }
02892 
02893     /// Parse the string using the provided decoder, put data into the
02894     /// container.
02895     ///
02896     /// @param pairs
02897     ///   Container to be filled with the parsed name/value pairs
02898     /// @param str
02899     ///   String to parse. The parser assumes the string is formatted like
02900     ///   "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
02901     ///   value is passed to the decoder (if not NULL) before storing the pair.
02902     /// @param decoder
02903     ///   String decoder (Url, Xml etc.)
02904     /// @param own
02905     ///   Flag indicating if the decoder must be deleted by the function.
02906     /// @param merge_argsep
02907     ///   Flag for merging separators between pairs. By default the separators
02908     ///   are merged to prevent pairs where both name and value are empty.
02909     static void Parse(TStrPairs&         pairs,
02910                       const CTempString& str,
02911                       const string&      arg_sep,
02912                       const string&      val_sep,
02913                       IStringDecoder*    decoder = NULL,
02914                       EOwnership         own = eTakeOwnership,
02915                       NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
02916     {
02917         AutoPtr<IStringDecoder> decoder_guard(decoder, own);
02918         list<string> lst;
02919         NStr::Split(str, arg_sep, lst, merge_argsep);
02920         pairs.clear();
02921         ITERATE(list<string>, it, lst) {
02922             string name, val;
02923             NStr::SplitInTwo(*it, val_sep, name, val);
02924             if ( decoder ) {
02925                 try {
02926                     name = decoder->Decode(name, IStringDecoder::eName);
02927                     val = decoder->Decode(val, IStringDecoder::eValue);
02928                 }
02929                 catch (CStringException) {
02930                     // Discard all data
02931                     pairs.clear();
02932                     throw;
02933                 }
02934             }
02935             pairs.insert(pairs.end(), TStrPair(name, val));
02936         }
02937     }
02938 
02939     /// Merge name-value pairs into a single string using the currently set
02940     /// separators and the provided encoder if any.
02941     string Merge(void) const
02942     {
02943         return Merge(m_Data, m_ArgSep, m_ValSep,
02944                      m_Encoder.get(), eNoOwnership);
02945     }
02946 
02947     /// Merge name-value pairs from the provided container, separators
02948     /// and encoder. Delete the encoder if the ownership flag allows.
02949     ///
02950     /// @param pairs
02951     ///   Container with the name/value pairs to be merged.
02952     /// @param arg_sep
02953     ///   Separator to be inserted bewteen pairs.
02954     /// @param val_sep
02955     ///   Separator to be inserted bewteen name and value.
02956     /// @param encoder
02957     ///   String encoder (Url, Xml etc.)
02958     /// @param own
02959     ///   Flag indicating if the encoder must be deleted by the function.
02960     static string Merge(const TStrPairs&      pairs,
02961                         const string&         arg_sep,
02962                         const string&         val_sep,
02963                         IStringEncoder*       encoder = NULL,
02964                         EOwnership            own = eTakeOwnership)
02965     {
02966         AutoPtr<IStringEncoder> encoder_guard(encoder, own);
02967         string ret;
02968         ITERATE(typename TStrPairs, it, pairs) {
02969             if ( !ret.empty() ) {
02970                 ret += arg_sep;
02971             }
02972             if ( encoder ) {
02973                 ret += encoder->Encode(it->first, IStringEncoder::eName) +
02974                     val_sep +
02975                     encoder->Encode(it->second, IStringEncoder::eValue);
02976             }
02977             else {
02978                 ret += it->first + val_sep + it->second;
02979             }
02980         }
02981         return ret;
02982     }
02983 
02984     /// Read data
02985     const TStrPairs& GetPairs(void) const { return m_Data; }
02986     /// Get non-const data
02987     TStrPairs& GetPairs(void) { return m_Data; }
02988 
02989 private:
02990     string                  m_ArgSep;   // Separator between name+value pairs ("&")
02991     string                  m_ValSep;   // Separator between name and value ("=")
02992     AutoPtr<IStringDecoder> m_Decoder;  // String decoder (Url, Xml etc.)
02993     AutoPtr<IStringEncoder> m_Encoder;  // String encoder (Url, Xml etc.)
02994     TStrPairs               m_Data;     // Parsed data
02995 };
02996 
02997 
02998 typedef vector<pair<string, string> > TStringPairsVector;
02999 typedef CStringPairs<TStringPairsVector> CStringPairsParser;
03000 
03001 
03002 /////////////////////////////////////////////////////////////////////////////
03003 ///
03004 /// CEncodedString --
03005 ///
03006 /// Class to detect if a string needs to be URL-encoded and hold both
03007 /// encoded and original versions.
03008 ///
03009 
03010 class  CEncodedString
03011 {
03012 public:
03013     CEncodedString(void) {}
03014     CEncodedString(const string& s,
03015                    NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
03016 
03017     /// Set new original string
03018     void SetString(const string& s,
03019                    NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
03020 
03021     /// Check if the original string was encoded.
03022     bool IsEncoded(void) const { return m_Encoded.get() != 0; }
03023     /// Get the original unencoded string
03024     const string& GetOriginalString(void) const { return m_Original; }
03025     /// Get encoded string
03026     const string& GetEncodedString(void) const
03027         { return IsEncoded() ? *m_Encoded : m_Original; }
03028 
03029     /// Check if the string is empty
03030     bool IsEmpty(void) const { return m_Original.empty(); }
03031 
03032 private:
03033     string           m_Original;
03034     auto_ptr<string> m_Encoded;
03035 };
03036 
03037 
03038 /////////////////////////////////////////////////////////////////////////////
03039 //  Predicates
03040 //
03041 
03042 
03043 
03044 /////////////////////////////////////////////////////////////////////////////
03045 ///
03046 /// Define Case-sensitive string comparison methods.
03047 ///
03048 /// Used as arguments to template functions for specifying the type of 
03049 /// comparison.
03050 
03051 template <typename T>
03052 struct PCase_Generic
03053 {
03054     /// Return difference between "s1" and "s2".
03055     int Compare(const T& s1, const T& s2) const;
03056 
03057     /// Return TRUE if s1 < s2.
03058     bool Less(const T& s1, const T& s2) const;
03059 
03060     /// Return TRUE if s1 == s2.
03061     bool Equals(const T& s1, const T& s2) const;
03062 
03063     /// Return TRUE if s1 < s2.
03064     bool operator()(const T& s1, const T& s2) const;
03065 };
03066 
03067 typedef PCase_Generic<string>       PCase;
03068 typedef PCase_Generic<const char *> PCase_CStr;
03069 
03070 
03071 
03072 /////////////////////////////////////////////////////////////////////////////
03073 ///
03074 /// Define Case-insensitive string comparison methods.
03075 ///
03076 /// Used as arguments to template functions for specifying the type of 
03077 /// comparison.
03078 ///
03079 /// @sa PNocase_Conditional_Generic
03080 
03081 template <typename T>
03082 struct PNocase_Generic
03083 {
03084     /// Return difference between "s1" and "s2".
03085     int Compare(const T& s1, const T& s2) const;
03086 
03087     /// Return TRUE if s1 < s2.
03088     bool Less(const T& s1, const T& s2) const;
03089 
03090     /// Return TRUE if s1 == s2.
03091     bool Equals(const T& s1, const T& s2) const;
03092 
03093     /// Return TRUE if s1 < s2 ignoring case.
03094     bool operator()(const T& s1, const T& s2) const;
03095 };
03096 
03097 typedef PNocase_Generic<string>       PNocase;
03098 typedef PNocase_Generic<const char *> PNocase_CStr;
03099 
03100 
03101 /////////////////////////////////////////////////////////////////////////////
03102 ///
03103 /// Define Case-insensitive string comparison methods.
03104 /// Case sensitivity can be turned on and off at runtime.
03105 ///
03106 /// Used as arguments to template functions for specifying the type of 
03107 /// comparison.
03108 ///
03109 /// @sa PNocase_Generic
03110 
03111 template <typename T>
03112 class PNocase_Conditional_Generic
03113 {
03114 public:
03115     /// Construction
03116     PNocase_Conditional_Generic(NStr::ECase case_sens = NStr::eCase);
03117 
03118     /// Get comparison type
03119     NStr::ECase GetCase() const { return m_CaseSensitive; }
03120 
03121     /// Set comparison type
03122     void SetCase(NStr::ECase case_sens) { m_CaseSensitive = case_sens; }
03123 
03124     /// Return difference between "s1" and "s2".
03125     int Compare(const T& s1, const T& s2) const;
03126 
03127     /// Return TRUE if s1 < s2.
03128     bool Less(const T& s1, const T& s2) const;
03129 
03130     /// Return TRUE if s1 == s2.
03131     bool Equals(const T& s1, const T& s2) const;
03132 
03133     /// Return TRUE if s1 < s2 ignoring case.
03134     bool operator()(const T& s1, const T& s2) const;
03135 private:
03136     NStr::ECase m_CaseSensitive; ///< case sensitive when TRUE
03137 };
03138 
03139 typedef PNocase_Conditional_Generic<string>       PNocase_Conditional;
03140 typedef PNocase_Conditional_Generic<const char *> PNocase_Conditional_CStr;
03141 
03142 
03143 /////////////////////////////////////////////////////////////////////////////
03144 ///
03145 /// PQuickStringLess implements an ordering of strings,
03146 /// that is more efficient than usual lexicographical order.
03147 /// It can be used in cases when no specific order is required,
03148 /// e.g. only simple key lookup is needed.
03149 /// Current implementation first compares lengths of strings,
03150 /// and will compare string data only when lengths are the same.
03151 ///
03152 struct PQuickStringLess
03153 {
03154     bool operator()(const CTempString& s1, const CTempString& s2) const {
03155         size_t len1 = s1.size(), len2 = s2.size();
03156         return len1 < len2 ||
03157             (len1 == len2 && ::memcmp(s1.data(), s2.data(), len1) < 0);
03158     }
03159 };
03160 
03161 
03162 /////////////////////////////////////////////////////////////////////////////
03163 //  Algorithms
03164 //
03165 
03166 
03167 /// Check equivalence of arguments using predicate.
03168 template<class Arg1, class Arg2, class Pred>
03169 inline
03170 bool AStrEquiv(const Arg1& x, const Arg2& y, Pred pr)
03171 {
03172     return pr.Equals(x, y);
03173 }
03174 
03175 
03176 /* @} */
03177 
03178 
03179 
03180 /////////////////////////////////////////////////////////////////////////////
03181 //
03182 //  IMPLEMENTATION of INLINE functions
03183 //
03184 /////////////////////////////////////////////////////////////////////////////
03185 
03186 
03187 /////////////////////////////////////////////////////////////////////////////
03188 //  CNcbiEmptyString::
03189 //
03190 #if !defined(NCBI_OS_MSWIN) && !( defined(NCBI_OS_LINUX)  &&  defined(NCBI_COMPILER_GCC) )
03191 inline
03192 const string& CNcbiEmptyString::Get(void)
03193 {
03194     const string* str = m_Str;
03195     return str ? *str: FirstGet();
03196 }
03197 #endif
03198 
03199 
03200 
03201 /////////////////////////////////////////////////////////////////////////////
03202 //  NStr::
03203 //
03204 
03205 inline
03206 string NStr::IntToString(long value,
03207                          TNumToStringFlags flags, int base)
03208 {
03209     string ret;
03210     IntToString(ret, value, flags, base);
03211     return ret;
03212 }
03213 
03214 inline
03215 string NStr::UIntToString(unsigned long value,
03216                           TNumToStringFlags flags, int base)
03217 {
03218     string ret;
03219     UIntToString(ret, value, flags, base);
03220     return ret;
03221 }
03222 
03223 inline
03224 int NStr::HexChar(char ch)
03225 {
03226     unsigned int rc = ch - '0';
03227     if (rc <= 9) {
03228         return rc;
03229     } else {
03230         rc = (ch | ' ') - 'a';
03231         return rc <= 5 ? int(rc + 10) : -1;
03232     }
03233 }
03234 
03235 inline
03236 bool NStr::MatchesMask(const string& str, const string& mask, ECase use_case)
03237 {
03238     return MatchesMask(str.c_str(), mask.c_str(), use_case);
03239 }
03240 
03241 inline
03242 int NStr::strcmp(const char* s1, const char* s2)
03243 {
03244     return ::strcmp(s1, s2);
03245 }
03246 
03247 inline
03248 int NStr::strncmp(const char* s1, const char* s2, size_t n)
03249 {
03250     return ::strncmp(s1, s2, n);
03251 }
03252 
03253 inline
03254 int NStr::strcasecmp(const char* s1, const char* s2)
03255 {
03256 #if defined(HAVE_STRICMP)
03257 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
03258     return ::_stricmp(s1, s2);
03259 #else
03260     return ::stricmp(s1, s2);
03261 #endif
03262 
03263 #elif defined(HAVE_STRCASECMP_LC)
03264     return ::strcasecmp(s1, s2);
03265 
03266 #else
03267     int diff = 0;
03268     for ( ;; ++s1, ++s2) {
03269         char c1 = *s1;
03270         // calculate difference
03271         diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
03272         // if end of string or different
03273         if (!c1  ||  diff)
03274             break; // return difference
03275     }
03276     return diff;
03277 #endif
03278 }
03279 
03280 inline
03281 int NStr::strncasecmp(const char* s1, const char* s2, size_t n)
03282 {
03283 #if defined(HAVE_STRICMP)
03284 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
03285     return ::_strnicmp(s1, s2, n);
03286 #else
03287     return ::strnicmp(s1, s2, n);
03288 #endif
03289 
03290 #elif defined(HAVE_STRCASECMP_LC)
03291     return ::strncasecmp(s1, s2, n);
03292 
03293 #else
03294     int diff = 0;
03295     for ( ; ; ++s1, ++s2, --n) {
03296         if (n == 0)
03297             return 0;
03298         char c1 = *s1;
03299         // calculate difference
03300         diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
03301         // if end of string or different
03302         if (!c1  ||  diff)
03303             break; // return difference
03304     }
03305     return diff;
03306 #endif
03307 }
03308 
03309 inline
03310 size_t NStr::strftime(char* s, size_t maxsize, const char* format,
03311                       const struct tm* timeptr)
03312 {
03313 #if defined(NCBI_COMPILER_MSVC)
03314     string x_format = Replace(format, "%T", "%H:%M:%S");
03315     ReplaceInPlace(x_format,          "%D", "%m/%d/%y");
03316     return ::strftime(s, maxsize, x_format.c_str(), timeptr);
03317 #else
03318     return ::strftime(s, maxsize, format, timeptr);
03319 #endif
03320 }
03321 
03322 
03323 inline
03324 int NStr::Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03325                   const char* pattern, ECase use_case)
03326 {
03327     return use_case == eCase ?
03328         CompareCase(str, pos, n, pattern): CompareNocase(str, pos, n, pattern);
03329 }
03330 
03331 inline
03332 int NStr::Compare(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03333                   const string& pattern, ECase use_case)
03334 {
03335     return use_case == eCase ?
03336         CompareCase(str, pos, n, pattern): CompareNocase(str, pos, n, pattern);
03337 }
03338 
03339 inline
03340 int NStr::CompareCase(const char* s1, const char* s2)
03341 {
03342     return NStr::strcmp(s1, s2);
03343 }
03344 
03345 inline
03346 int NStr::CompareNocase(const char* s1, const char* s2)
03347 {
03348     return NStr::strcasecmp(s1, s2);
03349 }
03350 
03351 inline
03352 int NStr::Compare(const char* s1, const char* s2, ECase use_case)
03353 {
03354     return use_case == eCase ? CompareCase(s1, s2): CompareNocase(s1, s2);
03355 }
03356 
03357 inline
03358 int NStr::Compare(const string& s1, const char* s2, ECase use_case)
03359 {
03360     return Compare(s1.c_str(), s2, use_case);
03361 }
03362 
03363 inline
03364 int NStr::Compare(const char* s1, const string& s2, ECase use_case)
03365 {
03366     return Compare(s1, s2.c_str(), use_case);
03367 }
03368 
03369 inline
03370 int NStr::Compare(const string& s1, const string& s2, ECase use_case)
03371 {
03372     return Compare(s1.c_str(), s2.c_str(), use_case);
03373 }
03374 
03375 inline
03376 int NStr::CompareCase(const string& s1, const string& s2)
03377 {
03378     return CompareCase(s1.c_str(), s2.c_str());
03379 }
03380 
03381 inline
03382 int NStr::CompareNocase(const string& s1, const string& s2)
03383 {
03384     return CompareNocase(s1.c_str(), s2.c_str());
03385 }
03386 
03387 inline
03388 bool NStr::Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03389                   const char* pattern, ECase use_case)
03390 {
03391     return use_case == eCase ?
03392         EqualCase(str, pos, n, pattern) : EqualNocase(str, pos, n, pattern);
03393 }
03394 
03395 inline
03396 bool NStr::Equal(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03397                   const string& pattern, ECase use_case)
03398 {
03399     return use_case == eCase ?
03400         EqualCase(str, pos, n, pattern) : EqualNocase(str, pos, n, pattern);
03401 }
03402 
03403 inline
03404 bool NStr::EqualCase(const char* s1, const char* s2)
03405 {
03406     return NStr::strcmp(s1, s2) == 0;
03407 }
03408 
03409 inline
03410 bool NStr::EqualNocase(const char* s1, const char* s2)
03411 {
03412     return NStr::strcasecmp(s1, s2) == 0;
03413 }
03414 
03415 inline
03416 bool NStr::EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03417                      const char* pattern)
03418 {
03419     return NStr::CompareCase(str, pos, n, pattern) == 0;
03420 }
03421 
03422 inline
03423 bool NStr::EqualCase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03424                      const string& pattern)
03425 {
03426     return NStr::CompareCase(str, pos, n, pattern) == 0;
03427 }
03428 
03429 inline
03430 bool NStr::Equal(const char* s1, const char* s2, ECase use_case)
03431 {
03432     return (use_case == eCase ? EqualCase(s1, s2) : EqualNocase(s1, s2));
03433 }
03434 
03435 inline
03436 bool NStr::Equal(const string& s1, const char* s2, ECase use_case)
03437 {
03438     return Equal(s1.c_str(), s2, use_case);
03439 }
03440 
03441 inline
03442 bool NStr::Equal(const char* s1, const string& s2, ECase use_case)
03443 {
03444     return Equal(s1, s2.c_str(), use_case);
03445 }
03446 
03447 inline
03448 bool NStr::Equal(const string& s1, const string& s2, ECase use_case)
03449 {
03450     return Equal(s1.c_str(), s2.c_str(), use_case);
03451 }
03452 
03453 inline
03454 bool NStr::EqualCase(const string& s1, const string& s2)
03455 {
03456     // return EqualCase(s1.c_str(), s2.c_str());
03457     return s1 == s2;
03458 }
03459 
03460 inline
03461 bool NStr::EqualNocase(const string& s1, const string& s2)
03462 {
03463     return EqualNocase(s1.c_str(), s2.c_str());
03464 }
03465 
03466 inline
03467 bool NStr::EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03468                              const char* pattern)
03469 {
03470     return CompareNocase(str, pos, n, pattern) == 0;
03471 }
03472 
03473 inline
03474 bool NStr::EqualNocase(const string& str, SIZE_TYPE pos, SIZE_TYPE n,
03475                              const string& pattern)
03476 {
03477     return CompareNocase(str, pos, n, pattern) == 0;
03478 }
03479 
03480 inline
03481 bool NStr::StartsWith(const string& str, const string& start, ECase use_case)
03482 {
03483     return str.size() >= start.size()  &&
03484         Compare(str, 0, start.size(), start, use_case) == 0;
03485 }
03486 
03487 inline
03488 bool NStr::StartsWith(const string& str, const char* start, ECase use_case)
03489 {
03490     size_t start_size = strlen(start);
03491     return str.size() >= start_size  &&
03492         Compare(str, 0, start_size, start, use_case) == 0;
03493 }
03494 
03495 inline
03496 bool NStr::StartsWith(const string& str, char start, ECase use_case)
03497 {
03498     return !str.empty()  &&
03499         ((use_case == eCase) ? (str[0] == start) :
03500          (toupper((unsigned char) str[0]) == start  ||
03501           tolower((unsigned char) str[0])));
03502 }
03503 
03504 inline
03505 bool NStr::EndsWith(const string& str, const string& end, ECase use_case)
03506 {
03507     return str.size() >= end.size()  &&
03508         Compare(str, str.size() - end.size(), end.size(), end, use_case) == 0;
03509 }
03510 
03511 inline
03512 bool NStr::EndsWith(const string& str, char end, ECase use_case)
03513 {
03514     if (!str.empty()) {
03515         char last = str[str.length() - 1];
03516         return (use_case == eCase) ? (last == end) :
03517                (toupper((unsigned char) last) == end  ||
03518                 tolower((unsigned char) last) == end);
03519     }
03520     return false;
03521 }
03522 
03523 inline
03524 SIZE_TYPE NStr::Find(const string& str, const string& pattern,
03525                      SIZE_TYPE start, SIZE_TYPE end, EOccurrence where,
03526                      ECase use_case)
03527 {
03528     return use_case == eCase ? FindCase(str, pattern, start, end, where)
03529         : FindNoCase(str, pattern, start, end, where);
03530 }
03531 
03532 inline
03533 SIZE_TYPE NStr::FindCase(const string& str, const string& pattern,
03534                          SIZE_TYPE start, SIZE_TYPE end, EOccurrence where)
03535 {
03536     if (where == eFirst) {
03537         SIZE_TYPE result = str.find(pattern, start);
03538         return (result == NPOS  ||  result > end) ? NPOS : result;
03539     } else {
03540         SIZE_TYPE result = str.rfind(pattern, end);
03541         return (result == NPOS  ||  result < start) ? NPOS : result;
03542     }
03543 }
03544 
03545 inline
03546 const string* NStr::FindCase(const list<string>& lst, const string& val)
03547 {
03548     return Find(lst, val, eCase);
03549 }
03550 
03551 inline
03552 const string* NStr::FindNoCase(const list <string>& lst, const string& val)
03553 {
03554     return Find(lst, val, eNocase);
03555 }
03556 
03557 inline
03558 const string* NStr::FindCase(const vector <string>& vec, const string& val)
03559 {
03560     return Find(vec, val, eCase);
03561 }
03562 
03563 inline
03564 const string* NStr::FindNoCase(const vector <string>& vec, const string& val)
03565 {
03566     return Find(vec, val, eNocase);
03567 }
03568 
03569 
03570 inline
03571 string NStr::CEncode(const string& str)
03572 {
03573     return PrintableString(str);
03574 }
03575 
03576 
03577 inline
03578 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr,
03579                          NStr::TWrapFlags flags, const string& prefix,
03580                          const string* prefix1)
03581 {
03582     return Wrap(str, width, arr, flags, &prefix, prefix1);
03583 }
03584 
03585 
03586 inline
03587 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr,
03588                          NStr::TWrapFlags flags, const string& prefix,
03589                          const string& prefix1)
03590 {
03591     return Wrap(str, width, arr, flags, &prefix, &prefix1);
03592 }
03593 
03594 
03595 inline
03596 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
03597                              const string& delim, list<string>& arr,
03598                              NStr::TWrapFlags flags, const string& prefix,
03599                              const string* prefix1)
03600 {
03601     return WrapList(l, width, delim, arr, flags, &prefix, prefix1);
03602 }
03603 
03604 
03605 inline
03606 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
03607                              const string& delim, list<string>& arr,
03608                              NStr::TWrapFlags flags, const string& prefix,
03609                              const string& prefix1)
03610 {
03611     return WrapList(l, width, delim, arr, flags, &prefix, &prefix1);
03612 }
03613 
03614 
03615 
03616 /////////////////////////////////////////////////////////////////////////////
03617 //  CStringUTF8::
03618 //
03619 
03620 template <typename TChar>
03621 inline
03622 basic_string<TChar> CStringUTF8::x_AsBasicString(
03623     const TChar* substitute_on_error) const
03624 {
03625     TUnicodeSymbol max_char = (TUnicodeSymbol)numeric_limits<TChar>::max();
03626     basic_string<TChar> result;
03627     result.reserve( GetSymbolCount()+1 );
03628     for (const char* src = c_str(); *src; ++src) {
03629         TUnicodeSymbol ch = Decode(src);
03630         if (ch > max_char) {
03631             if (substitute_on_error) {
03632                 result.append(substitute_on_error);
03633                 continue;
03634             } else {
03635                 NCBI_THROW2(CStringException, eConvert,
03636                     "Failed to convert symbol to wide character",
03637                     (SIZE_TYPE)(src - c_str()));
03638             }
03639         }
03640         result.append(1, (TChar)ch);
03641     }
03642     return result;
03643 }
03644 
03645 
03646 template <typename TChar>
03647 inline
03648 void CStringUTF8::x_Append(const TChar* src)
03649 {
03650     const TChar* srcBuf;
03651     SIZE_TYPE needed = 0;
03652 
03653     for (srcBuf = src; *srcBuf; ++srcBuf) {
03654         needed += x_BytesNeeded( *srcBuf );
03655     }
03656     if ( !needed ) {
03657         return;
03658     }
03659     reserve(max(capacity(),length()+needed+1));
03660     for (srcBuf = src; *srcBuf; ++srcBuf) {
03661         x_AppendChar( *srcBuf );
03662     }
03663 }
03664 
03665 
03666 
03667 
03668 /////////////////////////////////////////////////////////////////////////////
03669 //  PCase_Generic::
03670 //
03671 
03672 template <typename T>
03673 inline
03674 int PCase_Generic<T>::Compare(const T& s1, const T& s2) const
03675 {
03676     return NStr::Compare(s1, s2, NStr::eCase);
03677 }
03678 
03679 template <typename T>
03680 inline
03681 bool PCase_Generic<T>::Less(const T& s1, const T& s2) const
03682 {
03683     return Compare(s1, s2) < 0;
03684 }
03685 
03686 template <typename T>
03687 inline
03688 bool PCase_Generic<T>::Equals(const T& s1, const T& s2) const
03689 {
03690     return Compare(s1, s2) == 0;
03691 }
03692 
03693 template <typename T>
03694 inline
03695 bool PCase_Generic<T>::operator()(const T& s1, const T& s2) const
03696 {
03697     return Less(s1, s2);
03698 }
03699 
03700 
03701 
03702 ////////////////////////////////////////////////////////////////////////////
03703 //  PNocase_Generic<T>::
03704 //
03705 
03706 
03707 template <typename T>
03708 inline
03709 int PNocase_Generic<T>::Compare(const T& s1, const T& s2) const
03710 {
03711     return NStr::Compare(s1, s2, NStr::eNocase);
03712 }
03713 
03714 template <typename T>
03715 inline
03716 bool PNocase_Generic<T>::Less(const T& s1, const T& s2) const
03717 {
03718     return Compare(s1, s2) < 0;
03719 }
03720 
03721 template <typename T>
03722 inline
03723 bool PNocase_Generic<T>::Equals(const T& s1, const T& s2) const
03724 {
03725     return Compare(s1, s2) == 0;
03726 }
03727 
03728 template <typename T>
03729 inline
03730 bool PNocase_Generic<T>::operator()(const T& s1, const T& s2) const
03731 {
03732     return Less(s1, s2);
03733 }
03734 
03735 ////////////////////////////////////////////////////////////////////////////
03736 //  PNocase_Conditional_Generic<T>::
03737 //
03738 
03739 template <typename T>
03740 inline
03741 PNocase_Conditional_Generic<T>::PNocase_Conditional_Generic(NStr::ECase cs)
03742     : m_CaseSensitive(cs)
03743 {}
03744 
03745 template <typename T>
03746 inline
03747 int PNocase_Conditional_Generic<T>::Compare(const T& s1, const T& s2) const
03748 {
03749     return NStr::Compare(s1, s2, m_CaseSensitive);
03750 }
03751 
03752 template <typename T>
03753 inline
03754 bool PNocase_Conditional_Generic<T>::Less(const T& s1, const T& s2) const
03755 {
03756     return Compare(s1, s2) < 0;
03757 }
03758 
03759 template <typename T>
03760 inline
03761 bool PNocase_Conditional_Generic<T>::Equals(const T& s1, const T& s2) const
03762 {
03763     return Compare(s1, s2) == 0;
03764 }
03765 
03766 template <typename T>
03767 inline
03768 bool PNocase_Conditional_Generic<T>::operator()(const T& s1, const T& s2) const
03769 {
03770     return Less(s1, s2);
03771 }
03772 
03773 
03774 
03775 END_NCBI_SCOPE
03776 
03777 #endif  /* CORELIB___NCBISTR__HPP */
03778 
03779 

Generated on Sun Dec 6 22:01:27 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:35 2009 by modify_doxy.py rev. 173732