#include <ncbistr.hpp>
Definition at line 2076 of file ncbistr.hpp.
Public Types | |
| enum | EValidate { eNoValidate, eValidate } |
Public Member Functions | |
| CStringUTF8 (void) | |
| Default constructor. | |
| ~CStringUTF8 (void) | |
| Destructor. | |
| CStringUTF8 (const CStringUTF8 &src, EValidate validate=eNoValidate) | |
| Copy constructor. | |
| CStringUTF8 (const string &src, EEncoding encoding=eEncoding_ISO8859_1, EValidate validate=eNoValidate) | |
| Constructor from a C++ string. | |
| CStringUTF8 (const char *src, EEncoding encoding=eEncoding_ISO8859_1, EValidate validate=eNoValidate) | |
| Constructor from a C string. | |
| template<class T> | |
| CStringUTF8 (const basic_string< T > &src) | |
| Constructor from any string (ISO8859-1, USC-2 or USC-4, depending on the size of TChar). | |
| template<typename TChar> | |
| CStringUTF8 (const TChar *src) | |
| Constructor from any character sequence (ISO8859-1, USC-2 or USC-4, depending on the size of TChar). | |
| CStringUTF8 & | operator= (const CStringUTF8 &src) |
| Assign to UTF8 string. | |
| template<typename TChar> | |
| CStringUTF8 & | operator= (const basic_string< TChar > &src) |
| Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| template<typename TChar> | |
| CStringUTF8 & | operator= (const TChar *src) |
| Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| CStringUTF8 & | operator+= (const CStringUTF8 &src) |
| Append a string in UTF8 encoding. | |
| template<typename TChar> | |
| CStringUTF8 & | operator+= (const basic_string< TChar > &src) |
| Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| template<typename TChar> | |
| CStringUTF8 & | operator+= (const TChar *src) |
| Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| CStringUTF8 & | Assign (const string &src, EEncoding encoding, EValidate validate=eNoValidate) |
| Assign to C++ string. | |
| CStringUTF8 & | Assign (const char *src, EEncoding encoding, EValidate validate=eNoValidate) |
| Assign to C string. | |
| template<typename TChar> | |
| CStringUTF8 & | Assign (const basic_string< TChar > &src) |
| Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| template<typename TChar> | |
| CStringUTF8 & | Assign (const TChar *src) |
| Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| CStringUTF8 & | Assign (char ch, EEncoding encoding) |
| Assign to a single character. | |
| CStringUTF8 & | Append (const string &src, EEncoding encoding, EValidate validate=eNoValidate) |
| Append a C++ string. | |
| CStringUTF8 & | Append (const char *src, EEncoding encoding, EValidate validate=eNoValidate) |
| Append a C string. | |
| template<typename TChar> | |
| CStringUTF8 & | Append (const basic_string< TChar > &src) |
| Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| template<typename TChar> | |
| CStringUTF8 & | Append (const TChar *src) |
| Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar). | |
| CStringUTF8 & | Append (char ch, EEncoding encoding) |
| Append single character. | |
| CStringUTF8 & | Append (TUnicodeSymbol ch) |
| Append single Unicode code point. | |
| SIZE_TYPE | GetSymbolCount (void) const |
| Get the number of symbols (code points) in the string. | |
| bool | IsValid (void) const |
| Check that the character encoding of the string is valid UTF-8. | |
| string | AsLatin1 (const char *substitute_on_error=0) const |
| Convert to ISO 8859-1 (Latin1) character representation. | |
| string | AsSingleByteString (EEncoding encoding, const char *substitute_on_error=0) const |
| Convert the string to a single-byte character representation. | |
| TStringUCS2 | AsUCS2 (const TCharUCS2 *substitute_on_error=0) const |
| Convert to UCS-2 for all platforms. | |
Static Public Member Functions | |
| static SIZE_TYPE | GetValidSymbolCount (const char *src, SIZE_TYPE buf_size) |
| Get the number of valid UTF-8 symbols (code points) in the buffer. | |
| static SIZE_TYPE | GetValidBytesCount (const char *src, SIZE_TYPE buf_size) |
| Get the number of valid UTF-8 bytes (code units) in the buffer. | |
| static EEncoding | GuessEncoding (const char *src) |
| Guess the encoding of the C string. | |
| static EEncoding | GuessEncoding (const string &src) |
| Guess the encoding of the C++ string. | |
| static bool | MatchEncoding (const char *src, EEncoding encoding) |
| Check the encoding of the C string. | |
| static bool | MatchEncoding (const string &src, EEncoding encoding) |
| Check the encoding of the C++ string. | |
| static TUnicodeSymbol | CharToSymbol (char ch, EEncoding encoding) |
| Convert encoded character into UTF16. | |
| static char | SymbolToChar (TUnicodeSymbol sym, EEncoding encoding) |
| Convert Unicode code point into encoded character. | |
| static TUnicodeSymbol | Decode (const char *&src) |
| Convert sequence of UTF8 code units into Unicode code point. | |
| static TUnicodeSymbol | DecodeFirst (char ch, SIZE_TYPE &more) |
| Convert first character of UTF8 sequence into Unicode. | |
| static TUnicodeSymbol | DecodeNext (TUnicodeSymbol chU, char ch) |
| Convert next character of UTF8 sequence into Unicode. | |
Private Member Functions | |
| string | AsAscii (void) const |
| Function AsAscii is deprecated - use AsLatin1() instead. | |
| template<typename TChar> | |
| basic_string< TChar > | x_AsBasicString (const TChar *substitute_on_error) const |
| Conversion to basic_string with any base type we need. | |
| void | x_Validate (void) const |
| void | x_AppendChar (TUnicodeSymbol ch) |
| Convert Unicode code point into UTF8 and append. | |
| void | x_Append (const char *src, EEncoding encoding=eEncoding_ISO8859_1, EValidate validate=eNoValidate) |
| Convert coded character sequence into UTF8 and append. | |
| template<typename TChar> | |
| void | x_Append (const TChar *src) |
| Convert Unicode character sequence into UTF8 and append Sequence can be in UCS-4 (TChar == (U)Int4), UCS-2 (TChar == (U)Int2) or in ISO8859-1 (TChar == char). | |
Static Private Member Functions | |
| static SIZE_TYPE | x_BytesNeeded (TUnicodeSymbol ch) |
| Check how many bytes is needed to represent the code point in UTF8. | |
| static bool | x_EvalFirst (char ch, SIZE_TYPE &more) |
| Check if the character is valid first code unit of UTF8. | |
| static bool | x_EvalNext (char ch) |
| Check if the character is valid non-first code unit of UTF8. | |
|
|
Definition at line 2079 of file ncbistr.hpp. |
|
|
Default constructor.
Definition at line 2085 of file ncbistr.hpp. |
|
|
Destructor.
Definition at line 2090 of file ncbistr.hpp. |
|
||||||||||||
|
Copy constructor.
Definition at line 2100 of file ncbistr.hpp. |
|
||||||||||||||||
|
Constructor from a C++ string.
Definition at line 2116 of file ncbistr.hpp. |
|
||||||||||||||||
|
Constructor from a C string.
Definition at line 2132 of file ncbistr.hpp. |
|
||||||||||
|
Constructor from any string (ISO8859-1, USC-2 or USC-4, depending on the size of TChar).
Definition at line 2143 of file ncbistr.hpp. |
|
||||||||||
|
Constructor from any character sequence (ISO8859-1, USC-2 or USC-4, depending on the size of TChar).
Definition at line 2152 of file ncbistr.hpp. |
|
|
Append single Unicode code point.
Definition at line 2357 of file ncbistr.hpp. |
|
||||||||||||
|
Append single character.
Definition at line 2346 of file ncbistr.hpp. |
|
||||||||||
|
Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2334 of file ncbistr.hpp. |
|
||||||||||
|
Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2322 of file ncbistr.hpp. |
|
||||||||||||||||
|
Append a C string.
Definition at line 2308 of file ncbistr.hpp. |
|
||||||||||||||||
|
Append a C++ string.
Definition at line 2292 of file ncbistr.hpp. |
|
|
Function AsAscii is deprecated - use AsLatin1() instead.
Definition at line 2551 of file ncbistr.hpp. |
|
|
Convert to ISO 8859-1 (Latin1) character representation. Can throw a CStringException if the conversion is impossible or the string has invalid UTF-8 format.
Definition at line 2404 of file ncbistr.hpp. References eEncoding_ISO8859_1. |
|
||||||||||||
|
Assign to a single character.
Definition at line 2276 of file ncbistr.hpp. |
|
||||||||||
|
Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2263 of file ncbistr.hpp. |
|
||||||||||
|
Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2250 of file ncbistr.hpp. |
|
||||||||||||||||
|
Assign to C string.
Definition at line 2235 of file ncbistr.hpp. |
|
||||||||||||||||
|
Assign to C++ string.
Definition at line 2218 of file ncbistr.hpp. Referenced by CObjectOStreamXml::WriteEncodedChar(), CObjectOStreamJson::WriteEncodedChar(), and CWString::x_StringToUTF8(). |
|
||||||||||||
|
Convert the string to a single-byte character representation. Can throw a CStringException if the conversion is impossible or the string has invalid UTF-8 format.
Definition at line 3260 of file ncbistr.cpp. References Decode(), GetSymbolCount(), and SymbolToChar(). Referenced by CWString::x_UTF8ToString(). |
|
|
Convert to UCS-2 for all platforms. Can throw a CStringException if the conversion is impossible or the string has invalid UTF-8 format.
Definition at line 2448 of file ncbistr.hpp. |
|
||||||||||||
|
Convert encoded character into UTF16.
Definition at line 3362 of file ncbistr.cpp. References eEncoding_Ascii, eEncoding_ISO8859_1, eEncoding_Unknown, eEncoding_UTF8, eEncoding_Windows_1252, and NCBI_THROW2. Referenced by CObjectOStreamXml::WriteEncodedChar(), and x_Append(). |
|
|
Convert sequence of UTF8 code units into Unicode code point.
Definition at line 3523 of file ncbistr.cpp. References NCBI_THROW2. Referenced by AsSingleByteString(), CObjectOStreamXml::WriteEncodedChar(), and x_AsBasicString(). |
|
||||||||||||
|
Convert first character of UTF8 sequence into Unicode.
Definition at line 3556 of file ncbistr.cpp. Referenced by CObjectIStreamXml::ReadUtf8Char(). |
|
||||||||||||
|
Convert next character of UTF8 sequence into Unicode.
Definition at line 3576 of file ncbistr.cpp. Referenced by CObjectIStreamXml::ReadUtf8Char(). |
|
|
Get the number of symbols (code points) in the string.
Definition at line 3202 of file ncbistr.cpp. References NCBI_THROW2, s_DiffPtr(), x_EvalFirst(), and x_EvalNext(). Referenced by AsSingleByteString(), CWString::GetSymbolNum(), and x_AsBasicString(). |
|
||||||||||||
|
Get the number of valid UTF-8 bytes (code units) in the buffer.
Definition at line 3238 of file ncbistr.cpp. References x_EvalFirst(), and x_EvalNext(). Referenced by CODBC_BCPInCmd::Send(), CODBC_SendDataCmd::SendChunk(), CTL_SendDataCmd::SendChunk(), and CODBC_Connection::x_SendData(). |
|
||||||||||||
|
Get the number of valid UTF-8 symbols (code points) in the buffer.
Definition at line 3221 of file ncbistr.cpp. References x_EvalFirst(), and x_EvalNext(). |
|
|
Guess the encoding of the C++ string. It can distinguish between UTF-8, Latin1, and Win1252 only
Definition at line 2469 of file ncbistr.hpp. |
|
|
Guess the encoding of the C string. It can distinguish between UTF-8, Latin1, and Win1252 only
Definition at line 3282 of file ncbistr.cpp. References skip, utf8, and x_EvalNext(). Referenced by CHTMLHelper::HTMLDecode(), MatchEncoding(), and x_Append(). |
|
|
Check that the character encoding of the string is valid UTF-8.
Definition at line 2393 of file ncbistr.hpp. References eEncoding_UTF8. Referenced by x_Validate(). |
|
||||||||||||
|
Check the encoding of the C++ string. Check that the encoding of the source is the same, or is compatible with the specified one
Definition at line 2496 of file ncbistr.hpp. |
|
||||||||||||
|
Check the encoding of the C string. Check that the encoding of the source is the same, or is compatible with the specified one
Definition at line 3329 of file ncbistr.cpp. References eEncoding_Ascii, eEncoding_ISO8859_1, eEncoding_Unknown, eEncoding_UTF8, eEncoding_Windows_1252, and GuessEncoding(). Referenced by x_Append(). |
|
||||||||||
|
Append a C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2204 of file ncbistr.hpp. |
|
||||||||||
|
Append a C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2195 of file ncbistr.hpp. |
|
|
Append a string in UTF8 encoding.
Definition at line 2186 of file ncbistr.hpp. References operator+=(). |
|
||||||||||
|
Assign to C string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2178 of file ncbistr.hpp. |
|
||||||||||
|
Assign to C++ string in ISO8859-1, USC-2 or USC-4 (depending on the size of TChar).
Definition at line 2168 of file ncbistr.hpp. |
|
|
Assign to UTF8 string.
Definition at line 2159 of file ncbistr.hpp. |
|
||||||||||||
|
Convert Unicode code point into encoded character.
Definition at line 3387 of file ncbistr.cpp. References eEncoding_Unknown, eEncoding_UTF8, eEncoding_Windows_1252, and NCBI_THROW2. Referenced by AsSingleByteString(), and CObjectOStreamXml::WriteEncodedChar(). |
|
||||||||||
|
Convert Unicode character sequence into UTF8 and append Sequence can be in UCS-4 (TChar == (U)Int4), UCS-2 (TChar == (U)Int2) or in ISO8859-1 (TChar == char).
Definition at line 3648 of file ncbistr.hpp. References x_BytesNeeded(). |
|
||||||||||||||||
|
Convert coded character sequence into UTF8 and append.
Definition at line 3442 of file ncbistr.cpp. References CharToSymbol(), eEncoding_Ascii, eEncoding_Unknown, eEncoding_UTF8, eValidate, GuessEncoding(), MatchEncoding(), NCBI_THROW2, and x_BytesNeeded(). |
|
|
Convert Unicode code point into UTF8 and append.
Definition at line 3420 of file ncbistr.cpp. |
|
||||||||||
|
Conversion to basic_string with any base type we need.
Definition at line 3622 of file ncbistr.hpp. References Decode(), and GetSymbolCount(). |
|
|
Check how many bytes is needed to represent the code point in UTF8.
Definition at line 3477 of file ncbistr.cpp. Referenced by x_Append(). |
|
||||||||||||
|
Check if the character is valid first code unit of UTF8.
Definition at line 3491 of file ncbistr.cpp. Referenced by GetSymbolCount(), GetValidBytesCount(), and GetValidSymbolCount(). |
|
|
Check if the character is valid non-first code unit of UTF8.
Definition at line 3517 of file ncbistr.cpp. Referenced by GetSymbolCount(), GetValidBytesCount(), GetValidSymbolCount(), and GuessEncoding(). |
|
|
Definition at line 3411 of file ncbistr.cpp. References IsValid(), and NCBI_THROW2. |
1.4.6
Modified on Mon Dec 07 16:24:11 2009 by modify_doxy.py rev. 173732