00001 #ifndef UTIL_COMPRESS__ZLIB__HPP 00002 #define UTIL_COMPRESS__ZLIB__HPP 00003 00004 /* $Id: zlib.hpp 139071 2008-09-02 15:06:17Z ivanov $ 00005 * =========================================================================== 00006 * 00007 * PUBLIC DOMAIN NOTICE 00008 * National Center for Biotechnology Information 00009 * 00010 * This software/database is a "United States Government Work" under the 00011 * terms of the United States Copyright Act. It was written as part of 00012 * the author's official duties as a United States Government employee and 00013 * thus cannot be copyrighted. This software/database is freely available 00014 * to the public for use. The National Library of Medicine and the U.S. 00015 * Government have not placed any restriction on its use or reproduction. 00016 * 00017 * Although all reasonable efforts have been taken to ensure the accuracy 00018 * and reliability of the software and data, the NLM and the U.S. 00019 * Government do not and cannot warrant the performance or results that 00020 * may be obtained by using this software or data. The NLM and the U.S. 00021 * Government disclaim all warranties, express or implied, including 00022 * warranties of performance, merchantability or fitness for any particular 00023 * purpose. 00024 * 00025 * Please cite the author in any work or product based on this material. 00026 * 00027 * =========================================================================== 00028 * 00029 * Author: Vladimir Ivanov 00030 * 00031 */ 00032 00033 /// @file zlib.hpp 00034 /// ZLib Compression API. 00035 /// 00036 /// CZipCompression - base methods for compression/decompression 00037 /// memory buffers and files. 00038 /// CZipCompressionFile - allow read/write operations on files in 00039 /// zlib or gzip (.gz) format. 00040 /// CZipCompressor - zlib based compressor 00041 /// (used in CZipStreamCompressor). 00042 /// CZipDecompressor - zlib based decompressor 00043 /// (used in CZipStreamDecompressor). 00044 /// CZipStreamCompressor - zlib based compression stream processor 00045 /// (see util/compress/stream.hpp for details). 00046 /// CZipStreamDecompressor - zlib based decompression stream processor 00047 /// (see util/compress/stream.hpp for details). 00048 /// 00049 /// The zlib documentation can be found here: 00050 /// http://zlib.org, or 00051 /// http://www.gzip.org/zlib/manual.html 00052 00053 00054 #include <util/compress/stream.hpp> 00055 00056 /** @addtogroup Compression 00057 * 00058 * @{ 00059 */ 00060 00061 BEGIN_NCBI_SCOPE 00062 00063 00064 ////////////////////////////////////////////////////////////////////////////// 00065 // 00066 // Special compressor's parameters (description from zlib docs) 00067 // 00068 // <window_bits> 00069 // This parameter is the base two logarithm of the window size 00070 // (the size of the history buffer). It should be in the range 8..15 for 00071 // this version of the library. Larger values of this parameter result 00072 // in better compression at the expense of memory usage. 00073 // 00074 // <mem_level> 00075 // The "mem_level" parameter specifies how much memory should be 00076 // allocated for the internal compression state. mem_level=1 uses minimum 00077 // memory but is slow and reduces compression ratio; mem_level=9 uses 00078 // maximum memory for optimal speed. The default value is 8. See zconf.h 00079 // for total memory usage as a function of windowBits and memLevel. 00080 // 00081 // <strategy> 00082 // The strategy parameter is used to tune the compression algorithm. 00083 // Use the value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data 00084 // produced by a filter (or predictor), or Z_HUFFMAN_ONLY to force 00085 // Huffman encoding only (no string match). Filtered data consists mostly 00086 // of small values with a somewhat random distribution. In this case, 00087 // the compression algorithm is tuned to compress them better. The effect 00088 // of Z_FILTERED is to force more Huffman coding and less string matching; 00089 // it is somewhat intermediate between Z_DEFAULT and Z_HUFFMAN_ONLY. 00090 // The strategy parameter only affects the compression ratio but not the 00091 // correctness of the compressed output even if it is not set appropriately. 00092 00093 // Use default values, defined in zlib library 00094 const int kZlibDefaultWbits = -1; 00095 const int kZlibDefaultMemLevel = -1; 00096 const int kZlibDefaultStrategy = -1; 00097 const int kZlibDefaultCompression = -1; 00098 00099 00100 ///////////////////////////////////////////////////////////////////////////// 00101 /// 00102 /// CZipCompression -- 00103 /// 00104 /// Define a base methods for compression/decompression memory buffers 00105 /// and files. 00106 00107 class CZipCompression : public CCompression 00108 { 00109 public: 00110 /// Compression/decompression flags. 00111 enum EFlags { 00112 ///< Allow transparent reading data from buffer/file/stream 00113 ///< regardless is it compressed or not. But be aware, 00114 ///< if data source contains broken data and API cannot detect that 00115 ///< it is compressed data, that you can get binary instead of 00116 ///< decompressed data. By default this flag is OFF. 00117 ///< NOTE: zlib v1.1.4 and earlier have a bug in decoding. 00118 ///< In some cases decompressor can produce output data on invalid 00119 ///< compressed data. So, this is not recommended to use this flag 00120 ///< with old zlib versions. 00121 fAllowTransparentRead = (1<<0), 00122 ///< Check (and skip) file header for decompression stream 00123 fCheckFileHeader = (1<<1), 00124 ///< Use gzip (.gz) file format to write into compression stream 00125 ///< (the archive also can store file name and file modification 00126 ///< date in this format) 00127 fWriteGZipFormat = (1<<2), 00128 ///< This flag can be used only with DecompressFile[IntoDir](). 00129 ///< It allow to restore the original file name and/or time stamp stored 00130 ///< in the file header, if present. 00131 ///< @sa DecompressFile, DecompressFileIntoDir 00132 fRestoreFileAttr = (1<<3) 00133 }; 00134 00135 /// Constructor. 00136 CZipCompression( 00137 ELevel level = eLevel_Default, 00138 int window_bits = kZlibDefaultWbits, // [8..15] 00139 int mem_level = kZlibDefaultMemLevel, // [1..9] 00140 int strategy = kZlibDefaultStrategy // [0..2] 00141 ); 00142 00143 /// Destructor. 00144 virtual ~CZipCompression(void); 00145 00146 /// Return name and version of the compression library. 00147 virtual CVersionInfo GetVersion(void) const; 00148 00149 /// Returns default compression level for a compression algorithm. 00150 virtual ELevel GetDefaultLevel(void) const 00151 { return ELevel(kZlibDefaultCompression); }; 00152 00153 // 00154 // Utility functions 00155 // 00156 00157 /// Compress data in the buffer. 00158 /// 00159 /// Altogether, the total size of the destination buffer must be little 00160 /// more then size of the source buffer. 00161 /// @param src_buf 00162 /// Source buffer. 00163 /// @param src_len 00164 /// Size of data in source buffer. 00165 /// @param dst_buf 00166 /// Destination buffer. 00167 /// @param dst_size 00168 /// Size of destination buffer. 00169 /// @param dst_len 00170 /// Size of compressed data in destination buffer. 00171 /// @return 00172 /// Return TRUE if operation was succesfully or FALSE otherwise. 00173 /// On success, 'dst_buf' contains compressed data of dst_len size. 00174 /// @sa 00175 /// EstimateCompressionBufferSize, DecompressBuffer 00176 virtual bool CompressBuffer( 00177 const void* src_buf, size_t src_len, 00178 void* dst_buf, size_t dst_size, 00179 /* out */ size_t* dst_len 00180 ); 00181 00182 /// Compress data in the buffer. 00183 /// 00184 /// Altogether, the total size of the destination buffer must be little 00185 /// more then size of the source buffer. 00186 /// @param src_buf 00187 /// Source buffer. 00188 /// @param src_len 00189 /// Size of data in source buffer. 00190 /// @param dst_buf 00191 /// Destination buffer. 00192 /// @param dst_size 00193 /// Size of destination buffer. 00194 /// @param dst_len 00195 /// Size of compressed data in destination buffer. 00196 /// @return 00197 /// Return TRUE if operation was succesfully or FALSE otherwise. 00198 /// On success, 'dst_buf' contains compressed data of dst_len size. 00199 /// @sa 00200 /// CompressBuffer 00201 virtual bool DecompressBuffer( 00202 const void* src_buf, size_t src_len, 00203 void* dst_buf, size_t dst_size, 00204 /* out */ size_t* dst_len 00205 ); 00206 00207 /// Estimate buffer size for data compression. 00208 /// 00209 /// The function shall estimate the size of buffer required to compress 00210 /// specified number of bytes of data using the CompressBuffer() function. 00211 /// This function may return a conservative value that may be larger 00212 /// than 'src_len'. 00213 /// @param src_len 00214 /// Size of compressed data. 00215 /// @return 00216 /// Estimated buffer size. 00217 /// Return -1 on error, or if this method is not supported by current 00218 /// version of the zlib library. 00219 /// @sa 00220 /// CompressBuffer 00221 long EstimateCompressionBufferSize(size_t src_len); 00222 00223 /// Compress file. 00224 /// 00225 /// @param src_file 00226 /// File name of source file. 00227 /// @param dst_file 00228 /// File name of result file. 00229 /// @param buf_size 00230 /// Buffer size used to read/write files. 00231 /// @return 00232 /// Return TRUE on success, FALSE on error. 00233 /// @sa 00234 /// DecompressFile, DecompressFileIntoDir 00235 /// @note 00236 /// This method, as well as some gzip utilities, always 00237 /// keeps the original file name and timestamp in 00238 /// the compressed file. On this moment DecompressFile() 00239 /// do not use original file name at all, but be aware... 00240 /// If you assign different base name to destination 00241 /// compressed file, that behavior of decompression utilities 00242 /// on different platforms may differ. 00243 /// For example, WinZip on MS Windows always restore 00244 /// original file name and timestamp stored in the file. 00245 /// UNIX gunzip have -N option for this, but by default 00246 /// do not use it, and just creates a decompressed file with 00247 /// the name of the compressed file without .gz extention. 00248 virtual bool CompressFile( 00249 const string& src_file, 00250 const string& dst_file, 00251 size_t buf_size = kCompressionDefaultBufSize 00252 ); 00253 00254 /// Decompress file. 00255 /// 00256 /// @param src_file 00257 /// File name of source file. 00258 /// @param dst_file 00259 /// File name of result file. 00260 /// @param buf_size 00261 /// Buffer size used to read/write files. 00262 /// @return 00263 /// Return TRUE on success, FALSE on error. 00264 /// @sa 00265 /// CompressFile, DecompressFileIntoDir 00266 /// @note 00267 /// CompressFile() method, as well as some gzip utilities, 00268 /// always keeps the original file name and timestamp in 00269 /// the compressed file. If fRestoreFileAttr flag is set, 00270 /// that time stamp, stored in the file header will be restored. 00271 /// The original file name cannot be restored here, 00272 /// see DecompressFileIntoDir(). 00273 virtual bool DecompressFile( 00274 const string& src_file, 00275 const string& dst_file, 00276 size_t buf_size = kCompressionDefaultBufSize 00277 ); 00278 00279 /// Decompress file into specified directory. 00280 /// 00281 /// @param src_file 00282 /// File name of source file. 00283 /// @param dst_dir 00284 /// Destination directory. 00285 /// @param buf_size 00286 /// Buffer size used to read/write files. 00287 /// @return 00288 /// Return TRUE on success, FALSE on error. 00289 /// @sa 00290 /// CompressFile, DecompressFile 00291 /// @note 00292 /// CompressFile() method, as well as some gzip utilities, 00293 /// always keeps the original file name and timestamp in 00294 /// the compressed file. If fRestoreFileAttr flag is set, 00295 /// that original file name and time stamp, stored in 00296 /// the file header will be restored. If not, that destination 00297 /// file will be named as archive name without extention. 00298 virtual bool DecompressFileIntoDir( 00299 const string& src_file, 00300 const string& dst_dir, 00301 size_t buf_size = kCompressionDefaultBufSize 00302 ); 00303 00304 /// Structure to keep compressed file information. 00305 struct SFileInfo { 00306 string name; 00307 string comment; 00308 time_t mtime; 00309 SFileInfo(void) : mtime(0) {}; 00310 }; 00311 00312 protected: 00313 /// Format string with last error description 00314 string FormatErrorMessage(string where, bool use_stream_data =true) const; 00315 00316 protected: 00317 void* m_Stream; ///< Compressor stream. 00318 int m_WindowBits; ///< The base two logarithm of the window size 00319 ///< (the size of the history buffer). 00320 int m_MemLevel; ///< The allocation memory level for the 00321 ///< internal compression state. 00322 int m_Strategy; ///< The parameter to tune compression algorithm. 00323 }; 00324 00325 00326 ///////////////////////////////////////////////////////////////////////////// 00327 /// 00328 /// CZipCompressionFile -- 00329 /// 00330 /// Allow read/write operations on files in zlib or gzip (.gz) formats. 00331 /// Throw exceptions on critical errors. 00332 00333 class CZipCompressionFile : public CZipCompression, 00334 public CCompressionFile 00335 { 00336 public: 00337 /// Constructor. 00338 /// For a special parameters description see CZipCompression. 00339 CZipCompressionFile( 00340 const string& file_name, 00341 EMode mode, 00342 ELevel level = eLevel_Default, 00343 int window_bits = kZlibDefaultWbits, 00344 int mem_level = kZlibDefaultMemLevel, 00345 int strategy = kZlibDefaultStrategy 00346 ); 00347 /// Conventional constructor. 00348 /// For a special parameters description see CZipCompression. 00349 CZipCompressionFile( 00350 ELevel level = eLevel_Default, 00351 int window_bits = kZlibDefaultWbits, 00352 int mem_level = kZlibDefaultMemLevel, 00353 int strategy = kZlibDefaultStrategy 00354 ); 00355 00356 /// Destructor 00357 ~CZipCompressionFile(void); 00358 00359 /// Opens a compressed file for reading or writing. 00360 /// 00361 /// For reading/writing gzip (.gz) files the appropriate 00362 /// CZipCompression::EFlags flags should be set before Open() call. 00363 /// @param file_name 00364 /// File name of the file to open. 00365 /// @param mode 00366 /// File open mode. 00367 /// @return 00368 /// TRUE if file was opened succesfully or FALSE otherwise. 00369 /// @sa 00370 /// CZipCompression, Read, Write, Close 00371 virtual bool Open(const string& file_name, EMode mode); 00372 00373 /// Opens a compressed file for reading or writing. 00374 /// 00375 /// Do the same as standard Open(), but can also get/set file info. 00376 /// @param file_name 00377 /// File name of the file to open. 00378 /// @param mode 00379 /// File open mode. 00380 /// @param info 00381 /// Pointer to file information structure. If it is not NULL, 00382 /// that it will be used to get information about compressed file 00383 /// in the read mode, and set it in the write mode for gzip files. 00384 /// @return 00385 /// TRUE if file was opened succesfully or FALSE otherwise. 00386 /// @sa 00387 /// CZipCompression, Read, Write, Close 00388 virtual bool Open(const string& file_name, EMode mode, SFileInfo* info); 00389 00390 /// Read data from compressed file. 00391 /// 00392 /// Read up to "len" uncompressed bytes from the compressed file "file" 00393 /// into the buffer "buf". 00394 /// @param buf 00395 /// Buffer for requested data. 00396 /// @param len 00397 /// Number of bytes to read. 00398 /// @return 00399 /// Number of bytes actually read (0 for end of file, -1 for error). 00400 /// The number of really readed bytes can be less than requested. 00401 /// @sa 00402 /// Open, Write, Close 00403 virtual long Read(void* buf, size_t len); 00404 00405 /// Write data to compressed file. 00406 /// 00407 /// Writes the given number of uncompressed bytes from the buffer 00408 /// into the compressed file. 00409 /// @param buf 00410 /// Buffer with written data. 00411 /// @param len 00412 /// Number of bytes to write. 00413 /// @return 00414 /// Number of bytes actually written or -1 for error. 00415 /// @sa 00416 /// Open, Read, Close 00417 virtual long Write(const void* buf, size_t len); 00418 00419 /// Close compressed file. 00420 /// 00421 /// Flushes all pending output if necessary, closes the compressed file. 00422 /// @return 00423 /// TRUE on success, FALSE on error. 00424 /// @sa 00425 /// Open, Read, Write 00426 virtual bool Close(void); 00427 00428 private: 00429 EMode m_Mode; ///< I/O mode (read/write). 00430 CNcbiFstream* m_File; ///< File stream. 00431 CCompressionIOStream* m_Zip; ///< [De]comression stream. 00432 }; 00433 00434 00435 ///////////////////////////////////////////////////////////////////////////// 00436 /// 00437 /// CZipCompressor -- zlib based compressor 00438 /// 00439 /// Used in CZipStreamCompressor. 00440 /// @sa CZipStreamCompressor, CZipCompression, CCompressionProcessor 00441 00442 class CZipCompressor : public CZipCompression, 00443 public CCompressionProcessor 00444 { 00445 public: 00446 /// Constructor. 00447 CZipCompressor( 00448 ELevel level = eLevel_Default, 00449 int window_bits = kZlibDefaultWbits, 00450 int mem_level = kZlibDefaultMemLevel, 00451 int strategy = kZlibDefaultStrategy, 00452 CCompression::TFlags flags = 0 00453 ); 00454 /// Destructor. 00455 virtual ~CZipCompressor(void); 00456 00457 /// Set information about compressed file. 00458 /// 00459 /// Used for compression of gzip files. 00460 void SetFileInfo(const SFileInfo& info); 00461 00462 protected: 00463 virtual EStatus Init (void); 00464 virtual EStatus Process(const char* in_buf, size_t in_len, 00465 char* out_buf, size_t out_size, 00466 /* out */ size_t* in_avail, 00467 /* out */ size_t* out_avail); 00468 virtual EStatus Flush (char* out_buf, size_t out_size, 00469 /* out */ size_t* out_avail); 00470 virtual EStatus Finish (char* out_buf, size_t out_size, 00471 /* out */ size_t* out_avail); 00472 virtual EStatus End (void); 00473 00474 private: 00475 unsigned long m_CRC32; ///< CRC32 for compressed data. 00476 string m_Cache; ///< Buffer to cache small pieces of data. 00477 bool m_NeedWriteHeader; 00478 ///< Is true if needed to write a file header. 00479 SFileInfo m_FileInfo; ///< Compressed file info. 00480 }; 00481 00482 00483 00484 ///////////////////////////////////////////////////////////////////////////// 00485 /// 00486 /// CZipCompressor -- zlib based decompressor 00487 /// 00488 /// Used in CZipStreamCompressor. 00489 /// @sa CZipStreamCompressor, CZipCompression, CCompressionProcessor 00490 00491 class CZipDecompressor : public CZipCompression, 00492 public CCompressionProcessor 00493 { 00494 public: 00495 /// Constructor. 00496 CZipDecompressor( 00497 int window_bits = kZlibDefaultWbits, 00498 CCompression::TFlags flags = 0 00499 ); 00500 /// Destructor. 00501 virtual ~CZipDecompressor(void); 00502 00503 protected: 00504 virtual EStatus Init (void); 00505 virtual EStatus Process(const char* in_buf, size_t in_len, 00506 char* out_buf, size_t out_size, 00507 /* out */ size_t* in_avail, 00508 /* out */ size_t* out_avail); 00509 virtual EStatus Flush (char* out_buf, size_t out_size, 00510 /* out */ size_t* out_avail); 00511 virtual EStatus Finish (char* out_buf, size_t out_size, 00512 /* out */ size_t* out_avail); 00513 virtual EStatus End (void); 00514 00515 private: 00516 bool m_NeedCheckHeader; ///< Is TRUE if needed to check at file header. 00517 string m_Cache; ///< Buffer to cache small pieces of data. 00518 }; 00519 00520 00521 00522 ///////////////////////////////////////////////////////////////////////////// 00523 /// 00524 /// CZipStreamCompressor -- zlib based compression stream processor 00525 /// 00526 /// See util/compress/stream.hpp for details. 00527 /// @sa CCompressionStreamProcessor 00528 00529 class CZipStreamCompressor 00530 : public CCompressionStreamProcessor 00531 { 00532 public: 00533 /// Full constructor 00534 CZipStreamCompressor( 00535 CCompression::ELevel level, 00536 streamsize in_bufsize, 00537 streamsize out_bufsize, 00538 int window_bits, 00539 int mem_level, 00540 int strategy, 00541 CCompression::TFlags flags = 0 00542 ) 00543 : CCompressionStreamProcessor( 00544 new CZipCompressor(level,window_bits,mem_level,strategy,flags), 00545 eDelete, in_bufsize, out_bufsize) 00546 {} 00547 00548 /// Conventional constructor 00549 CZipStreamCompressor( 00550 CCompression::ELevel level, 00551 CCompression::TFlags flags = 0 00552 ) 00553 : CCompressionStreamProcessor( 00554 new CZipCompressor(level, kZlibDefaultWbits, 00555 kZlibDefaultMemLevel, kZlibDefaultStrategy, 00556 flags), 00557 eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize) 00558 {} 00559 00560 /// Conventional constructor 00561 CZipStreamCompressor(CCompression::TFlags flags = 0) 00562 : CCompressionStreamProcessor( 00563 new CZipCompressor(CCompression::eLevel_Default, 00564 kZlibDefaultWbits, kZlibDefaultMemLevel, 00565 kZlibDefaultStrategy, flags), 00566 eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize) 00567 {} 00568 }; 00569 00570 00571 ///////////////////////////////////////////////////////////////////////////// 00572 /// 00573 /// CZipStreamDecompressor -- zlib based decompression stream processor 00574 /// 00575 /// See util/compress/stream.hpp for details. 00576 /// @sa CCompressionStreamProcessor 00577 00578 class CZipStreamDecompressor 00579 : public CCompressionStreamProcessor 00580 { 00581 public: 00582 /// Full constructor 00583 CZipStreamDecompressor( 00584 streamsize in_bufsize, 00585 streamsize out_bufsize, 00586 int window_bits, 00587 CCompression::TFlags flags 00588 ) 00589 : CCompressionStreamProcessor( 00590 new CZipDecompressor(window_bits, flags), 00591 eDelete, in_bufsize, out_bufsize) 00592 {} 00593 00594 /// Conventional constructor 00595 CZipStreamDecompressor(CCompression::TFlags flags = 0) 00596 : CCompressionStreamProcessor( 00597 new CZipDecompressor(kZlibDefaultWbits, flags), 00598 eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize) 00599 {} 00600 }; 00601 00602 00603 END_NCBI_SCOPE 00604 00605 00606 /* @} */ 00607 00608 #endif /* UTIL_COMPRESS__ZLIB__HPP */ 00609 00610
1.4.6
Modified on Mon Oct 13 17:28:19 2008 by modify_doxy.py rev. 117643