00001 #ifndef UTIL_COMPRESS__BZIP2__HPP 00002 #define UTIL_COMPRESS__BZIP2__HPP 00003 00004 /* $Id: bzip2.hpp 169067 2009-08-25 13:41:52Z ivanov $ 00005 * =========================================================================== 00006 * 00007 * PUBLIC DOMAIN NOTICE 00008 * National Center for Biotechnology Information 00009 * 00010 * This software/database is a "United States Government Work" under the 00011 * terms of the United States Copyright Act. It was written as part of 00012 * the author's official duties as a United States Government employee and 00013 * thus cannot be copyrighted. This software/database is freely available 00014 * to the public for use. The National Library of Medicine and the U.S. 00015 * Government have not placed any restriction on its use or reproduction. 00016 * 00017 * Although all reasonable efforts have been taken to ensure the accuracy 00018 * and reliability of the software and data, the NLM and the U.S. 00019 * Government do not and cannot warrant the performance or results that 00020 * may be obtained by using this software or data. The NLM and the U.S. 00021 * Government disclaim all warranties, express or implied, including 00022 * warranties of performance, merchantability or fitness for any particular 00023 * purpose. 00024 * 00025 * Please cite the author in any work or product based on this material. 00026 * 00027 * =========================================================================== 00028 * 00029 * Author: Vladimir Ivanov 00030 * 00031 * File Description: BZip2 Compression API 00032 * 00033 * NOTE: The bzip2 documentation can be found here: 00034 * http://sources.redhat.com/bzip2/ 00035 */ 00036 00037 #include <util/compress/stream.hpp> 00038 #include <stdio.h> 00039 00040 /** @addtogroup Compression 00041 * 00042 * @{ 00043 */ 00044 00045 BEGIN_NCBI_SCOPE 00046 00047 00048 ////////////////////////////////////////////////////////////////////////////// 00049 // 00050 // Special compression parameters (description from bzip2 docs) 00051 // 00052 // <verbosity> 00053 // This parameter should be set to a number between 0 and 4 inclusive. 00054 // 0 is silent, and greater numbers give increasingly verbose 00055 // monitoring/debugging output. If the library has been compiled with 00056 // -DBZ_NO_STDIO, no such output will appear for any verbosity setting. 00057 // 00058 // <work_factor> 00059 // Parameter work_factor controls how the compression phase behaves when 00060 // presented with worst case, highly repetitive, input data. 00061 // If compression runs into difficulties caused by repetitive data, the 00062 // library switches from the standard sorting algorithm to a fallback 00063 // algorithm. The fallback is slower than the standard algorithm by 00064 // perhaps a factor of three, but always behaves reasonably, no matter 00065 // how bad the input. Lower values of work_factor reduce the amount of 00066 // effort the standard algorithm will expend before resorting to the 00067 // fallback. You should set this parameter carefully; too low, and many 00068 // inputs will be handled by the fallback algorithm and so compress 00069 // rather slowly, too high, and your average-to-worst case compression 00070 // times can become very large. The default value of 30 gives reasonable 00071 // behaviour over a wide range of circumstances. Allowable values range 00072 // from 0 to 250 inclusive. 0 is a special case, equivalent to using 00073 // the default value of 30. 00074 // 00075 // <small_decompress> 00076 // If it is nonzero, the library will use an alternative decompression 00077 // algorithm which uses less memory but at the cost of decompressing more 00078 // slowly (roughly speaking, half the speed, but the maximum memory 00079 // requirement drops to around 2300k). 00080 // 00081 00082 00083 ////////////////////////////////////////////////////////////////////////////// 00084 /// 00085 /// CBZip2Compression -- 00086 /// 00087 /// Define a base methods for compression/decompression memory buffers 00088 /// and files. 00089 00090 class CBZip2Compression : public CCompression 00091 { 00092 public: 00093 /// Compression/decompression flags. 00094 enum EFlags { 00095 ///< Allow transparent reading data from buffer/file/stream 00096 ///< regardless is it compressed or not. But be aware, 00097 ///< if data source contains broken data and API cannot detect that 00098 ///< it is compressed data, that you can get binary instead of 00099 ///< decompressed data. By default this flag is OFF. 00100 fAllowTransparentRead = (1<<0) 00101 }; 00102 typedef CBZip2Compression::TFlags TBZip2Flags; ///< Bitwise OR of EFlags 00103 00104 /// Constructor. 00105 CBZip2Compression( 00106 ELevel level = eLevel_Default, 00107 int verbosity = 0, // [0..4] 00108 int work_factor = 0, // [0..250] 00109 int small_decompress = 0 // [0,1] 00110 ); 00111 00112 /// Destructor. 00113 virtual ~CBZip2Compression(void); 00114 00115 /// Return name and version of the compression library. 00116 virtual CVersionInfo GetVersion(void) const; 00117 00118 /// Get compression level. 00119 /// 00120 /// NOTE: BZip2 algorithm do not support zero level compression. 00121 /// So the "eLevel_NoCompression" will be translated to 00122 /// "eLevel_Lowest". 00123 virtual ELevel GetLevel(void) const; 00124 00125 /// Return default compression level for a compression algorithm 00126 virtual ELevel GetDefaultLevel(void) const 00127 { return eLevel_VeryHigh; }; 00128 00129 // 00130 // Utility functions 00131 // 00132 00133 /// Compress data in the buffer. 00134 /// 00135 /// Altogether, the total size of the destination buffer must be little 00136 /// more then size of the source buffer. 00137 /// @param src_buf 00138 /// [in] Source buffer. 00139 /// @param src_len 00140 /// [in] Size of data in source buffer. 00141 /// @param dst_buf 00142 /// [in] Destination buffer. 00143 /// @param dst_size 00144 /// [in] Size of destination buffer. 00145 /// @param dst_len 00146 /// [out] Size of compressed data in destination buffer. 00147 /// @return 00148 /// Return TRUE if operation was succesfully or FALSE otherwise. 00149 /// On success, 'dst_buf' contains compressed data of dst_len size. 00150 /// @sa 00151 /// DecompressBuffer 00152 virtual bool CompressBuffer( 00153 const void* src_buf, size_t src_len, 00154 void* dst_buf, size_t dst_size, 00155 /* out */ size_t* dst_len 00156 ); 00157 00158 /// Decompress data in the buffer. 00159 /// 00160 /// @param src_buf 00161 /// Source buffer. 00162 /// @param src_len 00163 /// Size of data in source buffer. 00164 /// @param dst_buf 00165 /// Destination buffer. 00166 /// @param dst_len 00167 /// Size of destination buffer. 00168 /// @param dst_len 00169 /// Size of decompressed data in destination buffer. 00170 /// @return 00171 /// Return TRUE if operation was succesfully or FALSE otherwise. 00172 /// On success, 'dst_buf' contains decompressed data of dst_len size. 00173 /// @sa 00174 /// CompressBuffer 00175 virtual bool DecompressBuffer( 00176 const void* src_buf, size_t src_len, 00177 void* dst_buf, size_t dst_size, 00178 /* out */ size_t* dst_len 00179 ); 00180 00181 /// Compress file. 00182 /// 00183 /// @param src_file 00184 /// File name of source file. 00185 /// @param dst_file 00186 /// File name of result file. 00187 /// @param buf_size 00188 /// Buffer size used to read/write files. 00189 /// @return 00190 /// Return TRUE on success, FALSE on error. 00191 /// @sa 00192 /// DecompressFile 00193 virtual bool CompressFile( 00194 const string& src_file, 00195 const string& dst_file, 00196 size_t buf_size = kCompressionDefaultBufSize 00197 ); 00198 00199 /// Decompress file. 00200 /// 00201 /// @param src_file 00202 /// File name of source file. 00203 /// @param dst_file 00204 /// File name of result file. 00205 /// @param buf_size 00206 /// Buffer size used to read/write files. 00207 /// @return 00208 /// Return TRUE on success, FALSE on error. 00209 /// @sa 00210 /// CompressFile 00211 virtual bool DecompressFile( 00212 const string& src_file, 00213 const string& dst_file, 00214 size_t buf_size = kCompressionDefaultBufSize 00215 ); 00216 00217 protected: 00218 /// Get error description for specified error code. 00219 const char* GetBZip2ErrorDescription(int errcode); 00220 00221 /// Format string with last error description. 00222 string FormatErrorMessage(string where, bool use_stream_data = true) const; 00223 00224 protected: 00225 void* m_Stream; ///< Compressor stream 00226 int m_Verbosity; ///< Verbose monitoring/debugging output level 00227 int m_WorkFactor; ///< See description above 00228 int m_SmallDecompress; ///< Use memory-frugal decompression algorithm 00229 }; 00230 00231 00232 00233 ////////////////////////////////////////////////////////////////////////////// 00234 /// 00235 /// CBZip2CompressionFile class -- 00236 /// 00237 /// Throw exceptions on critical errors. 00238 00239 class CBZip2CompressionFile : public CBZip2Compression, 00240 public CCompressionFile 00241 { 00242 public: 00243 /// Constructor. 00244 /// For a special parameters description see CBZip2Compression. 00245 CBZip2CompressionFile( 00246 const string& file_name, 00247 EMode mode, 00248 ELevel level = eLevel_Default, 00249 int verbosity = 0, 00250 int work_factor = 0, 00251 int small_decompress = 0 00252 ); 00253 00254 /// Conventional constructor. 00255 /// For a special parameters description see CBZip2Compression. 00256 CBZip2CompressionFile( 00257 ELevel level = eLevel_Default, 00258 int verbosity = 0, 00259 int work_factor = 0, 00260 int small_decompress = 0 00261 ); 00262 00263 /// Destructor. 00264 ~CBZip2CompressionFile(void); 00265 00266 /// Opens a compressed file for reading or writing. 00267 /// 00268 /// @param file_name 00269 /// File name of the file to open. 00270 /// @param mode 00271 /// File open mode. 00272 /// @return 00273 /// TRUE if file was opened succesfully or FALSE otherwise. 00274 /// @sa 00275 /// CBZip2Compression, Read, Write, Close 00276 virtual bool Open(const string& file_name, EMode mode); 00277 00278 /// Read data from compressed file. 00279 /// 00280 /// Read up to "len" uncompressed bytes from the compressed file "file" 00281 /// into the buffer "buf". 00282 /// @param buf 00283 /// Buffer for requested data. 00284 /// @param len 00285 /// Number of bytes to read. 00286 /// @return 00287 /// Number of bytes actually read (0 for end of file, -1 for error). 00288 /// The number of really readed bytes can be less than requested. 00289 /// @sa 00290 /// Open, Write, Close 00291 virtual long Read(void* buf, size_t len); 00292 00293 /// Write data to compressed file. 00294 /// 00295 /// Writes the given number of uncompressed bytes from the buffer 00296 /// into the compressed file. 00297 /// @param buf 00298 /// Buffer with written data. 00299 /// @param len 00300 /// Number of bytes to write. 00301 /// @return 00302 /// Number of bytes actually written or -1 for error. 00303 /// @sa 00304 /// Open, Read, Close 00305 virtual long Write(const void* buf, size_t len); 00306 00307 /// Close compressed file. 00308 /// 00309 /// Flushes all pending output if necessary, closes the compressed file. 00310 /// @return 00311 /// TRUE on success, FALSE on error. 00312 /// @sa 00313 /// Open, Read, Write 00314 virtual bool Close(void); 00315 00316 protected: 00317 FILE* m_FileStream; ///< Underlying file stream 00318 bool m_EOF; ///< EOF flag for read mode 00319 }; 00320 00321 00322 00323 ///////////////////////////////////////////////////////////////////////////// 00324 /// 00325 /// CBZip2Compressor -- bzip2 based compressor 00326 /// 00327 /// Used in CBZip2StreamCompressor. 00328 /// @sa CBZip2StreamCompressor, CBZip2Compression, CCompressionProcessor 00329 00330 class CBZip2Compressor : public CBZip2Compression, 00331 public CCompressionProcessor 00332 { 00333 public: 00334 /// Constructor. 00335 CBZip2Compressor( 00336 ELevel level = eLevel_Default, 00337 int verbosity = 0, // [0..4] 00338 int work_factor = 0, // [0..250] 00339 TBZip2Flags flags = 0 00340 ); 00341 00342 /// Destructor. 00343 virtual ~CBZip2Compressor(void); 00344 00345 protected: 00346 virtual EStatus Init (void); 00347 virtual EStatus Process(const char* in_buf, size_t in_len, 00348 char* out_buf, size_t out_size, 00349 /* out */ size_t* in_avail, 00350 /* out */ size_t* out_avail); 00351 virtual EStatus Flush (char* out_buf, size_t out_size, 00352 /* out */ size_t* out_avail); 00353 virtual EStatus Finish (char* out_buf, size_t out_size, 00354 /* out */ size_t* out_avail); 00355 virtual EStatus End (void); 00356 }; 00357 00358 00359 ///////////////////////////////////////////////////////////////////////////// 00360 /// 00361 /// CBZip2Decompressor -- bzip2 based decompressor 00362 /// 00363 /// Used in CBZip2StreamCompressor. 00364 /// @sa CBZip2StreamCompressor, CBZip2Compression, CCompressionProcessor 00365 00366 class CBZip2Decompressor : public CBZip2Compression, 00367 public CCompressionProcessor 00368 { 00369 public: 00370 /// Constructor. 00371 CBZip2Decompressor( 00372 int verbosity = 0, // [0..4] 00373 int small_decompress = 0, // [0,1] 00374 TBZip2Flags flags = 0 00375 ); 00376 00377 /// Destructor. 00378 virtual ~CBZip2Decompressor(void); 00379 00380 protected: 00381 virtual EStatus Init (void); 00382 virtual EStatus Process(const char* in_buf, size_t in_len, 00383 char* out_buf, size_t out_size, 00384 /* out */ size_t* in_avail, 00385 /* out */ size_t* out_avail); 00386 virtual EStatus Flush (char* out_buf, size_t out_size, 00387 /* out */ size_t* out_avail); 00388 virtual EStatus Finish (char* out_buf, size_t out_size, 00389 /* out */ size_t* out_avail); 00390 virtual EStatus End (void); 00391 }; 00392 00393 00394 00395 ////////////////////////////////////////////////////////////////////////////// 00396 /// 00397 /// CBZip2StreamCompressor -- bzip2 based compression stream processor 00398 /// 00399 /// See util/compress/stream.hpp for details of stream processing. 00400 /// @sa CCompressionStreamProcessor 00401 00402 class CBZip2StreamCompressor 00403 : public CCompressionStreamProcessor 00404 { 00405 public: 00406 /// Constructor. 00407 CBZip2StreamCompressor( 00408 CBZip2Compression::ELevel level = CCompression::eLevel_Default, 00409 streamsize in_bufsize = kCompressionDefaultBufSize, 00410 streamsize out_bufsize = kCompressionDefaultBufSize, 00411 int verbosity = 0, 00412 int work_factor = 0, 00413 CBZip2Compression::TBZip2Flags flags = 0 00414 ) 00415 00416 : CCompressionStreamProcessor( 00417 new CBZip2Compressor(level, verbosity, work_factor, flags), 00418 eDelete, in_bufsize, out_bufsize) 00419 {} 00420 }; 00421 00422 00423 ///////////////////////////////////////////////////////////////////////////// 00424 /// 00425 /// CLZOStreamDecompressor -- bzip2 based decompression stream processor 00426 /// 00427 /// See util/compress/stream.hpp for details. 00428 /// @sa CCompressionStreamProcessor 00429 00430 class CBZip2StreamDecompressor 00431 : public CCompressionStreamProcessor 00432 { 00433 public: 00434 /// Full constructor. 00435 CBZip2StreamDecompressor( 00436 streamsize in_bufsize, 00437 streamsize out_bufsize, 00438 int verbosity, 00439 int small_decompress, 00440 CBZip2Compression::TBZip2Flags flags = 0 00441 ) 00442 : CCompressionStreamProcessor( 00443 new CBZip2Decompressor(verbosity, small_decompress, flags), 00444 eDelete, in_bufsize, out_bufsize) 00445 {} 00446 00447 /// Conventional constructor. 00448 CBZip2StreamDecompressor(CBZip2Compression::TBZip2Flags flags = 0) 00449 : CCompressionStreamProcessor( 00450 new CBZip2Decompressor(0, 0, flags), 00451 eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize) 00452 {} 00453 }; 00454 00455 00456 END_NCBI_SCOPE 00457 00458 00459 /* @} */ 00460 00461 #endif /* UTIL_COMPRESS__BZIP2__HPP */ 00462 00463
1.4.6
Modified on Wed Dec 09 08:17:44 2009 by modify_doxy.py rev. 173732